Class: Lumix::LookupSearch

Inherits:
Object
  • Object
show all
Defined in:
lib/lumix/lookup_search.rb,
lib/lumix/proto/lookup_search.rb

Constant Summary collapse

TAGGED =

Xxx|YYY

/([^\s\|]+)\|(\S+)/m

Instance Method Summary collapse

Constructor Details

#initialize(db, progress) ⇒ LookupSearch

Returns a new instance of LookupSearch.



11
12
13
14
# File 'lib/lumix/lookup_search.rb', line 11

def initialize(db, progress)
  @lookup = Lookup.new(db)
  @progress = progress
end

Instance Method Details

#concurrent_link?Boolean

Returns:

  • (Boolean)


16
17
18
# File 'lib/lumix/lookup_search.rb', line 16

def concurrent_link?
  true
end

#create_filter(f, &block) ⇒ Object



68
69
70
# File 'lib/lumix/lookup_search.rb', line 68

def create_filter(f, &block)
  Lumix::LookupFilter.new(@lookup, f, &block)
end

#find(*filters, &block) ⇒ Object



72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
# File 'lib/lumix/lookup_search.rb', line 72

def find(*filters, &block)
  p = Pool.new(4)
  filters.flatten.each do |f|
    p.schedule do
      last_id = -1
      t = nil
      f.apply(@lookup) do |text_id, s_begin, s_end, t_begin, t_end|
        t = TaggedText[text_id] if text_id != last_id
        last_id = text_id
      
        fname = File.basename(t.filename)
        text_snippet = Lumix::TextSnippet.new(fname, t.text, s_begin, s_end)
        tagged_snippet = Lumix::TextSnippet.new(fname, t.tagged, t_begin, t_end)
        f << [text_snippet, tagged_snippet]
      end
    end
  end
  p.shutdown
end


24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
# File 'lib/lumix/lookup_search.rb', line 24

def link_text(id)
  ds = TaggedText[id]
  @lookup.process id do |doc|
    result = true

    file, text, tagged = ds.filename, ds.text, ds.tagged

    puts "Linking text #{file}"

    txt_pos = 0
    position = 0
    tagged.scan(TAGGED) do |word, tag|
      tagged_begin = $~.begin(0)
      tagged_end = $~.end(0)

      # expand "x_y_z" notation to "x y  z"
      word_re = Regexp.new(Regexp.escape(word).gsub(/\_/, '\s+'))
      src_match = text[txt_pos..-1].match(word_re) # find the word
      if src_match
        offset = src_match.begin(0)
        src_begin = txt_pos + offset
        src_end = txt_pos + src_match.end(0)
        txt_pos = src_end

        unless @simulate
          doc.add_token(id, position, word, tag, src_begin, src_end, tagged_begin, tagged_end)
        end
      else
        STDERR.puts "Could not find match for '#{word}' in text #{file}"
        STDERR.puts text[(txt_pos-10)..(txt_pos+word.size+10)]
        `echo '#{file}:#{txt_pos}:#{tagged_begin} unmatched "#{word}"' >> unlinked.lst`
        result = nil
        break
      end
      position += 1
    end
    result
  end
rescue => e # TODO remove this crap
  STDERR.puts e
  STDERR.puts e.backtrace
  raise e
end

#simulate!Object



20
21
22
# File 'lib/lumix/lookup_search.rb', line 20

def simulate!
  @simulate = true
end