Class: Lumix::SlowSearch
- Inherits:
-
Object
- Object
- Lumix::SlowSearch
- Defined in:
- lib/lumix/slow_search.rb
Constant Summary collapse
- TAGGED =
Xxx|YYY
/([^\s\|]+)\|(\S+)/m
Instance Method Summary collapse
- #concurrent_link? ⇒ Boolean
- #create_filter ⇒ Object
- #find(filter, &block) ⇒ Object
- #find_range(t_id, t_begin, t_end, process_original) ⇒ Object
-
#initialize(db, progress) ⇒ SlowSearch
constructor
A new instance of SlowSearch.
- #link_text(id) ⇒ Object
Constructor Details
#initialize(db, progress) ⇒ SlowSearch
Returns a new instance of SlowSearch.
6 7 8 9 |
# File 'lib/lumix/slow_search.rb', line 6 def initialize(db, progress) @db = db @progress = progress end |
Instance Method Details
#concurrent_link? ⇒ Boolean
11 12 13 |
# File 'lib/lumix/slow_search.rb', line 11 def concurrent_link? true end |
#create_filter ⇒ Object
49 50 51 |
# File 'lib/lumix/slow_search.rb', line 49 def create_filter @filter ||= Filter.new('') end |
#find(filter, &block) ⇒ Object
53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 |
# File 'lib/lumix/slow_search.rb', line 53 def find(filter, &block) yield_text = block && block.arity >= 1 yield_tagged = block && block.arity >= 2 prog = Progress.new(:search, TaggedText.count, filter) @progress[prog] re = Filter.to_re(filter) index = 0 TaggedText.inject(0) do |result, t| fname = File.basename(t.filename) # matches to ranges results = [] t.tagged.scan(re) do |hit| t_begin = $~.begin(0) t_end = $~.end(0) # TODO decouple database operations for performance results << find_range(t.id, t_begin, t_end, yield_text) end result += results.inject(0) do |res, f| if yield_tagged tagged_snippet = TextSnippet.new(fname, t.tagged, f[:tagged_begin].to_i, f[:tagged_end].to_i) if yield_text text_snippet = TextSnippet.new(fname, t.text, f[:src_begin].to_i, f[:src_end].to_i) yield text_snippet, tagged_snippet else yield tagged_snippet end end res += 1 end @progress[prog, (index += 1)] result end end |
#find_range(t_id, t_begin, t_end, process_original) ⇒ Object
92 93 94 95 96 97 98 99 |
# File 'lib/lumix/slow_search.rb', line 92 def find_range(t_id, t_begin, t_end, process_original) if process_original ds = @db[:assoc].filter(:text_id => t_id).filter{tagged_end >= t_begin}.filter{tagged_begin < t_end} ds.select{[{min(:src_begin) => :src_begin},{ max(:src_end) => :src_end}, {min(:tagged_begin) => :tagged_begin}, {max(:tagged_end) => :tagged_end}]}.first else {:tagged_begin => t_begin, :tagged_end => t_end} end end |
#link_text(id) ⇒ Object
15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 |
# File 'lib/lumix/slow_search.rb', line 15 def link_text(id) t = TaggedText[id] text = t.text puts "Linking text #{t.filename}" src_last = 0 position = 0 assoc = [] t.tagged.scan(TAGGED) do |word, tag| tagged_begin = $~.begin(0) tagged_end = $~.end(0) word_re = Regexp.new(Regexp.escape(word).gsub(/_/, '\s*')) src_match = text[src_last..-1].match(word_re) # find the word if src_match src_begin = src_last + src_match.begin(0) src_end = src_last + src_match.end(0) src_last = src_end assoc << {:text_id => id, :position => position, :src_begin => src_begin, :src_end => src_end, :tagged_begin => tagged_begin, :tagged_end => tagged_end} else STDERR.puts "Could not find match for '#{word}' in text #{t.filename}" `echo '#{t.filename}:#{tagged_begin}:#{src_last} unmatched "#{word}"' >> unlinked.lst` return nil end position += 1 end @db[:assoc].multi_insert(assoc) rescue => e STDERR.puts e STDERR.puts e.backtrace raise e end |