Class: Lumix::FastSearch

Inherits:

Object

Object
Lumix::FastSearch

show all

Defined in:: lib/lumix/fast_search.rb

Constant Summary collapse

TAGGED = Xxx|YYY

/([^\s\|]+)\|(\S+)/m

ORIG = X|Y|Z|W

/([^\|\s]*)\|([^\|\s]*)\|([^\|\s]*)\|(\S*)/

Instance Method Summary collapse

#concurrent_link? ⇒ Boolean
#create_filter(f, &block) ⇒ Object
#find(filters) ⇒ Object
#initialize(db, progress) ⇒ FastSearch constructor

A new instance of FastSearch.
#link_text(id) ⇒ Object

Constructor Details

#initialize(db, progress) ⇒ `FastSearch`

Returns a new instance of FastSearch.

# File 'lib/lumix/fast_search.rb', line 11

def initialize(db, progress)
  @db = db
  @progress = progress
end

Instance Method Details

#concurrent_link? ⇒ `Boolean`

Returns:

(Boolean)



16
17
18

# File 'lib/lumix/fast_search.rb', line 16

def concurrent_link?
  true
end

#create_filter(f, &block) ⇒ `Object`



61
62
63

# File 'lib/lumix/fast_search.rb', line 61

def create_filter(f, &block)
  Lumix::Filter.new('\|(\d+)\|(\d+)', f, &block)
end

#find(filters) ⇒ `Object`

# File 'lib/lumix/fast_search.rb', line 65

def find(filters)
  prog = Progress.new(:search, TaggedText.count, "", 0)
  @progress[prog] if @progress


  TaggedText.each_with_index do |t, i|
    # matches to ranges
    filters.each do |f|
      f.scan(t.fulltagged) do |hit, t_begin, t_end, m|
        s_begin = m.captures.first.to_i
        s_end = m.captures.last.to_i

        fname = File.basename(t.filename)
        tagged_snippet = Lumix::TextSnippet.new(fname, t.fulltagged, t_begin, t_end)
        text_snippet = Lumix::TextSnippet.new(fname, t.text, s_begin, s_end)
        f << [text_snippet, tagged_snippet]
      end
    end
    prog.done = i
    @progress[prog] if @progress
  end
end

#link_text(id) ⇒ `Object`

# File 'lib/lumix/fast_search.rb', line 20

def link_text(id)
  ds = TaggedText[id]
  return ds.fulltagged if ds.fulltagged
  file, text, tagged = ds.filename, ds.text, ds.tagged

  puts "Linking text #{file}"

  txt_pos = 0
  linked = ''
  tagged.scan(TAGGED) do |word, tag|
    tagged_begin = $~.begin(0)

    # expand "x_y_z" notation to "x y z"
    word_re = Regexp.new(Regexp.escape(word).gsub(/_/, '\s*'))
    src_match = text[txt_pos..-1].match(word_re) # find the word
    if src_match
      offset = src_match.begin(0)
      src_begin = txt_pos + offset
      src_end = txt_pos + src_match.end(0)
      txt_pos = src_end

      linked << ' ' unless linked.empty?
      linked << word << '|' << tag << '|' << src_begin.to_s << '|' << src_end.to_s
    else
      STDERR.puts "Could not find match for '#{word}' in text #{file}"
      STDERR.puts text[(txt_pos-10)..(txt_pos+word.size+10)]
      `echo '#{file}:#{txt_pos}:#{tagged_begin} unmatched "#{word}"' >> unlinked.lst`
      return nil
    end
  end
  unless linked.empty?
    ds.fulltagged = linked
    ds.save
  end
  return linked
rescue => e # TODO remove this crap
  STDERR.puts e
  STDERR.puts e.backtrace
  raise e
end

Class: Lumix::FastSearch

Constant Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(db, progress) ⇒ FastSearch

Instance Method Details

#concurrent_link? ⇒ Boolean

#create_filter(f, &block) ⇒ Object

#find(filters) ⇒ Object

#link_text(id) ⇒ Object

#initialize(db, progress) ⇒ `FastSearch`

#concurrent_link? ⇒ `Boolean`

#create_filter(f, &block) ⇒ `Object`

#find(filters) ⇒ `Object`

#link_text(id) ⇒ `Object`