Class: Lumix::Lookup

Inherits:
Object
  • Object
show all
Defined in:
lib/lumix/lookup.rb,
lib/lumix/proto/lookup.rb

Defined Under Namespace

Classes: Document, LookupCollection

Instance Attribute Summary collapse

Instance Method Summary collapse

Constructor Details

#initializeLookup

Returns a new instance of Lookup.



43
44
45
46
47
# File 'lib/lumix/lookup.rb', line 43

def initialize(db)
  puts "Lookup"
  @db = db
  @tokens = db[:tokens]
end

Instance Attribute Details

#dbObject (readonly)

Returns the value of attribute db.



41
42
43
# File 'lib/lumix/lookup.rb', line 41

def db
  @db
end

#tokensObject (readonly)

Returns the value of attribute tokens.



41
42
43
# File 'lib/lumix/lookup.rb', line 41

def tokens
  @tokens
end

Instance Method Details

#add_token(text_id, word, tag, s_begin, s_end, t_begin, t_end) ⇒ Object



71
72
73
74
75
76
77
# File 'lib/lumix/proto/lookup.rb', line 71

def add_token(text_id, word, tag, s_begin, s_end, t_begin, t_end)
  return if @saving
  @dirty = true
  id = (@tokens << [text_id, s_begin, s_end, t_begin, t_end]).size - 1
  (@words[word] ||= []) << id
  (@tags[tag] ||= []) << id
end

#find(filters) ⇒ Object

kindly crafted by jeremyevans



75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
# File 'lib/lumix/lookup.rb', line 75

def find(filters)
  ds = db[:tokens.as(:t0)]
  f = filters[0]
  ds = ds.where(:t0__word_id=>f.word) if f.word
  ds = ds.where(:t0__tag_id=>f.tag) if f.tag
  i = 0
  filters[1..-1].each do |f|
    as = "t#{i+=1}"
    h = {}
    h[:"#{as}__word_id"] = f.word if f.word
    h[:"#{as}__tag_id"] = f.tag if f.tag
    ds = ds.join(:tokens.as(as)){ |j, lj, js| {:text_id.qualify(j) => :text_id.qualify(lj), :position.qualify(j) => :position.qualify(lj) + 1} }.where(h)
  end
  select = ds.select(:t0__text_id.as(:text_id), :t0__src_begin.as(:src_begin), :"t#{i}__src_end".as(:src_end),
    :t0__tagged_begin.as(:tagged_begin), :"t#{i}__tagged_end".as(:tagged_end))

  puts select.sql
  puts select.explain

  select.each do |e|
    yield [e[:text_id], e[:src_begin], e[:src_end], e[:tagged_begin], e[:tagged_end]]
  end
end

#find_tag(re) ⇒ Object



70
71
72
# File 'lib/lumix/lookup.rb', line 70

def find_tag(re)
  find_ids(db[:tags], :tag => re)
end

#find_word(re) ⇒ Object



66
67
68
# File 'lib/lumix/lookup.rb', line 66

def find_word(re)
  find_ids(db[:words], :word => re)
end

#loadObject



25
26
27
28
29
30
31
32
33
# File 'lib/lumix/proto/lookup.rb', line 25

def load
  @dirty = false
  return unless File.exists?('lookup.dat')
  puts "Loading"
  load_file :tags
  load_file :words
  load_file :texts
  load_file :tokens
end

#load_file(name) ⇒ Object



58
59
60
# File 'lib/lumix/proto/lookup.rb', line 58

def load_file(name)
  MessagePack.unpack(File.read(name.to_s + '.dat'))
end

#process(text_id) ⇒ Object



58
59
60
61
62
63
64
# File 'lib/lumix/lookup.rb', line 58

def process(text_id)
  return true unless tokens.where(:text_id => text_id).empty?
  doc = Document.new(self)
  result = yield(doc) if block_given?
  doc.flush if result
  result
end

#saveObject



35
36
37
38
39
40
41
42
43
44
# File 'lib/lumix/proto/lookup.rb', line 35

def save
  return unless @dirty
  @saving = true
  puts "Saving"
  save_file :tags
  save_file :words
  save_file :texts
  save_file :tokens
  @saving = false
end

#save_file(name) ⇒ Object



51
52
53
54
55
56
# File 'lib/lumix/proto/lookup.rb', line 51

def save_file(name)
  data = instance_variable_get("@#{name}")
  File.open(name.to_s + '.dat', 'w') do |f|
    f.print MessagePack.pack(data)
  end
end

#tagsObject



49
50
51
52
# File 'lib/lumix/lookup.rb', line 49

def tags
  # TODO create only in the context of linking
  @tags ||= LookupCollection.new(db[:tags], :tag)
end

#text_range(t_begin, t_end) ⇒ Object



93
94
95
96
97
# File 'lib/lumix/proto/lookup.rb', line 93

def text_range(t_begin, t_end)
  a, b = @tokens[t_begin], @tokens[t_end]
  return nil unless a[TEXT_ID] == b[TEXT_ID]
  return a[TEXT_ID], a[S_BEGIN], b[S_END], a[T_BEGIN], b[T_END]
end

#union(*id_sets) ⇒ Object

returns the start indices of matching sequences



88
89
90
91
# File 'lib/lumix/proto/lookup.rb', line 88

def union(*id_sets)
  unified = id_sets.each_with_index.map{|c,i| c.map{|e| e-i}}
  unified.inject(:&)
end

#with(*types) {|args| ... } ⇒ Object

Yields:

  • (args)


46
47
48
49
# File 'lib/lumix/proto/lookup.rb', line 46

def with(*types)
  args = types.flatten.map{|name| instance_variable_get("@#{name}") || instance_variable_get("@#{name}",load_file(name)) }
  yield *args
end

#wordsObject



54
55
56
# File 'lib/lumix/lookup.rb', line 54

def words
  @words ||= LookupCollection.new(db[:words], :word)
end