Class: Lumix::Lookup
- Inherits:
-
Object
show all
- Defined in:
- lib/lumix/lookup.rb,
lib/lumix/proto/lookup.rb
Defined Under Namespace
Classes: Document, LookupCollection
Instance Attribute Summary collapse
Instance Method Summary
collapse
Constructor Details
#initialize ⇒ Lookup
Returns a new instance of Lookup.
43
44
45
46
47
|
# File 'lib/lumix/lookup.rb', line 43
def initialize(db)
puts "Lookup"
@db = db
@tokens = db[:tokens]
end
|
Instance Attribute Details
#db ⇒ Object
Returns the value of attribute db.
41
42
43
|
# File 'lib/lumix/lookup.rb', line 41
def db
@db
end
|
#tokens ⇒ Object
Returns the value of attribute tokens.
41
42
43
|
# File 'lib/lumix/lookup.rb', line 41
def tokens
@tokens
end
|
Instance Method Details
#add_token(text_id, word, tag, s_begin, s_end, t_begin, t_end) ⇒ Object
71
72
73
74
75
76
77
|
# File 'lib/lumix/proto/lookup.rb', line 71
def add_token(text_id, word, tag, s_begin, s_end, t_begin, t_end)
return if @saving
@dirty = true
id = (@tokens << [text_id, s_begin, s_end, t_begin, t_end]).size - 1
(@words[word] ||= []) << id
(@tags[tag] ||= []) << id
end
|
#find(filters) ⇒ Object
kindly crafted by jeremyevans
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
|
# File 'lib/lumix/lookup.rb', line 75
def find(filters)
ds = db[:tokens.as(:t0)]
f = filters[0]
ds = ds.where(:t0__word_id=>f.word) if f.word
ds = ds.where(:t0__tag_id=>f.tag) if f.tag
i = 0
filters[1..-1].each do |f|
as = "t#{i+=1}"
h = {}
h[:"#{as}__word_id"] = f.word if f.word
h[:"#{as}__tag_id"] = f.tag if f.tag
ds = ds.join(:tokens.as(as)){ |j, lj, js| {:text_id.qualify(j) => :text_id.qualify(lj), :position.qualify(j) => :position.qualify(lj) + 1} }.where(h)
end
select = ds.select(:t0__text_id.as(:text_id), :t0__src_begin.as(:src_begin), :"t#{i}__src_end".as(:src_end),
:t0__tagged_begin.as(:tagged_begin), :"t#{i}__tagged_end".as(:tagged_end))
puts select.sql
puts select.explain
select.each do |e|
yield [e[:text_id], e[:src_begin], e[:src_end], e[:tagged_begin], e[:tagged_end]]
end
end
|
#find_tag(re) ⇒ Object
70
71
72
|
# File 'lib/lumix/lookup.rb', line 70
def find_tag(re)
find_ids(db[:tags], :tag => re)
end
|
#find_word(re) ⇒ Object
66
67
68
|
# File 'lib/lumix/lookup.rb', line 66
def find_word(re)
find_ids(db[:words], :word => re)
end
|
#load ⇒ Object
25
26
27
28
29
30
31
32
33
|
# File 'lib/lumix/proto/lookup.rb', line 25
def load
@dirty = false
return unless File.exists?('lookup.dat')
puts "Loading"
load_file :tags
load_file :words
load_file :texts
load_file :tokens
end
|
#load_file(name) ⇒ Object
58
59
60
|
# File 'lib/lumix/proto/lookup.rb', line 58
def load_file(name)
MessagePack.unpack(File.read(name.to_s + '.dat'))
end
|
#process(text_id) ⇒ Object
58
59
60
61
62
63
64
|
# File 'lib/lumix/lookup.rb', line 58
def process(text_id)
return true unless tokens.where(:text_id => text_id).empty?
doc = Document.new(self)
result = yield(doc) if block_given?
doc.flush if result
result
end
|
#save ⇒ Object
35
36
37
38
39
40
41
42
43
44
|
# File 'lib/lumix/proto/lookup.rb', line 35
def save
return unless @dirty
@saving = true
puts "Saving"
save_file :tags
save_file :words
save_file :texts
save_file :tokens
@saving = false
end
|
#save_file(name) ⇒ Object
51
52
53
54
55
56
|
# File 'lib/lumix/proto/lookup.rb', line 51
def save_file(name)
data = instance_variable_get("@#{name}")
File.open(name.to_s + '.dat', 'w') do |f|
f.print MessagePack.pack(data)
end
end
|
49
50
51
52
|
# File 'lib/lumix/lookup.rb', line 49
def tags
@tags ||= LookupCollection.new(db[:tags], :tag)
end
|
#text_range(t_begin, t_end) ⇒ Object
93
94
95
96
97
|
# File 'lib/lumix/proto/lookup.rb', line 93
def text_range(t_begin, t_end)
a, b = @tokens[t_begin], @tokens[t_end]
return nil unless a[TEXT_ID] == b[TEXT_ID]
return a[TEXT_ID], a[S_BEGIN], b[S_END], a[T_BEGIN], b[T_END]
end
|
#union(*id_sets) ⇒ Object
returns the start indices of matching sequences
88
89
90
91
|
# File 'lib/lumix/proto/lookup.rb', line 88
def union(*id_sets)
unified = id_sets.each_with_index.map{|c,i| c.map{|e| e-i}}
unified.inject(:&)
end
|
#with(*types) {|args| ... } ⇒ Object
46
47
48
49
|
# File 'lib/lumix/proto/lookup.rb', line 46
def with(*types)
args = types.flatten.map{|name| instance_variable_get("@#{name}") || instance_variable_get("@#{name}",load_file(name)) }
yield *args
end
|
#words ⇒ Object
54
55
56
|
# File 'lib/lumix/lookup.rb', line 54
def words
@words ||= LookupCollection.new(db[:words], :word)
end
|