Class: Rlid::OrderedNGrams
Overview
a subclass should define the filename
Constant Summary
collapse
- N =
3
Instance Method Summary
collapse
Methods inherited from NGramModel
language_models
Constructor Details
#initialize(string, cutoff = 300) ⇒ OrderedNGrams
Returns a new instance of OrderedNGrams.
9
10
11
|
# File 'lib/rlid/models/ordered_ngrams.rb', line 9
def initialize(string, cutoff=300)
super(string, N, cutoff)
end
|
Instance Method Details
#-(other) ⇒ Object
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
|
# File 'lib/rlid/models/ordered_ngrams.rb', line 38
def -(other)
if not other.is_a?(OrderedNGrams)
raise InvalidArgument
end
dist = 0
other.ngram_pos.each do |ngram, pos_other|
pos_self = ngram_pos[ngram]
if pos_self != nil
dist += (pos_self - pos_other).abs
else
dist += @cutoff end
end
dist
end
|
#generate_model(ngram_count) ⇒ Object
30
31
32
33
34
35
36
|
# File 'lib/rlid/models/ordered_ngrams.rb', line 30
def generate_model(ngram_count)
top = ngram_count.to_a.sort{|x, y| y[1] <=> x[1]}[0...@cutoff]
@ngram_pos = Hash.new i = 0
top.each {|n,| @ngram_pos[n] = i; i +=1}
end
|
#load(file) ⇒ Object
19
20
21
22
23
24
25
26
27
28
|
# File 'lib/rlid/models/ordered_ngrams.rb', line 19
def load(file)
@ngram_pos = Hash.new
pos = 0
file.each_line do |line|
ngram = line.gsub(/^(.{#{N}}).*\n?/, '\1')
@ngram_pos[ngram] = pos
pos += 1
end
end
|
#save(file) ⇒ Object
13
14
15
16
17
|
# File 'lib/rlid/models/ordered_ngrams.rb', line 13
def save(file)
@ngram_pos.each do |ngram, pos|
file.write "#{ngram} #{pos}\n"
end
end
|