Class: Rlid::OrderedNGrams

Inherits:
NGramModel show all
Defined in:
lib/rlid/models/ordered_ngrams.rb

Overview

a subclass should define the filename

Direct Known Subclasses

NGrams300, NGrams3000, NGrams4000, NGrams800, NGramsKDE

Constant Summary collapse

N =
3

Instance Method Summary collapse

Methods inherited from NGramModel

language_models

Constructor Details

#initialize(string, cutoff = 300) ⇒ OrderedNGrams

Returns a new instance of OrderedNGrams.



9
10
11
# File 'lib/rlid/models/ordered_ngrams.rb', line 9

def initialize(string, cutoff=300)
  super(string, N, cutoff)
end

Instance Method Details

#-(other) ⇒ Object



38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
# File 'lib/rlid/models/ordered_ngrams.rb', line 38

def -(other)
  if not other.is_a?(OrderedNGrams)
    raise InvalidArgument
  end
  dist = 0
  other.ngram_pos.each do |ngram, pos_other|
    pos_self = ngram_pos[ngram]
    if pos_self != nil
      dist += (pos_self - pos_other).abs
    else
      dist += @cutoff # max distance
    end
  end
  dist
end

#generate_model(ngram_count) ⇒ Object



30
31
32
33
34
35
36
# File 'lib/rlid/models/ordered_ngrams.rb', line 30

def generate_model(ngram_count)
  # top ngrams (transformed into arrays)
  top = ngram_count.to_a.sort{|x, y| y[1] <=> x[1]}[0...@cutoff]
  @ngram_pos = Hash.new # key is ngram value is position
  i = 0
  top.each {|n,| @ngram_pos[n] = i; i +=1}
end

#load(file) ⇒ Object



19
20
21
22
23
24
25
26
27
28
# File 'lib/rlid/models/ordered_ngrams.rb', line 19

def load(file)
  @ngram_pos = Hash.new
  pos = 0
  file.each_line do |line|
    # keep only the first @n characters of the line
    ngram = line.gsub(/^(.{#{N}}).*\n?/, '\1')
    @ngram_pos[ngram] = pos
    pos += 1
  end
end

#save(file) ⇒ Object



13
14
15
16
17
# File 'lib/rlid/models/ordered_ngrams.rb', line 13

def save(file)
  @ngram_pos.each do |ngram, pos|
    file.write "#{ngram}    #{pos}\n"
  end
end