Class: Rlid::FrequencyModel

Inherits:
NGramModel show all
Defined in:
lib/rlid/models/cosine_distance_model.rb

Direct Known Subclasses

CosineDistanceModel

Constant Summary collapse

N =

trigrams

3

Class Method Summary collapse

Instance Method Summary collapse

Methods inherited from NGramModel

language_models

Constructor Details

#initialize(string, cutoff = 3000) ⇒ FrequencyModel

Returns a new instance of FrequencyModel.



10
11
12
# File 'lib/rlid/models/cosine_distance_model.rb', line 10

def initialize(string, cutoff=3000)
  super(string, N, cutoff)
end

Class Method Details

.filenameObject



41
42
43
44
# File 'lib/rlid/models/cosine_distance_model.rb', line 41

def self.filename
  # FIXME should be frequency3000
  return "cosine_distance3000"
end

Instance Method Details

#generate_model(ngram_count) ⇒ Object



22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
# File 'lib/rlid/models/cosine_distance_model.rb', line 22

def generate_model(ngram_count)
  # top ngrams (transformed into arrays)
  arrays = ngram_count.to_a.sort{|x, y| y[1] <=> x[1]}
  top = arrays[0...@cutoff] # will be kept

  tot = 0.0 # total, for normalization
  @ngram_frequency = Hash.new # key is ngram value is position
  top.each_with_index do |ngram_and_count, i|
    ngram, count = ngram_and_count
    @ngram_frequency[ngram] = count
    tot += count
  end
  
  # normalization
  @ngram_frequency.each do |ngram, count|
    @ngram_frequency[ngram] /= tot
  end
end

#load(file) ⇒ Object



18
19
20
# File 'lib/rlid/models/cosine_distance_model.rb', line 18

def load(file)
  @ngram_frequency = Marshal.load(file.read)
end

#save(file) ⇒ Object



14
15
16
# File 'lib/rlid/models/cosine_distance_model.rb', line 14

def save(file)
  file.write Marshal.dump(@ngram_frequency)
end