Class: CompoundSplitter::Dictionary

Inherits:
Object
  • Object
show all
Defined in:
lib/compound_splitter/dictionary.rb

Instance Attribute Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(file_location = nil) ⇒ Dictionary

Returns a new instance of Dictionary.



5
6
7
# File 'lib/compound_splitter/dictionary.rb', line 5

def initialize(file_location = nil)
  @file_location = file_location || '/usr/share/dict/words'
end

Instance Attribute Details

#file_locationObject

Returns the value of attribute file_location.



3
4
5
# File 'lib/compound_splitter/dictionary.rb', line 3

def file_location
  @file_location
end

Instance Method Details

#[](lookup_word) ⇒ Object



34
35
36
# File 'lib/compound_splitter/dictionary.rb', line 34

def [](lookup_word)
  ocurrances_hash[lookup_word]
end

#max_word_lengthObject

Assign the length of the longest word in the dictionary.



20
21
22
# File 'lib/compound_splitter/dictionary.rb', line 20

def max_word_length
  @max_word_length ||= words.max.length
end

#ocurrances_hashObject



30
31
32
# File 'lib/compound_splitter/dictionary.rb', line 30

def ocurrances_hash
  @ocurrances_hash ||= self.class.count_dupes(words.sort)
end

#total_word_countObject

Assign the total number of words in the dictionary. It’s a float because we’re going to divide by it later on.



26
27
28
# File 'lib/compound_splitter/dictionary.rb', line 26

def total_word_count
  @total_word_count ||= words.length.to_f
end

#word_prob(word) ⇒ Object

Get the probability of a specific word ocurring in the dictionary.



39
40
41
42
43
44
# File 'lib/compound_splitter/dictionary.rb', line 39

def word_prob(word)
  # Return the number of ocurrances of a word in the dictionary or 0
  count = self[word] || 0
  # Divide by the total number of words.
  count / total_word_count
end

#wordsObject

Read a file of newline separated words into a downcased array.



10
11
12
13
14
15
16
17
# File 'lib/compound_splitter/dictionary.rb', line 10

def words
  unless @words
    f = File.read(file_location)
    @words = []
    f.each_line { |l| @words << l.chomp.downcase }
  end
  @words
end