Class: CompoundSplitter::Splitter

Inherits:
Object
  • Object
show all
Defined in:
lib/compound_splitter/splitter.rb

Instance Attribute Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(dictionary = nil) ⇒ Splitter

Returns a new instance of Splitter.



5
6
7
# File 'lib/compound_splitter/splitter.rb', line 5

def initialize(dictionary = nil)
  @dictionary = dictionary || Dictionary.new
end

Instance Attribute Details

#dictionaryObject

Returns the value of attribute dictionary.



3
4
5
# File 'lib/compound_splitter/splitter.rb', line 3

def dictionary
  @dictionary
end

Instance Method Details

#split(compound) ⇒ Object



9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
# File 'lib/compound_splitter/splitter.rb', line 9

def split(compound)
  return [] if compound.empty?

  probs, lasts = [1.0], [0]

  1.upto(compound.length) do |i|

    biggest = [0, i - dictionary.max_word_length].max

    all_probs = []
    biggest.upto(i - 1).each do |j|
      part_of_compound = compound[j..(i-1)]
      probability_part_is_word = dictionary.word_prob(part_of_compound)
      something = [(probs[j] || 0.0) * probability_part_is_word, j]
      all_probs << something
    end

    probs << all_probs.max[0]
    lasts << all_probs.max[1]
  end

  words = []
  i = compound.length
  while 0 < i
    words << compound[lasts[i]..(i-1)]
    i = lasts[i]
  end

  # [words.reverse, probs[-1]]
  words.reverse
end