Class: RMMSeg::ComplexAlgorithm
- Inherits:
-
Object
- Object
- RMMSeg::ComplexAlgorithm
- Includes:
- Algorithm
- Defined in:
- lib/rmmseg/complex_algorithm.rb
Instance Method Summary collapse
-
#create_chunks ⇒ Object
Create all possible three-word (or less) chunks starting from @index .
-
#initialize(text) ⇒ ComplexAlgorithm
constructor
Create a new ComplexAlgorithm .
Methods included from Algorithm
#basic_latin?, #find_match_words, #get_basic_latin_word, #get_cjk_word, #next_token, #nonword_char?, #segment
Constructor Details
#initialize(text) ⇒ ComplexAlgorithm
Create a new ComplexAlgorithm . Rules used by this algorithm includes MMRule , LAWLRule , SVWLRule and LSDMFOCWRule .
13 14 15 16 17 18 19 20 21 |
# File 'lib/rmmseg/complex_algorithm.rb', line 13 def initialize(text) super @rules = [ MMRule.new, LAWLRule.new, SVWLRule.new, LSDMFOCWRule.new ] end |
Instance Method Details
#create_chunks ⇒ Object
Create all possible three-word (or less) chunks starting from @index .
25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 |
# File 'lib/rmmseg/complex_algorithm.rb', line 25 def create_chunks chunks = Array.new find_match_words(@chars, @index).each { |w0| index0 = @index + w0.length if index0 < @chars.length find_match_words(@chars, index0).each { |w1| index1 = index0 + w1.length if index1 < @chars.length find_match_words(@chars, index1).each { |w2| if w2.type == Word::TYPES[:unrecognized] chunks << Chunk.new([w0, w1]) else chunks << Chunk.new([w0, w1, w2]) end } elsif index1 == @chars.length chunks << Chunk.new([w0, w1]) end } elsif index0 == @chars.length chunks << Chunk.new([w0]) end } chunks end |