Class: SRX::Polish::SentenceSplitter
- Inherits:
-
Object
- Object
- SRX::Polish::SentenceSplitter
- Includes:
- Enumerable
- Defined in:
- lib/srx/polish/sentence_splitter.rb
Instance Attribute Summary collapse
-
#debug ⇒ Object
writeonly
Sets the attribute debug.
-
#input ⇒ Object
Returns the value of attribute input.
Instance Method Summary collapse
- #each {|sentence + after_buffer| ... } ⇒ Object
-
#initialize(text = nil) ⇒ SentenceSplitter
constructor
A new instance of SentenceSplitter.
Constructor Details
#initialize(text = nil) ⇒ SentenceSplitter
Returns a new instance of SentenceSplitter.
64 65 66 67 68 69 70 |
# File 'lib/srx/polish/sentence_splitter.rb', line 64 def initialize(text=nil) if text.is_a?(String) @input = StringIO.new(text,"r:utf-8") else @input = text end end |
Instance Attribute Details
#debug=(value) ⇒ Object (writeonly)
Sets the attribute debug
62 63 64 |
# File 'lib/srx/polish/sentence_splitter.rb', line 62 def debug=(value) @debug = value end |
#input ⇒ Object
Returns the value of attribute input.
61 62 63 |
# File 'lib/srx/polish/sentence_splitter.rb', line 61 def input @input end |
Instance Method Details
#each {|sentence + after_buffer| ... } ⇒ Object
72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 |
# File 'lib/srx/polish/sentence_splitter.rb', line 72 def each raise "Invalid argument - text is nil" if @input.nil? buffer_length = 10 sentence = "" before_buffer = "" @input.pos = 0 after_buffer = buffer_length.times.map{|i| @input.readchar}.join("") matched_rule = nil while(!@input.eof?) do matched_before = BEFORE_RE.match(before_buffer) break_detected = false if matched_before start_index = (matched_before.size - 1).times.find do |index| matched_before[index+1] end if @debug puts "#{before_buffer}|#{after_buffer.gsub(/\n/,"\\n")}" end REGEXPS.each do |before_re,after_re,value| # skip the whole match if before_re.match(before_buffer) && after_re.match(after_buffer) break_detected = true color = value ? :red : :green if @debug sentence << Term::ANSIColor.send(color,"<#{before_re}:#{after_re}>") end if value yield sentence sentence = "" end break end end end next_after = @input.readchar before_buffer.sub!(FIRST_CHAR,"") if before_buffer.size >= buffer_length after_buffer.sub!(FIRST_CHAR,"") before_buffer << $& sentence << $& after_buffer << next_after end yield sentence + after_buffer unless sentence.empty? || after_buffer.empty? end |