Class: DiffLCS::WordSplitArray
- Inherits:
-
Array
- Object
- Array
- DiffLCS::WordSplitArray
- Defined in:
- lib/diff_l_c_s/word_split_array.rb
Constant Summary collapse
- SEPARATOR =
Used as a separator
"\031"
Instance Method Summary collapse
-
#initialize(text) ⇒ WordSplitArray
constructor
Splits the words, and treats whitespace correctly.
-
#translate_to_pos(positions) ⇒ Object
Translates word-positions to character-positions.
Constructor Details
#initialize(text) ⇒ WordSplitArray
Splits the words, and treats whitespace correctly.
11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 |
# File 'lib/diff_l_c_s/word_split_array.rb', line 11 def initialize(text) old_end = 0 # splits for html-tags, for any non-word-characters & for SEPARATORs treated = text.scan(/<\/?\w+>|[^\w<\/>#{SEPARATOR}]+|#{SEPARATOR}/) do |literal| match = $~ if match.begin(0) > old_end self.push(text[old_end...match.begin(0)]) end self.push(literal) old_end = match.end(0) end if old_end < text.size self.push(text[old_end...text.size]) end end |
Instance Method Details
#translate_to_pos(positions) ⇒ Object
Translates word-positions to character-positions.
31 32 33 34 35 36 37 38 39 40 41 42 43 |
# File 'lib/diff_l_c_s/word_split_array.rb', line 31 def translate_to_pos(positions) word_p = 0 temp_array = [0] i = 0 while i < self.size word_p += self[i].size temp_array.push(word_p) i += 1 end return PositionRange::List.new( positions.collect {|position| position.new_dup( temp_array[position.begin], temp_array[position.end])}) end |