Class: HTMLDiff::DiffBuilder
- Inherits:
-
Object
- Object
- HTMLDiff::DiffBuilder
- Defined in:
- lib/htmldiff.rb
Constant Summary collapse
- VALID_METHODS =
[:replace, :insert, :delete, :equal]
Instance Method Summary collapse
- #add_matching_words_left(match_in_old, match_in_new, match_size, start_in_old, start_in_new) ⇒ Object
- #add_matching_words_right(match_in_old, match_in_new, match_size, end_in_old, end_in_new) ⇒ Object
- #build ⇒ Object
- #closing_tag?(item) ⇒ Boolean
- #convert_html_to_list_of_words(x, use_brackets = false) ⇒ Object
- #delete(operation, tagclass = 'diffdel') ⇒ Object
- #end_of_tag?(char) ⇒ Boolean
- #equal(operation) ⇒ Object
- #explode(sequence) ⇒ Object
- #extract_consecutive_words(words, &condition) ⇒ Object
- #find_match(start_in_old, end_in_old, start_in_new, end_in_new) ⇒ Object
- #index_new_words ⇒ Object
-
#initialize(old_version, new_version, mode = :html) ⇒ DiffBuilder
constructor
A new instance of DiffBuilder.
- #insert(operation, tagclass = 'diffins') ⇒ Object
-
#insert_tag(tagname, cssclass, words) ⇒ Object
This method encloses words within a specified tag (ins or del), and adds this into @content, with a twist: if there are words contain tags, it actually creates multiple ins or del, so that they don’t include any ins or del.
- #matching_blocks ⇒ Object
- #opening_tag?(item) ⇒ Boolean
- #operations ⇒ Object
- #perform_operation(operation) ⇒ Object
- #recursively_find_matching_blocks(start_in_old, end_in_old, start_in_new, end_in_new, matching_blocks) ⇒ Object
- #replace(operation) ⇒ Object
- #split_inputs_to_words ⇒ Object
- #start_of_tag?(char) ⇒ Boolean
- #tag?(item) ⇒ Boolean
- #whitespace?(char) ⇒ Boolean
- #wrap_text(text, tagname, cssclass) ⇒ Object
Constructor Details
#initialize(old_version, new_version, mode = :html) ⇒ DiffBuilder
Returns a new instance of DiffBuilder.
18 19 20 21 22 |
# File 'lib/htmldiff.rb', line 18 def initialize(old_version, new_version, mode = :html) @old_version, @new_version = old_version, new_version @content = [] @mode = mode end |
Instance Method Details
#add_matching_words_left(match_in_old, match_in_new, match_size, start_in_old, start_in_new) ⇒ Object
145 146 147 148 149 150 151 152 153 154 |
# File 'lib/htmldiff.rb', line 145 def add_matching_words_left(match_in_old, match_in_new, match_size, start_in_old, start_in_new) while match_in_old > start_in_old and match_in_new > start_in_new and @old_words[match_in_old - 1] == @new_words[match_in_new - 1] match_in_old -= 1 match_in_new -= 1 match_size += 1 end [match_in_old, match_in_new, match_size] end |
#add_matching_words_right(match_in_old, match_in_new, match_size, end_in_old, end_in_new) ⇒ Object
156 157 158 159 160 161 162 163 |
# File 'lib/htmldiff.rb', line 156 def add_matching_words_right(match_in_old, match_in_new, match_size, end_in_old, end_in_new) while match_in_old + match_size < end_in_old and match_in_new + match_size < end_in_new and @old_words[match_in_old + match_size] == @new_words[match_in_new + match_size] match_size += 1 end [match_in_old, match_in_new, match_size] end |
#build ⇒ Object
24 25 26 27 28 29 |
# File 'lib/htmldiff.rb', line 24 def build split_inputs_to_words index_new_words operations.each { |op| perform_operation(op) } return @content.join end |
#closing_tag?(item) ⇒ Boolean
206 207 208 |
# File 'lib/htmldiff.rb', line 206 def closing_tag?(item) item =~ %r!^\s*</[^>]+>\s*$! end |
#convert_html_to_list_of_words(x, use_brackets = false) ⇒ Object
271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 |
# File 'lib/htmldiff.rb', line 271 def convert_html_to_list_of_words(x, use_brackets = false) mode = :char current_word = '' words = [] explode(x).each do |char| case mode when :tag if end_of_tag? char current_word << (use_brackets ? ']' : '>') words << current_word current_word = '' if whitespace?(char) mode = :whitespace else mode = :char end else current_word << char end when :char if start_of_tag? char words << current_word unless current_word.empty? current_word = (use_brackets ? '[' : '<') mode = :tag elsif /\s/.match char words << current_word unless current_word.empty? current_word = char mode = :whitespace else current_word << char end when :whitespace if start_of_tag? char words << current_word unless current_word.empty? current_word = (use_brackets ? '[' : '<') mode = :tag elsif /\s/.match char current_word << char else words << current_word unless current_word.empty? current_word = char mode = :char end else raise "Unknown mode #{mode.inspect}" end end words << current_word unless current_word.empty? words end |
#delete(operation, tagclass = 'diffdel') ⇒ Object
187 188 189 190 191 192 193 194 195 |
# File 'lib/htmldiff.rb', line 187 def delete(operation, tagclass = 'diffdel') if @mode == :html insert_tag('del', tagclass, @old_words[operation.start_in_old...operation.end_in_old]) else @content << '[--' @content << @old_words[operation.start_in_old...operation.end_in_old] @content << '--]' end end |
#end_of_tag?(char) ⇒ Boolean
259 260 261 |
# File 'lib/htmldiff.rb', line 259 def end_of_tag?(char) char == '>' end |
#equal(operation) ⇒ Object
197 198 199 200 |
# File 'lib/htmldiff.rb', line 197 def equal(operation) # no tags to insert, simply copy the matching words from one of the versions @content += @new_words[operation.start_in_new...operation.end_in_new] end |
#explode(sequence) ⇒ Object
255 256 257 |
# File 'lib/htmldiff.rb', line 255 def explode(sequence) sequence.is_a?(String) ? sequence.split(//) : sequence end |
#extract_consecutive_words(words, &condition) ⇒ Object
214 215 216 217 218 219 220 221 222 223 224 225 226 227 |
# File 'lib/htmldiff.rb', line 214 def extract_consecutive_words(words, &condition) index_of_first_tag = nil words.each_with_index do |word, i| if !condition.call(word) index_of_first_tag = i break end end if index_of_first_tag return words.slice!(0...index_of_first_tag) else return words.slice!(0..words.length) end end |
#find_match(start_in_old, end_in_old, start_in_new, end_in_new) ⇒ Object
109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 |
# File 'lib/htmldiff.rb', line 109 def find_match(start_in_old, end_in_old, start_in_new, end_in_new) best_match_in_old = start_in_old best_match_in_new = start_in_new best_match_size = 0 match_length_at = Hash.new { |h, index| h[index] = 0 } start_in_old.upto(end_in_old - 1) do |index_in_old| new_match_length_at = Hash.new { |h, index| h[index] = 0 } @word_indices[@old_words[index_in_old]].each do |index_in_new| next if index_in_new < start_in_new break if index_in_new >= end_in_new new_match_length = match_length_at[index_in_new - 1] + 1 new_match_length_at[index_in_new] = new_match_length if new_match_length > best_match_size best_match_in_old = index_in_old - new_match_length + 1 best_match_in_new = index_in_new - new_match_length + 1 best_match_size = new_match_length end end match_length_at = new_match_length_at end # best_match_in_old, best_match_in_new, best_match_size = add_matching_words_left( # best_match_in_old, best_match_in_new, best_match_size, start_in_old, start_in_new) # best_match_in_old, best_match_in_new, match_size = add_matching_words_right( # best_match_in_old, best_match_in_new, best_match_size, end_in_old, end_in_new) return (best_match_size != 0 ? Match.new(best_match_in_old, best_match_in_new, best_match_size) : nil) end |
#index_new_words ⇒ Object
36 37 38 39 |
# File 'lib/htmldiff.rb', line 36 def index_new_words @word_indices = Hash.new { |h, word| h[word] = [] } @new_words.each_with_index { |word, i| @word_indices[word] << i } end |
#insert(operation, tagclass = 'diffins') ⇒ Object
177 178 179 180 181 182 183 184 185 |
# File 'lib/htmldiff.rb', line 177 def insert(operation, tagclass = 'diffins') if @mode == :html insert_tag('ins', tagclass, @new_words[operation.start_in_new...operation.end_in_new]) else @content << '[++' @content << @new_words[operation.start_in_new...operation.end_in_new] @content << '++]' end end |
#insert_tag(tagname, cssclass, words) ⇒ Object
This method encloses words within a specified tag (ins or del), and adds this into @content, with a twist: if there are words contain tags, it actually creates multiple ins or del, so that they don’t include any ins or del. This handles cases like old: ‘<p>a</p>’ new: ‘<p>ab</p><p>c</b>’ diff result: ‘<p>a<ins>b</ins></p><p><ins>c</ins></p>’ this still doesn’t guarantee valid HTML (hint: think about diffing a text containing ins or del tags), but handles correctly more cases than the earlier version.
P.S.: Spare a thought for people who write HTML browsers. They live in this … every day.
240 241 242 243 244 245 246 247 248 249 |
# File 'lib/htmldiff.rb', line 240 def insert_tag(tagname, cssclass, words) loop do break if words.empty? = extract_consecutive_words(words) { |word| not tag?(word) } @content << wrap_text(.join, tagname, cssclass) unless .empty? break if words.empty? @content += extract_consecutive_words(words) { |word| tag?(word) } end end |
#matching_blocks ⇒ Object
88 89 90 91 92 |
# File 'lib/htmldiff.rb', line 88 def matching_blocks matching_blocks = [] recursively_find_matching_blocks(0, @old_words.size, 0, @new_words.size, matching_blocks) matching_blocks end |
#opening_tag?(item) ⇒ Boolean
202 203 204 |
# File 'lib/htmldiff.rb', line 202 def opening_tag?(item) item =~ %r!^\s*<[^>]+>\s*$! end |
#operations ⇒ Object
41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 |
# File 'lib/htmldiff.rb', line 41 def operations position_in_old = position_in_new = 0 operations = [] matches = matching_blocks # an empty match at the end forces the loop below to handle the unmatched tails # I'm sure it can be done more gracefully, but not at 23:52 matches << Match.new(@old_words.length, @new_words.length, 0) matches.each_with_index do |match, i| match_starts_at_current_position_in_old = (position_in_old == match.start_in_old) match_starts_at_current_position_in_new = (position_in_new == match.start_in_new) action_upto_match_positions = case [match_starts_at_current_position_in_old, match_starts_at_current_position_in_new] when [false, false] :replace when [true, false] :insert when [false, true] :delete else # this happens if the first few words are same in both versions :none end if action_upto_match_positions != :none operation_upto_match_positions = Operation.new(action_upto_match_positions, position_in_old, match.start_in_old, position_in_new, match.start_in_new) operations << operation_upto_match_positions end if match.size != 0 match_operation = Operation.new(:equal, match.start_in_old, match.end_in_old, match.start_in_new, match.end_in_new) operations << match_operation end position_in_old = match.end_in_old position_in_new = match.end_in_new end operations end |
#perform_operation(operation) ⇒ Object
167 168 169 170 |
# File 'lib/htmldiff.rb', line 167 def perform_operation(operation) @operation = operation self.send operation.action, operation end |
#recursively_find_matching_blocks(start_in_old, end_in_old, start_in_new, end_in_new, matching_blocks) ⇒ Object
94 95 96 97 98 99 100 101 102 103 104 105 106 107 |
# File 'lib/htmldiff.rb', line 94 def recursively_find_matching_blocks(start_in_old, end_in_old, start_in_new, end_in_new, matching_blocks) match = find_match(start_in_old, end_in_old, start_in_new, end_in_new) if match if start_in_old < match.start_in_old and start_in_new < match.start_in_new recursively_find_matching_blocks( start_in_old, match.start_in_old, start_in_new, match.start_in_new, matching_blocks) end matching_blocks << match if match.end_in_old < end_in_old and match.end_in_new < end_in_new recursively_find_matching_blocks( match.end_in_old, end_in_old, match.end_in_new, end_in_new, matching_blocks) end end end |
#replace(operation) ⇒ Object
172 173 174 175 |
# File 'lib/htmldiff.rb', line 172 def replace(operation) delete(operation, 'diffmod') insert(operation, 'diffmod') end |
#split_inputs_to_words ⇒ Object
31 32 33 34 |
# File 'lib/htmldiff.rb', line 31 def split_inputs_to_words @old_words = convert_html_to_list_of_words(explode(@old_version)) @new_words = convert_html_to_list_of_words(explode(@new_version)) end |
#start_of_tag?(char) ⇒ Boolean
263 264 265 |
# File 'lib/htmldiff.rb', line 263 def start_of_tag?(char) char == '<' end |
#tag?(item) ⇒ Boolean
210 211 212 |
# File 'lib/htmldiff.rb', line 210 def tag?(item) opening_tag?(item) or closing_tag?(item) end |
#whitespace?(char) ⇒ Boolean
267 268 269 |
# File 'lib/htmldiff.rb', line 267 def whitespace?(char) char =~ /\s/ end |
#wrap_text(text, tagname, cssclass) ⇒ Object
251 252 253 |
# File 'lib/htmldiff.rb', line 251 def wrap_text(text, tagname, cssclass) %(<#{tagname} class="#{cssclass}">#{text}</#{tagname}>) end |