Class: HTMLDiff::DiffBuilder
- Inherits:
-
Object
- Object
- HTMLDiff::DiffBuilder
- Defined in:
- lib/junebug/ext/diff.rb
Constant Summary collapse
- VALID_METHODS =
[:replace, :insert, :delete, :equal]
Instance Method Summary collapse
- #add_matching_words_left(match_in_old, match_in_new, match_size, start_in_old, start_in_new) ⇒ Object
- #add_matching_words_right(match_in_old, match_in_new, match_size, end_in_old, end_in_new) ⇒ Object
- #build ⇒ Object
- #closing_tag?(item) ⇒ Boolean
- #convert_html_to_list_of_words(x, use_brackets = false) ⇒ Object
- #delete(operation, tagclass = 'diffdel') ⇒ Object
- #end_of_tag?(char) ⇒ Boolean
- #equal(operation) ⇒ Object
- #explode(sequence) ⇒ Object
- #extract_consecutive_words(words, &condition) ⇒ Object
- #find_match(start_in_old, end_in_old, start_in_new, end_in_new) ⇒ Object
- #index_new_words ⇒ Object
-
#initialize(old_version, new_version) ⇒ DiffBuilder
constructor
A new instance of DiffBuilder.
- #insert(operation, tagclass = 'diffins') ⇒ Object
-
#insert_tag(tagname, cssclass, words) ⇒ Object
This method encloses words within a specified tag (ins or del), and adds this into @content, with a twist: if there are words contain tags, it actually creates multiple ins or del, so that they don’t include any ins or del.
- #matching_blocks ⇒ Object
- #opening_tag?(item) ⇒ Boolean
- #operations ⇒ Object
- #perform_operation(operation) ⇒ Object
- #recursively_find_matching_blocks(start_in_old, end_in_old, start_in_new, end_in_new, matching_blocks) ⇒ Object
- #replace(operation) ⇒ Object
- #split_inputs_to_words ⇒ Object
- #start_of_tag?(char) ⇒ Boolean
- #tag?(item) ⇒ Boolean
- #whitespace?(char) ⇒ Boolean
- #wrap_text(text, tagname, cssclass) ⇒ Object
Constructor Details
#initialize(old_version, new_version) ⇒ DiffBuilder
Returns a new instance of DiffBuilder.
18 19 20 21 |
# File 'lib/junebug/ext/diff.rb', line 18 def initialize(old_version, new_version) @old_version, @new_version = old_version, new_version @content = [] end |
Instance Method Details
#add_matching_words_left(match_in_old, match_in_new, match_size, start_in_old, start_in_new) ⇒ Object
144 145 146 147 148 149 150 151 152 153 |
# File 'lib/junebug/ext/diff.rb', line 144 def add_matching_words_left(match_in_old, match_in_new, match_size, start_in_old, start_in_new) while match_in_old > start_in_old and match_in_new > start_in_new and @old_words[match_in_old - 1] == @new_words[match_in_new - 1] match_in_old -= 1 match_in_new -= 1 match_size += 1 end [match_in_old, match_in_new, match_size] end |
#add_matching_words_right(match_in_old, match_in_new, match_size, end_in_old, end_in_new) ⇒ Object
155 156 157 158 159 160 161 162 |
# File 'lib/junebug/ext/diff.rb', line 155 def add_matching_words_right(match_in_old, match_in_new, match_size, end_in_old, end_in_new) while match_in_old + match_size < end_in_old and match_in_new + match_size < end_in_new and @old_words[match_in_old + match_size] == @new_words[match_in_new + match_size] match_size += 1 end [match_in_old, match_in_new, match_size] end |
#build ⇒ Object
23 24 25 26 27 28 |
# File 'lib/junebug/ext/diff.rb', line 23 def build split_inputs_to_words index_new_words operations.each { |op| perform_operation(op) } return @content.join end |
#closing_tag?(item) ⇒ Boolean
193 194 195 |
# File 'lib/junebug/ext/diff.rb', line 193 def closing_tag?(item) item =~ %r!^\s*</[^>]+>\s*$! end |
#convert_html_to_list_of_words(x, use_brackets = false) ⇒ Object
258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 |
# File 'lib/junebug/ext/diff.rb', line 258 def convert_html_to_list_of_words(x, use_brackets = false) mode = :char current_word = '' words = [] explode(x).each do |char| case mode when :tag if end_of_tag? char current_word << (use_brackets ? ']' : '>') words << current_word current_word = '' if whitespace?(char) mode = :whitespace else mode = :char end else current_word << char end when :char if start_of_tag? char words << current_word unless current_word.empty? current_word = (use_brackets ? '[' : '<') mode = :tag elsif /\s/.match char words << current_word unless current_word.empty? current_word = char mode = :whitespace else current_word << char end when :whitespace if start_of_tag? char words << current_word unless current_word.empty? current_word = (use_brackets ? '[' : '<') mode = :tag elsif /\s/.match char current_word << char else words << current_word unless current_word.empty? current_word = char mode = :char end else raise "Unknown mode #{mode.inspect}" end end words << current_word unless current_word.empty? words end |
#delete(operation, tagclass = 'diffdel') ⇒ Object
180 181 182 |
# File 'lib/junebug/ext/diff.rb', line 180 def delete(operation, tagclass = 'diffdel') insert_tag('del', tagclass, @old_words[operation.start_in_old...operation.end_in_old]) end |
#end_of_tag?(char) ⇒ Boolean
246 247 248 |
# File 'lib/junebug/ext/diff.rb', line 246 def end_of_tag?(char) char == '>' end |
#equal(operation) ⇒ Object
184 185 186 187 |
# File 'lib/junebug/ext/diff.rb', line 184 def equal(operation) # no tags to insert, simply copy the matching words from one of the versions @content += @new_words[operation.start_in_new...operation.end_in_new] end |
#explode(sequence) ⇒ Object
242 243 244 |
# File 'lib/junebug/ext/diff.rb', line 242 def explode(sequence) sequence.is_a?(String) ? sequence.split(//) : sequence end |
#extract_consecutive_words(words, &condition) ⇒ Object
201 202 203 204 205 206 207 208 209 210 211 212 213 214 |
# File 'lib/junebug/ext/diff.rb', line 201 def extract_consecutive_words(words, &condition) index_of_first_tag = nil words.each_with_index do |word, i| if !condition.call(word) index_of_first_tag = i break end end if index_of_first_tag return words.slice!(0...index_of_first_tag) else return words.slice!(0..words.length) end end |
#find_match(start_in_old, end_in_old, start_in_new, end_in_new) ⇒ Object
108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 |
# File 'lib/junebug/ext/diff.rb', line 108 def find_match(start_in_old, end_in_old, start_in_new, end_in_new) best_match_in_old = start_in_old best_match_in_new = start_in_new best_match_size = 0 match_length_at = Hash.new { |h, index| h[index] = 0 } start_in_old.upto(end_in_old - 1) do |index_in_old| new_match_length_at = Hash.new { |h, index| h[index] = 0 } @word_indices[@old_words[index_in_old]].each do |index_in_new| next if index_in_new < start_in_new break if index_in_new >= end_in_new new_match_length = match_length_at[index_in_new - 1] + 1 new_match_length_at[index_in_new] = new_match_length if new_match_length > best_match_size best_match_in_old = index_in_old - new_match_length + 1 best_match_in_new = index_in_new - new_match_length + 1 best_match_size = new_match_length end end match_length_at = new_match_length_at end # best_match_in_old, best_match_in_new, best_match_size = add_matching_words_left( # best_match_in_old, best_match_in_new, best_match_size, start_in_old, start_in_new) # best_match_in_old, best_match_in_new, match_size = add_matching_words_right( # best_match_in_old, best_match_in_new, best_match_size, end_in_old, end_in_new) return (best_match_size != 0 ? Match.new(best_match_in_old, best_match_in_new, best_match_size) : nil) end |
#index_new_words ⇒ Object
35 36 37 38 |
# File 'lib/junebug/ext/diff.rb', line 35 def index_new_words @word_indices = Hash.new { |h, word| h[word] = [] } @new_words.each_with_index { |word, i| @word_indices[word] << i } end |
#insert(operation, tagclass = 'diffins') ⇒ Object
176 177 178 |
# File 'lib/junebug/ext/diff.rb', line 176 def insert(operation, tagclass = 'diffins') insert_tag('ins', tagclass, @new_words[operation.start_in_new...operation.end_in_new]) end |
#insert_tag(tagname, cssclass, words) ⇒ Object
This method encloses words within a specified tag (ins or del), and adds this into @content, with a twist: if there are words contain tags, it actually creates multiple ins or del, so that they don’t include any ins or del. This handles cases like old: ‘<p>a</p>’ new: ‘<p>ab</p><p>c</b>’ diff result: ‘<p>a<ins>b</ins></p><p><ins>c</ins></p>’ this still doesn’t guarantee valid HTML (hint: think about diffing a text containing ins or del tags), but handles correctly more cases than the earlier version.
P.S.: Spare a thought for people who write HTML browsers. They live in this … every day.
227 228 229 230 231 232 233 234 235 236 |
# File 'lib/junebug/ext/diff.rb', line 227 def insert_tag(tagname, cssclass, words) loop do break if words.empty? = extract_consecutive_words(words) { |word| not tag?(word) } @content << wrap_text(.join, tagname, cssclass) unless .empty? break if words.empty? @content += extract_consecutive_words(words) { |word| tag?(word) } end end |
#matching_blocks ⇒ Object
87 88 89 90 91 |
# File 'lib/junebug/ext/diff.rb', line 87 def matching_blocks matching_blocks = [] recursively_find_matching_blocks(0, @old_words.size, 0, @new_words.size, matching_blocks) matching_blocks end |
#opening_tag?(item) ⇒ Boolean
189 190 191 |
# File 'lib/junebug/ext/diff.rb', line 189 def opening_tag?(item) item =~ %r!^\s*<[^>]+>\s*$! end |
#operations ⇒ Object
40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 |
# File 'lib/junebug/ext/diff.rb', line 40 def operations position_in_old = position_in_new = 0 operations = [] matches = matching_blocks # an empty match at the end forces the loop below to handle the unmatched tails # I'm sure it can be done more gracefully, but not at 23:52 matches << Match.new(@old_words.length, @new_words.length, 0) matches.each_with_index do |match, i| match_starts_at_current_position_in_old = (position_in_old == match.start_in_old) match_starts_at_current_position_in_new = (position_in_new == match.start_in_new) action_upto_match_positions = case [match_starts_at_current_position_in_old, match_starts_at_current_position_in_new] when [false, false] :replace when [true, false] :insert when [false, true] :delete else # this happens if the first few words are same in both versions :none end if action_upto_match_positions != :none operation_upto_match_positions = Operation.new(action_upto_match_positions, position_in_old, match.start_in_old, position_in_new, match.start_in_new) operations << operation_upto_match_positions end if match.size != 0 match_operation = Operation.new(:equal, match.start_in_old, match.end_in_old, match.start_in_new, match.end_in_new) operations << match_operation end position_in_old = match.end_in_old position_in_new = match.end_in_new end operations end |
#perform_operation(operation) ⇒ Object
166 167 168 169 |
# File 'lib/junebug/ext/diff.rb', line 166 def perform_operation(operation) @operation = operation self.send operation.action, operation end |
#recursively_find_matching_blocks(start_in_old, end_in_old, start_in_new, end_in_new, matching_blocks) ⇒ Object
93 94 95 96 97 98 99 100 101 102 103 104 105 106 |
# File 'lib/junebug/ext/diff.rb', line 93 def recursively_find_matching_blocks(start_in_old, end_in_old, start_in_new, end_in_new, matching_blocks) match = find_match(start_in_old, end_in_old, start_in_new, end_in_new) if match if start_in_old < match.start_in_old and start_in_new < match.start_in_new recursively_find_matching_blocks( start_in_old, match.start_in_old, start_in_new, match.start_in_new, matching_blocks) end matching_blocks << match if match.end_in_old < end_in_old and match.end_in_new < end_in_new recursively_find_matching_blocks( match.end_in_old, end_in_old, match.end_in_new, end_in_new, matching_blocks) end end end |
#replace(operation) ⇒ Object
171 172 173 174 |
# File 'lib/junebug/ext/diff.rb', line 171 def replace(operation) delete(operation, 'diffmod') insert(operation, 'diffmod') end |
#split_inputs_to_words ⇒ Object
30 31 32 33 |
# File 'lib/junebug/ext/diff.rb', line 30 def split_inputs_to_words @old_words = convert_html_to_list_of_words(explode(@old_version)) @new_words = convert_html_to_list_of_words(explode(@new_version)) end |
#start_of_tag?(char) ⇒ Boolean
250 251 252 |
# File 'lib/junebug/ext/diff.rb', line 250 def start_of_tag?(char) char == '<' end |
#tag?(item) ⇒ Boolean
197 198 199 |
# File 'lib/junebug/ext/diff.rb', line 197 def tag?(item) opening_tag?(item) or closing_tag?(item) end |
#whitespace?(char) ⇒ Boolean
254 255 256 |
# File 'lib/junebug/ext/diff.rb', line 254 def whitespace?(char) char =~ /\s/ end |
#wrap_text(text, tagname, cssclass) ⇒ Object
238 239 240 |
# File 'lib/junebug/ext/diff.rb', line 238 def wrap_text(text, tagname, cssclass) %(<#{tagname} class="#{cssclass}">#{text}</#{tagname}>) end |