Class: CloudRCS::Hunk

Inherits:
PrimitivePatch show all
Defined in:
lib/cloud_rcs/patch_types/hunk.rb

Overview

Hunk is one type of primitive patch. It represents a deletion or an insertion, or a combination of both, in a text file.

A Hunk is constructed using the path of a file, the first line modifications to the file, and a set of diffs, each of which represents a line added to or deleted from the file.

Constant Summary

Constants inherited from PrimitivePatch

PrimitivePatch::PATH_PREFIX

Class Method Summary collapse

Instance Method Summary collapse

Methods inherited from PrimitivePatch

#apply!, escape_path, merge, #named_patch?, #new_path, #primitive_patch?, priority, #to_a, unescape_path

Class Method Details

.generate(orig_file, changed_file) ⇒ Object

Given a list of files, determine whether this patch type describes the changes between the files and generate patches accordingly.

In this case we use the Diff::LCS algorithm to generate Change objects representing each changed line between two files. The changesets are automatically nested into a two dimensional Array, where each row represents a changed portion of the file that is separated from the other rows by an unchanged portion of the file. So we split that dimension of the Array into separate Hunk patches and return the resulting list.



167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
# File 'lib/cloud_rcs/patch_types/hunk.rb', line 167

def generate(orig_file, changed_file)
  return if orig_file.nil? and changed_file.nil?
  return if (orig_file and orig_file.contents.is_binary_data?) or 
    (changed_file and changed_file.contents.is_binary_data?)

  # If the original or the changed file is nil, the hunk should
  # contain the entirety of the other file. This is so that a
  # record is kept of a file that is deleted; and so that the
  # contents of a file is added to it after it is created.
  orig_lines = orig_file ? orig_file.contents.split("\n",-1) : []
  changed_lines = changed_file ? changed_file.contents.split("\n",-1) : []

  # Insert end-of-line tokens to preserve white space at the end
  # of lines. This is part of the darcs patch format.
  orig_lines.each { |l| l += "$" if l =~ /\s+$/ }
  changed_lines.each { |l| l += "$" if l =~ /\s+$/ }

  file_path = orig_file ? orig_file.path : changed_file.path

  diffs = Diff::LCS.diff(orig_lines, changed_lines)
  hunks = []
  offset = 0
  diffs.each do |d|
    
    # Diff::LCS assumes that removed lines from all hunks will be
    # removed from file before new lines are added. Unfortunately,
    # in this implementation we remove and add lines from each
    # hunk in order. So the position values for removed lines will
    # be off in all but the first hunk. So we need to adjust those
    # position values before we create the hunk patch.
    unless hunks.empty?
      offset += hunks.last.lengthnew - hunks.last.lengthold
    end
    d.collect! do |l|
      if l.action == '-'
        Diff::LCS::Change.new(l.action, l.position + offset, l.element)
      else
        l
      end
    end

    # The darcs patch format counts lines starting from 1; whereas
    # Diff::LCS counts lines starting from 0. So we add 1 to the
    # position of the first changed line to get the
    # darcs-compatible starting line number for the Hunk patch.
    position = d.first.position + 1
    
    hunks << Hunk.new(:path => file_path, :position => position, :contents => d)
  end
  return hunks
end

.parse(contents) ⇒ Object

Parse hunk info from a file and convert into a Hunk object.



220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
# File 'lib/cloud_rcs/patch_types/hunk.rb', line 220

def parse(contents)
  unless contents =~ /^hunk\s+(\S+)\s+(\d+)\s+(.*)$/m
    raise ParseException.new(true), "Failed to parse hunk patch: \"#{contents}\""
  end
  file_path = unescape_path($1)
  starting_position = $2.to_i
  contents = $3

  last_action = nil
  line_offset = 0

  diffs = []
  add_line_offset = 0
  del_line_offset = 0
  contents.split("\n").each do |line|
    # These regular expressions ensure that each line ends with a
    # non-whitespace character, or is empty. A dollar sign is
    # added during patch generation to the end of lines that end
    # in whitespace; so parsing this way will not cut off
    # whitespace that is supposed to be added to any patched file.
    #
    # If the line is empty, $1 will be nil. So it is important to
    # pass $1.to_s instead of just $1 to change nil to "".
    if line =~ /^\+(.*[\S\$])?\s*$/
      diffs << Diff::LCS::Change.new('+', starting_position + add_line_offset, $1.to_s)
      add_line_offset += 1
    elsif line =~ /^-(.*[\S\$])?\s*$/
      diffs << Diff::LCS::Change.new('-', starting_position + del_line_offset, $1.to_s)
      del_line_offset += 1
    else
      raise "Failed to parse a line in hunk: \"#{line}\""
    end
  end

  return Hunk.new(:path => file_path, :position => starting_position, :contents => diffs)
end

Instance Method Details

#added_linesObject



150
151
152
# File 'lib/cloud_rcs/patch_types/hunk.rb', line 150

def added_lines
  contents.find_all { |d| d.action == '+' }   # .sort { |a,b| a.position <=> b.position }
end

#apply_to(file) ⇒ Object



110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
# File 'lib/cloud_rcs/patch_types/hunk.rb', line 110

def apply_to(file)
  return file unless file.path == path

  # Passing a negative number as the second argument of split
  # preserves trailing newline characters at the end of the file
  # when the lines are re-joined.
  lines = file.contents.split("\n",-1)

  # First, remove lines
  removed_lines.each do |d|
    if lines[position-1] == d.element.sub(/(\s+)\$\s*$/) { $1 }
      lines.delete_at(position-1)
    else
      raise ApplyException.new(true), "Line in hunk marked for removal does not match contents of existing line in file\nfile contents: #{position} -'#{lines[position-1]}'\nline to be removed: #{d.position} -'#{d.element}'"
    end
  end

  # Next, add lines
  added_lines.each_with_index do |d,i|
    lines.insert(position - 1 + i, d.element.sub(/(\s+)\$\s*$/) { $1 })
  end

  file.contents = lines.join("\n")
  return file
end

#commute(patch) ⇒ Object

Given another patch, generates two new patches that have the same effect as the original two, but with the order of the analogous patches reversed. The message receiver is the first patch, and the argument is the second; so after commuting the analog of this patch will be second.



54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
# File 'lib/cloud_rcs/patch_types/hunk.rb', line 54

def commute(patch)
  if patch.is_a? Hunk and patch.path == self.path

    # self is applied first and precedes patch in the file
    if self.position + self.lengthnew < patch.position
      patch1 = Hunk.new(:path => patch.path,
                        :position => (patch.position - self.lengthnew + self.lengthold),
                        :contents => patch.contents)
      patch2 = Hunk.new(:path => self.path, :position => self.position, :contents => self.contents)
      
    # self is applied first, but is preceded by patch in the file          
    elsif patch.position + patch.lengthold < self.position
      patch1 = Hunk.new(:path => patch.path, :position => patch.position, :contents => patch.contents)
      patch2 = Hunk.new(:path => self.path, 
                        :position => (self.position + patch.lengthnew - patch.lengthold),
                        :contents => self.contents)
      
    # patch precedes self in file, but bumps up against it
    elsif patch.position + patch.lengthnew == self.position and
        self.lengthold != 0 and patch.lengthold != 0 and 
        self.lengthnew != 0 and patch.lengthnew != 0
      patch1 = Hunk.new(:path => patch.path, :position => patch.position, :contents => patch.contents)
      patch2 = Hunk.new(:path => self.path, 
                        :position => (self.position - patch.lengthnew + patch.lengthold), 
                        :contents => self.contents)
      
    # self precedes patch in file, but bumps up against it
    elsif self.position + self.lengthold == patch.position and
        self.lengthold != 0 and patch.lengthold != 0 and 
        self.lengthnew != 0 and patch.lengthnew != 0
      patch1 = Hunk.new(:path => patch.path, :position => patch.position, :contents => patch.contents)
      patch2 = Hunk.new(:path => self.path, 
                        :position => (self.position + patch.lengthnew - patch.lengthold), 
                        :contents => self.contents)
      
    # Patches overlap. This is a conflict scenario
    else
      raise CommuteException.new(true, "Conflict: hunk patches overlap.")
    end
    
  elsif patch.is_a? Rmfile and patch.path == self.path
    raise CommuteException.new(true, "Conflict: cannot modify a file after it is removed.")

  elsif patch.is_a? Move and self.path == patch.original_path
    patch1 = patch.clone
    patch2 = self.clone
    patch2.path = patch.new_path
    
  # Commutation is trivial
  else
    patch1, patch2 = patch, self
  end
  
  return patch1, patch2
end

#inverseObject

The inverse of a Hunk simply swaps adds and deletes.



39
40
41
42
43
44
45
46
47
# File 'lib/cloud_rcs/patch_types/hunk.rb', line 39

def inverse
  new_removals = added_lines.collect do |d|
    Diff::LCS::Change.new('-', d.position, d.element)
  end
  new_adds = removed_lines.collect do |d|
    Diff::LCS::Change.new('+', d.position, d.element)
  end
  Hunk.new(:path => path, :position => position, :contents => (new_removals + new_adds))
end

#lengthnewObject

Returns the number of lines added by the hunk patch



137
138
139
# File 'lib/cloud_rcs/patch_types/hunk.rb', line 137

def lengthnew
  added_lines.length
end

#lengtholdObject

Returns the number of lines removed by the hunk patch



142
143
144
# File 'lib/cloud_rcs/patch_types/hunk.rb', line 142

def lengthold
  removed_lines.length
end

#removed_linesObject



146
147
148
# File 'lib/cloud_rcs/patch_types/hunk.rb', line 146

def removed_lines
  contents.find_all { |d| d.action == '-' }   # .sort { |a,b| a.position <=> b.position }
end

#to_sObject

def after_initialize

  verify_path_prefix
  starting_line ||= contents.first.position
end


32
33
34
35
36
# File 'lib/cloud_rcs/patch_types/hunk.rb', line 32

def to_s
  "hunk #{self.class.escape_path(path)} #{position}\n" + contents.collect do |d|
    "#{d.action}#{d.element}"
  end.join("\n")
end

#validateObject



16
17
18
19
20
21
22
23
24
25
# File 'lib/cloud_rcs/patch_types/hunk.rb', line 16

def validate
  # Make sure diffs only contain the actions '+' and '-'
  if contents.respond_to? :each
    contents.each do |d|
      unless ['+','-'].include? d.action
        errors.add(:contents, "contains an unknown action.")
      end
    end
  end
end