Class: Gitlab::Git::Diff

Inherits:
Object
  • Object
show all
Includes:
EncodingHelper
Defined in:
lib/gitlab/git/diff.rb

Constant Summary collapse

TimeoutError =
Class.new(StandardError)
DEFAULT_MAX_PATCH_BYTES =

The default maximum content size to display a diff patch.

If this value ever changes, make sure to create a migration to update current records, and default of ‘ApplicationSettings#diff_max_patch_bytes`.

200.kilobytes
MAX_PATCH_BYTES_UPPER_BOUND =

This is a limitation applied on the source (Gitaly), therefore we don’t allow persisting limits over that.

500.kilobytes
SERIALIZE_KEYS =
%i[
  diff
  new_path
  old_path
  a_mode
  b_mode
  new_file
  renamed_file
  deleted_file
  too_large
  generated
  encoded_file_path
].freeze
BINARY_NOTICE_PATTERN =
%r{Binary files (.*) and (.*) differ}

Constants included from EncodingHelper

EncodingHelper::BOM_UTF8, EncodingHelper::ENCODING_CONFIDENCE_THRESHOLD, EncodingHelper::ESCAPED_CHARS, EncodingHelper::UNICODE_REPLACEMENT_CHARACTER

Instance Attribute Summary collapse

Class Method Summary collapse

Instance Method Summary collapse

Methods included from EncodingHelper

#binary_io, #detect_binary?, #detect_encoding, #detect_libgit2_binary?, #encode!, #encode_binary, #encode_utf8, #encode_utf8_no_detect, #encode_utf8_with_escaping!, #encode_utf8_with_replacement_character, #force_encode_utf8, #strip_bom, #unquote_path

Constructor Details

#initialize(raw_diff, expanded: true, replace_invalid_utf8_chars: true, generated: nil) ⇒ Diff

Returns a new instance of Diff.



163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
# File 'lib/gitlab/git/diff.rb', line 163

def initialize(raw_diff, expanded: true, replace_invalid_utf8_chars: true, generated: nil)
  @expanded = expanded
  @generated = generated

  case raw_diff
  when Hash
    init_from_hash(raw_diff)
    prune_diff_if_eligible
  when Gitlab::GitalyClient::Diff
    init_from_gitaly(raw_diff)
    prune_diff_if_eligible
  when Gitaly::CommitDelta
    init_from_gitaly(raw_diff)
  when nil
    raise "Nil as raw diff passed"
  else
    raise "Invalid raw diff type: #{raw_diff.class}"
  end

  encode_diff_to_utf8(replace_invalid_utf8_chars)
end

Instance Attribute Details

#a_modeObject

Diff properties



10
11
12
# File 'lib/gitlab/git/diff.rb', line 10

def a_mode
  @a_mode
end

#b_modeObject

Diff properties



10
11
12
# File 'lib/gitlab/git/diff.rb', line 10

def b_mode
  @b_mode
end

#deleted_fileObject Also known as: deleted_file?

Stats properties



13
14
15
# File 'lib/gitlab/git/diff.rb', line 13

def deleted_file
  @deleted_file
end

#diffObject

Diff properties



10
11
12
# File 'lib/gitlab/git/diff.rb', line 10

def diff
  @diff
end

#encoded_file_pathObject

Stats properties



13
14
15
# File 'lib/gitlab/git/diff.rb', line 13

def encoded_file_path
  @encoded_file_path
end

#expandedObject Also known as: expanded?

Returns the value of attribute expanded.



19
20
21
# File 'lib/gitlab/git/diff.rb', line 19

def expanded
  @expanded
end

#generatedObject Also known as: generated?

Stats properties



13
14
15
# File 'lib/gitlab/git/diff.rb', line 13

def generated
  @generated
end

#new_fileObject Also known as: new_file?

Stats properties



13
14
15
# File 'lib/gitlab/git/diff.rb', line 13

def new_file
  @new_file
end

#new_pathObject

Diff properties



10
11
12
# File 'lib/gitlab/git/diff.rb', line 10

def new_path
  @new_path
end

#old_pathObject

Diff properties



10
11
12
# File 'lib/gitlab/git/diff.rb', line 10

def old_path
  @old_path
end

#renamed_fileObject Also known as: renamed_file?

Stats properties



13
14
15
# File 'lib/gitlab/git/diff.rb', line 13

def renamed_file
  @renamed_file
end

#too_large=(value) ⇒ Object (writeonly)

Sets the attribute too_large

Parameters:

  • value

    the value to set the attribute too_large to.



20
21
22
# File 'lib/gitlab/git/diff.rb', line 20

def too_large=(value)
  @too_large = value
end

Class Method Details

.between(repo, head, base, options = {}, *paths) ⇒ Object



52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
# File 'lib/gitlab/git/diff.rb', line 52

def between(repo, head, base, options = {}, *paths)
  straight = options.delete(:straight) || false

  common_commit = if straight
                    base
                  else
                    # Only show what is new in the source branch
                    # compared to the target branch, not the other way
                    # around. The line below with merge_base is
                    # equivalent to diff with three dots (git diff
                    # branch1...branch2) From the git documentation:
                    # "git diff A...B" is equivalent to "git diff
                    # $(git-merge-base A B) B"
                    repo.merge_base(head, base)
                  end

  options ||= {}
  actual_options = filter_diff_options(options)
  repo.diff(common_commit, head, actual_options, *paths)
end

.binary_message(old_path, new_path) ⇒ Object

Return a binary diff message like:

“Binary files a/file/path and b/file/path differ\n” This is used when we detect that a diff is binary using CharlockHolmes.



129
130
131
# File 'lib/gitlab/git/diff.rb', line 129

def binary_message(old_path, new_path)
  "Binary files #{old_path} and #{new_path} differ\n"
end

.collect_patch_overage?Boolean

Returns:

  • (Boolean)


145
146
147
# File 'lib/gitlab/git/diff.rb', line 145

def collect_patch_overage?
  !!Feature.enabled?(:collect_all_diff_paths)
end

.filter_diff_options(options, default_options = {}) ⇒ Object

Return a copy of the options hash containing only recognized keys. Allowed options are:

:ignore_whitespace_change ::
  If true, changes in amount of whitespace will be ignored.

:max_files ::
  Limit how many files will patches be allowed for before collapsing

:max_lines ::
  Limit how many patch lines (across all files) will be allowed for
  before collapsing

:limits ::
  A hash with additional limits to check before collapsing patches.
  Allowed keys are: `max_bytes`, `safe_max_files`, `safe_max_lines`
  and `safe_max_bytes`

:expanded ::
  If false, patch raw data will not be included in the diff after
  `max_files`, `max_lines` or any of the limits in `limits` are
  exceeded
:generated_files ::
  If the list of generated files is given, those files will be marked
  as generated.


98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
# File 'lib/gitlab/git/diff.rb', line 98

def filter_diff_options(options, default_options = {})
  allowed_options = [:ignore_whitespace_change, :max_files, :max_lines,
    :limits, :expanded, :collect_all_paths, :generated_files, :offset_index]

  if default_options
    actual_defaults = default_options.dup
    actual_defaults.keep_if do |key|
      allowed_options.include?(key)
    end
  else
    actual_defaults = {}
  end

  if options
    filtered_opts = options.dup
    filtered_opts.keep_if do |key|
      allowed_options.include?(key)
    end
    filtered_opts = actual_defaults.merge(filtered_opts)
  else
    filtered_opts = actual_defaults
  end

  filtered_opts
end

.has_binary_notice?(text) ⇒ Boolean

Returns:

  • (Boolean)


157
158
159
160
161
# File 'lib/gitlab/git/diff.rb', line 157

def has_binary_notice?(text)
  return false unless text.present?

  text.start_with?(BINARY_NOTICE_PATTERN)
end

.patch_hard_limit_bytesObject

Returns the limit for a single diff file (patch).

Patches surpassing this limit shouldn’t be persisted in the database and will be presented as ‘too large’ for end-users.



153
154
155
# File 'lib/gitlab/git/diff.rb', line 153

def patch_hard_limit_bytes
  Gitlab::CurrentSettings.diff_max_patch_bytes
end

.patch_safe_limit_bytes(limit = patch_hard_limit_bytes) ⇒ Object

Returns the limit of bytes a single diff file can reach before it appears as ‘collapsed’ for end-users. By convention, it’s 10% of the persisted diff_max_patch_bytes.

Example: If we have 100k for the diff_max_patch_bytes, it will be 10k by default.

Patches surpassing this limit should still be persisted in the database.



141
142
143
# File 'lib/gitlab/git/diff.rb', line 141

def patch_safe_limit_bytes(limit = patch_hard_limit_bytes)
  limit / 10
end

Instance Method Details

#collapse!Object



248
249
250
251
# File 'lib/gitlab/git/diff.rb', line 248

def collapse!
  prune!
  @collapsed = true
end

#collapsed?Boolean

Returns:

  • (Boolean)


242
243
244
245
246
# File 'lib/gitlab/git/diff.rb', line 242

def collapsed?
  return @collapsed if defined?(@collapsed)

  @collapsed = !expanded && diff_bytesize >= self.class.patch_safe_limit_bytes
end

#diff_bytesizeObject



217
218
219
# File 'lib/gitlab/git/diff.rb', line 217

def diff_bytesize
  @diff_bytesize ||= @diff.bytesize
end

#has_binary_notice?Boolean

Returns:

  • (Boolean)


269
270
271
# File 'lib/gitlab/git/diff.rb', line 269

def has_binary_notice?
  self.class.has_binary_notice?(@diff)
end

#json_safe_diffObject



262
263
264
265
266
267
# File 'lib/gitlab/git/diff.rb', line 262

def json_safe_diff
  return @diff unless detect_binary?(@diff)

  # the diff is binary, let's make a message for it
  Diff.binary_message(@old_path, @new_path)
end

#line_countObject



213
214
215
# File 'lib/gitlab/git/diff.rb', line 213

def line_count
  @line_count ||= Util.count_lines(@diff)
end

#mode_changed?Boolean

Returns:

  • (Boolean)


195
196
197
# File 'lib/gitlab/git/diff.rb', line 195

def mode_changed?
  a_mode && b_mode && a_mode != b_mode
end

#overflow?Boolean

Returns:

  • (Boolean)


253
254
255
256
257
258
259
260
# File 'lib/gitlab/git/diff.rb', line 253

def overflow?
  return @overflow if defined?(@overflow)

  # If overflow is not defined, we're
  # not receiving a diff from Gitaly
  # and overflow has no meaning
  false
end

#prune!Object



232
233
234
235
# File 'lib/gitlab/git/diff.rb', line 232

def prune!
  @diff = ''
  @line_count = 0
end

#submodule?Boolean

Returns:

  • (Boolean)


199
200
201
# File 'lib/gitlab/git/diff.rb', line 199

def submodule?
  a_mode == '160000' || b_mode == '160000'
end

#to_hashObject



185
186
187
188
189
190
191
192
193
# File 'lib/gitlab/git/diff.rb', line 185

def to_hash
  hash = {}

  SERIALIZE_KEYS.each do |key|
    hash[key] = send(key) # rubocop:disable GitlabSecurity/PublicSend
  end

  hash
end

#too_large!Object



237
238
239
240
# File 'lib/gitlab/git/diff.rb', line 237

def too_large!
  prune!
  @too_large = true
end

#too_large?Boolean Also known as: too_large

Returns:

  • (Boolean)


221
222
223
224
225
226
227
# File 'lib/gitlab/git/diff.rb', line 221

def too_large?
  if @too_large.nil?
    @too_large = diff_bytesize >= self.class.patch_hard_limit_bytes
  else
    @too_large
  end
end

#unidiffObject



203
204
205
206
207
208
209
210
211
# File 'lib/gitlab/git/diff.rb', line 203

def unidiff
  return diff if diff.blank?
  return json_safe_diff if detect_binary?(@diff) || has_binary_notice?

  old_path_header = new_file? ? '/dev/null' : "a/#{old_path}"
  new_path_header = deleted_file? ? '/dev/null' : "b/#{new_path}"

  "--- #{old_path_header}\n+++ #{new_path_header}\n" + diff
end