Class: FinalDestination

Inherits:
Object
  • Object
show all
Defined in:
lib/final_destination.rb,
lib/final_destination/ssrf_detector.rb,
lib/final_destination/faraday_adapter.rb

Overview

Determine the final endpoint for a Web URI, following redirects

Defined Under Namespace

Modules: SSRFDetector Classes: FaradayAdapter, FastImage, HTTP, Resolver, SSRFError

Constant Summary collapse

MAX_REQUEST_TIME_SECONDS =
10
MAX_REQUEST_SIZE_BYTES =

1024 * 1024 * 5

5_242_880
DEFAULT_USER_AGENT =
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/14.0 Safari/605.1.15"

Instance Attribute Summary collapse

Class Method Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(url, opts = nil) ⇒ FinalDestination

Returns a new instance of FinalDestination.



41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
# File 'lib/final_destination.rb', line 41

def initialize(url, opts = nil)
  @url = url
  @uri = uri(normalized_url) if @url

  @opts = opts || {}
  @force_get_hosts = @opts[:force_get_hosts] || []
  @preserve_fragment_url_hosts = @opts[:preserve_fragment_url_hosts] || []
  @force_custom_user_agent_hosts = @opts[:force_custom_user_agent_hosts] || []
  @default_user_agent = @opts[:default_user_agent] || DEFAULT_USER_AGENT
  @opts[:max_redirects] ||= 5
  @https_redirect_ignore_limit = @opts[:initial_https_redirect_ignore_limit]
  @include_port_in_host_header = @opts[:include_port_in_host_header] || false

  @max_redirects = @opts[:max_redirects]
  @limit = @max_redirects

  @ignored = []
  if @limit > 0
    ignore_redirects = [Discourse.base_url_no_prefix]

    ignore_redirects.concat(@opts[:ignore_redirects]) if @opts[:ignore_redirects]

    ignore_redirects.each do |ignore_redirect|
      ignore_redirect = uri(ignore_redirect)
      @ignored << ignore_redirect.hostname if ignore_redirect.present? && ignore_redirect.hostname
    end
  end

  @status = :ready
  @follow_canonical = @opts[:follow_canonical]
  @http_verb = @opts[:http_verb] || http_verb(@force_get_hosts, @follow_canonical)
  @cookie = nil
  @limited_ips = []
  @verbose = @opts[:verbose] || false
  @timeout = @opts[:timeout] || nil
  @preserve_fragment_url = @preserve_fragment_url_hosts.any? { |host| hostname_matches?(host) }
  @validate_uri = @opts.fetch(:validate_uri) { true }
  @user_agent =
    (
      if @force_custom_user_agent_hosts.any? { |host| hostname_matches?(host) }
        Onebox.options.user_agent
      else
        @default_user_agent
      end
    )
  @stop_at_blocked_pages = @opts[:stop_at_blocked_pages]
end

Instance Attribute Details

#content_typeObject (readonly)

Returns the value of attribute content_type.



39
40
41
# File 'lib/final_destination.rb', line 39

def content_type
  @content_type
end

Returns the value of attribute cookie.



39
40
41
# File 'lib/final_destination.rb', line 39

def cookie
  @cookie
end

#ignoredObject (readonly)

Returns the value of attribute ignored.



39
40
41
# File 'lib/final_destination.rb', line 39

def ignored
  @ignored
end

#statusObject (readonly)

Returns the value of attribute status.



39
40
41
# File 'lib/final_destination.rb', line 39

def status
  @status
end

#status_codeObject (readonly)

Returns the value of attribute status_code.



39
40
41
# File 'lib/final_destination.rb', line 39

def status_code
  @status_code
end

Class Method Details

.cache_https_domain(domain) ⇒ Object



22
23
24
25
# File 'lib/final_destination.rb', line 22

def self.cache_https_domain(domain)
  key = redis_https_key(domain)
  Discourse.redis.without_namespace.setex(key, 1.day.to_i, "1")
end

.clear_https_cache!(domain) ⇒ Object



17
18
19
20
# File 'lib/final_destination.rb', line 17

def self.clear_https_cache!(domain)
  key = redis_https_key(domain)
  Discourse.redis.without_namespace.del(key)
end

.connection_timeoutObject



89
90
91
# File 'lib/final_destination.rb', line 89

def self.connection_timeout
  20
end

.is_https_domain?(domain) ⇒ Boolean

Returns:

  • (Boolean)


27
28
29
30
# File 'lib/final_destination.rb', line 27

def self.is_https_domain?(domain)
  key = redis_https_key(domain)
  Discourse.redis.without_namespace.get(key).present?
end

.redis_https_key(domain) ⇒ Object



32
33
34
# File 'lib/final_destination.rb', line 32

def self.redis_https_key(domain)
  "HTTPS_DOMAIN_#{domain}"
end

.resolve(url, opts = nil) ⇒ Object



93
94
95
# File 'lib/final_destination.rb', line 93

def self.resolve(url, opts = nil)
  new(url, opts).resolve
end

Instance Method Details

#get(redirects = @limit, extra_headers: {}, &blk) ⇒ Object

this is a new interface for simply getting N bytes accounting for all internal logic



154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
# File 'lib/final_destination.rb', line 154

def get(redirects = @limit, extra_headers: {}, &blk)
  raise "Must specify block" unless block_given?

  if @uri && @uri.port == 80 && FinalDestination.is_https_domain?(@uri.hostname)
    @uri.scheme = "https"
    @uri = URI(@uri.to_s)
  end

  return if !validate_uri
  return if @stop_at_blocked_pages && blocked_domain?(@uri)

  result, headers_subset = safe_get(@uri, &blk)
  return if !result

  cookie = headers_subset.set_cookie
  location = headers_subset.location

  if result == :redirect
    return if !location

    old_uri = @uri
    location = "#{@uri.scheme}://#{@uri.host}#{location}" if location[0] == "/"
    @uri = uri(location)

    if @uri && redirects == @max_redirects && @https_redirect_ignore_limit &&
         same_uri_but_https?(old_uri, @uri)
      redirects += 1
      @https_redirect_ignore_limit = false
    end

    return if redirects == 0

    # https redirect, so just cache that whole new domain is https
    if old_uri.port == 80 && @uri&.port == 443 && (URI::HTTPS === @uri)
      FinalDestination.cache_https_domain(@uri.hostname)
    end

    return if !@uri

    extra = nil
    extra = { "Cookie" => cookie } if cookie

    get(redirects - 1, extra_headers: extra, &blk)
  elsif result == :ok
    @uri.to_s
  else
    nil
  end
end

#hostnameObject



439
440
441
# File 'lib/final_destination.rb', line 439

def hostname
  @uri.hostname
end

#hostname_matches?(url) ⇒ Boolean

Returns:

  • (Boolean)


443
444
445
446
447
448
449
450
451
452
453
454
455
456
# File 'lib/final_destination.rb', line 443

def hostname_matches?(url)
  url = uri(url)

  if @uri&.hostname.present? && url&.hostname.present?
    hostname_parts = url.hostname.split(".")
    has_wildcard = hostname_parts.first == "*"

    if has_wildcard
      @uri.hostname.end_with?(hostname_parts[1..-1].join("."))
    else
      @uri.hostname == url.hostname
    end
  end
end

#hostname_matches_s3_endpoint?(allowed_internal_host) ⇒ Boolean

Returns:

  • (Boolean)


433
434
435
436
437
# File 'lib/final_destination.rb', line 433

def hostname_matches_s3_endpoint?(allowed_internal_host)
  s3_endpoint_uri = URI(SiteSetting.s3_endpoint)
  hostname_matches?("http://#{allowed_internal_host}") && @uri.port == s3_endpoint_uri.port &&
    @uri.hostname.end_with?(s3_endpoint_uri.hostname)
end

#http_port_ok?Boolean

Returns:

  • (Boolean)


422
423
424
425
426
427
428
429
430
431
# File 'lib/final_destination.rb', line 422

def http_port_ok?
  return true if @uri.port == 80

  allowed_internal_hosts =
    SiteSetting.allowed_internal_hosts&.split(/[|\n]/).filter_map { |aih| aih.strip.presence }
  return false if allowed_internal_hosts.empty? || SiteSetting.s3_endpoint.blank?
  return false if allowed_internal_hosts.none? { |aih| hostname_matches_s3_endpoint?(aih) }

  true
end

#http_verb(force_get_hosts, follow_canonical) ⇒ Object



97
98
99
100
101
102
103
# File 'lib/final_destination.rb', line 97

def http_verb(force_get_hosts, follow_canonical)
  if follow_canonical || force_get_hosts.any? { |host| hostname_matches?(host) }
    :get
  else
    :head
  end
end

#log(log_level, message) ⇒ Object



462
463
464
465
466
467
468
469
470
# File 'lib/final_destination.rb', line 462

def log(log_level, message)
  return unless @verbose
  return if @status_code == 404

  Rails.logger.public_send(
    log_level,
    "#{RailsMultisite::ConnectionManagement.current_db}: #{message}",
  )
end

#normalized_urlObject



458
459
460
# File 'lib/final_destination.rb', line 458

def normalized_url
  UrlHelper.normalized_encode(@url)
end

#redirected?Boolean

Returns:

  • (Boolean)


109
110
111
# File 'lib/final_destination.rb', line 109

def redirected?
  @limit < @max_redirects
end

#request_headersObject



113
114
115
116
117
118
119
120
121
122
123
124
# File 'lib/final_destination.rb', line 113

def request_headers
  result = {
    "User-Agent" => @user_agent,
    "Accept" => "*/*",
    "Accept-Language" => "*",
    "Host" => @uri.hostname + (@include_port_in_host_header ? ":#{@uri.port}" : ""),
  }

  result["Cookie"] = @cookie if @cookie

  result
end

#resolveObject



204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
# File 'lib/final_destination.rb', line 204

def resolve
  if @uri && @uri.port == 80 && FinalDestination.is_https_domain?(@uri.hostname)
    @uri.scheme = "https"
    @uri = URI(@uri.to_s)
  end

  if @limit < 0
    @status = :too_many_redirects
    log(:warn, "FinalDestination could not resolve URL (too many redirects): #{@uri}")
    return
  end

  unless validate_uri
    @status = :invalid_address
    log(:warn, "FinalDestination could not resolve URL (invalid URI): #{@uri}")
    return
  end

  @ignored.each do |host|
    if @uri&.hostname&.match?(host)
      @status = :resolved
      return @uri
    end
  end

  if Oneboxer.cached_response_body_exists?(@uri.to_s)
    @status = :resolved
    return @uri
  end

  headers = request_headers
  middlewares = Excon.defaults[:middlewares].dup
  middlewares << Excon::Middleware::Decompress if @http_verb == :get

  request_start_time = Time.now
  response_body = +""
  request_validator =
    lambda do |chunk, _remaining_bytes, _total_bytes|
      response_body << chunk
      if response_body.bytesize > MAX_REQUEST_SIZE_BYTES
        raise Excon::Errors::ExpectationFailed.new("response size too big: #{@uri.to_s}")
      end
      if Time.now - request_start_time > MAX_REQUEST_TIME_SECONDS
        raise Excon::Errors::ExpectationFailed.new("connect timeout reached: #{@uri.to_s}")
      end
    end

  # This technique will only use the first resolved IP
  # TODO: Can we standardise this by using FinalDestination::HTTP?
  begin
    resolved_ip = SSRFDetector.lookup_and_filter_ips(@uri.hostname).first
  rescue SSRFDetector::DisallowedIpError, SocketError, Timeout::Error
    @status = :invalid_address
    return
  end
  request_uri = @uri.dup
  request_uri.hostname = resolved_ip unless Rails.env.test? # WebMock doesn't understand the IP-based requests

  response =
    Excon.public_send(
      @http_verb,
      request_uri.to_s,
      read_timeout: timeout,
      connect_timeout: timeout,
      headers: { "Host" => @uri.hostname }.merge(headers),
      middlewares: middlewares,
      response_block: request_validator,
      ssl_verify_peer_host: @uri.hostname,
    )

  if @stop_at_blocked_pages
    if blocked_domain?(@uri) || response.headers["Discourse-No-Onebox"] == "1"
      @status = :blocked_page
      return
    end
  end

  location = nil
  response_headers = nil
  response_status = response.status.to_i

  case response.status
  when 200
    # Cache body of successful `get` requests
    if @http_verb == :get
      if Oneboxer.cache_response_body?(@uri)
        Oneboxer.cache_response_body(@uri.to_s, response_body)
      end
    end

    if @follow_canonical
      next_url = fetch_canonical_url(response_body)

      if next_url.to_s.present? && next_url != @uri
        @follow_canonical = false
        @uri = next_url
        @http_verb = http_verb(@force_get_hosts, @follow_canonical)

        return resolve
      end
    end

    @content_type = response.headers["Content-Type"] if response.headers.has_key?("Content-Type")
    @status = :resolved
    return @uri
  when 103, 400, 405, 406, 409, 500, 501
    response_status, small_headers = small_get(request_headers)

    if @stop_at_blocked_pages
      # this may seem weird, but the #to_hash method of the response object
      # of ruby's net/http lib returns a hash where each value is an array.
      # small_headers here is like that so our no onebox header value is an
      # array if it's set. Also the hash keys are always lower-cased.
      dont_onebox = small_headers["discourse-no-onebox"]&.join("") == "1"
      if dont_onebox || blocked_domain?(@uri)
        @status = :blocked_page
        return
      end
    end

    if response_status == 200
      @status = :resolved
      return @uri
    end

    response_headers = {}
    if cookie_val = small_headers["set-cookie"]
      response_headers[:cookies] = cookie_val
    end

    if location_val = small_headers["location"]
      response_headers[:location] = location_val.join
    end
  end

  unless response_headers
    response_headers = {
      cookies: response.data[:cookies] || response.headers[:"set-cookie"],
      location: response.headers[:location],
    }
  end

  location = response_headers[:location] if (300..399).include?(response_status)

  if cookies = response_headers[:cookies]
    @cookie = Array.wrap(cookies).map { |c| c.split(";").first.strip }.join("; ")
  end

  if location
    redirect_uri = uri(location)
    if @uri.host == redirect_uri.host &&
         (redirect_uri.path =~ %r{/login} || redirect_uri.path =~ %r{/session})
      @status = :resolved
      return @uri
    end

    old_uri = @uri
    location = "#{location}##{@uri.fragment}" if @preserve_fragment_url && @uri.fragment.present?
    location = "#{@uri.scheme}://#{@uri.host}#{location}" if location[0] == "/"
    @uri = uri(location)

    if @uri && @limit == @max_redirects && @https_redirect_ignore_limit &&
         same_uri_but_https?(old_uri, @uri)
      @limit += 1
      @https_redirect_ignore_limit = false
    end
    @limit -= 1

    # https redirect, so just cache that whole new domain is https
    if old_uri.port == 80 && @uri&.port == 443 && (URI::HTTPS === @uri)
      FinalDestination.cache_https_domain(@uri.hostname)
    end
    return resolve
  end

  # this is weird an exception seems better
  @status = :failure
  @status_code = response.status

  log(:warn, "FinalDestination could not resolve URL (status #{response.status}): #{@uri}")
  nil
rescue Excon::Errors::Timeout
  log(:warn, "FinalDestination could not resolve URL (timeout): #{@uri}")
  nil
end

#skip_validations?Boolean

Returns:

  • (Boolean)


390
391
392
# File 'lib/final_destination.rb', line 390

def skip_validations?
  !@validate_uri
end

#small_get(request_headers) ⇒ Object



126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
# File 'lib/final_destination.rb', line 126

def small_get(request_headers)
  status_code, response_headers = nil

  catch(:done) do
    FinalDestination::HTTP.start(
      @uri.host,
      @uri.port,
      use_ssl: @uri.is_a?(URI::HTTPS),
      open_timeout: timeout,
    ) do |http|
      http.read_timeout = timeout
      http.request_get(@uri.request_uri, request_headers) do |resp|
        status_code = resp.code.to_i
        response_headers = resp.to_hash

        # see: https://bugs.ruby-lang.org/issues/15624
        # if we allow response to return then body will be read
        # got to abort without reading body
        throw :done
      end
    end
  end

  [status_code, response_headers]
end

#timeoutObject



105
106
107
# File 'lib/final_destination.rb', line 105

def timeout
  @timeout || FinalDestination.connection_timeout
end

#validate_uriObject



394
395
396
# File 'lib/final_destination.rb', line 394

def validate_uri
  skip_validations? || validate_uri_format
end

#validate_uri_formatObject



398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
# File 'lib/final_destination.rb', line 398

def validate_uri_format
  return false unless @uri && @uri.host
  return false unless %w[https http].include?(@uri.scheme)

  # In some cases (like local/test environments) we may want to allow http URLs
  # to be used for internal hosts, but only if it's the case that the host is
  # explicitly used for SiteSetting.s3_endpoint. This is to allow for local
  # S3 providers like minio.
  #
  # In all other cases, we should not be allowing http calls to anything except
  # port 80.
  return false if @uri.scheme == "http" && !http_port_ok?
  return false if @uri.scheme == "https" && @uri.port != 443

  # Disallow IP based crawling
  (
    begin
      IPAddr.new(@uri.hostname)
    rescue StandardError
      nil
    end
  ).nil?
end