Class: UrlHelper

Inherits:
Object
  • Object
show all
Defined in:
lib/url_helper.rb

Constant Summary collapse

MAX_URL_LENGTH =
100_000

Class Method Summary collapse

Class Method Details

.absolute(url, cdn = Discourse.asset_host) ⇒ Object



52
53
54
55
# File 'lib/url_helper.rb', line 52

def self.absolute(url, cdn = Discourse.asset_host)
  cdn = "https:#{cdn}" if cdn && cdn =~ %r{\A//}
  url =~ %r{\A/[^/]} ? (cdn || Discourse.base_url_no_prefix) + url : url
end

.absolute_without_cdn(url) ⇒ Object



57
58
59
# File 'lib/url_helper.rb', line 57

def self.absolute_without_cdn(url)
  self.absolute(url, nil)
end

.cook_url(url, secure: false, local: nil) ⇒ Object



110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
# File 'lib/url_helper.rb', line 110

def self.cook_url(url, secure: false, local: nil)
  is_secure = SiteSetting.secure_uploads && secure
  local = is_local(url) if local.nil?
  return url if !local

  url = is_secure ? secure_proxy_without_cdn(url) : absolute_without_cdn(url)

  # we always want secure uploads to come from
  # Discourse.base_url_no_prefix/secure-uploads
  # to avoid asset_host mixups
  return schemaless(url) if is_secure

  # PERF: avoid parsing url except for extreme conditions
  # this is a hot path used on home page
  filename = url
  if url.include?("?")
    uri = URI.parse(url)
    filename = File.basename(uri.path)
  end

  # this technically requires a filename, but will work with a URL as long as it end with the
  # extension and has no query params
  is_attachment = !FileHelper.is_supported_media?(filename)

  no_cdn = SiteSetting. || SiteSetting.prevent_anons_from_downloading_files
  unless is_attachment && no_cdn
    url = Discourse.store.cdn_url(url)
    url = local_cdn_url(url) if Discourse.store.external?
  end

  schemaless(url)
rescue URI::Error
  url
end

.encode(url) ⇒ Object



31
32
33
# File 'lib/url_helper.rb', line 31

def self.encode(url)
  Addressable::URI.encode(url)
end

.encode_and_parse(url) ⇒ Object



27
28
29
# File 'lib/url_helper.rb', line 27

def self.encode_and_parse(url)
  URI.parse(Addressable::URI.encode(url))
end

.encode_component(url_component) ⇒ Object



39
40
41
# File 'lib/url_helper.rb', line 39

def self.encode_component(url_component)
  Addressable::URI.encode_component(url_component)
end

.escape_uri(uri) ⇒ Object



69
70
71
72
73
74
75
76
# File 'lib/url_helper.rb', line 69

def self.escape_uri(uri)
  Discourse.deprecate(
    "UrlHelper.escape_uri is deprecated. For normalization of user input use `.normalized_encode`. For true encoding, use `.encode`",
    output_in_test: true,
    drop_from: "3.0",
  )
  normalized_encode(uri)
end

.is_local(url) ⇒ Object



43
44
45
46
47
48
49
50
# File 'lib/url_helper.rb', line 43

def self.is_local(url)
  url.present? &&
    (
      Discourse.store.has_been_uploaded?(url) ||
        !!(url =~ Regexp.new("^#{Discourse.base_path}/(assets|plugins|images)/")) ||
        url.start_with?(Discourse.asset_host || Discourse.base_url_no_prefix)
    )
end

.local_cdn_url(url) ⇒ Object



145
146
147
148
149
150
151
152
# File 'lib/url_helper.rb', line 145

def self.local_cdn_url(url)
  return url if Discourse.asset_host.blank?
  if url.start_with?("/#{Discourse.store.upload_path}/")
    "#{Discourse.asset_host}#{url}"
  else
    url.sub(Discourse.base_url_no_prefix, Discourse.asset_host)
  end
end

.normalized_encode(uri) ⇒ Object

Raises:

  • (ArgumentError)


78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
# File 'lib/url_helper.rb', line 78

def self.normalized_encode(uri)
  url = uri.to_s

  raise ArgumentError.new(:uri, "URL is too long") if url.length > MAX_URL_LENGTH

  # Ideally we will jump straight to `Addressable::URI.normalized_encode`. However,
  # that implementation has some edge-case issues like https://github.com/sporkmonger/addressable/issues/472.
  # To temporaily work around those issues for the majority of cases, we try parsing with `::URI`.
  # If that fails (e.g. due to non-ascii characters) then we will fall back to addressable.
  # Hopefully we can simplify this back to `Addressable::URI.normalized_encode` in the future.

  # edge case where we expect mailto:test%40test.com to normalize to mailto:[email protected]
  return normalize_with_addressable(url) if url.match(/\Amailto:/)

  # If it doesn't pass the regexp, it's definitely not gonna parse with URI.parse. Skip
  # to addressable
  return normalize_with_addressable(url) if !url.match?(/\A#{URI.regexp}\z/)

  begin
    normalize_with_ruby_uri(url)
  rescue URI::Error
    normalize_with_addressable(url)
  end
end

.rails_route_from_url(url) ⇒ Object



103
104
105
106
107
108
# File 'lib/url_helper.rb', line 103

def self.rails_route_from_url(url)
  path = URI.parse(encode(url)).path
  Rails.application.routes.recognize_path(path)
rescue Addressable::URI::InvalidURIError, URI::InvalidComponentError
  nil
end

.relaxed_parse(url) ⇒ Object

At the moment this handles invalid URLs that browser address bar accepts where second # is not encoded

Longer term we can add support of simpleidn and encode unicode domains



10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
# File 'lib/url_helper.rb', line 10

def self.relaxed_parse(url)
  url, fragment = url.split("#", 2)
  uri = URI.parse(url)
  if uri
    # Addressable::URI::CharacterClasses::UNRESERVED is used here because without it
    # the # in the fragment is not encoded
    fragment =
      Addressable::URI.encode_component(
        fragment,
        Addressable::URI::CharacterClasses::UNRESERVED,
      ) if fragment&.include?("#")
    uri.fragment = fragment
    uri
  end
rescue URI::Error
end

.schemaless(url) ⇒ Object



61
62
63
# File 'lib/url_helper.rb', line 61

def self.schemaless(url)
  url.sub(/\Ahttp:/i, "")
end

.secure_proxy_without_cdn(url) ⇒ Object



65
66
67
# File 'lib/url_helper.rb', line 65

def self.secure_proxy_without_cdn(url)
  self.absolute(Upload.secure_uploads_url_from_upload_url(url), nil)
end

.unencode(url) ⇒ Object



35
36
37
# File 'lib/url_helper.rb', line 35

def self.unencode(url)
  Addressable::URI.unencode(url)
end