Class: UrlHelper

Inherits:
Object
  • Object
show all
Defined in:
lib/url_helper.rb

Constant Summary collapse

MAX_URL_LENGTH =
2_000

Class Method Summary collapse

Class Method Details

.absolute(url, cdn = Discourse.asset_host) ⇒ Object



75
76
77
78
# File 'lib/url_helper.rb', line 75

def self.absolute(url, cdn = Discourse.asset_host)
  cdn = "https:#{cdn}" if cdn && cdn =~ %r{\A//}
  url =~ %r{\A/[^/]} ? (cdn || Discourse.base_url_no_prefix) + url : url
end

.absolute_without_cdn(url) ⇒ Object



80
81
82
# File 'lib/url_helper.rb', line 80

def self.absolute_without_cdn(url)
  self.absolute(url, nil)
end

.cook_url(url, secure: false, local: nil) ⇒ Object



126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
# File 'lib/url_helper.rb', line 126

def self.cook_url(url, secure: false, local: nil)
  is_secure = SiteSetting.secure_uploads && secure
  local = is_local(url) if local.nil?
  return url if !local

  url = is_secure ? secure_proxy_without_cdn(url) : absolute_without_cdn(url)

  # we always want secure uploads to come from
  # Discourse.base_url_no_prefix/secure-uploads
  # to avoid asset_host mixups
  return schemaless(url) if is_secure

  # PERF: avoid parsing url except for extreme conditions
  # this is a hot path used on home page
  filename = url
  if url.include?("?")
    uri = URI.parse(url)
    filename = File.basename(uri.path)
  end

  # this technically requires a filename, but will work with a URL as long as it end with the
  # extension and has no query params
  is_attachment = !FileHelper.is_supported_media?(filename)

  no_cdn = SiteSetting. || SiteSetting.prevent_anons_from_downloading_files
  unless is_attachment && no_cdn
    url = Discourse.store.cdn_url(url)
    url = local_cdn_url(url) if Discourse.store.external?
  end

  schemaless(url)
rescue URI::Error
  url
end

.encode(url) ⇒ Object



54
55
56
# File 'lib/url_helper.rb', line 54

def self.encode(url)
  Addressable::URI.encode(url)
end

.encode_and_parse(url) ⇒ Object



50
51
52
# File 'lib/url_helper.rb', line 50

def self.encode_and_parse(url)
  URI.parse(Addressable::URI.encode(url))
end

.encode_component(url_component) ⇒ Object



62
63
64
# File 'lib/url_helper.rb', line 62

def self.encode_component(url_component)
  Addressable::URI.encode_component(url_component)
end

.is_local(url) ⇒ Object



66
67
68
69
70
71
72
73
# File 'lib/url_helper.rb', line 66

def self.is_local(url)
  url.present? &&
    (
      Discourse.store.has_been_uploaded?(url) ||
        !!(url =~ Regexp.new("^#{Discourse.base_path}/(assets|plugins|images)/")) ||
        url.start_with?(Discourse.asset_host || Discourse.base_url_no_prefix)
    )
end

.is_valid_url?(url) ⇒ Boolean

Heuristic checks to determine if the URL string is a valid absolute URL, path or anchor

Returns:

  • (Boolean)


28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
# File 'lib/url_helper.rb', line 28

def self.is_valid_url?(url)
  uri = URI.parse(url)

  return true if uri.is_a?(URI::Generic) && url.starts_with?("/") || url.match?(/\A\#([^#]*)/)

  if uri.scheme
    return true if uri.is_a?(URI::MailTo)

    if url.match?(%r{\A#{uri.scheme}://[^/]}) &&
         (
           uri.is_a?(URI::HTTP) || uri.is_a?(URI::HTTPS) || uri.is_a?(URI::FTP) ||
             uri.is_a?(URI::LDAP)
         )
      return true
    end
  end

  false
rescue URI::InvalidURIError
  false
end

.local_cdn_url(url) ⇒ Object



161
162
163
164
165
166
167
168
# File 'lib/url_helper.rb', line 161

def self.local_cdn_url(url)
  return url if Discourse.asset_host.blank?
  if url.start_with?("/#{Discourse.store.upload_path}/")
    "#{Discourse.asset_host}#{url}"
  else
    url.sub(Discourse.base_url_no_prefix, Discourse.asset_host)
  end
end

.normalized_encode(uri) ⇒ Object



92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
# File 'lib/url_helper.rb', line 92

def self.normalized_encode(uri)
  url = uri.to_s

  if url.length > MAX_URL_LENGTH
    raise ArgumentError.new("URL starting with #{url[0..100]} is too long")
  end

  # Ideally we will jump straight to `Addressable::URI.normalized_encode`. However,
  # that implementation has some edge-case issues like https://github.com/sporkmonger/addressable/issues/472.
  # To temporaily work around those issues for the majority of cases, we try parsing with `::URI`.
  # If that fails (e.g. due to non-ascii characters) then we will fall back to addressable.
  # Hopefully we can simplify this back to `Addressable::URI.normalized_encode` in the future.

  # edge case where we expect mailto:test%40test.com to normalize to mailto:[email protected]
  return normalize_with_addressable(url) if url.match(/\Amailto:/)

  # If it doesn't pass the regexp, it's definitely not gonna parse with URI.parse. Skip
  # to addressable
  return normalize_with_addressable(url) if !url.match?(/\A#{URI.regexp}\z/)

  begin
    normalize_with_ruby_uri(url)
  rescue URI::Error
    normalize_with_addressable(url)
  end
end

.rails_route_from_url(url) ⇒ Object



119
120
121
122
123
124
# File 'lib/url_helper.rb', line 119

def self.rails_route_from_url(url)
  path = URI.parse(encode(url)).path
  Rails.application.routes.recognize_path(path)
rescue Addressable::URI::InvalidURIError, URI::InvalidComponentError
  nil
end

.relaxed_parse(url) ⇒ Object

At the moment this handles invalid URLs that browser address bar accepts where second # is not encoded

Longer term we can add support of simpleidn and encode unicode domains



10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
# File 'lib/url_helper.rb', line 10

def self.relaxed_parse(url)
  url, fragment = url.split("#", 2)
  uri = URI.parse(url)
  if uri
    # Addressable::URI::CharacterClasses::UNRESERVED is used here because without it
    # the # in the fragment is not encoded
    fragment =
      Addressable::URI.encode_component(
        fragment,
        Addressable::URI::CharacterClasses::UNRESERVED,
      ) if fragment&.include?("#")
    uri.fragment = fragment
    uri
  end
rescue URI::Error
end

.schemaless(url) ⇒ Object



84
85
86
# File 'lib/url_helper.rb', line 84

def self.schemaless(url)
  url.sub(/\Ahttp:/i, "")
end

.secure_proxy_without_cdn(url) ⇒ Object



88
89
90
# File 'lib/url_helper.rb', line 88

def self.secure_proxy_without_cdn(url)
  self.absolute(Upload.secure_uploads_url_from_upload_url(url), nil)
end

.unencode(url) ⇒ Object



58
59
60
# File 'lib/url_helper.rb', line 58

def self.unencode(url)
  Addressable::URI.unencode(url)
end