Module: Onebox::Helpers

Defined in:
lib/onebox/helpers.rb

Defined Under Namespace

Classes: DownloadTooLarge

Class Method Summary collapse

Class Method Details

.audio_placeholder_htmlObject



290
291
292
# File 'lib/onebox/helpers.rb', line 290

def self.audio_placeholder_html
  "<div class='onebox-placeholder-container'><span class='placeholder-icon audio'></span></div>"
end

.blank?(value) ⇒ Boolean

Returns:

  • (Boolean)


193
194
195
196
197
198
199
200
201
# File 'lib/onebox/helpers.rb', line 193

def self.blank?(value)
  if value.nil?
    true
  elsif String === value
    value.empty? || !(/[[:^space:]]/ === value)
  else
    value.respond_to?(:empty?) ? !!value.empty? : !value
  end
end

.clean(html) ⇒ Object



23
24
25
# File 'lib/onebox/helpers.rb', line 23

def self.clean(html)
  html.gsub(/<[^>]+>/, " ").gsub(/\n/, "")
end

.click_to_scroll_div(width = 690, height = 400) ⇒ Object



189
190
191
# File 'lib/onebox/helpers.rb', line 189

def self.click_to_scroll_div(width = 690, height = 400)
  "<div style=\"background:transparent;position:relative;width:#{width}px;height:#{height}px;top:#{height}px;margin-top:-#{height}px;\" onClick=\"style.pointerEvents='none'\"></div>"
end

.fetch_content_length(location) ⇒ Object



152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
# File 'lib/onebox/helpers.rb', line 152

def self.fetch_content_length(location)
  uri = URI(location)

  FinalDestination::HTTP.start(
    uri.host,
    uri.port,
    open_timeout: Onebox.options.connect_timeout,
    use_ssl: uri.is_a?(URI::HTTPS),
  ) do |http|
    http.read_timeout = Onebox.options.timeout
    if uri.is_a?(URI::HTTPS)
      http.use_ssl = true
      http.verify_mode = OpenSSL::SSL::VERIFY_NONE
    end

    http.request_head([uri.path, uri.query].join("?")) do |response|
      code = response.code.to_i
      return nil unless code === 200 || Onebox::Helpers.blank?(response.content_length)
      return response.content_length
    end
  end
end

.fetch_html_doc(url, headers = nil, body_cacher = nil) ⇒ Object



27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
# File 'lib/onebox/helpers.rb', line 27

def self.fetch_html_doc(url, headers = nil, body_cacher = nil)
  response =
    (
      begin
        fetch_response(url, headers: headers, body_cacher: body_cacher)
      rescue StandardError
        nil
      end
    )
  doc = Nokogiri.HTML(response)
  uri = Addressable::URI.parse(url)

  ignore_canonical_tag = doc.at('meta[property="og:ignore_canonical"]')
  should_ignore_canonical =
    IGNORE_CANONICAL_DOMAINS.map { |hostname| uri.hostname.match?(hostname) }.any?

  if !(ignore_canonical_tag && ignore_canonical_tag["content"].to_s == "true") &&
       !should_ignore_canonical
    # prefer canonical link
    canonical_link = doc.at('//link[@rel="canonical"]/@href')
    canonical_uri = Addressable::URI.parse(canonical_link)
    if canonical_link && canonical_uri &&
         "#{canonical_uri.host}#{canonical_uri.path}" != "#{uri.host}#{uri.path}"
      uri =
        FinalDestination.new(
          canonical_link,
          Oneboxer.get_final_destination_options(canonical_link),
        ).resolve
      if uri.present?
        response =
          (
            begin
              fetch_response(uri.to_s, headers: headers, body_cacher: body_cacher)
            rescue StandardError
              nil
            end
          )
        doc = Nokogiri.HTML(response) if response
      end
    end
  end

  doc
end

.fetch_response(location, redirect_limit: 5, domain: nil, headers: nil, body_cacher: nil) ⇒ Object

Raises:

  • (Net::HTTPError)


72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
# File 'lib/onebox/helpers.rb', line 72

def self.fetch_response(
  location,
  redirect_limit: 5,
  domain: nil,
  headers: nil,
  body_cacher: nil
)
  redirect_limit = Onebox.options.redirect_limit if redirect_limit >
    Onebox.options.redirect_limit

  raise Net::HTTPError.new("HTTP redirect too deep", location) if redirect_limit == 0

  uri = Addressable::URI.parse(location)
  uri = Addressable::URI.join(domain, uri) if !uri.host

  use_body_cacher = body_cacher && body_cacher.respond_to?("fetch_cached_response_body")
  if use_body_cacher
    response_body = body_cacher.fetch_cached_response_body(uri.to_s)

    return response_body if response_body.present?
  end

  result = StringIO.new
  FinalDestination::HTTP.start(
    uri.host,
    uri.port,
    open_timeout: Onebox.options.connect_timeout,
    use_ssl: uri.normalized_scheme == "https",
  ) do |http|
    http.read_timeout = Onebox.options.timeout
    http.verify_mode = OpenSSL::SSL::VERIFY_NONE # Work around path building bugs

    headers ||= {}

    if Onebox.options.user_agent && !headers["User-Agent"]
      headers["User-Agent"] = Onebox.options.user_agent
    end

    request = Net::HTTP::Get.new(uri.request_uri, headers)
    start_time = Time.now

    size_bytes = Onebox.options.max_download_kb * 1024
    http.request(request) do |response|
      if cookie = response.get_fields("set-cookie")
        # HACK: If this breaks again in the future, use HTTP::CookieJar from gem 'http-cookie'
        # See test: it "does not send cookies to the wrong domain"
        redir_header = { "Cookie" => cookie.join("; ") }
      end

      redir_header = nil unless redir_header.is_a? Hash

      code = response.code.to_i
      unless code === 200
        response.error! unless [301, 302, 303, 307, 308].include?(code)

        return(
          fetch_response(
            response["location"],
            redirect_limit: redirect_limit - 1,
            domain: "#{uri.scheme}://#{uri.host}",
            headers: redir_header,
          )
        )
      end

      response.read_body do |chunk|
        result.write(chunk)
        raise DownloadTooLarge.new if result.size > size_bytes
        raise Timeout::Error.new if (Time.now - start_time) > Onebox.options.timeout
      end

      if use_body_cacher && body_cacher.cache_response_body?(uri)
        body_cacher.cache_response_body(uri.to_s, result.string)
      end

      return result.string
    end
  end
end

.generic_placeholder_htmlObject



298
299
300
# File 'lib/onebox/helpers.rb', line 298

def self.generic_placeholder_html
  "<div class='onebox-placeholder-container'><span class='placeholder-icon generic'></span></div>"
end

.get(meta, attr) ⇒ Object



208
209
210
# File 'lib/onebox/helpers.rb', line 208

def self.get(meta, attr)
  (meta && !blank?(meta[attr])) ? sanitize(meta[attr]) : nil
end

.get_absolute_image_url(src, url) ⇒ Object



232
233
234
235
236
237
238
# File 'lib/onebox/helpers.rb', line 232

def self.get_absolute_image_url(src, url)
  begin
    URI.parse(url).merge(src).to_s
  rescue ArgumentError, URI::BadURIError, URI::InvalidURIError
    src
  end
end

.image_placeholder_htmlObject



282
283
284
# File 'lib/onebox/helpers.rb', line 282

def self.image_placeholder_html
  "<div class='onebox-placeholder-container'><span class='placeholder-icon image'></span></div>"
end

.map_placeholder_htmlObject



294
295
296
# File 'lib/onebox/helpers.rb', line 294

def self.map_placeholder_html
  "<div class='onebox-placeholder-container'><span class='placeholder-icon map'></span></div>"
end

.normalize_url_for_output(url) ⇒ Object



217
218
219
220
221
222
223
224
225
226
227
228
229
230
# File 'lib/onebox/helpers.rb', line 217

def self.normalize_url_for_output(url)
  return "" unless url
  url = url.dup
  # expect properly encoded url, remove any unsafe chars
  url.gsub!(" ", "%20")
  url.gsub!("'", "&apos;")
  url.gsub!('"', "&quot;")
  url.gsub!(/[^\w\-`.~:\/?#\[\]@!$&'\(\)*+,;=%\p{M}’]/, "")

  parsed = Addressable::URI.parse(url)
  return "" unless parsed.host

  url
end

.pretty_filesize(size) ⇒ Object



175
176
177
178
179
180
181
182
183
184
185
186
187
# File 'lib/onebox/helpers.rb', line 175

def self.pretty_filesize(size)
  conv = %w[B KB MB GB TB PB EB]
  scale = 1024

  ndx = 1
  return "#{(size)} #{conv[ndx - 1]}" if (size < 2 * (scale**ndx))
  size = size.to_f
  [2, 3, 4, 5, 6, 7].each do |i|
    return "#{"%.2f" % (size / (scale**(i - 1)))} #{conv[i - 1]}" if (size < 2 * (scale**i))
  end
  ndx = 7
  "#{"%.2f" % (size / (scale**(ndx - 1)))} #{conv[ndx - 1]}"
end

.sanitize(value, length = 50) ⇒ Object



212
213
214
215
# File 'lib/onebox/helpers.rb', line 212

def self.sanitize(value, length = 50)
  return nil if blank?(value)
  Sanitize.fragment(value).strip
end

.symbolize_keys(hash) ⇒ Object



12
13
14
15
16
17
18
19
20
21
# File 'lib/onebox/helpers.rb', line 12

def self.symbolize_keys(hash)
  return {} if hash.nil?

  hash.inject({}) do |result, (key, value)|
    new_key = key.is_a?(String) ? key.to_sym : key
    new_value = value.is_a?(Hash) ? symbolize_keys(value) : value
    result[new_key] = new_value
    result
  end
end

.truncate(string, length = 50) ⇒ Object



203
204
205
206
# File 'lib/onebox/helpers.rb', line 203

def self.truncate(string, length = 50)
  return string if string.nil?
  string.size > length ? string[0...(string.rindex(" ", length) || length)] + "..." : string
end

.uri_encode(url) ⇒ Object

Percent-encodes a URI string per RFC3986 - tools.ietf.org/html/rfc3986



241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
# File 'lib/onebox/helpers.rb', line 241

def self.uri_encode(url)
  return "" unless url

  uri = Addressable::URI.parse(url)

  encoded_uri =
    Addressable::URI.new(
      scheme:
        Addressable::URI.encode_component(
          uri.scheme,
          Addressable::URI::CharacterClasses::SCHEME,
        ),
      authority:
        Addressable::URI.encode_component(
          uri.authority,
          Addressable::URI::CharacterClasses::AUTHORITY,
        ),
      path:
        Addressable::URI.encode_component(
          uri.path,
          Addressable::URI::CharacterClasses::PATH + "\\%",
        ),
      query:
        Addressable::URI.encode_component(
          uri.query,
          "a-zA-Z0-9\\-\\.\\_\\~\\$\\&\\*\\,\\=\\:\\@\\?\\%",
        ),
      fragment:
        Addressable::URI.encode_component(
          uri.fragment,
          "a-zA-Z0-9\\-\\.\\_\\~\\!\\$\\&\\'\\(\\)\\*\\+\\,\\;\\=\\:\\/\\?\\%",
        ),
    )

  encoded_uri.to_s
end

.uri_unencode(url) ⇒ Object



278
279
280
# File 'lib/onebox/helpers.rb', line 278

def self.uri_unencode(url)
  Addressable::URI.unencode(url)
end

.video_placeholder_htmlObject



286
287
288
# File 'lib/onebox/helpers.rb', line 286

def self.video_placeholder_html
  "<div class='onebox-placeholder-container'><span class='placeholder-icon video'></span></div>"
end