Module: Onebox::Helpers

Defined in:
lib/onebox/helpers.rb

Defined Under Namespace

Classes: DownloadTooLarge

Class Method Summary collapse

Class Method Details

.audio_placeholder_htmlObject



222
223
224
# File 'lib/onebox/helpers.rb', line 222

def self.audio_placeholder_html
  "<div class='onebox-placeholder-container'><span class='placeholder-icon audio'></span></div>"
end

.blank?(value) ⇒ Boolean

Returns:

  • (Boolean)


143
144
145
146
147
148
149
150
151
# File 'lib/onebox/helpers.rb', line 143

def self.blank?(value)
  if value.nil?
    true
  elsif String === value
    value.empty? || !(/[[:^space:]]/ === value)
  else
    value.respond_to?(:empty?) ? !!value.empty? : !value
  end
end

.clean(html) ⇒ Object



23
24
25
# File 'lib/onebox/helpers.rb', line 23

def self.clean(html)
  html.gsub(/<[^>]+>/, ' ').gsub(/\n/, '')
end

.click_to_scroll_div(width = 690, height = 400) ⇒ Object



139
140
141
# File 'lib/onebox/helpers.rb', line 139

def self.click_to_scroll_div(width = 690, height = 400)
  "<div style=\"background:transparent;position:relative;width:#{width}px;height:#{height}px;top:#{height}px;margin-top:-#{height}px;\" onClick=\"style.pointerEvents='none'\"></div>"
end

.fetch_content_length(location) ⇒ Object



101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
# File 'lib/onebox/helpers.rb', line 101

def self.fetch_content_length(location)
  uri = URI(location)

  Net::HTTP.start(uri.host, uri.port, open_timeout: Onebox.options.connect_timeout, use_ssl: uri.is_a?(URI::HTTPS)) do |http|
    http.read_timeout = Onebox.options.timeout
    if uri.is_a?(URI::HTTPS)
      http.use_ssl = true
      http.verify_mode = OpenSSL::SSL::VERIFY_NONE
    end

    http.request_head([uri.path, uri.query].join("?")) do |response|
      code = response.code.to_i
      unless code === 200 || Onebox::Helpers.blank?(response.content_length)
        return nil
      end
      return response.content_length
    end
  end
end

.fetch_html_doc(url, headers = nil, body_cacher = nil) ⇒ Object



27
28
29
30
# File 'lib/onebox/helpers.rb', line 27

def self.fetch_html_doc(url, headers = nil, body_cacher = nil)
  response = (fetch_response(url, headers: headers, body_cacher: body_cacher) rescue nil)
  Nokogiri::HTML(response)
end

.fetch_response(location, redirect_limit: 5, domain: nil, headers: nil, body_cacher: nil) ⇒ Object

Raises:

  • (Net::HTTPError)


32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
# File 'lib/onebox/helpers.rb', line 32

def self.fetch_response(location, redirect_limit: 5, domain: nil, headers: nil, body_cacher: nil)
  redirect_limit = Onebox.options.redirect_limit if redirect_limit > Onebox.options.redirect_limit

  raise Net::HTTPError.new('HTTP redirect too deep', location) if redirect_limit == 0

  uri = Addressable::URI.parse(location)
  uri = Addressable::URI.join(domain, uri) if !uri.host

  use_body_cacher = body_cacher && body_cacher.respond_to?('fetch_cached_response_body')
  if use_body_cacher
    response_body = body_cacher.fetch_cached_response_body(uri.to_s)

    if response_body.present?
      return response_body
    end
  end

  result = StringIO.new
  Net::HTTP.start(uri.host, uri.port, open_timeout: Onebox.options.connect_timeout, use_ssl: uri.normalized_scheme == 'https') do |http|
    http.read_timeout = Onebox.options.timeout
    http.verify_mode = OpenSSL::SSL::VERIFY_NONE  # Work around path building bugs

    headers ||= {}

    if Onebox.options.user_agent && !headers['User-Agent']
      headers['User-Agent'] = Onebox.options.user_agent
    end

    request = Net::HTTP::Get.new(uri.request_uri, headers)
    start_time = Time.now

    size_bytes = Onebox.options.max_download_kb * 1024
    http.request(request) do |response|

      if cookie = response.get_fields('set-cookie')
        # HACK: If this breaks again in the future, use HTTP::CookieJar from gem 'http-cookie'
        # See test: it "does not send cookies to the wrong domain"
        redir_header = { 'Cookie' => cookie.join('; ') }
      end

      redir_header = nil unless redir_header.is_a? Hash

      code = response.code.to_i
      unless code === 200
        response.error! unless [301, 302, 303, 307, 308].include?(code)

        return fetch_response(
          response['location'],
          redirect_limit: redirect_limit - 1,
          domain: "#{uri.scheme}://#{uri.host}",
          headers: redir_header
        )
      end

      response.read_body do |chunk|
        result.write(chunk)
        raise DownloadTooLarge.new if result.size > size_bytes
        raise Timeout::Error.new if (Time.now - start_time) > Onebox.options.timeout
      end

      if use_body_cacher && body_cacher.cache_response_body?(uri)
        body_cacher.cache_response_body(uri.to_s, result.string)
      end

      return result.string
    end
  end
end

.generic_placeholder_htmlObject



230
231
232
# File 'lib/onebox/helpers.rb', line 230

def self.generic_placeholder_html
  "<div class='onebox-placeholder-container'><span class='placeholder-icon generic'></span></div>"
end

.get(meta, attr) ⇒ Object



158
159
160
# File 'lib/onebox/helpers.rb', line 158

def self.get(meta, attr)
  (meta && !blank?(meta[attr])) ? sanitize(meta[attr]) : nil
end

.get_absolute_image_url(src, url) ⇒ Object



182
183
184
185
186
187
188
189
190
191
192
193
194
195
# File 'lib/onebox/helpers.rb', line 182

def self.get_absolute_image_url(src, url)
  if src && !!(src =~ /^\/\//)
    uri = URI(url)
    src = "#{uri.scheme}:#{src}"
  elsif src && src.match(/^https?:\/\//i).nil?
    uri = URI(url)
    src = if !src.start_with?("/") && uri.path.present?
      "#{uri.scheme}://#{uri.host.sub(/\/$/, '')}#{uri.path.sub(/\/$/, '')}/#{src.sub(/^\//, '')}"
    else
      "#{uri.scheme}://#{uri.host.sub(/\/$/, '')}/#{src.sub(/^\//, '')}"
    end
  end
  src
end

.map_placeholder_htmlObject



226
227
228
# File 'lib/onebox/helpers.rb', line 226

def self.map_placeholder_html
  "<div class='onebox-placeholder-container'><span class='placeholder-icon map'></span></div>"
end

.normalize_url_for_output(url) ⇒ Object



167
168
169
170
171
172
173
174
175
176
177
178
179
180
# File 'lib/onebox/helpers.rb', line 167

def self.normalize_url_for_output(url)
  return "" unless url
  url = url.dup
  # expect properly encoded url, remove any unsafe chars
  url.gsub!(' ', '%20')
  url.gsub!("'", "&apos;")
  url.gsub!('"', "&quot;")
  url.gsub!(/[^\w\-`.~:\/?#\[\]@!$&'\(\)*+,;=%\p{M}’]/, "")

  parsed = Addressable::URI.parse(url)
  return "" unless parsed.host

  url
end

.pretty_filesize(size) ⇒ Object



121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
# File 'lib/onebox/helpers.rb', line 121

def self.pretty_filesize(size)
  conv = [ 'B', 'KB', 'MB', 'GB', 'TB', 'PB', 'EB' ]
  scale = 1024

  ndx = 1
  if (size < 2 * (scale**ndx)) then
    return "#{(size)} #{conv[ndx - 1]}"
  end
  size = size.to_f
  [2, 3, 4, 5, 6, 7].each do |i|
    if (size < 2 * (scale**i)) then
      return "#{'%.2f' % (size / (scale**(i - 1)))} #{conv[i - 1]}"
    end
  end
  ndx = 7
  "#{'%.2f' % (size / (scale**(ndx - 1)))} #{conv[ndx - 1]}"
end

.sanitize(value, length = 50) ⇒ Object



162
163
164
165
# File 'lib/onebox/helpers.rb', line 162

def self.sanitize(value, length = 50)
  return nil if blank?(value)
  Sanitize.fragment(value).strip
end

.symbolize_keys(hash) ⇒ Object



12
13
14
15
16
17
18
19
20
21
# File 'lib/onebox/helpers.rb', line 12

def self.symbolize_keys(hash)
  return {} if hash.nil?

  hash.inject({}) do |result, (key, value)|
    new_key = key.is_a?(String) ? key.to_sym : key
    new_value = value.is_a?(Hash) ? symbolize_keys(value) : value
    result[new_key] = new_value
    result
  end
end

.truncate(string, length = 50) ⇒ Object



153
154
155
156
# File 'lib/onebox/helpers.rb', line 153

def self.truncate(string, length = 50)
  return string if string.nil?
  string.size > length ? string[0...(string.rindex(" ", length) || length)] + "..." : string
end

.uri_encode(url) ⇒ Object

Percent-encodes a URI string per RFC3986 - tools.ietf.org/html/rfc3986



198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
# File 'lib/onebox/helpers.rb', line 198

def self.uri_encode(url)
  return "" unless url

  uri = Addressable::URI.parse(url)

  encoded_uri = Addressable::URI.new(
    scheme: Addressable::URI.encode_component(uri.scheme, Addressable::URI::CharacterClasses::SCHEME),
    authority: Addressable::URI.encode_component(uri.authority, Addressable::URI::CharacterClasses::AUTHORITY),
    path: Addressable::URI.encode_component(uri.path, Addressable::URI::CharacterClasses::PATH + "\\%"),
    query: Addressable::URI.encode_component(uri.query, "a-zA-Z0-9\\-\\.\\_\\~\\$\\&\\*\\,\\=\\:\\@\\?\\%"),
    fragment: Addressable::URI.encode_component(uri.fragment, "a-zA-Z0-9\\-\\.\\_\\~\\!\\$\\&\\'\\(\\)\\*\\+\\,\\;\\=\\:\\/\\?\\%")
  )

  encoded_uri.to_s
end

.uri_unencode(url) ⇒ Object



214
215
216
# File 'lib/onebox/helpers.rb', line 214

def self.uri_unencode(url)
  Addressable::URI.unencode(url)
end

.video_placeholder_htmlObject



218
219
220
# File 'lib/onebox/helpers.rb', line 218

def self.video_placeholder_html
  "<div class='onebox-placeholder-container'><span class='placeholder-icon video'></span></div>"
end