Module: BrowserCrawler::UrlTools

Defined in:
lib/browser_crawler/url_tools.rb

Class Method Summary collapse

Class Method Details

.full_url(uri:) ⇒ Object



16
17
18
19
20
21
22
23
# File 'lib/browser_crawler/url_tools.rb', line 16

def full_url(uri:)
  path_query = get_path_query(uri: uri)
  if uri.port == 80 || uri.port == 443
    "#{uri.scheme}://#{uri.host}#{uri.path}#{path_query}"
  else
    "#{uri.scheme}://#{uri.host}:#{uri.port}#{uri.path}#{path_query}"
  end.sub(%r{(/)+$}, '')
end

.get_path_query(uri:) ⇒ Object



25
26
27
28
# File 'lib/browser_crawler/url_tools.rb', line 25

def get_path_query(uri:)
  uri_fragment = uri.query
  uri_fragment.nil? || (uri_fragment == '') ? nil : "?#{uri.query}"
end

.uri(url:) ⇒ Object



3
4
5
6
7
# File 'lib/browser_crawler/url_tools.rb', line 3

def uri(url:)
  uri!(url: url)
rescue URI::InvalidURIError
  nil
end

.uri!(url:) ⇒ Object

Raises:

  • (URI::InvalidURIError)


9
10
11
12
13
14
# File 'lib/browser_crawler/url_tools.rb', line 9

def uri!(url:)
  string_url = url.to_s
  raise URI::InvalidURIError unless string_url =~ /\A#{URI.regexp(%w[http https])}\z/

  URI(string_url)
end