Class: Retriever::Link

Inherits:
Object
  • Object
show all
Defined in:
lib/retriever/link.rb

Constant Summary collapse

SLASH_RE =

HTTP_RE = Regexp.new(/^http/i).freeze

Regexp.new(%r(^/{1}[^/])).freeze
DOUBLE_SLASH_RE =
Regexp.new(%r(^/{2}[^/])).freeze
WWW_DOT_RE =
Regexp.new(/^www\./i).freeze

Instance Method Summary collapse

Constructor Details

#initialize(target_scheme, target_host, this_link) ⇒ Link

Returns a new instance of Link.



10
11
12
13
14
15
16
17
18
19
20
# File 'lib/retriever/link.rb', line 10

def initialize(target_scheme, target_host, this_link)
  begin
    @link_uri = Addressable::URI.parse(Addressable::URI.encode(this_link)).normalize
  rescue Addressable::URI::InvalidURIError => e
    dummy_link = Retriever::Link.new(target_scheme, target_host, target_host)
    @link_uri = Addressable::URI.parse(dummy_link.path)
  end
  @scheme = target_scheme
  @host = target_host
  @this_link = @link_uri.to_s
end

Instance Method Details

#pathObject



22
23
24
25
26
27
28
29
30
31
32
33
34
# File 'lib/retriever/link.rb', line 22

def path
  return this_link if link_uri.absolute?

  return "#{@scheme}://#{this_link}" if WWW_DOT_RE =~ this_link

  return "#{@scheme}://#{host}#{this_link}" if SLASH_RE =~ this_link

  # link begins with '//'
  return "#{@scheme}:#{this_link}" if DOUBLE_SLASH_RE =~ this_link

  # link uses relative path with no slashes at all
  return "#{@scheme}://#{host}/#{this_link}" if link_uri.relative?
end