Class: Retriever::Link

Inherits:
Object
  • Object
show all
Defined in:
lib/retriever/link.rb

Constant Summary collapse

SLASH_RE =

HTTP_RE = Regexp.new(/^http/i).freeze

Regexp.new(%r(^/{1}[^/])).freeze
DOUBLE_SLASH_RE =
Regexp.new(%r(^/{2}[^/])).freeze
WWW_DOT_RE =
Regexp.new(/^www\./i).freeze

Instance Method Summary collapse

Constructor Details

#initialize(target_scheme, target_host, this_link, current_url) ⇒ Link

Returns a new instance of Link.



10
11
12
13
14
15
16
17
18
19
20
21
22
# File 'lib/retriever/link.rb', line 10

def initialize(target_scheme, target_host, this_link, current_url)
  begin
    #this_link = Addressable::URI.encode(this_link) //not necessary; and breaking links
    @link_uri = Addressable::URI.parse(this_link)
  rescue Addressable::URI::InvalidURIError
    dummy = Retriever::Link.new(target_scheme, target_host, target_host, target_host)
    @link_uri = Addressable::URI.parse(dummy.path)
  end
  @scheme = target_scheme
  @host = target_host
  @this_link = @link_uri.to_s
  @current_page_url = current_url
end

Instance Method Details

#pathObject



24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
# File 'lib/retriever/link.rb', line 24

def path
  return this_link if link_uri.absolute?

  return "#{@scheme}://#{this_link}" if WWW_DOT_RE =~ this_link

  return "#{@scheme}://#{host}#{this_link}" if SLASH_RE =~ this_link

  # link begins with '//'
  return "#{@scheme}:#{this_link}" if DOUBLE_SLASH_RE =~ this_link

  # link uses relative path with no slashes at all
  if link_uri.relative?
    if @current_page_url[-1, 1] == "/"
      return "#{@current_page_url}#{this_link}"
    end
    return "#{@current_page_url}/#{this_link}"
  end
end