Module: UrlUtils

Included in:
Spider
Defined in:
lib/url_utils.rb

Overview

URL Tools

Instance Method Summary collapse

Instance Method Details

#create_abs_url_from_ctx(potential_base, relative_url) ⇒ Object



37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
# File 'lib/url_utils.rb', line 37

def create_abs_url_from_ctx(potential_base, relative_url)
  absolute_url = nil
  if potential_base =~ /\/$/
    absolute_url = potential_base + relative_url
  else
    last_index_of_slash = potential_base.rindex('/')
    if potential_base[last_index_of_slash - 2, 2] == ':/'
      absolute_url = potential_base + '/' + relative_url
    else
      last_index_of_dot = potential_base.rindex('.')
      if last_index_of_dot < last_index_of_slash
        absolute_url = potential_base + '/' + relative_url
      else
        absolute_url = potential_base[0, last_index_of_slash + 1] + relative_url
      end
    end
  end
  absolute_url
end

#create_absolute_url_from_base(potential_base, relative_url) ⇒ Object



23
24
25
# File 'lib/url_utils.rb', line 23

def create_absolute_url_from_base(potential_base, relative_url)
  remove_extra_paths(potential_base) + relative_url
end

#get_domain(url) ⇒ Object



19
20
21
# File 'lib/url_utils.rb', line 19

def get_domain(url)
  remove_extra_paths(url)
end

#make_absolute(potential_base, relative_url) ⇒ Object



7
8
9
10
11
12
13
# File 'lib/url_utils.rb', line 7

def make_absolute(potential_base, relative_url)
  if relative_url =~ /^\//
    create_absolute_url_from_base(potential_base, relative_url)
  else
    create_abs_url_from_ctx(potential_base, relative_url)
  end
end

#relative?(url) ⇒ Boolean

Returns:

  • (Boolean)


3
4
5
# File 'lib/url_utils.rb', line 3

def relative?(url)
  !url.match(/^http/)
end

#remove_extra_paths(potential_base) ⇒ Object



27
28
29
30
31
32
33
34
35
# File 'lib/url_utils.rb', line 27

def remove_extra_paths(potential_base)
  index_to_start_slash_search = potential_base.index('://') + 3
  index_of_first_relevant_slash = potential_base.index('/', index_to_start_slash_search)
  if !index_of_first_relevant_slash.nil?
    potential_base[0, index_of_first_relevant_slash]
  else
    potential_base
  end
end

#urls_on_same_domain?(url1, url2) ⇒ Boolean

Returns:

  • (Boolean)


15
16
17
# File 'lib/url_utils.rb', line 15

def urls_on_same_domain?(url1, url2)
  get_domain(url1) == get_domain(url2)
end