Class: URLGrey

Inherits:
Object
  • Object
show all
Defined in:
lib/url_grey.rb,
lib/url_grey/version.rb

Constant Summary collapse

AUTHORITY_TERMINATORS =
"/\\?#"
ABOUT_BLANK_URL =
"about:blank"
PATH_PASS_CHARS =
"!$&'()*+,/:;=@[]"
PATH_UNESCAPE_CHARS =
"-0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ_abcdefghijklmnopqrstuvwxyz~"
HOST_ESCAPE_CHARS =
" !\"\#$&'()*,<=>@`{|}"
HOST_NORMAL_CHARS =
"+-.0123456789:ABCDEFGHIJKLMNOPQRSTUVWXYZ[]_abcdefghijklmnopqrstuvwxyz"
HOST_CHROME_DEFAULT =
"version"
QUERY_NORMAL_CHARS =
"!$%&()*+,-./0123456789:;=?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~"
DEFAULT_PORTS =
{
  ftp:    21,
  gopher: 70,
  http:   80,
  https:  443,
  ws:     80,
  wss:    443,
}
STANDARD_SCHEMES =
['http', 'https', 'file', 'ftp', 'gopher', 'ws', 'wss', 'filesystem']
VERSION =
"1.0.0"

Instance Attribute Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(_original) ⇒ URLGrey

Returns a new instance of URLGrey.



28
29
30
31
32
# File 'lib/url_grey.rb', line 28

def initialize(_original)
  self.original = _original.sub(%r{^\s*}, '')

  parse!
end

Instance Attribute Details

#coercedObject

Returns the value of attribute coerced.



24
25
26
# File 'lib/url_grey.rb', line 24

def coerced
  @coerced
end

#hostObject

Returns the value of attribute host.



25
26
27
# File 'lib/url_grey.rb', line 25

def host
  @host
end

#originalObject

Returns the value of attribute original.



24
25
26
# File 'lib/url_grey.rb', line 24

def original
  @original
end

#passwordObject

Returns the value of attribute password.



25
26
27
# File 'lib/url_grey.rb', line 25

def password
  @password
end

#pathObject

Returns the value of attribute path.



25
26
27
# File 'lib/url_grey.rb', line 25

def path
  @path
end

#portObject

Returns the value of attribute port.



25
26
27
# File 'lib/url_grey.rb', line 25

def port
  @port
end

#queryObject

Returns the value of attribute query.



25
26
27
# File 'lib/url_grey.rb', line 25

def query
  @query
end

#refObject

Returns the value of attribute ref.



25
26
27
# File 'lib/url_grey.rb', line 25

def ref
  @ref
end

#schemeObject

Returns the value of attribute scheme.



25
26
27
# File 'lib/url_grey.rb', line 25

def scheme
  @scheme
end

#slashesObject

Returns the value of attribute slashes.



26
27
28
# File 'lib/url_grey.rb', line 26

def slashes
  @slashes
end

#usernameObject

Returns the value of attribute username.



25
26
27
# File 'lib/url_grey.rb', line 25

def username
  @username
end

Instance Method Details

#fixedObject



47
48
49
50
51
# File 'lib/url_grey.rb', line 47

def fixed
  return ABOUT_BLANK_URL if self.original == ABOUT_BLANK_URL

  "#{fixed_scheme}#{fixed_credentials}#{fixed_host}#{fixed_port}#{fixed_path}#{fixed_query}#{fixed_ref}"
end

#fixed_credentialsObject



53
54
55
56
57
# File 'lib/url_grey.rb', line 53

def fixed_credentials
  return "" unless (!self.username.empty? || !self.password.empty?)
  return "#{self.username}@" if self.password.empty?
  "#{self.username}:#{self.password}@"
end

#fixed_hostObject

from components/url_formatter/url_fixer.cc FixupHost



60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
# File 'lib/url_grey.rb', line 60

def fixed_host
  fixed = self.host.gsub(%r{\s}, '').downcase
  unless fixed.match(%r{^\.*$})
    fixed = fixed.sub(%r{^\.*}, '')
    fixed = fixed.sub(%r{(?<=\.)\.*$}, '')
  end
  if fixed.empty? && ["about", "chrome"].include?(self.scheme)
    fixed = HOST_CHROME_DEFAULT
  end

  if fixed.match(%r{^[[:ascii:]]*$})
    fixed = fixed.chars.map do |char|
      if HOST_NORMAL_CHARS.include?(char)
        char
      else
        "%#{char.codepoints.first.to_s(16).upcase}"
      end
    end.join("")
  else
    fixed = SimpleIDN.to_ascii(fixed)
  end
  fixed
end

#fixed_pathObject

from url/url_canon_path.cc CanonicalizePath



85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
# File 'lib/url_grey.rb', line 85

def fixed_path
  fixed = self.path
  if (fixed[0] != '/') && ((STANDARD_SCHEMES + ["about", "chrome"]).include?(self.scheme))
    fixed = '/' + fixed
  end

  fixed.chars.map do |char|
    if PATH_PASS_CHARS.include?(char)
      char
    elsif PATH_UNESCAPE_CHARS.include?(char)
      char
    elsif char == "."
      # TODO: if the dot is preceded by a slash, do directory stuff:
      # google.com/abc/.././def -> google.com/def
      char
    else
      "%#{char.codepoints.first.to_s(16).upcase}"
    end
  end.join("")
end

#fixed_portObject



106
107
108
109
# File 'lib/url_grey.rb', line 106

def fixed_port
  return "" if (self.port.empty? || self.port.to_i == DEFAULT_PORTS[self.scheme.to_sym])
  ":#{self.port}"
end

#fixed_queryObject



111
112
113
114
115
116
117
118
119
120
121
122
# File 'lib/url_grey.rb', line 111

def fixed_query
  fixed = self.query
  return "" if fixed.nil?
  fixed = fixed.bytes.map do |byte|
    if QUERY_NORMAL_CHARS.unpack("U*").include?(byte)
      [byte].pack("U")
    else
      "%#{byte.to_s(16).upcase}"
    end
  end.join('')
  "?#{fixed}"
end

#fixed_refObject



124
125
126
127
# File 'lib/url_grey.rb', line 124

def fixed_ref
  return "" if self.ref.nil?
  "\##{self.ref}"
end

#fixed_schemeObject



129
130
131
132
133
134
135
136
137
138
139
140
# File 'lib/url_grey.rb', line 129

def fixed_scheme
  fixed = self.scheme
  if fixed == "about"
    fixed = "chrome"
  end

  if (STANDARD_SCHEMES + ["about", "chrome"]).include?(fixed)
    "#{fixed}://"
  else
    "#{fixed}:#{self.slashes}"
  end
end

#partsObject



34
35
36
37
38
39
40
41
42
43
44
45
# File 'lib/url_grey.rb', line 34

def parts
  {
    scheme:   self.scheme,
    username: self.username,
    password: self.password,
    host:     self.host,
    port:     self.port,
    path:     self.path,
    query:    self.query,
    ref:      self.ref
  }
end