Class: UrlParser::Parser

Inherits:
Object
  • Object
show all
Defined in:
lib/url_parser/parser.rb

Instance Attribute Summary collapse

Class Method Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(uri, options = {}) ⇒ Parser

Returns a new instance of Parser.



33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
# File 'lib/url_parser/parser.rb', line 33

def initialize(uri, options = {})
  @uri              = uri
  @domain           = nil
  @base_uri         = options.delete(:base_uri) { nil }
  @default_scheme   = options.delete(:default_scheme) {
                        UrlParser.configuration.default_scheme
                      }
  @embedded_params  = options.delete(:embedded_params) {
                        UrlParser.configuration.embedded_params
                      }
  @scheme_map       = options.delete(:scheme_map) {
                        UrlParser.configuration.scheme_map
                      }
  @raw              = options.delete(:raw) { false }
  @options          = options
end

Instance Attribute Details

#default_schemeObject (readonly)

Returns the value of attribute default_scheme.



26
27
28
# File 'lib/url_parser/parser.rb', line 26

def default_scheme
  @default_scheme
end

#domainObject (readonly)

Returns the value of attribute domain.



26
27
28
# File 'lib/url_parser/parser.rb', line 26

def domain
  @domain
end

#optionsObject (readonly)

Returns the value of attribute options.



26
27
28
# File 'lib/url_parser/parser.rb', line 26

def options
  @options
end

#scheme_mapObject (readonly)

Returns the value of attribute scheme_map.



26
27
28
# File 'lib/url_parser/parser.rb', line 26

def scheme_map
  @scheme_map
end

#uriObject (readonly)

Returns the value of attribute uri.



26
27
28
# File 'lib/url_parser/parser.rb', line 26

def uri
  @uri
end

Class Method Details

.call(uri, options = {}, &blk) ⇒ Object Also known as: parse



9
10
11
12
13
14
15
16
17
18
19
20
21
# File 'lib/url_parser/parser.rb', line 9

def call(uri, options = {}, &blk)
  return nil unless uri

  parser = new(uri, options).tap do |uri|
    if block_given?
      yield uri
    else
      uri.parse!
    end
  end

  parser.raw? ? parser.raw : parser.uri
end

Instance Method Details

#==(uri) ⇒ Object



197
198
199
200
201
202
203
# File 'lib/url_parser/parser.rb', line 197

def ==(uri)
  opts  = options.merge(raw: false)
  one   = self.dup.tap { |uri| uri.clean! }
  two   = self.class.new(uri, opts).tap { |uri| uri.clean! }

  one.sha1 == two.sha1
end

#base_uriObject



50
51
52
# File 'lib/url_parser/parser.rb', line 50

def base_uri
  (@base_uri ? @base_uri : uri).to_s
end

#canonicalizeObject Also known as: c14n



150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
# File 'lib/url_parser/parser.rb', line 150

def canonicalize
  parse.tap do |uri|
    matches_global_param = proc do |key, value|
      UrlParser::DB[:global].include?(key)
    end

    matches_host_based_param = proc do |key, value|
      UrlParser::DB[:hosts].find do |host, param|
        uri.host =~ Regexp.new(Regexp.escape(host)) && param.include?(key)
      end
    end

    uri.query_values = uri.query_values(Array).tap do |params|
      params.delete_if &matches_global_param
      params.delete_if &matches_host_based_param
    end if uri.query_values
  end
end

#canonicalize!Object Also known as: c14n!



170
171
172
# File 'lib/url_parser/parser.rb', line 170

def canonicalize!
  @uri = canonicalize
end

#clean!Object



188
189
190
191
192
193
194
195
# File 'lib/url_parser/parser.rb', line 188

def clean!
  unescape!
  parse!
  unembed!
  canonicalize!
  normalize!
  raw! if raw?
end

#embedded_paramsObject



54
55
56
# File 'lib/url_parser/parser.rb', line 54

def embedded_params
  UrlParser.wrap(@embedded_params)
end

#normalizeObject



134
135
136
137
138
139
140
141
142
143
144
# File 'lib/url_parser/parser.rb', line 134

def normalize
  parse.tap do |uri|
    uri.path      = uri.path.squeeze('/')
    uri.path      = uri.path.chomp('/') if uri.path.size != 1
    uri.query     = nil if uri.query && uri.query.empty?
    uri.query     = uri.query.strip if uri.query
    uri.fragment  = nil

    uri.normalize!
  end
end

#normalize!Object



146
147
148
# File 'lib/url_parser/parser.rb', line 146

def normalize!
  @uri = normalize
end

#parseObject



74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
# File 'lib/url_parser/parser.rb', line 74

def parse
  return uri if uri.is_a?(Addressable::URI)

  Addressable::URI.parse(base_uri).tap do |parsed_uri|
    parsed_uri.join!(uri) if @base_uri

    if options[:host]
      parsed_uri.host = options[:host]
    else
      parts     = parsed_uri.path.to_s.split(/[\/:]/)
      hostname  = parsed_uri.host || parts.first
      @domain   = UrlParser::Domain.new(hostname)
      if @domain.valid?
        parsed_uri.path = '/' +
          parts.drop(1).join('/') +
          parsed_uri.path[/(?<=\/).*(\/)\s*$/, 1].to_s
        parsed_uri.host = @domain.name
      end
    end

    if scheme_map.has_key?(parsed_uri.scheme)
      parsed_uri.scheme = scheme_map[parsed_uri.scheme]
    end

    if parsed_uri.host && !parsed_uri.scheme
      parsed_uri.scheme = default_scheme
    end if set_default_scheme?

    if parsed_uri.host && !domain
      @domain = UrlParser::Domain.new(hostname)
    end
  end
end

#parse!Object



108
109
110
# File 'lib/url_parser/parser.rb', line 108

def parse!
  @uri = parse
end

#rawObject



175
176
177
# File 'lib/url_parser/parser.rb', line 175

def raw
  uri.to_s
end

#raw!Object



179
180
181
# File 'lib/url_parser/parser.rb', line 179

def raw!
  @uri = raw
end

#raw?Boolean

Returns:

  • (Boolean)


58
59
60
# File 'lib/url_parser/parser.rb', line 58

def raw?
  !!@raw
end

#set_default_scheme?Boolean

Returns:

  • (Boolean)


62
63
64
# File 'lib/url_parser/parser.rb', line 62

def set_default_scheme?
  !!@default_scheme
end

#sha1Object Also known as: hash



183
184
185
# File 'lib/url_parser/parser.rb', line 183

def sha1
  Digest::SHA1.hexdigest(raw)
end

#unembedObject Also known as: embedded



112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
# File 'lib/url_parser/parser.rb', line 112

def unembed
  original = parse

  candidates = original.query_values.select do |key, value|
    embedded_params.include?(key) &&
    value =~ Addressable::URI::URIREGEX
  end.values if original.query_values

  embed = candidates.find do |candidate|
    parsed = Addressable::URI.parse(candidate)
    %w(http https).include?(parsed.scheme) && parsed.host
  end if candidates

  embed ? self.class.call(embed, raw: raw?) : original
end

#unembed!Object Also known as: embedded!



129
130
131
# File 'lib/url_parser/parser.rb', line 129

def unembed!
  @uri = unembed
end

#unescapeObject



66
67
68
# File 'lib/url_parser/parser.rb', line 66

def unescape
  UrlParser.unescape(uri)
end

#unescape!Object



70
71
72
# File 'lib/url_parser/parser.rb', line 70

def unescape!
  @uri = unescape
end