Class: UrlNormalizer
- Inherits:
-
Object
show all
- Defined in:
- lib/url_normalizer.rb,
lib/url_normalizer/proxy.rb,
lib/url_normalizer/version.rb,
lib/url_normalizer/linkedin.rb,
lib/url_normalizer/newyorker.rb,
lib/url_normalizer/new_york_times.rb,
lib/url_normalizer/think_progress.rb
Defined Under Namespace
Classes: LinkedIn, NewYorkTimes, Newyorker, Proxy, ThinkProgress
Constant Summary
collapse
- VERSION =
"0.0.2"
- @@normalizer_for =
Hash.new(UrlNormalizer)
Class Method Summary
collapse
Instance Method Summary
collapse
Constructor Details
Returns a new instance of UrlNormalizer.
20
21
22
|
# File 'lib/url_normalizer.rb', line 20
def initialize uri
@uri = uri
end
|
Class Method Details
.normalize(url) ⇒ Object
12
13
14
15
16
17
18
|
# File 'lib/url_normalizer.rb', line 12
def self.normalize url
url.sub!(/#(?!\!)[^#]*$/,'')
uri = Addressable::URI.parse(url)
@@normalizer_for[uri.host].new(uri).normalize
end
|
.normalize_for(domain) ⇒ Object
8
9
10
|
# File 'lib/url_normalizer.rb', line 8
def self.normalize_for domain
@@normalizer_for[domain] = self
end
|
Instance Method Details
#build_query(params) ⇒ Object
64
65
66
67
68
69
70
71
72
73
74
75
76
|
# File 'lib/url_normalizer.rb', line 64
def build_query(params)
params.map do |name,values|
escaped_name = encode_component name
if values.length > 0
values.map do |value|
escaped_value = encode_component value
"#{escaped_name}=#{escaped_value}"
end
else
["#{escaped_name}"]
end
end.flatten.join("&")
end
|
#clean_query(query) ⇒ Object
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
|
# File 'lib/url_normalizer.rb', line 42
def clean_query query
return unless query
uri_params = CGI.parse(query)
if forbidden_uri_params
forbidden_params = forbidden_uri_params.map(&:to_s)
uri_params.reject! {|k,v| forbidden_params.include? k}
end
if whitelisted_uri_params
allowed_params = whitelisted_uri_params.map(&:to_s)
uri_params.select! {|k,v| allowed_params.include? k}
end
build_query(uri_params)
end
|
#encode_component(component) ⇒ Object
60
61
62
|
# File 'lib/url_normalizer.rb', line 60
def encode_component component
Addressable::URI.encode_component component
end
|
#forbidden_uri_params ⇒ Object
34
35
36
|
# File 'lib/url_normalizer.rb', line 34
def forbidden_uri_params
[:utm_source, :utm_content, :utm_medium, :utm_campaign]
end
|
#normalize ⇒ Object
24
25
26
27
28
29
30
31
32
|
# File 'lib/url_normalizer.rb', line 24
def normalize
uri = @uri
uri.query = clean_query(uri.query)
uri.normalize!
url = uri.to_s
url.sub(/\?$/,'')
end
|
#whitelisted_uri_params ⇒ Object
38
39
40
|
# File 'lib/url_normalizer.rb', line 38
def whitelisted_uri_params
nil
end
|