Class: Linkhum::URL

Inherits:
Object
  • Object
show all
Includes:
IDN
Defined in:
lib/linkhum/url.rb

Class Method Summary collapse

Class Method Details

.parse(url) ⇒ Object



8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
# File 'lib/linkhum/url.rb', line 8

def self.parse(url)
  au = Addressable::URI.parse(url)
  human_readable = { scheme: au.scheme, userinfo: au.userinfo }
  url_encoded = { scheme: au.scheme, userinfo: au.userinfo }

  if au.host =~ /\bxn--/
    human_readable[:host] = Idna.toUnicode(au.host)
  else
    human_readable[:host] = au.host
  end
  url_encoded[:host] = au.normalized_host

  url_encoded[:port] = au.port ? ":#{au.port}" : ""
  human_readable[:port] = url_encoded[:port]

  human_readable[:path] = unencode_component(au.path, false)
  # this code handles bug in Addressable::URI (up to 2.5.0), which
  # converts paths to Unicode NFKC (it should only do that for
  # hostnames).  Patch to Addressable::URI pending.
  au_path = au.path.dup
  if au_path =~ /\A[\x00-\x7F]*\z/
    au_path = unencode_component(au_path)
  end
  au_path.force_encoding(Encoding::ASCII_8BIT)
  url_encoded[:path] = encode_component(au_path)
  decoded_path = human_readable[:path].dup
  if !decoded_path.force_encoding(Encoding::UTF_8).valid_encoding?
      human_readable[:path] = au.path
  end

  human_readable[:query] = unencode_component(au.query, false)
  if au.query
    decoded_query = human_readable[:query].dup
    if !decoded_query.force_encoding(Encoding::UTF_8).valid_encoding?
      human_readable[:query] = au.query
    end
  end
  if au.query
    # see above
    au_query = au.query.dup
    if au_query =~ /\A[\x00-\x7F]*\z/
      au_query = unencode_component(au_query)
    end
    au_query.force_encoding(Encoding::ASCII_8BIT)
  end
  url_encoded[:query] = encode_component(au_query)

  # fragments do not need to be encoded
  human_readable[:fragment] = au.fragment
  url_encoded[:fragment] = au.fragment

  { human_readable: generate_url(human_readable),
    url_encoded: generate_url(url_encoded) }
end