Module: SimpleIDN::Punycode

Defined in:
lib/simpleidn.rb

Constant Summary collapse

INITIAL_N =
0x80
INITIAL_BIAS =
72
DELIMITER =
0x2D
BASE =
36
DAMP =
700
TMIN =
1
TMAX =
26
SKEW =
38
MAXINT =
0x7FFFFFFF
ASCII_MAX =
0x7F
EMPTY =
''.encode(Encoding::UTF_8).freeze

Class Method Summary collapse

Class Method Details

.adapt(delta, numpoints, firsttime) ⇒ Object

Bias adaptation function



43
44
45
46
47
48
49
50
51
52
53
# File 'lib/simpleidn.rb', line 43

def adapt(delta, numpoints, firsttime)
  delta = firsttime ? (delta / DAMP) : (delta >> 1)
  delta += (delta / numpoints)

  k = 0
  while delta > (((BASE - TMIN) * TMAX) / 2)
    delta /= BASE - TMIN
    k += BASE
  end
  k + (BASE - TMIN + 1) * delta / (delta + SKEW)
end

.decode(input) ⇒ Object

Main decode



56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
# File 'lib/simpleidn.rb', line 56

def decode(input)
  input_encoding = input.encoding
  input = input.encode(Encoding::UTF_8).codepoints.to_a
  output = []

  # Initialize the state:
  n = INITIAL_N
  i = 0
  bias = INITIAL_BIAS

  # Handle the basic code points: Let basic be the number of input code
  # points before the last delimiter, or 0 if there is none, then
  # copy the first basic code points to the output.
  basic = input.rindex(DELIMITER) || 0

  input[0, basic].each do |char|
    raise(ConversionError, "Illegal input >= 0x80") if char > ASCII_MAX
    output << char
  end

  # Main decoding loop: Start just after the last delimiter if any
  # basic code points were copied; start at the beginning otherwise.

  ic = basic > 0 ? basic + 1 : 0
  while ic < input.length
    # ic is the index of the next character to be consumed,

    # Decode a generalized variable-length integer into delta,
    # which gets added to i. The overflow checking is easier
    # if we increase i as we go, then subtract off its starting
    # value at the end to obtain delta.
    oldi = i
    w = 1
    k = BASE
    loop do
      raise(ConversionError, "punycode_bad_input(1)") if ic >= input.length

      digit = decode_digit(input[ic])
      ic += 1

      raise(ConversionError, "punycode_bad_input(2)") if digit >= BASE

      raise(ConversionError, "punycode_overflow(1)") if digit > (MAXINT - i) / w

      i += digit * w
      t = k <= bias ? TMIN : k >= bias + TMAX ? TMAX : k - bias
      break if digit < t
      raise(ConversionError, "punycode_overflow(2)") if w > MAXINT / (BASE - t)

      w *= BASE - t
      k += BASE
    end

    out = output.length + 1
    bias = adapt(i - oldi, out, oldi == 0)

    # i was supposed to wrap around from out to 0,
    # incrementing n each time, so we'll fix that now:
    raise(ConversionError, "punycode_overflow(3)") if (i / out) > MAXINT - n

    n += (i / out)
    i %= out

    # Insert n at position i of the output:
    output.insert(i, n)
    i += 1
  end

  output.collect {|c| c.chr(Encoding::UTF_8)}.join(EMPTY).encode(input_encoding)
end

.decode_digit(cp) ⇒ Object

decode_digit(cp) returns the numeric value of a basic code point (for use in representing integers) in the range 0 to base-1, or base if cp is does not represent a value.



29
30
31
# File 'lib/simpleidn.rb', line 29

def decode_digit(cp)
  cp - 48 < 10 ? cp - 22 : cp - 65 < 26 ? cp - 65 : cp - 97 < 26 ? cp - 97 : BASE
end

.encode(input) ⇒ Object

Main encode function



128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
# File 'lib/simpleidn.rb', line 128

def encode(input)
  input_encoding = input.encoding
  input = input.encode(Encoding::UTF_8).codepoints.to_a
  output = []

  # Initialize the state:
  n = INITIAL_N
  delta = 0
  bias = INITIAL_BIAS

  # Handle the basic code points:
  output = input.select { |char| char <= ASCII_MAX }

  h = b = output.length

  # h is the number of code points that have been handled, b is the
  # number of basic code points

  output << DELIMITER if b > 0

  # Main encoding loop:
  while h < input.length
    # All non-basic code points < n have been
    # handled already. Find the next larger one:

    m = MAXINT

    input.each do |char|
      m = char if char >= n && char < m
    end

    # Increase delta enough to advance the decoder's
    # <n,i> state to <m,0>, but guard against overflow:

    raise(ConversionError, "punycode_overflow (1)") if m - n > ((MAXINT - delta) / (h + 1)).floor

    delta += (m - n) * (h + 1)
    n = m

    input.each_with_index do |char, _|
      if char < n
        delta += 1
        raise(ConversionError, "punycode_overflow(2)") if delta > MAXINT
      end

      next unless char == n

      # Represent delta as a generalized variable-length integer:
      q = delta
      k = BASE
      loop do
        t = k <= bias ? TMIN : k >= bias + TMAX ? TMAX : k - bias
        break if q < t
        output << encode_digit(t + (q - t) % (BASE - t))
        q = ((q - t) / (BASE - t)).floor
        k += BASE
      end
      output << encode_digit(q)
      bias = adapt(delta, h + 1, h == b)
      delta = 0
      h += 1
    end

    delta += 1
    n += 1
  end
  output.collect {|c| c.chr(Encoding::UTF_8)}.join(EMPTY).encode(input_encoding)
end

.encode_digit(d) ⇒ Object

encode_digit(d) returns the basic code point whose value (when used for representing integers) is d, which needs to be in the range 0 to base-1.



36
37
38
39
40
# File 'lib/simpleidn.rb', line 36

def encode_digit(d)
  d + 22 + 75 * (d < 26 ? 1 : 0)
  #  0..25 map to ASCII a..z
  # 26..35 map to ASCII 0..9
end