Module: SimpleIDN::Punycode
- Defined in:
- lib/simpleidn.rb
Constant Summary collapse
- INITIAL_N =
0x80
- INITIAL_BIAS =
72
- DELIMITER =
0x2D
- BASE =
36
- DAMP =
700
- TMIN =
1
- TMAX =
26
- SKEW =
38
- MAXINT =
0x7FFFFFFF
- ASCII_MAX =
0x7F
- EMPTY =
''.encode(Encoding::UTF_8).freeze
Class Method Summary collapse
-
.adapt(delta, numpoints, firsttime) ⇒ Object
Bias adaptation function.
-
.decode(input) ⇒ Object
Main decode.
-
.decode_digit(cp) ⇒ Object
decode_digit(cp) returns the numeric value of a basic code point (for use in representing integers) in the range 0 to base-1, or base if cp is does not represent a value.
-
.encode(input) ⇒ Object
Main encode function.
-
.encode_digit(d) ⇒ Object
encode_digit(d) returns the basic code point whose value (when used for representing integers) is d, which needs to be in the range 0 to base-1.
Class Method Details
.adapt(delta, numpoints, firsttime) ⇒ Object
Bias adaptation function
43 44 45 46 47 48 49 50 51 52 53 |
# File 'lib/simpleidn.rb', line 43 def adapt(delta, numpoints, firsttime) delta = firsttime ? (delta / DAMP) : (delta >> 1) delta += (delta / numpoints) k = 0 while delta > (((BASE - TMIN) * TMAX) / 2) delta /= BASE - TMIN k += BASE end k + (BASE - TMIN + 1) * delta / (delta + SKEW) end |
.decode(input) ⇒ Object
Main decode
56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 |
# File 'lib/simpleidn.rb', line 56 def decode(input) input_encoding = input.encoding input = input.encode(Encoding::UTF_8).codepoints.to_a output = [] # Initialize the state: n = INITIAL_N i = 0 bias = INITIAL_BIAS # Handle the basic code points: Let basic be the number of input code # points before the last delimiter, or 0 if there is none, then # copy the first basic code points to the output. basic = input.rindex(DELIMITER) || 0 input[0, basic].each do |char| raise(ConversionError, "Illegal input >= 0x80") if char > ASCII_MAX output << char end # Main decoding loop: Start just after the last delimiter if any # basic code points were copied; start at the beginning otherwise. ic = basic > 0 ? basic + 1 : 0 while ic < input.length # ic is the index of the next character to be consumed, # Decode a generalized variable-length integer into delta, # which gets added to i. The overflow checking is easier # if we increase i as we go, then subtract off its starting # value at the end to obtain delta. oldi = i w = 1 k = BASE loop do raise(ConversionError, "punycode_bad_input(1)") if ic >= input.length digit = decode_digit(input[ic]) ic += 1 raise(ConversionError, "punycode_bad_input(2)") if digit >= BASE raise(ConversionError, "punycode_overflow(1)") if digit > (MAXINT - i) / w i += digit * w t = k <= bias ? TMIN : k >= bias + TMAX ? TMAX : k - bias break if digit < t raise(ConversionError, "punycode_overflow(2)") if w > MAXINT / (BASE - t) w *= BASE - t k += BASE end out = output.length + 1 bias = adapt(i - oldi, out, oldi == 0) # i was supposed to wrap around from out to 0, # incrementing n each time, so we'll fix that now: raise(ConversionError, "punycode_overflow(3)") if (i / out) > MAXINT - n n += (i / out) i %= out # Insert n at position i of the output: output.insert(i, n) i += 1 end output.collect {|c| c.chr(Encoding::UTF_8)}.join(EMPTY).encode(input_encoding) end |
.decode_digit(cp) ⇒ Object
decode_digit(cp) returns the numeric value of a basic code point (for use in representing integers) in the range 0 to base-1, or base if cp is does not represent a value.
29 30 31 |
# File 'lib/simpleidn.rb', line 29 def decode_digit(cp) cp - 48 < 10 ? cp - 22 : cp - 65 < 26 ? cp - 65 : cp - 97 < 26 ? cp - 97 : BASE end |
.encode(input) ⇒ Object
Main encode function
128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 |
# File 'lib/simpleidn.rb', line 128 def encode(input) input_encoding = input.encoding input = input.encode(Encoding::UTF_8).codepoints.to_a output = [] # Initialize the state: n = INITIAL_N delta = 0 bias = INITIAL_BIAS # Handle the basic code points: output = input.select { |char| char <= ASCII_MAX } h = b = output.length # h is the number of code points that have been handled, b is the # number of basic code points output << DELIMITER if b > 0 # Main encoding loop: while h < input.length # All non-basic code points < n have been # handled already. Find the next larger one: m = MAXINT input.each do |char| m = char if char >= n && char < m end # Increase delta enough to advance the decoder's # <n,i> state to <m,0>, but guard against overflow: raise(ConversionError, "punycode_overflow (1)") if m - n > ((MAXINT - delta) / (h + 1)).floor delta += (m - n) * (h + 1) n = m input.each_with_index do |char, _| if char < n delta += 1 raise(ConversionError, "punycode_overflow(2)") if delta > MAXINT end next unless char == n # Represent delta as a generalized variable-length integer: q = delta k = BASE loop do t = k <= bias ? TMIN : k >= bias + TMAX ? TMAX : k - bias break if q < t output << encode_digit(t + (q - t) % (BASE - t)) q = ((q - t) / (BASE - t)).floor k += BASE end output << encode_digit(q) bias = adapt(delta, h + 1, h == b) delta = 0 h += 1 end delta += 1 n += 1 end output.collect {|c| c.chr(Encoding::UTF_8)}.join(EMPTY).encode(input_encoding) end |
.encode_digit(d) ⇒ Object
encode_digit(d) returns the basic code point whose value (when used for representing integers) is d, which needs to be in the range 0 to base-1.
36 37 38 39 40 |
# File 'lib/simpleidn.rb', line 36 def encode_digit(d) d + 22 + 75 * (d < 26 ? 1 : 0) # 0..25 map to ASCII a..z # 26..35 map to ASCII 0..9 end |