Class: ByteCharacteristics

Inherits:
Characteristics show all
Defined in:
lib/characteristics/byte.rb

Constant Summary collapse

HAS_C1 =
/^(ISO-8859-|TIS-620)/
UNASSIGNED =
{
  0x80 => /^(IBM869)/,
  0x81 => /^(Windows-(1250|1252|1253|1254|1255|1257|1258)|IBM869|Windows-874)/,
  0x82 => /^(IBM869|Windows-874)/,
  0x83 => /^(Windows-(1250|1257)|IBM869|Windows-874)/,
  0x84 => /^(IBM869|Windows-874)/,
  0x85 => /^(IBM869)/,
  0x86 => /^(Windows-874)/,
  0x87 => /^(IBM869|Windows-874)/,
  0x88 => /^(Windows-(1250|1253|1257)|Windows-874)/,
  0x89 => /^(Windows-874)/,
  0x8A => /^(Windows-(1253|1255|1257|1258)|Windows-874)/,
  0x8B => /^(Windows-874)/,
  0x8C => /^(Windows-(1253|1255|1257)|Windows-874)/,
  0x8D => /^(Windows-(1252|1253|1254|1255|1258)|Windows-874)/,
  0x8E => /^(Windows-(1253|1254|1255|1258)|Windows-874)/,
  0x8F => /^(Windows-(1252|1253|1254|1255|1258)|Windows-874)/,

  0x90 => /^(Windows-(1250|1252|1253|1254|1255|1257|1258)|macThai|Windows-874)/,
  0x93 => /^(IBM869)/,
  0x94 => /^(IBM869)/,
  0x98 => /^(Windows-(1250|1251|1253|1257)|Windows-874)/,
  0x99 => /^(Windows-874)/,
  0x9A => /^(Windows-(1253|1255|1257|1258)|Windows-874)/,
  0x9B => /^(IBM864|Windows-874)/,
  0x9C => /^(Windows-(1253|1255|1257)|IBM864|Windows-874)/,
  0x9D => /^(Windows-(1252|1253|1254|1255|1258)|Windows-874)/,
  0x9E => /^(Windows-(1253|1254|1255|1258)|Windows-874)/,
  0x9F => /^(Windows-(1253|1255|1257)|IBM864|macThai|Windows-874)/,

  0xA0 => /^(TIS-620)/,
  0xA1 => /^(ISO-8859-(6|8)|Windows-(1257))/,
  0xA2 => /^(ISO-8859-(6))/,
  0xA3 => /^(ISO-8859-(6))/,
  0xA5 => /^(ISO-8859-(3|6)|Windows-(1257))/,
  0xA6 => /^(ISO-8859-(6)|IBM864)/,
  0xA7 => /^(ISO-8859-(6))/,
  0xA8 => /^(ISO-8859-(6))/,
  0xA9 => /^(ISO-8859-(6))/,
  0xAA => /^(ISO-8859-(6)|Windows-(1253))/,
  0xAB => /^(ISO-8859-(6))/,
  0xAE => /^(ISO-8859-(3|6|7))/,
  0xAF => /^(ISO-8859-(6))/,

  0xB0 => /^(ISO-8859-(6))/,
  0xB1 => /^(ISO-8859-(6))/,
  0xB2 => /^(ISO-8859-(6))/,
  0xB3 => /^(ISO-8859-(6))/,
  0xB4 => /^(ISO-8859-(6))/,
  0xB5 => /^(ISO-8859-(6))/,
  0xB6 => /^(ISO-8859-(6))/,
  0xB7 => /^(ISO-8859-(6))/,
  0xB8 => /^(ISO-8859-(6))/,
  0xB9 => /^(ISO-8859-(6))/,
  0xBA => /^(ISO-8859-(6))/,
  0xBC => /^(ISO-8859-(6))/,
  0xBD => /^(ISO-8859-(6))/,
  0xBE => /^(ISO-8859-(3|6))/,
  0xBF => /^(ISO-8859-(8))/,

  0xC0 => /^(ISO-8859-(6|8))/,
  0xC1 => /^(ISO-8859-(8))/,
  0xC2 => /^(ISO-8859-(8))/,
  0xC3 => /^(ISO-8859-(3|8))/,
  0xC4 => /^(ISO-8859-(8))/,
  0xC5 => /^(ISO-8859-(8))/,
  0xC6 => /^(ISO-8859-(8))/,
  0xC7 => /^(ISO-8859-(8))/,
  0xC8 => /^(ISO-8859-(8))/,
  0xC9 => /^(ISO-8859-(8))/,
  0xCA => /^(ISO-8859-(8))/,
  0xCB => /^(ISO-8859-(8))/,
  0xCC => /^(ISO-8859-(8))/,
  0xCD => /^(ISO-8859-(8))/,
  0xCE => /^(ISO-8859-(8))/,
  0xCF => /^(ISO-8859-(8))/,

  0xD0 => /^(ISO-8859-(3|8))/,
  0xD1 => /^(ISO-8859-(8))/,
  0xD2 => /^(ISO-8859-(7|8)|Windows-(1253))/,
  0xD3 => /^(ISO-8859-(8))/,
  0xD4 => /^(ISO-8859-(8))/,
  0xD5 => /^(ISO-8859-(8)|IBM857)/, # IBM857: Ruby does not support euro sign
  0xD6 => /^(ISO-8859-(8))/,
  0xD7 => /^(ISO-8859-(8))/,
  0xD8 => /^(ISO-8859-(8))/,
  0xD9 => /^(ISO-8859-(8)|Windows-(1255))/,
  0xDA => /^(ISO-8859-(8)|Windows-(1255))/,
  0xDB => /^(ISO-8859-(6|8|11)|Windows-(1255)|TIS-620|Windows-874)/,
  0xDC => /^(ISO-8859-(6|8|11)|Windows-(1255)|TIS-620|Windows-874)/,
  0xDD => /^(ISO-8859-(6|8|11)|Windows-(1255)|TIS-620|Windows-874)/,
  0xDE => /^(ISO-8859-(6|8|11)|Windows-(1255)|TIS-620|Windows-874)/,
  0xDF => /^(ISO-8859-(6)|Windows-(1255))/,

  0xE3 => /^(ISO-8859-(3))/,
  0xE7 => /^(IBM857)/,

  0xF0 => /^(ISO-8859-(3))/, # mac: Treating F0 as always assigned
  0xF2 => /^(IBM857)/,
  0xF3 => /^(ISO-8859-(6))/,
  0xF4 => /^(ISO-8859-(6))/,
  0xF5 => /^(ISO-8859-(6)|macTurkish)/,
  0xF6 => /^(ISO-8859-(6))/,
  0xF7 => /^(ISO-8859-(6))/,
  0xF8 => /^(ISO-8859-(6))/,
  0xF9 => /^(ISO-8859-(6))/,
  0xFA => /^(ISO-8859-(6))/,
  0xFB => /^(ISO-8859-(6|8)|Windows-(1255))/,
  0xFC => /^(ISO-8859-(6|8|11)|Windows-(1255)|macThai|TIS-620|Windows-874)/,
  0xFD => /^(ISO-8859-(6|11)|macThai|TIS-620|Windows-874)/,
  0xFE => /^(ISO-8859-(6|11)|macThai|TIS-620|Windows-874)/,
  0xFF => /^(ISO-8859-(6|7|8|11)|Windows-(1253|1255)|IBM864|macGreek|macThai|TIS-620|Windows-874)/, # macGreek: Ruby does not know of soft hyphen at FF
}.freeze
BLANKS =
[
  0x9,
  0x20,
].freeze
SEPARATORS =
[
  0xA,
  0xB,
  0xC,
  0xD,
].freeze
EXTRA_BLANKS =
{
  0xA0 => /^(ISO-8859-|Windows-125|macThai|Windows-874)/,
  0xA1 => /^IBM864/,
  0xAD => /^(ISO-8859-(?!11)|Windows-125)/,
  0x9A => /^KOI8-/,
  0x9D => /^Windows-(1256)/,
  0x9E => /^Windows-(1256)/,
  0xCA => /^mac(?!Thai)/,
  0xDB => /^macThai/,
  0xDC => /^macThai/,
  0xF0 => /^(IBM(?!437|737|86)|IBM869|CP)/,
  0xFF => /^(IBM(?!864)|CP)/, # |macGreek, but is unnasigned in Ruby
}.freeze
FORMATS =
{
  0xFD => /^(ISO-8859-8|Windows-(1255|1256))/,
  0xFE => /^(ISO-8859-8|Windows-(1255|1256))/,
}.freeze

Constants inherited from Characteristics

Characteristics::UNICODE_VERSION, Characteristics::VERSION

Instance Attribute Summary

Attributes inherited from Characteristics

#encoding

Instance Method Summary collapse

Methods inherited from Characteristics

create, create_for_type, #docomo?, #kddi?, #softbank?, type_from_encoding_name, #valid?

Constructor Details

#initialize(char) ⇒ ByteCharacteristics

Returns a new instance of ByteCharacteristics.



151
152
153
154
# File 'lib/characteristics/byte.rb', line 151

def initialize(char)
  super
  @ord = char.ord
end

Instance Method Details

#assigned?Boolean

Returns:

  • (Boolean)


160
161
162
# File 'lib/characteristics/byte.rb', line 160

def assigned?
  control? || UNASSIGNED[@ord] !~ @encoding_name
end

#bidi_control?Boolean

Returns:

  • (Boolean)


194
195
196
# File 'lib/characteristics/byte.rb', line 194

def bidi_control?
  format?
end

#blank?Boolean

Returns:

  • (Boolean)


180
181
182
183
184
# File 'lib/characteristics/byte.rb', line 180

def blank?
  BLANKS.include?(@ord) ||
  SEPARATORS.include?(@ord) ||
  EXTRA_BLANKS[@ord] =~ @encoding_name
end

#c0?Boolean

Returns:

  • (Boolean)


168
169
170
# File 'lib/characteristics/byte.rb', line 168

def c0?
  @ord < 0x20 && encoding_has_c0?
end

#c1?Boolean

Returns:

  • (Boolean)


172
173
174
# File 'lib/characteristics/byte.rb', line 172

def c1?
  @ord >= 0x80 && @ord < 0xA0 && encoding_has_c1?
end

#control?Boolean

Returns:

  • (Boolean)


164
165
166
# File 'lib/characteristics/byte.rb', line 164

def control?
  c0? || c1? || delete?
end

#delete?Boolean

Returns:

  • (Boolean)


176
177
178
# File 'lib/characteristics/byte.rb', line 176

def delete?
  @ord == 0x7F && encoding_has_delete?
end

#format?Boolean

Returns:

  • (Boolean)


190
191
192
# File 'lib/characteristics/byte.rb', line 190

def format?
  FORMATS[@ord] =~ @encoding_name
end

#separator?Boolean

Returns:

  • (Boolean)


186
187
188
# File 'lib/characteristics/byte.rb', line 186

def separator?
  SEPARATORS.include?(@ord)
end

#unicode?Boolean

Returns:

  • (Boolean)


156
157
158
# File 'lib/characteristics/byte.rb', line 156

def unicode?
  false
end