Module: Analects::Encoding

Extended by:
Encoding
Included in:
Encoding
Defined in:
lib/analects/encoding.rb

Constant Summary collapse

GB =
::Encoding::GB18030
BIG5 =
::Encoding::BIG5_UAO

Instance Method Summary collapse

Instance Method Details

#from_big5(str) ⇒ Object



18
19
20
# File 'lib/analects/encoding.rb', line 18

def from_big5(str)
  recode(BIG5, str)
end

#from_gb(str) ⇒ Object



14
15
16
# File 'lib/analects/encoding.rb', line 14

def from_gb(str)
  recode(GB, str)
end

#ratings(str) ⇒ Object

Crude way to guess which encoding it is



34
35
36
37
38
39
40
41
42
43
44
45
46
# File 'lib/analects/encoding.rb', line 34

def ratings(str)
  all_valid_cjk(str).map do |enc|
    [
      enc,
      recode(enc, str).codepoints.map do |point|
        Analects::Models::Zi.codepoint_ranges.map.with_index do |range, idx|
          next 6-idx if range.include?(point)
          0
        end.inject(:+)
      end.inject(:+)
    ]
  end.sort_by(&:last).reverse
end

#recode(enc, str) ⇒ Object



10
11
12
# File 'lib/analects/encoding.rb', line 10

def recode(enc, str)
  str.force_encoding(enc).encode('UTF-8')
end

#valid_cjk(str) ⇒ Object



22
23
24
25
26
27
28
29
30
31
# File 'lib/analects/encoding.rb', line 22

def valid_cjk(str)
  [GB, BIG5].map do |enc|
    begin
      recode(enc, str)
      enc
    rescue ::Encoding::UndefinedConversionError
    rescue ::Encoding::InvalidByteSequenceError
    end
  end.compact
end