Class: CharDet::UTF8Prober
Instance Attribute Summary
#active
Instance Method Summary
collapse
#filter_high_bit_only, #filter_with_english_letters, #filter_without_english_letters, #get_state
Constructor Details
Returns a new instance of UTF8Prober.
Instance Method Details
#feed(aBuf) ⇒ Object
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
|
# File 'lib/rchardet/utf8prober.rb', line 49
def feed(aBuf)
aBuf.each_byte do |b|
c = b.chr
codingState = @codingSM.next_state(c)
if codingState == EError
@state = ENotMe
break
elsif codingState == EItsMe
@state = EFoundIt
break
elsif codingState == EStart
if @codingSM.get_current_charlen() >= 2
@numOfMBChar += 1
end
end
end
if get_state() == EDetecting
if get_confidence() > SHORTCUT_THRESHOLD
@state = EFoundIt
end
end
return get_state()
end
|
#get_charset_name ⇒ Object
45
46
47
|
# File 'lib/rchardet/utf8prober.rb', line 45
def get_charset_name
return "utf-8"
end
|
#get_confidence ⇒ Object
75
76
77
78
79
80
81
82
83
|
# File 'lib/rchardet/utf8prober.rb', line 75
def get_confidence
unlike = 0.99
if @numOfMBChar < 6
unlike *= ONE_CHAR_PROB ** @numOfMBChar
return 1.0 - unlike
else
return unlike
end
end
|
#reset ⇒ Object
39
40
41
42
43
|
# File 'lib/rchardet/utf8prober.rb', line 39
def reset
super()
@codingSM.reset()
@numOfMBChar = 0
end
|