Class: CharDet::MultiByteCharSetProber
Instance Attribute Summary
#active
Instance Method Summary
collapse
#filter_high_bit_only, #filter_with_english_letters, #filter_without_english_letters, #get_state
Constructor Details
Returns a new instance of MultiByteCharSetProber.
33
34
35
36
37
38
|
# File 'lib/rchardet/mbcharsetprober.rb', line 33
def initialize
super
@_mDistributionAnalyzer = nil
@_mCodingSM = nil
@_mLastChar = "\x00\x00"
end
|
Instance Method Details
#feed(aBuf) ⇒ Object
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
|
# File 'lib/rchardet/mbcharsetprober.rb', line 54
def feed(aBuf)
aLen = aBuf.length
for i in (0...aLen)
codingState = @_mCodingSM.next_state(aBuf[i..i])
if codingState == EError
$stderr << "#{get_charset_name} prober hit error at byte #{i}\n" if $debug
@_mState = ENotMe
break
elsif codingState == EItsMe
@_mState = EFoundIt
break
elsif codingState == EStart
charLen = @_mCodingSM.get_current_charlen()
if i == 0
@_mLastChar[1] = aBuf[0..0]
@_mDistributionAnalyzer.feed(@_mLastChar, charLen)
else
@_mDistributionAnalyzer.feed(aBuf[i-1...i+1], charLen)
end
end
end
@_mLastChar[0] = aBuf[aLen-1..aLen-1]
if get_state() == EDetecting
if @_mDistributionAnalyzer.got_enough_data() and (get_confidence() > SHORTCUT_THRESHOLD)
@_mState = EFoundIt
end
end
return get_state()
end
|
#get_charset_name ⇒ Object
51
52
|
# File 'lib/rchardet/mbcharsetprober.rb', line 51
def get_charset_name
end
|
#get_confidence ⇒ Object
85
86
87
|
# File 'lib/rchardet/mbcharsetprober.rb', line 85
def get_confidence
return @_mDistributionAnalyzer.get_confidence()
end
|
#reset ⇒ Object
40
41
42
43
44
45
46
47
48
49
|
# File 'lib/rchardet/mbcharsetprober.rb', line 40
def reset
super
if @_mCodingSM
@_mCodingSM.reset()
end
if @_mDistributionAnalyzer
@_mDistributionAnalyzer.reset()
end
@_mLastChar = "\x00\x00"
end
|