Class: CharDet::EUCJPProber
Instance Attribute Summary
#active
Instance Method Summary
collapse
#filter_high_bit_only, #filter_with_english_letters, #filter_without_english_letters, #get_state
Constructor Details
Returns a new instance of EUCJPProber.
Instance Method Details
#feed(aBuf) ⇒ Object
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
|
# File 'lib/tmail/vendor/rchardet-1.3/lib/rchardet/eucjpprober.rb', line 48
def feed(aBuf)
aLen = aBuf.length
for i in (0...aLen)
codingState = @_mCodingSM.next_state(aBuf[i..i])
if codingState == EError
$stderr << "#{get_charset_name} prober hit error at byte #{i}\n" if $debug
@_mState = ENotMe
break
elsif codingState == EItsMe
@_mState = EFoundIt
break
elsif codingState == EStart
charLen = @_mCodingSM.get_current_charlen()
if i == 0
@_mLastChar[1] = aBuf[0..0]
@_mContextAnalyzer.feed(@_mLastChar, charLen)
@_mDistributionAnalyzer.feed(@_mLastChar, charLen)
else
@_mContextAnalyzer.feed(aBuf[i-1...i+1], charLen)
@_mDistributionAnalyzer.feed(aBuf[i-1...i+1], charLen)
end
end
end
@_mLastChar[0] = aBuf[aLen-1..aLen-1]
if get_state() == EDetecting
if @_mContextAnalyzer.got_enough_data() and (get_confidence() > SHORTCUT_THRESHOLD)
@_mState = EFoundIt
end
end
return get_state()
end
|
#get_charset_name ⇒ Object
44
45
46
|
# File 'lib/tmail/vendor/rchardet-1.3/lib/rchardet/eucjpprober.rb', line 44
def get_charset_name
return "EUC-JP"
end
|
#get_confidence ⇒ Object
83
84
85
86
|
# File 'lib/tmail/vendor/rchardet-1.3/lib/rchardet/eucjpprober.rb', line 83
def get_confidence
l = [@_mContextAnalyzer.get_confidence,@_mDistributionAnalyzer.get_confidence]
return l.max
end
|
39
40
41
42
|
# File 'lib/tmail/vendor/rchardet-1.3/lib/rchardet/eucjpprober.rb', line 39
def reset
super()
@_mContextAnalyzer.reset()
end
|