Class: CharDet::EUCJPProber
Instance Attribute Summary
#active
Instance Method Summary
collapse
#filter_high_bit_only, #filter_with_english_letters, #filter_without_english_letters, #result, #state
Constructor Details
Returns a new instance of EUCJPProber.
Instance Method Details
#charset_name ⇒ Object
44
45
46
|
# File 'lib/rchardet/eucjpprober.rb', line 44
def charset_name
return "EUC-JP"
end
|
#confidence ⇒ Object
83
84
85
86
|
# File 'lib/rchardet/eucjpprober.rb', line 83
def confidence
l = [@_mContextAnalyzer.confidence,@_mDistributionAnalyzer.confidence]
return l.max
end
|
#feed(aBuf) ⇒ Object
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
|
# File 'lib/rchardet/eucjpprober.rb', line 48
def feed(aBuf)
aLen = aBuf.length
for i in (0...aLen)
codingState = @_mCodingSM.next_state(aBuf[i..i])
if codingState == EError
$stderr << "#{charset_name} prober hit error at byte #{i}\n" if $debug
@_mState = ENotMe
break
elsif codingState == EItsMe
@_mState = EFoundIt
break
elsif codingState == EStart
charLen = @_mCodingSM.current_charlen()
if i == 0
@_mLastChar[1] = aBuf[0..0]
@_mContextAnalyzer.feed(@_mLastChar, charLen)
@_mDistributionAnalyzer.feed(@_mLastChar, charLen)
else
@_mContextAnalyzer.feed(aBuf[i-1...i+1], charLen)
@_mDistributionAnalyzer.feed(aBuf[i-1...i+1], charLen)
end
end
end
@_mLastChar[0] = aBuf[aLen-1..aLen-1]
if state() == EDetecting
if @_mContextAnalyzer.got_enough_data() and (confidence() > SHORTCUT_THRESHOLD)
@_mState = EFoundIt
end
end
return state()
end
|
#reset ⇒ Object
39
40
41
42
|
# File 'lib/rchardet/eucjpprober.rb', line 39
def reset
super()
@_mContextAnalyzer.reset()
end
|