Class: UniversalDetector::EUCJPProber
Instance Method Summary
collapse
#filter_high_bit_only, #filter_with_english_letters, #filter_without_english_letters, #get_state
Constructor Details
Returns a new instance of EUCJPProber.
Instance Method Details
#feed(aBuf) ⇒ Object
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
|
# File 'lib/EUCJPProber.rb', line 55
def feed(aBuf)
aLen = aBuf.length
for i in 0...aLen
codingState = @_mCodingSM.next_state(aBuf[i])
if codingState == :Error
if DEBUG
p(get_charset_name() + ' prober hit error at byte ' + i.to_s + '\n')
end
@_mState = :NotMe
break
elsif codingState == :ItsMe
@_mState = :FoundIt
break
elsif codingState == :Start
charLen = @_mCodingSM.get_current_charlen()
if i == 0
@_mLastChar[1] = aBuf[0]
@_mContextAnalyzer.feed(@_mLastChar, charLen)
@_mDistributionAnalyzer.feed(@_mLastChar, charLen)
else
@_mContextAnalyzer.feed(aBuf[i-1..i+1], charLen)
@_mDistributionAnalyzer.feed(aBuf[i-1..i+1], charLen)
end
end
end
@_mLastChar[0] = aBuf[aLen - 1]
if get_state() == :Detecting
if @_mContextAnalyzer.got_enough_data() && (get_confidence() > SHORTCUT_THRESHOLD)
@_mState = :FoundIt
end
end
return get_state()
end
|
#get_charset_name ⇒ Object
51
52
53
|
# File 'lib/EUCJPProber.rb', line 51
def get_charset_name
return "EUC-JP"
end
|
#get_confidence ⇒ Object
91
92
93
94
95
|
# File 'lib/EUCJPProber.rb', line 91
def get_confidence
contxtCf = @_mContextAnalyzer.get_confidence()
distribCf = @_mDistributionAnalyzer.get_confidence()
return [contxtCf, distribCf].max
end
|
#reset ⇒ Object
46
47
48
49
|
# File 'lib/EUCJPProber.rb', line 46
def reset
super
@_mContextAnalyzer.reset()
end
|