Class: UniversalDetector::MultiByteCharSetProber
Instance Method Summary
collapse
#filter_high_bit_only, #filter_with_english_letters, #filter_without_english_letters, #get_state
Constructor Details
Returns a new instance of MultiByteCharSetProber.
35
36
37
38
39
40
|
# File 'lib/MultiByteCharSetProber.rb', line 35
def initialize
super
@_mDistributionAnalyzer = nil
@_mCodingSM = nil
@_mLastChar = ['\x00', '\x00']
end
|
Instance Method Details
#feed(aBuf) ⇒ Object
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
|
# File 'lib/MultiByteCharSetProber.rb', line 56
def feed(aBuf)
aLen = aBuf.length
for i in 0...aLen
codingState = @_mCodingSM.next_state(aBuf[i])
if codingState == :Error
if UniversalDetector::DEBUG
p(get_charset_name() + ' prober hit error at byte ' + i.to_s + '\n')
end
@_mState = :NotMe
break
elsif codingState == :ItsMe
@_mState = :FoundIt
break
elsif codingState == :Start
charLen = @_mCodingSM.get_current_charlen()
if i == 0
@_mLastChar[1] = aBuf[0]
@_mDistributionAnalyzer.feed(@_mLastChar, charLen)
else
@_mDistributionAnalyzer.feed(aBuf[(i-1)..(i+1)], charLen)
end
end
end
@_mLastChar[0] = aBuf[aLen - 1]
if get_state() == :Detecting
if @_mDistributionAnalyzer.got_enough_data() && (get_confidence() > SHORTCUT_THRESHOLD)
@_mState = :FoundIt
end
end
return get_state()
end
|
#get_charset_name ⇒ Object
53
54
|
# File 'lib/MultiByteCharSetProber.rb', line 53
def get_charset_name
end
|
#get_confidence ⇒ Object
90
91
92
|
# File 'lib/MultiByteCharSetProber.rb', line 90
def get_confidence
return @_mDistributionAnalyzer.get_confidence()
end
|
#reset ⇒ Object
42
43
44
45
46
47
48
49
50
51
|
# File 'lib/MultiByteCharSetProber.rb', line 42
def reset
super
if @_mCodingSM
@_mCodingSM.reset()
end
if @_mDistributionAnalyzer
@_mDistributionAnalyzer.reset()
end
@_mLastChar = ['\x00', '\x00']
end
|