Class: UniversalDetector::Latin1Prober
Instance Method Summary
collapse
#filter_high_bit_only, #filter_with_english_letters, #filter_without_english_letters, #get_state
Constructor Details
Returns a new instance of Latin1Prober.
109
110
111
112
|
# File 'lib/Latin1Prober.rb', line 109
def initialize
super
reset()
end
|
Instance Method Details
#feed(aBuf) ⇒ Object
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
|
# File 'lib/Latin1Prober.rb', line 124
def feed(aBuf)
aBuf = filter_with_english_letters(aBuf)
for c in aBuf
charClass = Latin1_CharToClass[c[0]]
freq = Latin1ClassModel[(@_mLastCharClass * CLASS_NUM) + charClass]
if freq == 0
@_mState = :NotMe
break
end
@_mFreqCounter[freq] += 1
@_mLastCharClass = charClass
end
return get_state()
end
|
#get_charset_name ⇒ Object
120
121
122
|
# File 'lib/Latin1Prober.rb', line 120
def get_charset_name
return "windows-1252"
end
|
#get_confidence ⇒ Object
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
|
# File 'lib/Latin1Prober.rb', line 140
def get_confidence()
if get_state() == :NotMe
return 0.01
end
total = @_mFreqCounter.reduce(:+, 0)
if total < 0.01
confidence = 0.0
else
confidence = (@_mFreqCounter[3] / total) - (@_mFreqCounter[1] * 20.0 / total)
end
if confidence < 0.0
confidence = 0.0
end
confidence = confidence * 0.5
return confidence
end
|
#reset ⇒ Object
114
115
116
117
118
|
# File 'lib/Latin1Prober.rb', line 114
def reset
@_mLastCharClass = OTH
@_mFreqCounter = [0] * FREQ_CAT_NUM
super
end
|