Class: CharDet::Latin1Prober
Instance Attribute Summary
#active
Instance Method Summary
collapse
#filter_high_bit_only, #filter_with_english_letters, #filter_without_english_letters, #get_state
Constructor Details
Returns a new instance of Latin1Prober.
95
96
97
98
|
# File 'lib/rchardet/latin1prober.rb', line 95
def initialize
super
reset()
end
|
Instance Method Details
#feed(aBuf) ⇒ Object
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
|
# File 'lib/rchardet/latin1prober.rb', line 110
def feed(aBuf)
aBuf = filter_with_english_letters(aBuf)
aBuf.each_byte do |b|
c = b.chr
charClass = Latin1_CharToClass[c[0]]
freq = Latin1ClassModel[(@_mLastCharClass * CLASS_NUM) + charClass]
if freq == 0
@_mState = ENotMe
break
end
@_mFreqCounter[freq] += 1
@_mLastCharClass = charClass
end
return get_state()
end
|
#get_charset_name ⇒ Object
106
107
108
|
# File 'lib/rchardet/latin1prober.rb', line 106
def get_charset_name
return "windows-1252"
end
|
#get_confidence ⇒ Object
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
|
# File 'lib/rchardet/latin1prober.rb', line 127
def get_confidence
if get_state() == ENotMe
return 0.01
end
total = @_mFreqCounter.inject{|a,b| a+b}
if total < 0.01
confidence = 0.0
else
confidence = (@_mFreqCounter[3] / total) - (@_mFreqCounter[1] * 20.0 / total)
end
if confidence < 0.0
confidence = 0.0
end
confidence = confidence * 0.5
return confidence
end
|
#reset ⇒ Object
100
101
102
103
104
|
# File 'lib/rchardet/latin1prober.rb', line 100
def reset
@_mLastCharClass = OTH
@_mFreqCounter = [0] * FREQ_CAT_NUM
super
end
|