Class: UniversalDetector::Latin1Prober

Inherits:
CharSetProber show all
Defined in:
lib/Latin1Prober.rb

Instance Method Summary collapse

Methods inherited from CharSetProber

#filter_high_bit_only, #filter_with_english_letters, #filter_without_english_letters, #get_state

Constructor Details

#initializeLatin1Prober

Returns a new instance of Latin1Prober.



109
110
111
112
# File 'lib/Latin1Prober.rb', line 109

def initialize
    super
    reset()
end

Instance Method Details

#feed(aBuf) ⇒ Object



124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
# File 'lib/Latin1Prober.rb', line 124

def feed(aBuf)
    aBuf = filter_with_english_letters(aBuf)
    for c in aBuf
        charClass = Latin1_CharToClass[c[0]]
        freq = Latin1ClassModel[(@_mLastCharClass * CLASS_NUM) + charClass]
        if freq == 0
            @_mState = :NotMe
            break
        end
        @_mFreqCounter[freq] += 1
        @_mLastCharClass = charClass
    end        

    return get_state()
end

#get_charset_nameObject



120
121
122
# File 'lib/Latin1Prober.rb', line 120

def get_charset_name
    return "windows-1252"
end

#get_confidenceObject



140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
# File 'lib/Latin1Prober.rb', line 140

def get_confidence()
    if get_state() == :NotMe
        return 0.01
    end

    total = @_mFreqCounter.reduce(:+, 0)
    if total < 0.01
        confidence = 0.0
    else
        confidence = (@_mFreqCounter[3] / total) - (@_mFreqCounter[1] * 20.0 / total)
    end
    if confidence < 0.0
        confidence = 0.0
    end
    # lower the confidence of latin1 so that other more accurate detector 
    # can take priority.
    confidence = confidence * 0.5
    return confidence
end

#resetObject



114
115
116
117
118
# File 'lib/Latin1Prober.rb', line 114

def reset
    @_mLastCharClass = OTH
    @_mFreqCounter = [0] * FREQ_CAT_NUM
    super
end