Class: UniversalDetector::GB2312DistributionAnalysis

Inherits:
CharDistributionAnalysis show all
Defined in:
lib/CharDistributionAnalysis.rb

Constant Summary

Constants inherited from CharDistributionAnalysis

CharDistributionAnalysis::ENOUGH_DATA_THRESHOLD, CharDistributionAnalysis::SURE_NO, CharDistributionAnalysis::SURE_YES

Instance Method Summary collapse

Methods inherited from CharDistributionAnalysis

#feed, #get_confidence, #got_enough_data, #reset

Constructor Details

#initializeGB2312DistributionAnalysis

Returns a new instance of GB2312DistributionAnalysis.



151
152
153
154
155
156
# File 'lib/CharDistributionAnalysis.rb', line 151

def initialize
    super
    @_mCharToFreqOrder = GB2312CharToFreqOrder
    @_mTableSize = GB2312_TABLE_SIZE
    @_mTypicalDistributionRatio = GB2312_TYPICAL_DISTRIBUTION_RATIO
end

Instance Method Details

#get_order(aStr) ⇒ Object



158
159
160
161
162
163
164
165
166
167
168
# File 'lib/CharDistributionAnalysis.rb', line 158

def get_order(aStr)
    # for GB2312 encoding, we are interested 
    #  first  byte range: 0xb0 -- 0xfe
    #  second byte range: 0xa1 -- 0xfe
    # no validation needed here. State machine has done that
    if (aStr[0] >= 0xB0) and (aStr[1] >= 0xA1)
        return 94 * (aStr[0] - 0xB0) + aStr[1] - 0xA1
    else
        return -1;
    end
end