Class: UniversalDetector::SJISProber

Inherits:
MultiByteCharSetProber show all
Defined in:
lib/SJISProber.rb

Instance Method Summary collapse

Methods inherited from CharSetProber

#filter_high_bit_only, #filter_with_english_letters, #filter_without_english_letters, #get_state

Constructor Details

#initializeSJISProber

Returns a new instance of SJISProber.



38
39
40
41
42
43
44
# File 'lib/SJISProber.rb', line 38

def initialize
    super
    @_mCodingSM = CodingStateMachine.new(SJISSMModel)
    @_mDistributionAnalyzer = SJISDistributionAnalysis.new
    @_mContextAnalyzer = SJISContextAnalysis.new
    reset()
end

Instance Method Details

#feed(aBuf) ⇒ Object



55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
# File 'lib/SJISProber.rb', line 55

def feed(aBuf)
    aLen = aBuf.length
    for i in 0...aLen
        codingState = @_mCodingSM.next_state(aBuf[i])
        if codingState == :Error
            if DEBUG
                p(get_charset_name() + ' prober hit error at byte ' + i.to_s + '\n')
            end
            @_mState = :NotMe                    
            break                    
        elsif codingState == :ItsMe
            @_mState = :FoundIt
            break
        elsif codingState == :Start
            charLen = @_mCodingSM.get_current_charlen()
            if i == 0
                @_mLastChar[1] = aBuf[0]
                @_mContextAnalyzer.feed(@_mLastChar[2 - charLen..@_mLastChar.length], charLen)
                @_mDistributionAnalyzer.feed(@_mLastChar, charLen)                        
            else
                @_mContextAnalyzer.feed(aBuf[i + 1 - charLen .. i + 3 - charLen], charLen)
                @_mDistributionAnalyzer.feed(aBuf[i - 1 .. i + 1], charLen)
            end
        end
    end

    @_mLastChar[0] = aBuf[aLen - 1]

    if get_state() == :Detecting
        if @_mContextAnalyzer.got_enough_data() and \
               (get_confidence() > SHORTCUT_THRESHOLD)
            @_mState = :FoundIt
        end
    end

    return get_state()
end

#get_charset_nameObject



51
52
53
# File 'lib/SJISProber.rb', line 51

def get_charset_name
    return "SHIFT_JIS"
end

#get_confidenceObject



93
94
95
96
97
# File 'lib/SJISProber.rb', line 93

def get_confidence
    contxtCf = @_mContextAnalyzer.get_confidence()
    distribCf = @_mDistributionAnalyzer.get_confidence()
    return [contxtCf, distribCf].max
end

#resetObject



46
47
48
49
# File 'lib/SJISProber.rb', line 46

def reset
    super
    @_mContextAnalyzer.reset()
end