Class: CharDet::EUCJPProber

Inherits:
MultiByteCharSetProber show all
Defined in:
lib/tmail/vendor/rchardet-1.3/lib/rchardet/eucjpprober.rb

Instance Attribute Summary

Attributes inherited from CharSetProber

#active

Instance Method Summary collapse

Methods inherited from CharSetProber

#filter_high_bit_only, #filter_with_english_letters, #filter_without_english_letters, #get_state

Constructor Details

#initializeEUCJPProber

Returns a new instance of EUCJPProber.



31
32
33
34
35
36
37
# File 'lib/tmail/vendor/rchardet-1.3/lib/rchardet/eucjpprober.rb', line 31

def initialize
  super()
  @_mCodingSM = CodingStateMachine.new(EUCJPSMModel)
  @_mDistributionAnalyzer = EUCJPDistributionAnalysis.new()
  @_mContextAnalyzer = EUCJPContextAnalysis.new()
  reset
end

Instance Method Details

#feed(aBuf) ⇒ Object



48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
# File 'lib/tmail/vendor/rchardet-1.3/lib/rchardet/eucjpprober.rb', line 48

def feed(aBuf)
  aLen = aBuf.length
  for i in (0...aLen)
    codingState = @_mCodingSM.next_state(aBuf[i..i])
    if codingState == EError
      $stderr << "#{get_charset_name} prober hit error at byte #{i}\n" if $debug
      @_mState = ENotMe
      break
    elsif codingState == EItsMe
      @_mState = EFoundIt
      break
    elsif codingState == EStart
      charLen = @_mCodingSM.get_current_charlen()
      if i == 0
        @_mLastChar[1] = aBuf[0..0]
        @_mContextAnalyzer.feed(@_mLastChar, charLen)
        @_mDistributionAnalyzer.feed(@_mLastChar, charLen)
      else
        @_mContextAnalyzer.feed(aBuf[i-1...i+1], charLen)
        @_mDistributionAnalyzer.feed(aBuf[i-1...i+1], charLen)
      end
    end
  end

  @_mLastChar[0] = aBuf[aLen-1..aLen-1]

  if get_state() == EDetecting
    if @_mContextAnalyzer.got_enough_data() and (get_confidence() > SHORTCUT_THRESHOLD)
      @_mState = EFoundIt
    end
  end

  return get_state()
end

#get_charset_nameObject



44
45
46
# File 'lib/tmail/vendor/rchardet-1.3/lib/rchardet/eucjpprober.rb', line 44

def get_charset_name
  return "EUC-JP"
end

#get_confidenceObject



83
84
85
86
# File 'lib/tmail/vendor/rchardet-1.3/lib/rchardet/eucjpprober.rb', line 83

def get_confidence
  l = [@_mContextAnalyzer.get_confidence,@_mDistributionAnalyzer.get_confidence]
  return l.max
end

#resetObject



39
40
41
42
# File 'lib/tmail/vendor/rchardet-1.3/lib/rchardet/eucjpprober.rb', line 39

def reset
  super()
  @_mContextAnalyzer.reset()
end