Class: CharDet::SJISProber

Inherits:
MultiByteCharSetProber show all
Defined in:
lib/rchardet/sjisprober.rb

Instance Attribute Summary

Attributes inherited from CharSetProber

#active

Instance Method Summary collapse

Methods inherited from CharSetProber

#filter_high_bit_only, #filter_with_english_letters, #filter_without_english_letters, #get_state

Constructor Details

#initializeSJISProber

Returns a new instance of SJISProber.



31
32
33
34
35
36
37
# File 'lib/rchardet/sjisprober.rb', line 31

def initialize
  super()
  @_mCodingSM = CodingStateMachine.new(SJISSMModel)
  @_mDistributionAnalyzer = SJISDistributionAnalysis.new()
  @_mContextAnalyzer = SJISContextAnalysis.new()
  reset()
end

Instance Method Details

#feed(aBuf) ⇒ Object



48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
# File 'lib/rchardet/sjisprober.rb', line 48

def feed(aBuf)
  aLen = aBuf.length
  for i in (0...aLen)
	codingState = @_mCodingSM.next_state(aBuf[i..i])
	if codingState == EError
	  $stderr << "#{get_charset_name} prober hit error at byte #{i}\n" if $debug
	  @_mState = ENotMe
	  break
	elsif codingState == EItsMe
	  @_mState = EFoundIt
	  break
	elsif codingState == EStart
	  charLen = @_mCodingSM.get_current_charlen()
	  if i == 0
 @_mLastChar[1] = aBuf[0..0]
 @_mContextAnalyzer.feed(@_mLastChar[2 - charLen..-1], charLen)
 @_mDistributionAnalyzer.feed(@_mLastChar, charLen)
	  else
 @_mContextAnalyzer.feed(aBuf[i + 1 - charLen ... i + 3 - charLen], charLen)
 @_mDistributionAnalyzer.feed(aBuf[i - 1 ... i + 1], charLen)
	  end
	end
  end

  @_mLastChar[0] = aBuf[aLen - 1.. aLen-1]

  if get_state() == EDetecting
	if @_mContextAnalyzer.got_enough_data() and (get_confidence() > SHORTCUT_THRESHOLD)
	  @_mState = EFoundIt
	end
  end

  return get_state()
end

#get_charset_nameObject



44
45
46
# File 'lib/rchardet/sjisprober.rb', line 44

def get_charset_name
  return "SHIFT_JIS"
end

#get_confidenceObject



83
84
85
86
# File 'lib/rchardet/sjisprober.rb', line 83

def get_confidence
  l = [@_mContextAnalyzer.get_confidence(), @_mDistributionAnalyzer.get_confidence()]
  return l.max
end

#resetObject



39
40
41
42
# File 'lib/rchardet/sjisprober.rb', line 39

def reset
  super()
  @_mContextAnalyzer.reset()
end