Module: Annex29::WordSegmentation

Defined in:
lib/annex_29/word_segmentation.rb

Class Attribute Summary collapse

Class Method Summary collapse

Class Attribute Details

._segmenter_eof_transObject

Returns the value of attribute _segmenter_eof_trans.



2779
2780
2781
# File 'lib/annex_29/word_segmentation.rb', line 2779

def _segmenter_eof_trans
  @_segmenter_eof_trans
end

._segmenter_from_state_actionsObject

Returns the value of attribute _segmenter_from_state_actions.



2770
2771
2772
# File 'lib/annex_29/word_segmentation.rb', line 2770

def _segmenter_from_state_actions
  @_segmenter_from_state_actions
end

._segmenter_index_offsetsObject

Returns the value of attribute _segmenter_index_offsets.



1770
1771
1772
# File 'lib/annex_29/word_segmentation.rb', line 1770

def _segmenter_index_offsets
  @_segmenter_index_offsets
end

._segmenter_indiciesObject

Returns the value of attribute _segmenter_indicies.



1779
1780
1781
# File 'lib/annex_29/word_segmentation.rb', line 1779

def _segmenter_indicies
  @_segmenter_indicies
end

._segmenter_key_offsetsObject

Returns the value of attribute _segmenter_key_offsets.



14
15
16
# File 'lib/annex_29/word_segmentation.rb', line 14

def _segmenter_key_offsets
  @_segmenter_key_offsets
end

._segmenter_range_lengthsObject

Returns the value of attribute _segmenter_range_lengths.



1761
1762
1763
# File 'lib/annex_29/word_segmentation.rb', line 1761

def _segmenter_range_lengths
  @_segmenter_range_lengths
end

._segmenter_single_lengthsObject

Returns the value of attribute _segmenter_single_lengths.



1752
1753
1754
# File 'lib/annex_29/word_segmentation.rb', line 1752

def _segmenter_single_lengths
  @_segmenter_single_lengths
end

._segmenter_to_state_actionsObject

Returns the value of attribute _segmenter_to_state_actions.



2761
2762
2763
# File 'lib/annex_29/word_segmentation.rb', line 2761

def _segmenter_to_state_actions
  @_segmenter_to_state_actions
end

._segmenter_trans_actionsObject

Returns the value of attribute _segmenter_trans_actions.



2751
2752
2753
# File 'lib/annex_29/word_segmentation.rb', line 2751

def _segmenter_trans_actions
  @_segmenter_trans_actions
end

._segmenter_trans_keysObject

Returns the value of attribute _segmenter_trans_keys.



23
24
25
# File 'lib/annex_29/word_segmentation.rb', line 23

def _segmenter_trans_keys
  @_segmenter_trans_keys
end

._segmenter_trans_targsObject

Returns the value of attribute _segmenter_trans_targs.



2741
2742
2743
# File 'lib/annex_29/word_segmentation.rb', line 2741

def _segmenter_trans_targs
  @_segmenter_trans_targs
end

.segmenter_en_mainObject

Returns the value of attribute segmenter_en_main.



2801
2802
2803
# File 'lib/annex_29/word_segmentation.rb', line 2801

def segmenter_en_main
  @segmenter_en_main
end

.segmenter_errorObject

Returns the value of attribute segmenter_error.



2796
2797
2798
# File 'lib/annex_29/word_segmentation.rb', line 2796

def segmenter_error
  @segmenter_error
end

.segmenter_first_finalObject

Returns the value of attribute segmenter_first_final.



2792
2793
2794
# File 'lib/annex_29/word_segmentation.rb', line 2792

def segmenter_first_final
  @segmenter_first_final
end

.segmenter_startObject

Returns the value of attribute segmenter_start.



2788
2789
2790
# File 'lib/annex_29/word_segmentation.rb', line 2788

def segmenter_start
  @segmenter_start
end

Class Method Details

.call(input) ⇒ Object



2809
2810
2811
2812
2813
2814
2815
2816
2817
2818
2819
2820
2821
2822
2823
2824
2825
2826
2827
2828
2829
2830
2831
2832
2833
2834
2835
2836
2837
2838
2839
2840
2841
2842
2843
2844
2845
2846
2847
2848
2849
2850
2851
2852
2853
2854
2855
2856
2857
2858
2859
2860
2861
2862
2863
2864
2865
2866
2867
2868
2869
2870
2871
2872
2873
2874
2875
2876
2877
2878
2879
2880
2881
2882
2883
2884
2885
2886
2887
2888
2889
2890
2891
2892
2893
2894
2895
2896
2897
2898
2899
2900
2901
2902
2903
2904
2905
2906
2907
2908
2909
2910
2911
2912
2913
2914
2915
2916
2917
2918
2919
2920
2921
2922
2923
2924
2925
2926
2927
2928
2929
2930
2931
2932
2933
2934
2935
2936
2937
2938
2939
2940
2941
2942
2943
2944
2945
2946
2947
2948
2949
2950
2951
2952
2953
2954
2955
2956
2957
2958
2959
2960
2961
2962
2963
2964
2965
2966
2967
2968
2969
2970
2971
2972
2973
2974
2975
2976
2977
2978
2979
# File 'lib/annex_29/word_segmentation.rb', line 2809

def call(input)
  data = input.each_char.map(&:ord)
  eof = data.length
  words = []


				# line 2816 "lib/annex_29/word_segmentation.rb"
				begin
					p ||= 0
					pe ||= data.length
					cs = segmenter_start
					ts = nil
					te = nil
				end

				# line 181 "lib/annex_29/word_segmentation.rl"

				# line 2828 "lib/annex_29/word_segmentation.rb"
				begin
					_klen, _trans, _keys = nil
					_goto_level = 0
					_resume = 10
					_eof_trans = 15
					_again = 20
					_test_eof = 30
					_out = 40
					while true
if _goto_level <= 0
	if p == pe
		_goto_level = _test_eof
		next
	end
end
if _goto_level <= _resume
	case _segmenter_from_state_actions[cs]
	when 4 then
# line 1 "NONE"
		begin
			ts = p
		end
# line 2852 "lib/annex_29/word_segmentation.rb"
	end # from state action switch

	_keys = _segmenter_key_offsets[cs]
	_trans = _segmenter_index_offsets[cs]
	_klen = _segmenter_single_lengths[cs]
	_break_match = false

	begin
		if _klen > 0
			_lower = _keys
			_upper = _keys + _klen - 1

			loop do
				break if _upper < _lower
				_mid = _lower + ( (_upper - _lower) >> 1 )

				if data[p].ord < _segmenter_trans_keys[_mid]
					_upper = _mid - 1
				elsif data[p].ord > _segmenter_trans_keys[_mid]
					_lower = _mid + 1
				else
					_trans += (_mid - _keys)
					_break_match = true
					break
				end
			end # loop
			break if _break_match
			_keys += _klen
			_trans += _klen
		end
		_klen = _segmenter_range_lengths[cs]
		if _klen > 0
			_lower = _keys
			_upper = _keys + (_klen << 1) - 2
			loop do
				break if _upper < _lower
				_mid = _lower + (((_upper-_lower) >> 1) & ~1)
				if data[p].ord < _segmenter_trans_keys[_mid]
					_upper = _mid - 2
				elsif data[p].ord > _segmenter_trans_keys[_mid+1]
					_lower = _mid + 2
				else
					_trans += ((_mid - _keys) >> 1)
					_break_match = true
					break
				end
			end # loop
			break if _break_match
			_trans += _klen
		end
	end while false
	_trans = _segmenter_indicies[_trans];
end
if _goto_level <= _eof_trans
	cs = _segmenter_trans_targs[_trans];

	if _segmenter_trans_actions[_trans] != 0

		case _segmenter_trans_actions[_trans]
		when 2 then
			# line 1 "NONE"
			begin
				te = p+1
			end
		when 5 then
			# line 10 "lib/annex_29/word_segmentation.rl"
			begin
				te = p+1
				begin
					words << data[ts...te].pack("U*")
				end
			end
		when 6 then
			# line 10 "lib/annex_29/word_segmentation.rl"
			begin
				te = p
				p = p - 1
				begin
					words << data[ts...te].pack("U*")
				end
			end
		when 1 then
			# line 10 "lib/annex_29/word_segmentation.rl"
			begin
				begin p = ((te))-1; end
				begin
					words << data[ts...te].pack("U*")
				end
			end
			# line 2941 "lib/annex_29/word_segmentation.rb"
		end # action switch
	end

end
if _goto_level <= _again
	case _segmenter_to_state_actions[cs]
	when 3 then
# line 1 "NONE"
		begin
			ts = nil
		end
# line 2952 "lib/annex_29/word_segmentation.rb"
	end

	p += 1
	if p != pe
		_goto_level = _resume
		next
	end
end
if _goto_level <= _test_eof
	if p == eof
		if _segmenter_eof_trans[cs] > 0
			_trans = _segmenter_eof_trans[cs] - 1;
			_goto_level = _eof_trans
			next;
		end
	end

end
if _goto_level <= _out
	break
end
					end
				end

				# line 182 "lib/annex_29/word_segmentation.rl"

  words
end