Module: Annex29::WordSegmentation
- Defined in:
- lib/annex_29/word_segmentation.rb
Class Attribute Summary collapse
-
._segmenter_eof_trans ⇒ Object
Returns the value of attribute _segmenter_eof_trans.
-
._segmenter_from_state_actions ⇒ Object
Returns the value of attribute _segmenter_from_state_actions.
-
._segmenter_index_offsets ⇒ Object
Returns the value of attribute _segmenter_index_offsets.
-
._segmenter_indicies ⇒ Object
Returns the value of attribute _segmenter_indicies.
-
._segmenter_key_offsets ⇒ Object
Returns the value of attribute _segmenter_key_offsets.
-
._segmenter_range_lengths ⇒ Object
Returns the value of attribute _segmenter_range_lengths.
-
._segmenter_single_lengths ⇒ Object
Returns the value of attribute _segmenter_single_lengths.
-
._segmenter_to_state_actions ⇒ Object
Returns the value of attribute _segmenter_to_state_actions.
-
._segmenter_trans_actions ⇒ Object
Returns the value of attribute _segmenter_trans_actions.
-
._segmenter_trans_keys ⇒ Object
Returns the value of attribute _segmenter_trans_keys.
-
._segmenter_trans_targs ⇒ Object
Returns the value of attribute _segmenter_trans_targs.
-
.segmenter_en_main ⇒ Object
Returns the value of attribute segmenter_en_main.
-
.segmenter_error ⇒ Object
Returns the value of attribute segmenter_error.
-
.segmenter_first_final ⇒ Object
Returns the value of attribute segmenter_first_final.
-
.segmenter_start ⇒ Object
Returns the value of attribute segmenter_start.
Class Method Summary collapse
Class Attribute Details
._segmenter_eof_trans ⇒ Object
Returns the value of attribute _segmenter_eof_trans.
2779 2780 2781 |
# File 'lib/annex_29/word_segmentation.rb', line 2779 def _segmenter_eof_trans @_segmenter_eof_trans end |
._segmenter_from_state_actions ⇒ Object
Returns the value of attribute _segmenter_from_state_actions.
2770 2771 2772 |
# File 'lib/annex_29/word_segmentation.rb', line 2770 def _segmenter_from_state_actions @_segmenter_from_state_actions end |
._segmenter_index_offsets ⇒ Object
Returns the value of attribute _segmenter_index_offsets.
1770 1771 1772 |
# File 'lib/annex_29/word_segmentation.rb', line 1770 def _segmenter_index_offsets @_segmenter_index_offsets end |
._segmenter_indicies ⇒ Object
Returns the value of attribute _segmenter_indicies.
1779 1780 1781 |
# File 'lib/annex_29/word_segmentation.rb', line 1779 def _segmenter_indicies @_segmenter_indicies end |
._segmenter_key_offsets ⇒ Object
Returns the value of attribute _segmenter_key_offsets.
14 15 16 |
# File 'lib/annex_29/word_segmentation.rb', line 14 def _segmenter_key_offsets @_segmenter_key_offsets end |
._segmenter_range_lengths ⇒ Object
Returns the value of attribute _segmenter_range_lengths.
1761 1762 1763 |
# File 'lib/annex_29/word_segmentation.rb', line 1761 def _segmenter_range_lengths @_segmenter_range_lengths end |
._segmenter_single_lengths ⇒ Object
Returns the value of attribute _segmenter_single_lengths.
1752 1753 1754 |
# File 'lib/annex_29/word_segmentation.rb', line 1752 def _segmenter_single_lengths @_segmenter_single_lengths end |
._segmenter_to_state_actions ⇒ Object
Returns the value of attribute _segmenter_to_state_actions.
2761 2762 2763 |
# File 'lib/annex_29/word_segmentation.rb', line 2761 def _segmenter_to_state_actions @_segmenter_to_state_actions end |
._segmenter_trans_actions ⇒ Object
Returns the value of attribute _segmenter_trans_actions.
2751 2752 2753 |
# File 'lib/annex_29/word_segmentation.rb', line 2751 def _segmenter_trans_actions @_segmenter_trans_actions end |
._segmenter_trans_keys ⇒ Object
Returns the value of attribute _segmenter_trans_keys.
23 24 25 |
# File 'lib/annex_29/word_segmentation.rb', line 23 def _segmenter_trans_keys @_segmenter_trans_keys end |
._segmenter_trans_targs ⇒ Object
Returns the value of attribute _segmenter_trans_targs.
2741 2742 2743 |
# File 'lib/annex_29/word_segmentation.rb', line 2741 def _segmenter_trans_targs @_segmenter_trans_targs end |
.segmenter_en_main ⇒ Object
Returns the value of attribute segmenter_en_main.
2801 2802 2803 |
# File 'lib/annex_29/word_segmentation.rb', line 2801 def segmenter_en_main @segmenter_en_main end |
.segmenter_error ⇒ Object
Returns the value of attribute segmenter_error.
2796 2797 2798 |
# File 'lib/annex_29/word_segmentation.rb', line 2796 def segmenter_error @segmenter_error end |
.segmenter_first_final ⇒ Object
Returns the value of attribute segmenter_first_final.
2792 2793 2794 |
# File 'lib/annex_29/word_segmentation.rb', line 2792 def segmenter_first_final @segmenter_first_final end |
.segmenter_start ⇒ Object
Returns the value of attribute segmenter_start.
2788 2789 2790 |
# File 'lib/annex_29/word_segmentation.rb', line 2788 def segmenter_start @segmenter_start end |
Class Method Details
.call(input) ⇒ Object
2809 2810 2811 2812 2813 2814 2815 2816 2817 2818 2819 2820 2821 2822 2823 2824 2825 2826 2827 2828 2829 2830 2831 2832 2833 2834 2835 2836 2837 2838 2839 2840 2841 2842 2843 2844 2845 2846 2847 2848 2849 2850 2851 2852 2853 2854 2855 2856 2857 2858 2859 2860 2861 2862 2863 2864 2865 2866 2867 2868 2869 2870 2871 2872 2873 2874 2875 2876 2877 2878 2879 2880 2881 2882 2883 2884 2885 2886 2887 2888 2889 2890 2891 2892 2893 2894 2895 2896 2897 2898 2899 2900 2901 2902 2903 2904 2905 2906 2907 2908 2909 2910 2911 2912 2913 2914 2915 2916 2917 2918 2919 2920 2921 2922 2923 2924 2925 2926 2927 2928 2929 2930 2931 2932 2933 2934 2935 2936 2937 2938 2939 2940 2941 2942 2943 2944 2945 2946 2947 2948 2949 2950 2951 2952 2953 2954 2955 2956 2957 2958 2959 2960 2961 2962 2963 2964 2965 2966 2967 2968 2969 2970 2971 2972 2973 2974 2975 2976 2977 2978 2979 |
# File 'lib/annex_29/word_segmentation.rb', line 2809 def call(input) data = input.each_char.map(&:ord) eof = data.length words = [] # line 2816 "lib/annex_29/word_segmentation.rb" begin p ||= 0 pe ||= data.length cs = segmenter_start ts = nil te = nil end # line 181 "lib/annex_29/word_segmentation.rl" # line 2828 "lib/annex_29/word_segmentation.rb" begin _klen, _trans, _keys = nil _goto_level = 0 _resume = 10 _eof_trans = 15 _again = 20 _test_eof = 30 _out = 40 while true if _goto_level <= 0 if p == pe _goto_level = _test_eof next end end if _goto_level <= _resume case _segmenter_from_state_actions[cs] when 4 then # line 1 "NONE" begin ts = p end # line 2852 "lib/annex_29/word_segmentation.rb" end # from state action switch _keys = _segmenter_key_offsets[cs] _trans = _segmenter_index_offsets[cs] _klen = _segmenter_single_lengths[cs] _break_match = false begin if _klen > 0 _lower = _keys _upper = _keys + _klen - 1 loop do break if _upper < _lower _mid = _lower + ( (_upper - _lower) >> 1 ) if data[p].ord < _segmenter_trans_keys[_mid] _upper = _mid - 1 elsif data[p].ord > _segmenter_trans_keys[_mid] _lower = _mid + 1 else _trans += (_mid - _keys) _break_match = true break end end # loop break if _break_match _keys += _klen _trans += _klen end _klen = _segmenter_range_lengths[cs] if _klen > 0 _lower = _keys _upper = _keys + (_klen << 1) - 2 loop do break if _upper < _lower _mid = _lower + (((_upper-_lower) >> 1) & ~1) if data[p].ord < _segmenter_trans_keys[_mid] _upper = _mid - 2 elsif data[p].ord > _segmenter_trans_keys[_mid+1] _lower = _mid + 2 else _trans += ((_mid - _keys) >> 1) _break_match = true break end end # loop break if _break_match _trans += _klen end end while false _trans = _segmenter_indicies[_trans]; end if _goto_level <= _eof_trans cs = _segmenter_trans_targs[_trans]; if _segmenter_trans_actions[_trans] != 0 case _segmenter_trans_actions[_trans] when 2 then # line 1 "NONE" begin te = p+1 end when 5 then # line 10 "lib/annex_29/word_segmentation.rl" begin te = p+1 begin words << data[ts...te].pack("U*") end end when 6 then # line 10 "lib/annex_29/word_segmentation.rl" begin te = p p = p - 1 begin words << data[ts...te].pack("U*") end end when 1 then # line 10 "lib/annex_29/word_segmentation.rl" begin begin p = ((te))-1; end begin words << data[ts...te].pack("U*") end end # line 2941 "lib/annex_29/word_segmentation.rb" end # action switch end end if _goto_level <= _again case _segmenter_to_state_actions[cs] when 3 then # line 1 "NONE" begin ts = nil end # line 2952 "lib/annex_29/word_segmentation.rb" end p += 1 if p != pe _goto_level = _resume next end end if _goto_level <= _test_eof if p == eof if _segmenter_eof_trans[cs] > 0 _trans = _segmenter_eof_trans[cs] - 1; _goto_level = _eof_trans next; end end end if _goto_level <= _out break end end end # line 182 "lib/annex_29/word_segmentation.rl" words end |