Class: MeCab

Inherits:
Object
  • Object
show all
Defined in:
lib/fluent/plugin/mecab.rb

Instance Method Summary collapse

Constructor Details

#initialize(type, keys) ⇒ MeCab

Returns a new instance of MeCab.



5
6
7
8
9
10
11
# File 'lib/fluent/plugin/mecab.rb', line 5

def initialize(type, keys)
  @type = type
  opt = {}
  opt[:output_format_type] = @type unless type =~ /default/
  @mecab = Natto::MeCab.new(opt)
  @keys = keys
end

Instance Method Details

#chansen(str) ⇒ Object



43
44
45
# File 'lib/fluent/plugin/mecab.rb', line 43

def chansen(str)
  # TODO
end

#default(str) ⇒ Object



33
34
35
36
37
38
39
40
41
# File 'lib/fluent/plugin/mecab.rb', line 33

def default(str)
  base = "(?<word>.+)\t(?<part_of_speech>.+),(?<part_of_speech_subclassification1>.+),(?<part_of_speech_subclassification2>.+),(?<part_of_speech_subclassification3>.+),(?<inflected_forms>.+),(?<utilizing_types>.+),(?<original_word>.*)"
  # 表層形\t品詞,品詞細分類1,品詞細分類2,品詞細分類3,活用形,活用型,原形,読み,発音
  if str =~ /^\w/
    str.match(/#{base}/)
  else
    str.match(/#{base},(?<reading>.*),(?<pronunciation>.*)/)
  end
end

#parse(record) ⇒ Object



13
14
15
16
17
# File 'lib/fluent/plugin/mecab.rb', line 13

def parse(record)
  @keys.map do |key|
    {key => result_format(@mecab.parse(record[key]))}
  end
end

#result_format(result) ⇒ Object



19
20
21
22
23
24
25
26
27
28
29
30
31
# File 'lib/fluent/plugin/mecab.rb', line 19

def result_format(result)
  case @type
  when /default/i
    strs = result.split(/\n/)
    strs.pop
    strs.map do|str|
      m = default(str)
      m ? Hash[m.names.zip(m.captures)] : nil
    end.compact
  when /chasen/i
    # TODO
  end
end