Module: YAZAWA

Defined in:
lib/yazawa.rb,
lib/yazawa/version.rb

Constant Summary collapse

VERSION =
"0.1.2"

Class Method Summary collapse

Class Method Details

.convert(text, options = {}) ⇒ Object

e.g. “空飛ぶ 寿司” => “空飛ぶ 『SUSHI』”



20
21
22
23
24
25
26
27
28
29
# File 'lib/yazawa.rb', line 20

def convert(text, options = {})
  # e.g. "空飛ぶ 寿司" => "空飛ぶ 『SUSHI』"
  # Find a word index which is 'noun'(名詞) and longest length
  index_for_replace = find_suitable_index_for_replace(tagger.parse(text), options)

  # Convert specific word only
  words = separate_words(text)
  words[index_for_replace] = convert_word(words[index_for_replace])
  words.join
end

.convert_word(word) ⇒ Object

e.g. “ジャバ” => “『JABA』” e.g. “ JAVA” => “ 『JAVA』”



43
44
45
46
47
48
49
50
51
52
53
54
# File 'lib/yazawa.rb', line 43

def convert_word(word)
  # e.g. " JAVA" => " "
  left_space = word.match(/^\s+/).to_s
  striped_word = word.lstrip

  # e.g. "ジャバ" => "JABA"
  katakana = tagger.parse(striped_word).map{|x| x.feature.split(',')[7] }.join
  katakana = striped_word if katakana == ""

  # Generate a result
  left_space + "" + katakana.romaji.upcase + ""
end

.find_suitable_index_for_replace(parsed_words, options = {}) ⇒ Object



56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
# File 'lib/yazawa.rb', line 56

def find_suitable_index_for_replace(parsed_words, options = {})
  index_for_replace = 0
  max_score = 0
  
  parsed_words.each_with_index do |result, index|
    # Calculate priority for determining a suitable word
    score = 
      # Japanese++
      (result.surface.contains_japanese? ? 100 : 0) +
      # Katakana++
      (result.surface.contains_katakana? ? 10 : 0) +
      # Kanji++
      (result.surface.contains_kanji? ? 10 : 0) +
      # adjective++
      (result.feature.split(',')[0] == "形容詞" ? 20 : 0) + 
      # noun++
      (result.feature.split(',')[0] == "名詞" ? 10 : 0) +
      # verb++
      (result.feature.split(',')[0] == "動詞" ? 8 : 0)

    score += if options[:at_random]
      rand(20)
    else
      result.surface.length
    end

    if max_score < score
      max_score = score
      index_for_replace = index
    end
  end

  index_for_replace
end

.separate_words(text) ⇒ Object

e.g. “空飛ぶ寿司” => [“空”, “飛ぶ”, “寿司”]



37
38
39
# File 'lib/yazawa.rb', line 37

def separate_words(text)
  tagger.parse(text).map(&:surface)
end

.taggerObject



31
32
33
34
# File 'lib/yazawa.rb', line 31

def tagger
  # Specify mecab options for keeping white spaces in parsed text
  @tagger ||= MeCab::Light::CustomTagger.new('--node-format=%M\t%H\n --unk-format=%M\t%H\n')
end