Module: YAZAWA
- Defined in:
- lib/yazawa.rb,
lib/yazawa/version.rb
Constant Summary collapse
- VERSION =
"0.1.2"
Class Method Summary collapse
-
.convert(text, options = {}) ⇒ Object
e.g.
-
.convert_word(word) ⇒ Object
e.g.
- .find_suitable_index_for_replace(parsed_words, options = {}) ⇒ Object
-
.separate_words(text) ⇒ Object
e.g.
- .tagger ⇒ Object
Class Method Details
.convert(text, options = {}) ⇒ Object
e.g. “空飛ぶ 寿司” => “空飛ぶ 『SUSHI』”
20 21 22 23 24 25 26 27 28 29 |
# File 'lib/yazawa.rb', line 20 def convert(text, = {}) # e.g. "空飛ぶ 寿司" => "空飛ぶ 『SUSHI』" # Find a word index which is 'noun'(名詞) and longest length index_for_replace = find_suitable_index_for_replace(tagger.parse(text), ) # Convert specific word only words = separate_words(text) words[index_for_replace] = convert_word(words[index_for_replace]) words.join end |
.convert_word(word) ⇒ Object
e.g. “ジャバ” => “『JABA』” e.g. “ JAVA” => “ 『JAVA』”
43 44 45 46 47 48 49 50 51 52 53 54 |
# File 'lib/yazawa.rb', line 43 def convert_word(word) # e.g. " JAVA" => " " left_space = word.match(/^\s+/).to_s striped_word = word.lstrip # e.g. "ジャバ" => "JABA" katakana = tagger.parse(striped_word).map{|x| x.feature.split(',')[7] }.join katakana = striped_word if katakana == "" # Generate a result left_space + "『" + katakana.romaji.upcase + "』" end |
.find_suitable_index_for_replace(parsed_words, options = {}) ⇒ Object
56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 |
# File 'lib/yazawa.rb', line 56 def find_suitable_index_for_replace(parsed_words, = {}) index_for_replace = 0 max_score = 0 parsed_words.each_with_index do |result, index| # Calculate priority for determining a suitable word score = # Japanese++ (result.surface.contains_japanese? ? 100 : 0) + # Katakana++ (result.surface.contains_katakana? ? 10 : 0) + # Kanji++ (result.surface.contains_kanji? ? 10 : 0) + # adjective++ (result.feature.split(',')[0] == "形容詞" ? 20 : 0) + # noun++ (result.feature.split(',')[0] == "名詞" ? 10 : 0) + # verb++ (result.feature.split(',')[0] == "動詞" ? 8 : 0) score += if [:at_random] rand(20) else result.surface.length end if max_score < score max_score = score index_for_replace = index end end index_for_replace end |
.separate_words(text) ⇒ Object
e.g. “空飛ぶ寿司” => [“空”, “飛ぶ”, “寿司”]
37 38 39 |
# File 'lib/yazawa.rb', line 37 def separate_words(text) tagger.parse(text).map(&:surface) end |
.tagger ⇒ Object
31 32 33 34 |
# File 'lib/yazawa.rb', line 31 def tagger # Specify mecab options for keeping white spaces in parsed text @tagger ||= MeCab::Light::CustomTagger.new('--node-format=%M\t%H\n --unk-format=%M\t%H\n') end |