Module: Dopeness

Defined in:: lib/dopeness/parse_surfaces.rb,
lib/dopeness.rb,
lib/dopeness/version.rb,
lib/dopeness/parse_vowels.rb,
lib/dopeness/parse_pronunciation.rb

Overview

sentenceを解析してchunkの配列を返す

Constant Summary collapse

VERSION =

"0.1.0"

Class Method Summary collapse

Class Method Details

.dope(verse) ⇒ `Object`

# File 'lib/dopeness.rb', line 10

def self.dope(verse)
	threshold = 0.0
       surfaces = parse_surfaces(verse)
       pronuncitaitons = parse_pronunciation(verse)
       vowels = []
       pronuncitaitons.each do |pronuncitaiton|
       	vowels.push(parse_vowels(pronuncitaiton.to_roman))
       end
       chunk_features = {}
       (0...vowels.size).each do |i|
       	once_chunk_hash = {}
           max = i + 10
           if vowels.size < max
               max = vowels.size

           end
       	(i...max).each do |j|
               # 母音の3-gram類似度
               # 母音のLevenshtein距離
               # 発音のLevenshtein距離
               # 対象までの物理的距離 からスコアを計算する
       		trigram_evaluation = Trigram.compare(vowels[i], vowels[j])
               if trigram_evaluation.nan?
                   trigram_evaluation = 0
               end
               vowels_distance = 1 - Levenshtein.normalized_distance(vowels[i], vowels[j])
               pronuncitaitons_distance = 1 - Levenshtein.normalized_distance(pronuncitaitons[i], pronuncitaitons[j])
               physical_distance = 1 - ((j - i) / 10)
               matching_score = trigram_evaluation + vowels_distance + pronuncitaitons_distance + physical_distance
               once_chunk_hash.store(j, matching_score)
       	end
           sorted_score = Hash[once_chunk_hash.sort_by{ |_, v| -v }]
           chunk_features.store(i, sorted_score)
       end
       return surfaces, chunk_features
end

.parse_pronunciation(sentence) ⇒ `Object`

# File 'lib/dopeness/parse_pronunciation.rb', line 8

def parse_pronunciation(sentence)
  parser = Parser.new;
  tree = parser.parse(sentence)
  tree.set_output_layer(OUTPUT_RAW_SENTENCE)
  chunks = []
  (0 ... tree.chunk_size).each do |i|
    chunk = tree.chunk(i)
    x = (0 ... chunk.token_size).map do |j|
      if tree.token(chunk.token_pos + j).feature_list(tree.token(chunk.token_pos + j).feature_list_size - 1).force_encoding("UTF-8") != "*" then
        tree.token(chunk.token_pos + j).feature_list(tree.token(chunk.token_pos + j).feature_list_size - 1).force_encoding("UTF-8")  
      else
        tree.token(chunk.token_pos).surface.force_encoding("UTF-8")
      end
    end.join("")
    chunks.push(x)
  end
  return chunks
end

.parse_surfaces(sentence) ⇒ `Object`

# File 'lib/dopeness/parse_surfaces.rb', line 8

def parse_surfaces(sentence)
  parser = Parser.new;
  tree = parser.parse(sentence)
  tree.set_output_layer(OUTPUT_RAW_SENTENCE)
  surfaces = []
  (0 ... tree.chunk_size).each do |i|
    chunk = tree.chunk(i)
    x = (0 ... chunk.token_size).map do |j|
      surface = tree.token(chunk.token_pos + j).normalized_surface.force_encoding("UTF-8")
      if surface != "*" then
        if surface != "。"
          surface
        end
      else
        surface = tree.token(chunk.token_pos).normalized_surface.force_encoding("UTF-8")
        if surface != "。"
          surface
        end
      end
    end.join("")
    surfaces.push(x)
  end
  return surfaces
end

.parse_vowels(str) ⇒ `Object`

# File 'lib/dopeness/parse_vowels.rb', line 4

def parse_vowels(str)
  vowel = ["a", "i", "u", "e", "o"]
  rhyme = ""
  str.each_char do |ch|
    if vowel.include?(ch)
      rhyme += ch
    end
  end
  return rhyme
end

Module: Dopeness

Overview

Constant Summary collapse

Class Method Summary collapse

Class Method Details

.dope(verse) ⇒ Object

.parse_pronunciation(sentence) ⇒ Object

.parse_surfaces(sentence) ⇒ Object

.parse_vowels(str) ⇒ Object

.dope(verse) ⇒ `Object`

.parse_pronunciation(sentence) ⇒ `Object`

.parse_surfaces(sentence) ⇒ `Object`

.parse_vowels(str) ⇒ `Object`