Module: Fonemas

Defined in:: lib/fonemas.rb,
lib/fonemas/version.rb

Constant Summary collapse

VERSION =

"0.4.12"

Class Method Summary collapse

.anormalize(t) ⇒ Object
.clean(text) ⇒ Object
.downcase(text) ⇒ Object
.entreVocales(word, i) ⇒ Object
.entreVocalyConsonante(word, i) ⇒ Object
.fonemaLetra(letra) ⇒ Object
.fonemas(word) ⇒ Object
.generateFonemas(fonema, i = 0, current = []) ⇒ Object

def self.checkFonemas(p) #un ultimo chequeo de seguridad for pronunciacion in p for fonema in pronunciacion.split(“ ”) raise “fonema invalido” unless lista_de_fonemas.include? fonema end end.
.isDiptongo(word, first, second) ⇒ Object
.isFinal(word, i) ⇒ Object
.isFricativa(word, i) ⇒ Object
.isTonica(word, i) ⇒ Object
.isVocal(word, i) ⇒ Object
.lastVocal(word, from) ⇒ Object
.lista_de_fonemas ⇒ Object
.normalize(t) ⇒ Object
.separar(word) ⇒ Object
.version ⇒ Object

Class Method Details

.anormalize(t) ⇒ `Object`

# File 'lib/fonemas.rb', line 468

def self.anormalize(t)
  #puts "pre normalize: #{t}"
  if t.class.name == 'Array'
    for i in t
      anormalize(i)
    end
  else
    #puts "found #{t}"
    @output << t
  end

end

.clean(text) ⇒ `Object`

# File 'lib/fonemas.rb', line 12

def self.clean(text)
  s = text.gsub(/,/,' ')
  s = s.gsub(/\s+/,' ')
  s = s.chomp.strip
  s = downcase(s)
  return s
end

.downcase(text) ⇒ `Object`



20
21
22

# File 'lib/fonemas.rb', line 20

def self.downcase(text)
  UnicodeUtils.downcase(text)
end

.entreVocales(word, i) ⇒ `Object`

# File 'lib/fonemas.rb', line 122

def self.entreVocales(word,i)
  if i == 0 or word.size - 1 == i
    return false
  else
    return (isVocal(word,i-1) and isVocal(word,i+1))
  end
end

.entreVocalyConsonante(word, i) ⇒ `Object`

# File 'lib/fonemas.rb', line 130

def self.entreVocalyConsonante(word,i)
  return ((isVocal(word,i-1) and !isVocal(word,i+1)) or (isVocal(word,i+1) and !isVocal(word,i-1)))

end

.fonemaLetra(letra) ⇒ `Object`

# File 'lib/fonemas.rb', line 171

def self.fonemaLetra(letra)
  case letra
    when 'a','á' then ['aa']
    when 'b' then ['b ee']
    when 'c' then ['s ee']
    when 'd' then ['d ee']
    when 'e','é' then ['ee']
    when 'f' then ['ee f ee']
    when 'g' then ['g ee']
    when 'h' then ['aa ch e']
    when 'i','í' then ['ii']
    when 'j' then ['j oo t a']
    when 'k' then ['k aa']
    when 'l' then ['ee l e']
    when 'm' then ['ee m e']
    when 'n' then ['ee n e']
    when 'ñ' then ['ee nh e']
    when 'o','ó' then ['oo']
    when 'p' then ['p ee']
    when 'q' then ['k uu']
    when 'r' then ['ee rr ee','ee r ee']
    when 's' then ['ee s e']
    when 't' then ['t ee']
    when 'u','ú' then ['uu']
    when 'v' then ['b ee','uu b e']
    when 'w' then ['d o b l e b ee','d o b l e uu b e']
    when 'x' then ['ee k i s']
    when 'y' then ['ll ee']
    when 'z' then ['s ee t a']
    else
      raise "error, no conozco pronunciación de #{letra}"
  end
end

.fonemas(word) ⇒ `Object`

# File 'lib/fonemas.rb', line 206

def self.fonemas(word)
  word = word.gsub(/'/,'')
  if word.size == 1
    return fonemaLetra(word)
  end
  if word.include?('_')
    output = []
    for a in word.split('_')
      if a.size > 0
        output << Fonemas.fonemas(a)
      end
    end
    return [output.join(" ")]
  end
  word = separar(word)
  fonema = []
  for i in 0..(word.length-1)
    letra = word[i]
    case letra
      when 'a','á','ä','ã' then
        if isTonica(word,i)
          fonema << 'aa'
        else
          fonema << 'a'
        end
      when 'b','v' then
        if isVocal(word,i-1) and (word[i+1] == 'b' or word[i+1] == 'v')
          fonema << ['bb','']
        elsif i == 0 and isVocal(word,i+1)
          if word[i+1] == 'u' and isDiptongo(word,i+1,i+2)
            fonema << ['bb','b','g']
          else
            fonema << ['bb','b']
          end
        elsif word[i+1] == 'u' and isDiptongo(word,i+1,i+2)
          if entreVocales(word,i)
            fonema << ['b','g','']
          else
            fonema << ['bb','g']
          end
        elsif isFricativa(word,i-1)
          fonema << 'b'
        elsif isFinal(word,i)
          fonema << 'b'
        elsif entreVocales(word,i)
          fonema << ['b','']
        else
          fonema << 'bb'
        end
      when 'c' then
        if word[i+1] == 'e' or word[i+1] == 'i'
          fonema << 's'
        else
          fonema << 'k'
        end
      when 'ch' then
        if entreVocales(word,i)
          #fonema << ['ch','sh','tch','j']
          fonema << 'ch'
        else
          fonema << 'ch'
          #fonema << ['ch','sh','tch']
        end
      when 'd' then
        if i == 0 and isVocal(word,i+1)
          fonema << ['dd','d']
        elsif entreVocales(word,i) or i == word.size-1
          fonema << ['d','']
        elsif entreVocalyConsonante(word,i)
          fonema << ['dd','d']
        else
          fonema << 'd'
        end
      when 'e','é','ë' then
        if isTonica(word,i)
          fonema << 'ee'
        else
          fonema << 'e'
        end
      when 'f' then
        fonema << 'f'
      when 'g' then
        if word[i+1] == 'u' and i == 0 and isTonica(word,i+2)
          #nada

        elsif word[i+1] == 'e' or word[i+1] == 'i'
          fonema << 'j'
        else
          if !entreVocales(word,i) and word[i-1] != 'n'
            fonema << 'gg'
          else
            fonema << 'g'
          end
        end
      when 'h' then
        if word[i+1] == 'u' and isDiptongo(word,i+1,i+2)
          fonema << ['','g']
        elsif i > 0 and word[i-1] == 'o' and word[i+1] == 'a'
          fonema << 'j'
        end
        #nada
      when 'i','í' then
        if isTonica(word,i)
          fonema << 'ii'
        else
          fonema << 'i'
        end
      when 'j' then
        if i == 0 and word[i+1] == 'o' and (word[i+2] == 'ã' or word[i+2] == 'h')
          fonema << 'll'
        else
          fonema << 'j'
        end
      when 'k' then
        fonema << 'k'
      when 'l' then
        fonema << 'l'
      when 'll' then
        #fonema << ['ll','lli','i']
        fonema << ['ll','i']
      when 'm' then
        fonema << 'm'
      when 'n' then
        fonema << 'n'
      when 'ñ'  then
        fonema << 'nh'
      when 'o','ó' then
        if isTonica(word,i)
          fonema << 'oo'
        else
          fonema << 'o'
        end
      when 'p' then
        fonema << 'p'
      when 'q' then
        fonema << 'k'
      when 'r' then
        if i == 0
          fonema << 'rr'
        else
          fonema << 'r'
        end
      when 'rr' then
        fonema << 'rr'
      when 's' then
        if word[i-1] == 'r' or word[i-1] == 'd' or i == word.size-1
          #fonema << ['s','','h']
          fonema << ['s','']
        elsif entreVocalyConsonante(word,i)
          #fonema << ['s','h']
          fonema << 's'
        elsif word[i-1] == 'b' and word[i+1] == 't'
          #fonema << ['s','h']
          fonema << 's'
        elsif word[i-1] == 'b'
          fonema << ['s','']
        else
          fonema << 's'
        end
      when 't' then
        fonema << 't'
      when 'ü' then
        fonema << 'u'
      when 'u','ú' then
        if word[i-1] == 'q'
          #nada
        elsif word[i-1] == 'g' and i == 1 and isTonica(word,i+1)
            fonema << ['gu']
        elsif isTonica(word,i)
            fonema << 'uu'
        else
            fonema << 'u'
        end
      when 'w' then
        if i == 0
          fonema << ['b','bb']
        elsif word[i-1] == 'o'
          fonema << 'u'
        elsif word[i+1] == 'i'
          fonema << 'u'
        else #if entreVocales(word,i)
          fonema << 'gu'
#          else
#            fonema << 'Gu'
        end
      when 'x' then
        #fonema << ['ks','k','h']
        fonema << ['ks','k']
      when 'y' then
        if i == word.size - 1
          fonema << 'i'
        else
          #fonema << ['ll','lli','i']
          fonema << 'll'
        end
      when 'z' then
        if i == word.size - 1
          #fonema << ['s','h','']
          fonema << ['s','']
        else
          fonema << 's'
        end

      else
        raise "error, no conozco pronunciación de #{letra} en #{word}"
    end



  end
  #puts "pre: #{fonema}"
  t =  normalize(generateFonemas(fonema))
  #puts "out: #{t}"

  #self.checkFonemas(t)

  return t
end

.generateFonemas(fonema, i = 0, current = []) ⇒ `Object`

def self.checkFonemas(p)

#un ultimo chequeo de seguridad
for pronunciacion in p
  for fonema in pronunciacion.split(" ")
    raise "fonema invalido" unless lista_de_fonemas.include? fonema
  end
end

end

# File 'lib/fonemas.rb', line 435

def self.generateFonemas(fonema,i=0,current=[])
    if i == fonema.length
      return current.join(' ')
    end

    c = fonema[i]
    if c.class.name == 'Array'
      output = []
      for j in c
        if j == ''
          output << generateFonemas(fonema,i+1,current)
        else
          output << generateFonemas(fonema,i+1,current + [j])
        end
      end
      return output
    else
      if c == ''
        return generateFonemas(fonema,i+1,current)
      else
        return generateFonemas(fonema,i+1,current + [c])
      end
    end

end

.isDiptongo(word, first, second) ⇒ `Object`

# File 'lib/fonemas.rb', line 140

def self.isDiptongo(word,first,second)
  f = word[first]
  s = word[second]
  abiertas = %w(a e o)
  cerradas = %w(i u)
  return ((abiertas.include? f and cerradas.include? s) or (abiertas.include? s and cerradas.include? f) or (cerradas.include? f and cerradas.include? s))

end

.isFinal(word, i) ⇒ `Object`



113
114
115

# File 'lib/fonemas.rb', line 113

def self.isFinal(word,i)
  return word.size == i-1
end

.isFricativa(word, i) ⇒ `Object`

# File 'lib/fonemas.rb', line 117

def self.isFricativa(word,i)
  fricativas = %w(f s c z j ll y g b w b v w s m b x d)
  return fricativas.include? word[i]
end

.isTonica(word, i) ⇒ `Object`

# File 'lib/fonemas.rb', line 36

def self.isTonica(word,i)
  #falta considerar las palabras que poseen acento pero no tilde
  return true if word.size == 1
  tildes = %w(á é í ó ú ã ä ë)
  w = word.join
  if tildes.include? word[i]
    return true
  else
    es = Text::Hyphen.new(:language => "es", :left => 0, :right => 1)
    p = es.hyphenate(w)
    #puts es.visualize(w)
    hh = es.visualize(w).split("-")

    if hh.size == 1 and w.size > 4 and w.include? 'h' and w[0] != 'h'
      #caso johan
      p = w.index('h')
      if i < p
        return true
      else
        return false
      end
    end

    #puts hh.size
    if w =~ /[áéíóúãäë]/
      #acento ya existe en otra silaba
      return false
    else
      #puts es.visualize(w)
      if hh.size == 1
        if lastVocal(w,w.size-1) == word[i]
          return true
        else
          return false
        end
        #monosilabos
      elsif hh.size == 2
          #agudas, se acentuan en n,s o vocal
          #puts "#{word[i]} #{i}<#{p[0]} - #{lastVocal(w,p[0])}"
          if w =~ /[nsaeiou]$/
            #termina en n s y vocal y no tiene tilde
            #por lo tanto es grave
          #  puts "#{lastVocal(w,p[0])} == #{word[i]} #{word[i].class.name}"
            if i < p[0] and lastVocal(w,p[0]) == word[i]
              return true
            else
              return false
            end
          else
            if i < p[0]
              return false
            else
              return lastVocal(w,w.size) == word[i]
            end
          end
      elsif hh.size >= 3
        #puts hh.join("-")
        if i > p[p.size-1]
          if w =~ /[nsaeiou]$/
            return false
          else
            return true
          end
        elsif i > p[p.size-2] and i <= p[p.size-1] and w =~ /[nsaeiou]$/
          return true
        else
          return false
        end
      end
    end



    return false
  end
end

.isVocal(word, i) ⇒ `Object`

# File 'lib/fonemas.rb', line 135

def self.isVocal(word,i)
  vocales = %w(a e i o u á é í ó ú)
  return vocales.include? word[i]
end

.lastVocal(word, from) ⇒ `Object`

# File 'lib/fonemas.rb', line 24

def self.lastVocal(word,from)
  #puts "last vocal for #{word} from #{from}"
  for i in 1..from
   # puts i
    #puts word[from-i]
    if word[from-i] =~ /[aeiou]/
      return word[from-i]
    end
  end
  return false
end

.lista_de_fonemas ⇒ `Object`

# File 'lib/fonemas.rb', line 481

def self.lista_de_fonemas
  phonelist = ['SIL']
  phonelist += %w{a e i o u aa ee ii oo uu}
  phonelist += %w{bb b d e f g i j k l m n o p rr r s t u ks k gu ch dd gg ll nh}
  phonelist.uniq
end

.normalize(t) ⇒ `Object`

# File 'lib/fonemas.rb', line 462

def self.normalize(t)
  @output = []
  anormalize(t)
  return @output
end

.separar(word) ⇒ `Object`

# File 'lib/fonemas.rb', line 149

def self.separar(word)
  word = downcase(word)
  output = []
  i = 0
  while(i < word.length)
    if word[i] == 'c' and word[i+1] == 'h'
      output << "ch"
      i+=1
    elsif word[i] == 'l' and word[i+1] == 'l'
      output << 'll'
      i+=1
    elsif word[i] == 'r' and word[i+1] == 'r'
      output << 'rr'
      i+=1
    else
      output << word[i]
    end
    i +=1
  end
  return output
end

.version ⇒ `Object`



8
9
10

# File 'lib/fonemas.rb', line 8

def self.version
  VERSION
end

Module: Fonemas

Constant Summary collapse

Class Method Summary collapse

Class Method Details

.anormalize(t) ⇒ Object

.clean(text) ⇒ Object

.downcase(text) ⇒ Object

.entreVocales(word, i) ⇒ Object

.entreVocalyConsonante(word, i) ⇒ Object

.fonemaLetra(letra) ⇒ Object

.fonemas(word) ⇒ Object

.generateFonemas(fonema, i = 0, current = []) ⇒ Object

.isDiptongo(word, first, second) ⇒ Object

.isFinal(word, i) ⇒ Object

.isFricativa(word, i) ⇒ Object

.isTonica(word, i) ⇒ Object

.isVocal(word, i) ⇒ Object

.lastVocal(word, from) ⇒ Object

.lista_de_fonemas ⇒ Object

.normalize(t) ⇒ Object

.separar(word) ⇒ Object

.version ⇒ Object

.anormalize(t) ⇒ `Object`

.clean(text) ⇒ `Object`

.downcase(text) ⇒ `Object`

.entreVocales(word, i) ⇒ `Object`

.entreVocalyConsonante(word, i) ⇒ `Object`

.fonemaLetra(letra) ⇒ `Object`

.fonemas(word) ⇒ `Object`

.generateFonemas(fonema, i = 0, current = []) ⇒ `Object`

.isDiptongo(word, first, second) ⇒ `Object`

.isFinal(word, i) ⇒ `Object`

.isFricativa(word, i) ⇒ `Object`

.isTonica(word, i) ⇒ `Object`

.isVocal(word, i) ⇒ `Object`

.lastVocal(word, from) ⇒ `Object`

.lista_de_fonemas ⇒ `Object`

.normalize(t) ⇒ `Object`

.separar(word) ⇒ `Object`

.version ⇒ `Object`