Class: Rhopalic::Analysis

Inherits:
Object
  • Object
show all
Defined in:
lib/rhopalic/analysis.rb

Instance Method Summary collapse

Constructor Details

#initialize(dictionary = nil) ⇒ Analysis

Returns a new instance of Analysis.



10
11
12
# File 'lib/rhopalic/analysis.rb', line 10

def initialize(dictionary=nil)
  @dictionary = dictionary
end

Instance Method Details

#analyze_phrase(phrase) ⇒ Object



14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
# File 'lib/rhopalic/analysis.rb', line 14

def analyze_phrase(phrase)
  words = []
  indices = []
  syllable_counts = []
  in_dictionary = []
  is_letter_rhopalic = true
  is_syllable_rhopalic = true

  # Explanation of this regex:
  # - [:alpha:] matches alphabetic characters throughout the whole unicode set
  # - we use word boundaries (\b) to delineate words
  # - however, word boundaries don't match before and after underscore, so we
  #   explicitly treat that as a word boundary
  # - using positive lookahead (?=...) because otherwise we'd miss words where
  #   they are only separated by an underscore
  phrase.scan(/(\b|_)([[:alpha:]\d]+)(?=\b|_)/) do
    match = Regexp.last_match
    word = match[2]
    index = match.begin(0)
    is_number = false

    # Bail out on words that contain numbers, unless we can pronounce the number
    # as a whole word.
    word.match(/\d+/) do |num_match|
      return nil if num_match[0].size != word.size
      is_number = true
    end

    # Checking whether the previous and this word form a known contraction
    # or possessive.
    if !indices.empty? && (phrase[indices.last + words.last.length] == "'") &&
        (index == indices.last + words.last.length + 1)
      contraction = words.last + "'" + word
      if (syllable_count = CONTRACTIONS[contraction.downcase]) || word.downcase == "s"
        words[-1] = contraction
        if syllable_count
          syllable_counts[-1] = syllable_count
          in_dictionary[-1] = true
        end

        is_letter_rhopalic = false unless word_sequence_rhopalic?(words)
        is_syllable_rhopalic = false unless syllable_sequence_rhopalic?(syllable_counts)
        return nil unless is_letter_rhopalic || is_syllable_rhopalic
        next
      end
    end

    # If the words is a series of digits, count syllables based on spelling the
    # number out, but ignore it if the number doesn't translate to a single word.
    syllable_counting_word = word
    if is_number
      number_as_words = word.to_i.to_words
      if number_as_words.match(/^\w+$/)
        syllable_counting_word = number_as_words
      else
        return nil
      end
    end

    if @dictionary
      syllable_count = @dictionary.syllable_count(syllable_counting_word)
      in_dictionary.push(true) unless syllable_count.nil?
    end
    if !syllable_count
      syllable_count = Lingua::EN::Syllable.syllables(syllable_counting_word)
      in_dictionary.push(false)
    end

    words.push(word)
    indices.push(match.begin(0))
    syllable_counts.push(syllable_count)

    is_letter_rhopalic = false unless word_sequence_rhopalic?(words)
    is_syllable_rhopalic = false unless syllable_sequence_rhopalic?(syllable_counts)
    return nil unless is_letter_rhopalic || is_syllable_rhopalic
  end

  return Phrase.new(phrase, is_letter_rhopalic, is_syllable_rhopalic, words, indices,
      syllable_counts, in_dictionary)
end