Class: Odyssey::Engine

Inherits:
Object
  • Object
show all
Defined in:
lib/odyssey/engine.rb

Constant Summary collapse

LETTER_REGEX =

regex

/[A-z]/
WORD_REGEX =
/[^\W][A-z\-']*/
SENTENCE_REGEX =
/[^\.!?\s][^\.!?]*(?:[\.!?](?!['"]?\s|$)[^\.!?]*)*[\.!?]?['"]?(?=\s|$)/
PROBLEM_WORDS =

words that cause the syllable analyzer to fail word => syllables

{
  'ion'    => 2
}

Instance Method Summary collapse

Constructor Details

#initialize(formula_name) ⇒ Engine

Returns a new instance of Engine.



26
27
28
29
# File 'lib/odyssey/engine.rb', line 26

def initialize(formula_name)
  reset
  update_formula(formula_name)
end

Instance Method Details

#analyze_syllables(_word) ⇒ Object



148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
# File 'lib/odyssey/engine.rb', line 148

def analyze_syllables(_word)
  #remove non-alpha characters
  word = _word.gsub(/[^A-z]/, '')
  count = 0

  if PROBLEM_WORDS.has_key?(word)
    count = PROBLEM_WORDS[word]
  else
    #this is an approximation, but it is fairly close
    word.downcase!
    return 1 if word.length <= 3
    word.sub!(/(?:[^laeiouy]es|ed|[^laeiouy]e)$/, '')
    word.sub!(/^y/, '')
    count = word.scan(/[aeiouy]{1,2}/).size
  end

  count
end

#average_syllables_per_word(text) ⇒ Object



130
131
132
# File 'lib/odyssey/engine.rb', line 130

def average_syllables_per_word(text)
  @stats['syllable_count'].to_f / @stats['word_count'].to_f
end

#average_syllables_per_word_per_sentence(text) ⇒ Object



134
135
136
137
138
139
140
# File 'lib/odyssey/engine.rb', line 134

def average_syllables_per_word_per_sentence(text)
  res = []
  for i in 0..@stats_by_sentence['string_length'].length-1
    res.push(@stats_by_sentence['syllable_count'][i].to_f / @stats_by_sentence['word_count'][i].to_f)
  end
  res
end

#average_words_per_sentence(text) ⇒ Object



126
127
128
# File 'lib/odyssey/engine.rb', line 126

def average_words_per_sentence(text)
  @stats['word_count'].to_f / @stats['sentence_count'].to_f
end

#get_stats(with_score = true) ⇒ Object



167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
# File 'lib/odyssey/engine.rb', line 167

def get_stats(with_score = true)
  all_stats = {
    'string_length'  => @stats['string_length'],
    'letter_count'   => @stats['letter_count'],
    'syllable_count' => @stats['syllable_count'],
    'word_count'     => @stats['word_count'],
    'sentence_count' => @stats['sentence_count'],
    'average_words_per_sentence' => @stats['average_words_per_sentence'],
    'average_syllables_per_word' => @stats['average_syllables_per_word']
  }
  if with_score
    all_stats['name']    = @formula.name
    all_stats['formula'] = @formula
    all_stats['score']   = @score
    all_stats['score_by_sentence'] = merge_scores_with_sentences()
  end
  all_stats
end

#letter_count(text) ⇒ Object



84
85
86
87
# File 'lib/odyssey/engine.rb', line 84

def letter_count(text)
  matches = text.scan LETTER_REGEX
  matches.size
end

#merge_scores_with_sentencesObject



186
187
188
189
190
191
192
193
194
195
196
# File 'lib/odyssey/engine.rb', line 186

def merge_scores_with_sentences()
  res = []
  ro = Struct.new(:score, :sentence)
  for i in 0..@score_by_sentence.length-1
    r = ro.new
    r.sentence = @sentences[i]
    r.score = @score_by_sentence[i]
    res.push(r)
  end
  res
end

#resetObject



198
199
200
201
202
203
204
205
206
# File 'lib/odyssey/engine.rb', line 198

def reset
  @formula = nil
  @score = 0
  @score_by_sentence = []
  @stats = {}
  @words = nil
  @sentences = nil
  @syllables = []
end

#sanitize(text) ⇒ Object

for now this just removes html tags but it could do more in the future



144
145
146
# File 'lib/odyssey/engine.rb', line 144

def sanitize(text)
  output = text.gsub(/<\/?[^>]+>/, '')
end

#score(_text, analyze = true) ⇒ Object



38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
# File 'lib/odyssey/engine.rb', line 38

def score(_text, analyze = true)
  if analyze
    #sanitize the text
    text = sanitize(_text)

    #first get all the statistics
    @stats = {
      'string_length' => string_length(text),
      'letter_count' => letter_count(text),
      'word_count' => word_count(text),
      'syllable_count' => syllable_count(text),
      'sentence_count' => sentence_count(text),
    }

    @stats['average_words_per_sentence'] = average_words_per_sentence(text)
    @stats['average_syllables_per_word'] = average_syllables_per_word(text)

    #prepare the parameter to the score method
    @data = {
      'raw' => text,
      'words' => @words,
      'sentences' => @sentences,
      'syllables' => @syllables
    }
    @data['words_per_sentence'] = words_by_sentence()
    @data['syllables_per_sentence'] = syllables_by_sentence(@sentences)

    @stats_by_sentence = {
      'string_length' => @sentences.map { |a| string_length(a) },
      'letter_count' => @sentences.map { |a| letter_count(a) },
      'word_count' => @sentences.map { |a| word_count(a) },
      'syllable_count' => @sentences.map { |a| syllable_count(a) },
      'sentence_count' => Array.new(@sentences.length, 1),
    }
    @stats_by_sentence['average_syllables_per_word'] = average_syllables_per_word_per_sentence(text)
  end

  #now run all that through the formula
  @score_by_sentence = @formula.score_by_sentence(@data, @stats_by_sentence)
  @score = @formula.score(@data, @stats)
end

#sentence_count(text) ⇒ Object



121
122
123
124
# File 'lib/odyssey/engine.rb', line 121

def sentence_count(text)
  @sentences = text.scan SENTENCE_REGEX
  @sentences.size
end

#string_length(text) ⇒ Object



80
81
82
# File 'lib/odyssey/engine.rb', line 80

def string_length(text)
  text.length
end

#syllable_count(text) ⇒ Object



89
90
91
92
93
94
95
96
97
# File 'lib/odyssey/engine.rb', line 89

def syllable_count(text)
  count = 0
  @words.each do |w|
    num = analyze_syllables(w)
    count += num
    @syllables << num
  end
  count
end

#syllables_by_sentence(words) ⇒ Object



99
100
101
102
103
104
105
106
# File 'lib/odyssey/engine.rb', line 99

def syllables_by_sentence(words)
  res = []
  words.each do |w|
    num = analyze_syllables(w)
    res << num
  end
  res
end

#update_formula(formula_name) ⇒ Object



31
32
33
34
35
36
# File 'lib/odyssey/engine.rb', line 31

def update_formula(formula_name)
  klass = Module.const_get formula_name
  @formula = klass.new
rescue
  @formula = Formula.new
end

#word_count(text) ⇒ Object



108
109
110
111
# File 'lib/odyssey/engine.rb', line 108

def word_count(text)
  @words = text.scan WORD_REGEX
  @words.size
end

#words_by_sentenceObject



113
114
115
116
117
118
119
# File 'lib/odyssey/engine.rb', line 113

def words_by_sentence()
  res = []
  for i in 0..@sentences.length-1
    res.push(@sentences[i].scan WORD_REGEX)
  end
  res
end