Class: TextStat
- Inherits:
-
Object
- Object
- TextStat
- Defined in:
- lib/textstat.rb,
lib/textstat/version.rb
Constant Summary collapse
- GEM_PATH =
File.dirname(File.dirname(__FILE__))
- VERSION =
"0.1.9"
Class Method Summary collapse
- .automated_readability_index(text) ⇒ Object
- .avg_letter_per_word(text) ⇒ Object
- .avg_sentence_length(text) ⇒ Object
- .avg_sentence_per_word(text) ⇒ Object
- .avg_syllables_per_word(text, language = 'en_us') ⇒ Object
- .char_count(text, ignore_spaces = true) ⇒ Object
- .coleman_liau_index(text) ⇒ Object
- .dale_chall_readability_score(text, language = 'en_us') ⇒ Object
- .dictionary_path ⇒ Object
- .dictionary_path=(path) ⇒ Object
- .difficult_words(text, language = 'en_us', return_words = false) ⇒ Object
- .flesch_kincaid_grade(text, language = 'en_us') ⇒ Object
- .flesch_reading_ease(text, language = 'en_us') ⇒ Object
- .forcast(text, language = 'en_us') ⇒ Object
- .gunning_fog(text, language = 'en_us') ⇒ Object
- .lexicon_count(text, remove_punctuation = true) ⇒ Object
- .linsear_write_formula(text, language = 'en_us') ⇒ Object
- .lix(text) ⇒ Object
- .polysyllab_count(text, language = 'en_us') ⇒ Object
- .powers_sumner_kearl(text, language = 'en_us') ⇒ Object
- .sentence_count(text) ⇒ Object
- .smog_index(text, language = 'en_us') ⇒ Object
- .spache(text, language = 'en_us') ⇒ Object
- .syllable_count(text, language = 'en_us') ⇒ Object
- .text_standard(text, float_output = nil) ⇒ Object
Class Method Details
.automated_readability_index(text) ⇒ Object
113 114 115 116 117 118 119 120 121 122 123 124 125 126 |
# File 'lib/textstat.rb', line 113 def self.automated_readability_index(text) chars = char_count(text) words = lexicon_count(text) sentences = sentence_count(text) begin a = chars.to_f / words b = words.to_f / sentences readability = 4.71 * a + 0.5 * b - 21.43 readability.round(1) rescue ZeroDivisionError 0.0 end end |
.avg_letter_per_word(text) ⇒ Object
53 54 55 56 57 58 |
# File 'lib/textstat.rb', line 53 def self.avg_letter_per_word(text) letters_per_word = char_count(text).to_f / lexicon_count(text) letters_per_word.round(2) rescue ZeroDivisionError 0.0 end |
.avg_sentence_length(text) ⇒ Object
35 36 37 38 39 40 |
# File 'lib/textstat.rb', line 35 def self.avg_sentence_length(text) asl = lexicon_count(text).to_f / sentence_count(text) asl.round(1) rescue ZeroDivisionError 0.0 end |
.avg_sentence_per_word(text) ⇒ Object
60 61 62 63 64 65 |
# File 'lib/textstat.rb', line 60 def self.avg_sentence_per_word(text) sentence_per_word = sentence_count(text).to_f / lexicon_count(text) sentence_per_word.round(2) rescue ZeroDivisionError 0.0 end |
.avg_syllables_per_word(text, language = 'en_us') ⇒ Object
42 43 44 45 46 47 48 49 50 51 |
# File 'lib/textstat.rb', line 42 def self.avg_syllables_per_word(text, language = 'en_us') syllable = syllable_count(text, language) words = lexicon_count(text) begin syllables_per_word = syllable.to_f / words syllables_per_word.round(1) rescue ZeroDivisionError 0.0 end end |
.char_count(text, ignore_spaces = true) ⇒ Object
6 7 8 9 |
# File 'lib/textstat.rb', line 6 def self.char_count(text, ignore_spaces = true) text = text.delete(' ') if ignore_spaces text.length end |
.coleman_liau_index(text) ⇒ Object
106 107 108 109 110 111 |
# File 'lib/textstat.rb', line 106 def self.coleman_liau_index(text) letters = (avg_letter_per_word(text) * 100).round(2) sentences = (avg_sentence_per_word(text) * 100).round(2) coleman = 0.0588 * letters - 0.296 * sentences - 15.8 coleman.round(2) end |
.dale_chall_readability_score(text, language = 'en_us') ⇒ Object
169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 |
# File 'lib/textstat.rb', line 169 def self.dale_chall_readability_score(text, language = 'en_us') word_count = lexicon_count(text) count = word_count - difficult_words(text, language) begin per = 100.0 * count / word_count rescue ZeroDivisionError return 0.0 end difficult_words = 100 - per score = 0.1579 * difficult_words + 0.0496 * avg_sentence_length(text) score += 3.6365 if difficult_words > 5 score.round(2) end |
.dictionary_path ⇒ Object
310 311 312 |
# File 'lib/textstat.rb', line 310 def self.dictionary_path @dictionary_path ||= File.join(TextStat::GEM_PATH, 'lib', 'dictionaries') end |
.dictionary_path=(path) ⇒ Object
306 307 308 |
# File 'lib/textstat.rb', line 306 def self.dictionary_path=(path) @dictionary_path = path end |
.difficult_words(text, language = 'en_us', return_words = false) ⇒ Object
148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 |
# File 'lib/textstat.rb', line 148 def self.difficult_words(text, language = 'en_us', return_words = false) require 'set' easy_words = Set.new File.read(File.join(dictionary_path, "#{language}.txt")).each_line do |line| easy_words << line.chop end text_list = text.downcase.gsub(/[^0-9a-z ]/i, '').split(' ') diff_words_set = Set.new text_list.each do |value| next if easy_words.include? value diff_words_set.add(value) if syllable_count(value, language) > 1 end if return_words diff_words_set else diff_words_set.length end end |
.flesch_kincaid_grade(text, language = 'en_us') ⇒ Object
74 75 76 77 78 79 |
# File 'lib/textstat.rb', line 74 def self.flesch_kincaid_grade(text, language = 'en_us') sentence_length = avg_sentence_length(text) syllables_per_word = avg_syllables_per_word(text, language) flesch = 0.39 * sentence_length + 11.8 * syllables_per_word - 15.59 flesch.round(1) end |
.flesch_reading_ease(text, language = 'en_us') ⇒ Object
67 68 69 70 71 72 |
# File 'lib/textstat.rb', line 67 def self.flesch_reading_ease(text, language = 'en_us') sentence_length = avg_sentence_length(text) syllables_per_word = avg_syllables_per_word(text, language) flesch = 206.835 - 1.015 * sentence_length - 84.6 * syllables_per_word flesch.round(2) end |
.forcast(text, language = 'en_us') ⇒ Object
207 208 209 210 211 212 213 214 |
# File 'lib/textstat.rb', line 207 def self.forcast(text, language = 'en_us') words = text.split(' ')[0..149] words_with_one_syllabe = words.count { |word| syllable_count(word, language) == 1 } forcast = 20 - (words_with_one_syllabe / 10) forcast end |
.gunning_fog(text, language = 'en_us') ⇒ Object
186 187 188 189 190 191 192 193 |
# File 'lib/textstat.rb', line 186 def self.gunning_fog(text, language = 'en_us') per_diff_words = 100.0 * difficult_words(text, language) / lexicon_count(text) + 5 grade = 0.4 * (avg_sentence_length(text) + per_diff_words) grade.round(2) rescue ZeroDivisionError 0.0 end |
.lexicon_count(text, remove_punctuation = true) ⇒ Object
11 12 13 14 15 |
# File 'lib/textstat.rb', line 11 def self.lexicon_count(text, remove_punctuation = true) text = text.gsub(/[^a-zA-Z\s]/, '').squeeze(' ') if remove_punctuation count = text.split(' ').count count end |
.linsear_write_formula(text, language = 'en_us') ⇒ Object
128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 |
# File 'lib/textstat.rb', line 128 def self.linsear_write_formula(text, language = 'en_us') easy_word = 0 difficult_word = 0 text_list = text.split(' ')[0..100] text_list.each do |word| if syllable_count(word, language) < 3 easy_word += 1 else difficult_word += 1 end end text = text_list.join(' ') number = (easy_word * 1 + difficult_word * 3).to_f / sentence_count(text) number -= 2 if number <= 20 number / 2 end |
.lix(text) ⇒ Object
195 196 197 198 199 200 201 202 203 204 205 |
# File 'lib/textstat.rb', line 195 def self.lix(text) words = text.split(' ') words_length = words.length long_words = words.count { |word| word.length > 6 } per_long_words = 100.0 * long_words / words_length asl = avg_sentence_length(text) lix = asl + per_long_words lix.round(2) end |
.polysyllab_count(text, language = 'en_us') ⇒ Object
81 82 83 84 85 86 87 88 |
# File 'lib/textstat.rb', line 81 def self.polysyllab_count(text, language = 'en_us') count = 0 text.split(' ').each do |word| w = syllable_count(word, language) count += 1 if w >= 3 end count end |
.powers_sumner_kearl(text, language = 'en_us') ⇒ Object
216 217 218 219 |
# File 'lib/textstat.rb', line 216 def self.powers_sumner_kearl(text, language = 'en_us') grade = 0.0778 * avg_sentence_length(text) + 0.0455 * syllable_count(text, language) - 2.2029 grade.round(2) end |
.sentence_count(text) ⇒ Object
31 32 33 |
# File 'lib/textstat.rb', line 31 def self.sentence_count(text) text.scan(/[\.\?!][\'\\)\]]*[ |\n][A-Z]/).map(&:strip).count + 1 end |
.smog_index(text, language = 'en_us') ⇒ Object
90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 |
# File 'lib/textstat.rb', line 90 def self.smog_index(text, language = 'en_us') sentences = sentence_count(text) if sentences >= 3 begin polysyllab = polysyllab_count(text, language) smog = 1.043 * Math.sqrt(30.0 * polysyllab / sentences) + 3.1291 smog.round(1) rescue ZeroDivisionError 0.0 end else 0.0 end end |
.spache(text, language = 'en_us') ⇒ Object
221 222 223 224 225 226 |
# File 'lib/textstat.rb', line 221 def self.spache(text, language = 'en_us') words = text.split(' ').count unfamiliar_words = difficult_words(text, language) / words grade = (0.141 * avg_sentence_length(text)) + (0.086 * unfamiliar_words) + 0.839 grade.round(2) end |
.syllable_count(text, language = 'en_us') ⇒ Object
17 18 19 20 21 22 23 24 25 26 27 28 29 |
# File 'lib/textstat.rb', line 17 def self.syllable_count(text, language = 'en_us') return 0 if text.empty? text = text.downcase text.gsub(/[^a-zA-Z\s]/, '').squeeze(' ') dictionary = Text::Hyphen.new(language: language, left: 0, right: 0) count = 0 text.split(' ').each do |word| word_hyphenated = dictionary.visualise(word) count += word_hyphenated.count('-') + 1 end count end |
.text_standard(text, float_output = nil) ⇒ Object
228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 |
# File 'lib/textstat.rb', line 228 def self.text_standard(text, float_output=nil) grade = [] lower = flesch_kincaid_grade(text).round upper = flesch_kincaid_grade(text).ceil grade.append(lower.to_i) grade.append(upper.to_i) # Appending Flesch Reading Easy score = flesch_reading_ease(text) if score < 100 && score >= 90 grade.append(5) elsif score < 90 && score >= 80 grade.append(6) elsif score < 80 && score >= 70 grade.append(7) elsif score < 70 && score >= 60 grade.append(8) grade.append(9) elsif score < 60 && score >= 50 grade.append(10) elsif score < 50 && score >= 40 grade.append(11) elsif score < 40 && score >= 30 grade.append(12) else grade.append(13) end # Appending SMOG Index lower = smog_index(text).round upper = smog_index(text).ceil grade.append(lower.to_i) grade.append(upper.to_i) # Appending Coleman_Liau_Index lower = coleman_liau_index(text).round upper = coleman_liau_index(text).ceil grade.append(lower.to_i) grade.append(upper.to_i) # Appending Automated_Readability_Index lower = automated_readability_index(text).round upper = automated_readability_index(text).ceil grade.append(lower.to_i) grade.append(upper.to_i) # Appending Dale_Chall_Readability_Score lower = dale_chall_readability_score(text).round upper = dale_chall_readability_score(text).ceil grade.append(lower.to_i) grade.append(upper.to_i) # Appending Linsear_Write_Formula lower = linsear_write_formula(text).round upper = linsear_write_formula(text).ceil grade.append(lower.to_i) grade.append(upper.to_i) # Appending Gunning Fog Index lower = gunning_fog(text).round upper = gunning_fog(text).ceil grade.append(lower.to_i) grade.append(upper.to_i) # Finding the Readability Consensus based upon all the above tests require 'counter' d = Counter.new(grade) final_grade = d.most_common(1) score = final_grade[0][0] if float_output score.to_f else "#{score.to_i - 1}th and #{score.to_i}th grade" end end |