Class: Text
- Inherits:
-
Object
- Object
- Text
- Defined in:
- lib/textation/text.rb
Instance Method Summary collapse
- #analyze(text) ⇒ Object
- #check_input(text) ⇒ Object
- #occurence_of_word(text, word) ⇒ Object
- #occurences_of_words(text) ⇒ Object
- #percentage_of_word(text, word) ⇒ Object
- #percentage_of_words(text) ⇒ Object
- #syllables_per_line(text) ⇒ Object
- #top_words(text, num) ⇒ Object
- #top_words_all(text, num = 3) ⇒ Object
- #top_words_no_stop_words(text, num = 3) ⇒ Object
- #useful_words(text) ⇒ Object
Instance Method Details
#analyze(text) ⇒ Object
7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 |
# File 'lib/textation/text.rb', line 7 def analyze(text) text = check_input(text) result = {} result[:character_count] = text.length result[:character_count_excluding_spaces] = text.gsub(/\s/, '').length result[:letter_count] = text.gsub(/[^[:alpha:]]/, '').length result[:line_count] = text.split(/\n/).length result[:word_count] = text.split(/\W+/).delete_if(&:empty?).length result[:sentence_count] = text.split(/[^[:alpha:]{1}\.]\.{1,3}\s?\r?\n?|\?+|!+|\?!+|!\?+/).length result[:paragraph_count] = text.split(/\n\n/).length result[:lines_per_paragraph] = text.split(/\n\n/).map { |p| p.split(/\n/).length }.join(', ') result[:syllables_per_line] = syllables_per_line(text).join(', ') result[:average_words_per_sentence] = (result[:word_count].to_f / result[:sentence_count]).round(2) result[:average_sentences_per_paragraph] = (result[:sentence_count].to_f / result[:paragraph_count]).round(2) result[:useful_words] = useful_words(text).join(', ') result[:percentage_of_useful_words] = ((result[:useful_words].split(/\W+/).length.to_f / result[:word_count]) * 100).round(2) result[:occurences_of_words] = occurences_of_words(text).to_a.map {|el| "#{el[0]}: #{el[1]}" }.join(', ') result[:percentage_of_words] = percentage_of_words(text).to_a.map {|el| "#{el[0]}: #{el[1]}" }.join(', ') result[:unique_words] = text.downcase.split(/\W+/).select { |w| w.length >= 1 }.uniq.join(', ') # result[:occurences_of_words].select { |_k, v| v == 1 }.keys result[:percentage_of_unique_words] = ((result[:unique_words].split(/\W+/).length.to_f / result[:word_count]) * 100).round(2) result end |
#check_input(text) ⇒ Object
94 95 96 |
# File 'lib/textation/text.rb', line 94 def check_input(text) text.match?(/.txt$/) ? File.open(text, 'r').read : text end |
#occurence_of_word(text, word) ⇒ Object
68 69 70 |
# File 'lib/textation/text.rb', line 68 def occurence_of_word(text, word) occurences_of_words(text)[word.downcase] end |
#occurences_of_words(text) ⇒ Object
57 58 59 60 61 62 63 64 65 66 |
# File 'lib/textation/text.rb', line 57 def occurences_of_words(text) text = check_input(text) text.downcase .split(/\W+/) .delete_if(&:empty?) .group_by(&:itself) .transform_values(&:count) .sort_by { |_k, v| -v } .to_h end |
#percentage_of_word(text, word) ⇒ Object
78 79 80 |
# File 'lib/textation/text.rb', line 78 def percentage_of_word(text, word) percentage_of_words(text)[word.downcase] end |
#percentage_of_words(text) ⇒ Object
72 73 74 75 76 |
# File 'lib/textation/text.rb', line 72 def percentage_of_words(text) occurences = occurences_of_words(text) len = check_input(text).split(/\W+/).length occurences.transform_values { |v| ((v.to_f / len) * 100).round(2) } end |
#syllables_per_line(text) ⇒ Object
82 83 84 85 86 87 88 89 90 91 92 |
# File 'lib/textation/text.rb', line 82 def syllables_per_line(text) text.downcase.split(/\n/).map do |line| line.split(/\W+/).map do |word| if word.split(/[^aeiouy]+/).delete_if(&:empty?).length > 1 word.gsub(/e$|es$|ed$/, "").split(/[^aeiouy]+/).delete_if(&:empty?).length else word.split(/[^aeiouy]+/).delete_if(&:empty?).length end end end.map(&:sum) end |
#top_words(text, num) ⇒ Object
49 50 51 52 53 54 55 |
# File 'lib/textation/text.rb', line 49 def top_words(text, num) text.group_by(&:itself) .transform_values(&:count) .sort_by { |_k, v| -v } .first(num) .map(&:first).join(', ') end |
#top_words_all(text, num = 3) ⇒ Object
44 45 46 47 |
# File 'lib/textation/text.rb', line 44 def top_words_all(text, num = 3) text = (check_input(text)).downcase.split(/\W+/) top_words(text, num) end |
#top_words_no_stop_words(text, num = 3) ⇒ Object
39 40 41 42 |
# File 'lib/textation/text.rb', line 39 def top_words_no_stop_words(text, num = 3) text = useful_words(text) top_words(text, num) end |
#useful_words(text) ⇒ Object
30 31 32 33 34 35 36 37 |
# File 'lib/textation/text.rb', line 30 def useful_words(text) text = check_input(text) text.downcase .split(/\W+/) .delete_if { |w| STOP_WORDS.include?(w) } .select { |w| w.length >= 1 } .uniq end |