Module: TextAnalysis

Defined in:
lib/text_analysis.rb,
lib/text_analysis/version.rb

Defined Under Namespace

Classes: Result

Constant Summary collapse

ROOT =
File.expand_path("../..", __FILE__)
VERSION =
"0.2.1"

Class Method Summary collapse

Class Method Details

.analyze_text(text_input) ⇒ Object



6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
# File 'lib/text_analysis.rb', line 6

def self.analyze_text(text_input)
  result = Result.new

  stop_words = File.readlines("#{ROOT}/data/stop_words/en.txt").map { |word| word.gsub("\n", '') }.inspect
  input_words = text_input.split(/[\s]+/)

  result.total_characters = text_input.length
  result.total_characters_without_whitespaces = text_input.gsub(/\s+/, "").length
  result.total_words = input_words.size
  result.stop_words_found = input_words.select { |word| stop_words.include? word.downcase }.uniq
  result.most_common_words =
      input_words.
          group_by { |word| word.downcase }.
          map { |k,v| { :word => k, :occurences => v.size } }.
          sort_by { |hash| hash[:occurences] }.
          reverse

  result.most_common_non_stop_words =
      input_words.
          reject { |word| stop_words.include? word.downcase }.
          group_by { |word| word.downcase }.
          map { |k,v| { :word => k, :occurences => v.size } }.
          sort_by { |hash| hash[:occurences] }.
          reverse

  result
end