Class: NLP::Analyzer

Inherits:
Object
  • Object
show all
Defined in:
lib/analyzer.rb

Direct Known Subclasses

LIWCAnalyzer, RIDAnalyzer

Constant Summary collapse

CACHE_DIR =
'~/'

Instance Method Summary collapse

Constructor Details

#initialize(category_file, restore = true) ⇒ Analyzer

Returns a new instance of Analyzer.



20
21
22
23
24
25
26
27
28
29
30
# File 'lib/analyzer.rb', line 20

def initialize( category_file, restore = true )
    state_file = File.expand_path(Analyzer::CACHE_DIR)
    if restore 
       @dictionary = Dictionary.restore(state_file) 
    else
        @dictionary = Dictionary.new
        @dictionary.load_categories( category_file, :rid => true )
        @dictionary.store(state_file)
    end
    
end

Instance Method Details

#analyze(scanner) ⇒ Object



33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
# File 'lib/analyzer.rb', line 33

def analyze( scanner)
  
 results = {
    :word_count => 0,
    :word_total => 0,
    :scores => Hash.new { 0 },
    :words => []
  }



 while token = scanner.current
    word = token.lemat

    categories = @dictionary.find( word.gsub( /[^\w-]/, "" ) ) 
    unless categories.nil?
        categories.each do |category|
           puts "Znalazłem słowo #{word} : #{category}"
            results[:scores][category] = results[:scores][category] + 1
		results[:word_count] += 1
        	results[:words].push word
        end
        
        
    end

    results[:word_total] += 1
    scanner.next(:word)
 end
  
  results[:sorted_scores] = results[:scores].to_a.sort_by { |result| -result[1] }
	primary_sum = results[:sorted_scores].select { |result| result[0].primary? }.inject( 0 ) { |count,result| count + result[1] }
	secondary_sum = results[:sorted_scores].select { |result| result[0].secondary? }.inject( 0 ) { |count,result| count + result[1] }
	emotion_sum = results[:sorted_scores].select { |result| result[0].emotions? }.inject( 0 ) { |count,result| count + result[1] }
	

  results[:classes] = {
    :primary => Float(primary_sum) / results[:word_count],
    :secondary => Float(secondary_sum) / results[:word_count],
    :emotions => Float(emotion_sum) / results[:word_count]
  }
  
  results
end