Class: Classifier

Inherits:
Object
  • Object
show all
Defined in:
lib/engine/classifier.rb

Constant Summary collapse

UNKNOWN_WORD_STRENGTH =
1.0
UNKNOWN_WORD_PROBABILITY =
0.5
TOLERANCE =
0.05
STOP_WORDS =
%w{
    a,able,about,across,after,all,almost,also,am,among,an,and,
    any,are,as,at,be,because,been,but,by,can,cannot,could,dear,
    did,do,does,either,else,ever,every,for,from,get,got,had,has,
    have,he,her,hers,him,his,how,however,i,if,in,into,is,it,its,
    just,least,let,like,likely,may,me,might,most,must,my,neither,
    no,nor,not,of,off,often,on,only,or,other,our,own,rather,
    really,said,say,says,she,should,since,so,some,than,that,the,
    their,them,then,there,these,they,this,tis,to,too,totally,twas,
    us,wants,was,we,were,what,when,where,which,while,who,whom,why,
    will,with,would,yet,you,your
}

Instance Method Summary collapse

Constructor Details

#initialize(positive_corpus, negative_corpus) ⇒ Classifier

Returns a new instance of Classifier.



23
24
25
26
# File 'lib/engine/classifier.rb', line 23

def initialize positive_corpus, negative_corpus
  @positive_corpus = positive_corpus
  @negative_corpus = negative_corpus
end

Instance Method Details

#classify(sentence) ⇒ Object



28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
# File 'lib/engine/classifier.rb', line 28

def classify sentence
  result = ClassificationResult.new sentence

  Document.new(sentence).each_token do |token|
    next if STOP_WORDS.include? token

    positive_count = @positive_corpus.token_count token
    negative_count = @negative_corpus.token_count token

    token_probability = calculate_probability(
      positive_count, @positive_corpus.entry_count,
      negative_count, @negative_corpus.entry_count)

      record_probability token_probability

      result.token_probabilities.push TokenProbability.new(
        token, token_probability, @positive_corpus.entry_count,
        positive_count, @negative_corpus.entry_count,
        negative_count, calculate_sentiment(token_probability)
      )
  end

  result.overall_probability = combine_probabilities
  result.sentiment = calculate_sentiment result.overall_probability

  result
end