Class: Judgee::Classifier
- Inherits:
-
Object
- Object
- Judgee::Classifier
- Defined in:
- lib/judgee/classifier.rb
Constant Summary collapse
- CATEGORIES_KEY =
Constants ###
"judgee:categories"
- CATEGORY_KEY =
"judgee:category"
- ALPHA =
1.0
Instance Attribute Summary collapse
-
#redis ⇒ Object
readonly
Returns the value of attribute redis.
Instance Method Summary collapse
- #classify(data) ⇒ Object
- #classify_fast(data) ⇒ Object
- #flush_category(category) ⇒ Object
- #flushdb(flush_db = false) ⇒ Object
-
#initialize(options = {}) ⇒ Classifier
constructor
A new instance of Classifier.
- #train(category, data) ⇒ Object
- #train_fast(category, data) ⇒ Object
- #untrain(category, data) ⇒ Object
- #untrain_fast(category, data) ⇒ Object
Constructor Details
#initialize(options = {}) ⇒ Classifier
Returns a new instance of Classifier.
16 17 18 |
# File 'lib/judgee/classifier.rb', line 16 def initialize(={}) @redis = Redis.new() end |
Instance Attribute Details
#redis ⇒ Object (readonly)
Returns the value of attribute redis.
14 15 16 |
# File 'lib/judgee/classifier.rb', line 14 def redis @redis end |
Instance Method Details
#classify(data) ⇒ Object
64 65 66 67 68 69 70 71 72 73 74 75 76 77 |
# File 'lib/judgee/classifier.rb', line 64 def classify(data) result = Hash.new(0) categories = redis.smembers(CATEGORIES_KEY) categories.each do |category| count_occurance(data).each do |word, word_count| numerator = (redis.hget(redis_category_key(category), word).to_i + ALPHA).to_f denominator = (categories.map { |category| redis.hget(redis_category_key(category), word).to_i }.inject(0, :+) + (ALPHA * data.length)).to_f result[category] += (word_count * Math.log(numerator / denominator)).abs end end result.min_by(&:last).first.to_sym end |
#classify_fast(data) ⇒ Object
79 80 81 82 83 84 85 86 87 88 89 90 91 |
# File 'lib/judgee/classifier.rb', line 79 def classify_fast(data) result = Hash.new(0) categories = redis.smembers(CATEGORIES_KEY) occurances = count_occurance(data) categories.each do |category| numerator = Hash[occurances.keys.zip(redis.hmget(redis_category_key(category), occurances.keys))].inject({}) { |hash, (key, value)| hash[key] = value.to_f + ALPHA; hash } denominator = categories.map { |category| Hash[occurances.keys.zip(redis.hmget(redis_category_key(category), occurances.keys))] }.inject(Hash.new(0)) { |main_hash, sub_hash| main_hash.merge(sub_hash) { |key, value_first, value_second| value_first.to_f + value_second.to_f} }.inject(Hash.new(0)) { |hash, (key, value)| hash[key] = value.to_f + (ALPHA * data.length); hash } result[category] += numerator.merge(denominator) { |key, value_numerator, value_denominator| (occurances[key] * Math.log(value_numerator / value_denominator)).abs }.values.inject(0, :+) end result.min_by(&:last).first.to_sym end |
#flush_category(category) ⇒ Object
101 102 103 104 |
# File 'lib/judgee/classifier.rb', line 101 def flush_category(category) redis.del(redis_category_key(category)) redis.srem(CATEGORIES_KEY, category_name(category)) end |
#flushdb(flush_db = false) ⇒ Object
95 96 97 |
# File 'lib/judgee/classifier.rb', line 95 def flushdb(flush_db=false) redis.flushdb if flush_db end |
#train(category, data) ⇒ Object
21 22 23 24 25 26 27 |
# File 'lib/judgee/classifier.rb', line 21 def train(category, data) redis.sadd(CATEGORIES_KEY, category_name(category)) count_occurance(data).each do |word, word_count| redis.hincrby(redis_category_key(category), word, word_count) end "OK" end |
#train_fast(category, data) ⇒ Object
29 30 31 32 33 34 35 36 |
# File 'lib/judgee/classifier.rb', line 29 def train_fast(category, data) redis.sadd(CATEGORIES_KEY, category_name(category)) occurances = count_occurance(data) database_occurances = Hash[occurances.keys.zip(redis.hmget(redis_category_key(category), occurances.keys))] new_occurances = occurances.merge(database_occurances) { |key, value_occurance, value_database_occurance| value_occurance.to_i + value_database_occurance.to_i }.to_a.flatten! redis.hmset(redis_category_key(category), new_occurances) "OK" end |
#untrain(category, data) ⇒ Object
40 41 42 43 44 45 46 47 48 49 50 |
# File 'lib/judgee/classifier.rb', line 40 def untrain(category, data) count_occurance(data).each do |word, word_count| new_count = [(redis.hget(redis_category_key(category), word).to_i - word_count), 0].max if new_count > 0 redis.hset(redis_category_key(category), word, new_count) else redis.hdel(redis_category_key(category), word) end end "OK" end |
#untrain_fast(category, data) ⇒ Object
52 53 54 55 56 57 58 59 60 |
# File 'lib/judgee/classifier.rb', line 52 def untrain_fast(category, data) occurances = count_occurance(data) database_occurances = Hash[occurances.keys.zip(redis.hmget(redis_category_key(category), occurances.keys))] untrain_occurances = database_occurances.merge(occurances) { |key, value_occurance, value_untrain_occurance| value_occurance.to_i - value_untrain_occurance.to_i } empty_occurances = untrain_occurances.select { |key, value| value.to_i <= 0 } redis.hmset(redis_category_key(category), untrain_occurances.to_a.flatten!) redis.hdel(redis_category_key(category), empty_occurances.keys) unless empty_occurances.empty? "OK" end |