Class: Inferx::Categories

Inherits:
Object
  • Object
show all
Includes:
Enumerable
Defined in:
lib/inferx/categories.rb

Instance Attribute Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(redis, options = {}) ⇒ Categories

Returns a new instance of Categories.

Parameters:

  • redis (Redis)

    an instance of Redis

  • options (Hash) (defaults to: {})

Options Hash (options):

  • :complementary (Boolean)

    use complementary Bayes classifier

  • :namespace (String)

    namespace of keys to be used to Redis

  • :manual (Boolean)

    whether manual save, defaults to false



15
16
17
18
19
20
21
22
23
24
# File 'lib/inferx/categories.rb', line 15

def initialize(redis, options = {})
  @redis = redis
  @category_class = options[:complementary] ? Category::Complementary : Category
  parts = %w(inferx categories)
  parts.insert(1, options[:namespace]) if options[:namespace]
  @key = parts.join(':')
  @manual = !!options[:manual]
  @filter = nil
  @except = Set.new
end

Instance Attribute Details

#keyString (readonly)

Get key for access to categories on Redis.

Returns:

  • (String)

    the key



30
31
32
# File 'lib/inferx/categories.rb', line 30

def key
  @key
end

Instance Method Details

#add(*category_names) ⇒ Object

Add categories.

Parameters:

  • category_names (Array<String>)

    category names



85
86
87
88
89
90
# File 'lib/inferx/categories.rb', line 85

def add(*category_names)
  @redis.pipelined do
    category_names.each { |category_name| @redis.hsetnx(@key, category_name, 0) }
    @redis.save unless manual?
  end
end

#allArray<String>

Get all category names.

Returns:

  • (Array<String>)

    category names



66
67
68
# File 'lib/inferx/categories.rb', line 66

def all
  all_in_visible.to_a
end

#each {|category| ... } ⇒ Object

Apply process for each category.

Yields:

  • called for every category

Yield Parameters:



115
116
117
118
119
120
121
122
# File 'lib/inferx/categories.rb', line 115

def each
  visible_category_names = all_in_visible

  @redis.hgetall(@key).each do |category_name, size|
    next unless visible_category_names.include?(category_name)
    yield make_category(category_name, size.to_i)
  end
end

#eject(words) ⇒ Hash<String, Integer>

Eject the words from the training data of the categories.

Parameters:

  • words (Array<String>)

    an array of words

Returns:

  • (Hash<String, Integer>)

    decrease for each category



153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
# File 'lib/inferx/categories.rb', line 153

def eject(words)
  category_names = all
  return {} if category_names.empty?
  return associate(category_names, 0) if words.empty?

  decrease = words.size
  words = collect(words)

  associate(category_names, decrease) do |fluctuation|
    all_scores = @redis.pipelined do
      category_names.each do |category_name|
        category_key = make_category_key(category_name)
        words.each { |word, count| @redis.zincrby(category_key, -count, word) }
        @redis.zremrangebyscore(category_key, '-inf', 0)
      end
    end

    length = words.size

    category_names.each_with_index do |category_name, index|
      scores = all_scores[index * (length + 1), length]
      initial = fluctuation[category_name]

      fluctuation[category_name] = scores.inject(initial) do |decrease, score|
        score = score.to_i
        score < 0 ? decrease + score : decrease
      end
    end

    @redis.pipelined do
      fluctuation.each do |category_name, decrease|
        @redis.hincrby(@key, category_name, -decrease)
      end

      @redis.save unless manual?
    end
  end
end

#except(*category_names) ⇒ Inferx::Categories

Filter by excepting categories.

Parameters:

  • category_names (Array<String>)

    category names

Returns:



55
56
57
58
59
60
61
# File 'lib/inferx/categories.rb', line 55

def except(*category_names)
  category_names = category_names.map(&:to_s)

  filtered do
    @except.merge(category_names)
  end
end

#exists?(category_name) ⇒ Boolean

Determine if the category is defined.

Parameters:

  • category_name (String)

    the category name

Returns:

  • (Boolean)

    whether the category is defined



107
108
109
# File 'lib/inferx/categories.rb', line 107

def exists?(category_name)
  all_in_visible.include?(category_name.to_s)
end

#filter(*category_names) ⇒ Inferx::Categories

Filter categories.

Parameters:

  • category_names (Array<String>)

    category names

Returns:



43
44
45
46
47
48
49
# File 'lib/inferx/categories.rb', line 43

def filter(*category_names)
  category_names = category_names.map(&:to_s)

  filtered do
    @filter = @filter ? @filter & category_names : Set.new(category_names)
  end
end

#get(category_name) ⇒ Inferx::Category Also known as: []

Get category according the name.

Parameters:

  • category_name (String)

    the category name

Returns:

Raises:

  • (ArgumentError)


74
75
76
77
78
79
# File 'lib/inferx/categories.rb', line 74

def get(category_name)
  size = @redis.hget(@key, category_name)
  raise ArgumentError, "#{category_name.inspect} is missing" unless size
  raise ArgumentError, "#{category_name.inspect} does not exist in filtered categories" unless all_in_visible.include?(category_name.to_s)
  make_category(category_name, size.to_i)
end

#inject(words) ⇒ Hash<String, Integer>

Inject the words to the training data of the categories.

Parameters:

  • words (Array<String>)

    an array of words

Returns:

  • (Hash<String, Integer>)

    increase for each category



128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
# File 'lib/inferx/categories.rb', line 128

def inject(words)
  category_names = all
  return {} if category_names.empty?
  return associate(category_names, 0) if words.empty?

  increase = words.size
  words = collect(words)

  associate(category_names, increase) do
    @redis.pipelined do
      category_names.each do |category_name|
        category_key = make_category_key(category_name)
        words.each { |word, count| @redis.zincrby(category_key, count, word) }
        @redis.hincrby(@key, category_name, increase)
      end

      @redis.save unless manual?
    end
  end
end

#manual?Boolean

Determine if manual save.

Returns:

  • (Boolean)

    whether manual save



35
36
37
# File 'lib/inferx/categories.rb', line 35

def manual?
  @manual
end

#remove(*category_names) ⇒ Object

Remove categories.

Parameters:

  • category_names (Array<String>)

    category names



95
96
97
98
99
100
101
# File 'lib/inferx/categories.rb', line 95

def remove(*category_names)
  @redis.pipelined do
    category_names.each { |category_name| @redis.hdel(@key, category_name) }
    @redis.del(*category_names.map(&method(:make_category_key)))
    @redis.save unless manual?
  end
end