Class: CorrectHorseBatteryStaple::Corpus::Redis

Inherits:
Base show all
Includes:
Backend::Redis
Defined in:
lib/correct_horse_battery_staple/corpus/redis.rb

Direct Known Subclasses

Redis2

Constant Summary collapse

MAX_ITERATIONS =
1000

Instance Attribute Summary collapse

Attributes inherited from Base

#frequency_mean, #frequency_stddev, #original_size, #probability_mean, #probability_stddev, #weighted_size

Class Method Summary collapse

Instance Method Summary collapse

Methods included from Backend::Redis

included

Methods inherited from Base

#candidates, #compose_filters, #count, #count_by_options, #count_candidates, #entropy_per_word, #entropy_per_word_by_filter, #filter, #filter_for_options, #frequencies, #inspect, #load_stats_from_hash, #precache, #recalculate, #reset, #result, #stats, #words

Methods included from Memoize

included

Methods included from CorrectHorseBatteryStaple::Common

#array_sample, #logger, #random_in_range, #random_number, #set_sample

Methods inherited from CorrectHorseBatteryStaple::Corpus

format_for

Constructor Details

#initialize(dest) ⇒ Redis

Returns a new instance of Redis.



14
15
16
17
18
19
20
21
# File 'lib/correct_horse_battery_staple/corpus/redis.rb', line 14

def initialize(dest)
  super
  self.dest    = dest
  self.options = {}
  parse_uri(dest)

  load_stats
end

Instance Attribute Details

#destObject

Returns the value of attribute dest.



11
12
13
# File 'lib/correct_horse_battery_staple/corpus/redis.rb', line 11

def dest
  @dest
end

#optionsObject

Returns the value of attribute options.



12
13
14
# File 'lib/correct_horse_battery_staple/corpus/redis.rb', line 12

def options
  @options
end

Class Method Details

.read(file) ⇒ Object



23
24
25
# File 'lib/correct_horse_battery_staple/corpus/redis.rb', line 23

def self.read(file)
  self.new file
end

Instance Method Details

#closeObject



155
156
157
# File 'lib/correct_horse_battery_staple/corpus/redis.rb', line 155

def close
  super
end

#corpus_length_rangeObject

TODO: make this use actual data from stored stats



130
131
132
# File 'lib/correct_horse_battery_staple/corpus/redis.rb', line 130

def corpus_length_range
  3..18
end

#count_allObject



33
34
35
# File 'lib/correct_horse_battery_staple/corpus/redis.rb', line 33

def count_all
  @count_all ||= db.zcard(@words_key)
end

#discontiguous_range_map(key, outer_range, inner_range, divisor = 100) ⇒ Object



118
119
120
121
# File 'lib/correct_horse_battery_staple/corpus/redis.rb', line 118

def discontiguous_range_map(key, outer_range, inner_range, divisor=100)
  CorrectHorseBatteryStaple::Backend::Redis::DRange.new(@db, key, outer_range,
                                                        inner_range, divisor)
end

#each(&block) ⇒ Object

some core Enumerable building blocks



29
30
31
# File 'lib/correct_horse_battery_staple/corpus/redis.rb', line 29

def each(&block)
  entries.each &block
end

#entriesObject

our own collection operations



45
46
47
# File 'lib/correct_horse_battery_staple/corpus/redis.rb', line 45

def entries
  table
end

#get_word_ids_in_zset(key, range) ⇒ Object



145
146
147
# File 'lib/correct_horse_battery_staple/corpus/redis.rb', line 145

def get_word_ids_in_zset(key, range)
  db.zrangebyscore(key, range.begin, range.end)
end

#get_words_for_ids(ids) ⇒ Object



150
151
152
# File 'lib/correct_horse_battery_staple/corpus/redis.rb', line 150

def get_words_for_ids(ids)
  ids.map {|id| CorrectHorseBatteryStaple::Word.new(:word => get_word_by_id(id)) }
end

#intersection(*sets) ⇒ Object



141
142
143
# File 'lib/correct_horse_battery_staple/corpus/redis.rb', line 141

def intersection(*sets)
  sets.reduce {|a,b|  a & b }
end

#pick(count, options = {}) ⇒ Object

Raises:

  • (NotImplementedError)


54
55
56
57
58
59
60
# File 'lib/correct_horse_battery_staple/corpus/redis.rb', line 54

def pick(count, options = {})
  # incompat check
  raise NotImplementedError, "Redis does not support :filter option" if options[:filter]

  strategy = options.delete(:strategy) || ENV['pick_strategy'] || "drange"
  send("pick_#{strategy}", count, options)
end

#pick_drange(count, options = {}) ⇒ Object



87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
# File 'lib/correct_horse_battery_staple/corpus/redis.rb', line 87

def pick_drange(count, options = {})
  percentile_range = options[:percentile]
  length_range     = options[:word_length]

  if percentile_range && range_cover?(percentile_range, 0..100)
    percentile_range = nil
  end

  corpus_length_range = self.corpus_length_range
  if !length_range || range_cover?(length_range, corpus_length_range)
    length_range = nil
  end

  if (!percentile_range && !length_range)
    get_words_for_ids(pick_random_words(count))
  else
    dspace = discontiguous_range_map(@lenprod_key, length_range, percentile_range)
    max = dspace.count
    ids = count.times.map do
      dspace.pick_nth(random_number(max))
    end
    # STDERR.puts "ids from decimal are #{ids.inspect}"
    get_words_for_ids(ids)
  end
end

#pick_random_words(count) ⇒ Object



134
135
136
137
138
139
# File 'lib/correct_horse_battery_staple/corpus/redis.rb', line 134

def pick_random_words(count)
  count.times.map do
    idx = random_number(size)-1
    db.zrange(@words_key, idx, idx)[0]
  end
end

#pick_standard(count, options = {}) ⇒ Object

optimized pick implementations - they do NOT support :filter, though



65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
# File 'lib/correct_horse_battery_staple/corpus/redis.rb', line 65

def pick_standard(count, options = {})
  percentile_range = options[:percentile]
  length_range     = options[:word_length]

  if percentile_range && percentile_range.begin == 0 && percentile_range.end == 100
    percentile_range = nil
  end

  if (!percentile_range && !length_range)
    get_words_for_ids(pick_random_words(count))
  else
    sets = []
    sets << get_word_ids_in_zset(@percentile_key, percentile_range) if percentile_range
    sets << get_word_ids_in_zset(@lenprod_key, length_range)         if length_range

    candidates = (sets.length == 1 ? sets[0] : intersection(*sets))
    get_words_for_ids(array_sample(candidates, count))
  end
end

#range_cover?(outer, inner) ⇒ Boolean

XXX - does not handle exclusive endpoints

Returns:

  • (Boolean)


125
126
127
# File 'lib/correct_horse_battery_staple/corpus/redis.rb', line 125

def range_cover?(outer, inner)
  outer.cover?(inner.begin) && outer.cover?(inner.end)
end

#sizeObject



37
38
39
# File 'lib/correct_horse_battery_staple/corpus/redis.rb', line 37

def size
  stats[:size] || count_all
end

#sorted_entriesObject



49
50
51
# File 'lib/correct_horse_battery_staple/corpus/redis.rb', line 49

def sorted_entries
  entries.sort
end

#zcount(key, min, max) ⇒ Object



113
114
115
# File 'lib/correct_horse_battery_staple/corpus/redis.rb', line 113

def zcount(key, min, max)
  db.zcount(key, min, max)
end