Class: CorrectHorseBatteryStaple::Corpus::Redis
Direct Known Subclasses
Redis2
Constant Summary
collapse
- MAX_ITERATIONS =
1000
Instance Attribute Summary collapse
Attributes inherited from Base
#frequency_mean, #frequency_stddev, #original_size, #probability_mean, #probability_stddev, #weighted_size
Class Method Summary
collapse
Instance Method Summary
collapse
-
#close ⇒ Object
-
#corpus_length_range ⇒ Object
TODO: make this use actual data from stored stats.
-
#count_all ⇒ Object
-
#discontiguous_range_map(key, outer_range, inner_range, divisor = 100) ⇒ Object
-
#each(&block) ⇒ Object
some core Enumerable building blocks.
-
#entries ⇒ Object
our own collection operations.
-
#get_word_ids_in_zset(key, range) ⇒ Object
-
#get_words_for_ids(ids) ⇒ Object
-
#initialize(dest) ⇒ Redis
constructor
-
#intersection(*sets) ⇒ Object
-
#pick(count, options = {}) ⇒ Object
-
#pick_drange(count, options = {}) ⇒ Object
-
#pick_random_words(count) ⇒ Object
-
#pick_standard(count, options = {}) ⇒ Object
optimized pick implementations - they do NOT support :filter, though.
-
#range_cover?(outer, inner) ⇒ Boolean
XXX - does not handle exclusive endpoints.
-
#size ⇒ Object
-
#sorted_entries ⇒ Object
-
#zcount(key, min, max) ⇒ Object
included
Methods inherited from Base
#candidates, #compose_filters, #count, #count_by_options, #count_candidates, #entropy_per_word, #entropy_per_word_by_filter, #filter, #filter_for_options, #frequencies, #inspect, #load_stats_from_hash, #precache, #recalculate, #reset, #result, #stats, #words
Methods included from Memoize
included
#array_sample, #logger, #random_in_range, #random_number, #set_sample
format_for
Constructor Details
#initialize(dest) ⇒ Redis
Returns a new instance of Redis.
14
15
16
17
18
19
20
21
|
# File 'lib/correct_horse_battery_staple/corpus/redis.rb', line 14
def initialize(dest)
super
self.dest = dest
self.options = {}
parse_uri(dest)
load_stats
end
|
Instance Attribute Details
#dest ⇒ Object
Returns the value of attribute dest.
11
12
13
|
# File 'lib/correct_horse_battery_staple/corpus/redis.rb', line 11
def dest
@dest
end
|
#options ⇒ Object
Returns the value of attribute options.
12
13
14
|
# File 'lib/correct_horse_battery_staple/corpus/redis.rb', line 12
def options
@options
end
|
Class Method Details
.read(file) ⇒ Object
23
24
25
|
# File 'lib/correct_horse_battery_staple/corpus/redis.rb', line 23
def self.read(file)
self.new file
end
|
Instance Method Details
#close ⇒ Object
155
156
157
|
# File 'lib/correct_horse_battery_staple/corpus/redis.rb', line 155
def close
super
end
|
#corpus_length_range ⇒ Object
TODO: make this use actual data from stored stats
130
131
132
|
# File 'lib/correct_horse_battery_staple/corpus/redis.rb', line 130
def corpus_length_range
3..18
end
|
#count_all ⇒ Object
33
34
35
|
# File 'lib/correct_horse_battery_staple/corpus/redis.rb', line 33
def count_all
@count_all ||= db.zcard(@words_key)
end
|
#discontiguous_range_map(key, outer_range, inner_range, divisor = 100) ⇒ Object
118
119
120
121
|
# File 'lib/correct_horse_battery_staple/corpus/redis.rb', line 118
def discontiguous_range_map(key, outer_range, inner_range, divisor=100)
CorrectHorseBatteryStaple::Backend::Redis::DRange.new(@db, key, outer_range,
inner_range, divisor)
end
|
#each(&block) ⇒ Object
some core Enumerable building blocks
29
30
31
|
# File 'lib/correct_horse_battery_staple/corpus/redis.rb', line 29
def each(&block)
entries.each &block
end
|
#entries ⇒ Object
our own collection operations
45
46
47
|
# File 'lib/correct_horse_battery_staple/corpus/redis.rb', line 45
def entries
table
end
|
#get_word_ids_in_zset(key, range) ⇒ Object
145
146
147
|
# File 'lib/correct_horse_battery_staple/corpus/redis.rb', line 145
def get_word_ids_in_zset(key, range)
db.zrangebyscore(key, range.begin, range.end)
end
|
#get_words_for_ids(ids) ⇒ Object
150
151
152
|
# File 'lib/correct_horse_battery_staple/corpus/redis.rb', line 150
def get_words_for_ids(ids)
ids.map {|id| CorrectHorseBatteryStaple::Word.new(:word => get_word_by_id(id)) }
end
|
#intersection(*sets) ⇒ Object
141
142
143
|
# File 'lib/correct_horse_battery_staple/corpus/redis.rb', line 141
def intersection(*sets)
sets.reduce {|a,b| a & b }
end
|
#pick(count, options = {}) ⇒ Object
54
55
56
57
58
59
60
|
# File 'lib/correct_horse_battery_staple/corpus/redis.rb', line 54
def pick(count, options = {})
raise NotImplementedError, "Redis does not support :filter option" if options[:filter]
strategy = options.delete(:strategy) || ENV['pick_strategy'] || "drange"
send("pick_#{strategy}", count, options)
end
|
#pick_drange(count, options = {}) ⇒ Object
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
|
# File 'lib/correct_horse_battery_staple/corpus/redis.rb', line 87
def pick_drange(count, options = {})
percentile_range = options[:percentile]
length_range = options[:word_length]
if percentile_range && range_cover?(percentile_range, 0..100)
percentile_range = nil
end
corpus_length_range = self.corpus_length_range
if !length_range || range_cover?(length_range, corpus_length_range)
length_range = nil
end
if (!percentile_range && !length_range)
get_words_for_ids(pick_random_words(count))
else
dspace = discontiguous_range_map(@lenprod_key, length_range, percentile_range)
max = dspace.count
ids = count.times.map do
dspace.pick_nth(random_number(max))
end
get_words_for_ids(ids)
end
end
|
#pick_random_words(count) ⇒ Object
134
135
136
137
138
139
|
# File 'lib/correct_horse_battery_staple/corpus/redis.rb', line 134
def pick_random_words(count)
count.times.map do
idx = random_number(size)-1
db.zrange(@words_key, idx, idx)[0]
end
end
|
#pick_standard(count, options = {}) ⇒ Object
optimized pick implementations - they do NOT support :filter, though
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
|
# File 'lib/correct_horse_battery_staple/corpus/redis.rb', line 65
def pick_standard(count, options = {})
percentile_range = options[:percentile]
length_range = options[:word_length]
if percentile_range && percentile_range.begin == 0 && percentile_range.end == 100
percentile_range = nil
end
if (!percentile_range && !length_range)
get_words_for_ids(pick_random_words(count))
else
sets = []
sets << get_word_ids_in_zset(@percentile_key, percentile_range) if percentile_range
sets << get_word_ids_in_zset(@lenprod_key, length_range) if length_range
candidates = (sets.length == 1 ? sets[0] : intersection(*sets))
get_words_for_ids(array_sample(candidates, count))
end
end
|
#range_cover?(outer, inner) ⇒ Boolean
XXX - does not handle exclusive endpoints
125
126
127
|
# File 'lib/correct_horse_battery_staple/corpus/redis.rb', line 125
def range_cover?(outer, inner)
outer.cover?(inner.begin) && outer.cover?(inner.end)
end
|
#size ⇒ Object
37
38
39
|
# File 'lib/correct_horse_battery_staple/corpus/redis.rb', line 37
def size
stats[:size] || count_all
end
|
#sorted_entries ⇒ Object
49
50
51
|
# File 'lib/correct_horse_battery_staple/corpus/redis.rb', line 49
def sorted_entries
entries.sort
end
|
#zcount(key, min, max) ⇒ Object
113
114
115
|
# File 'lib/correct_horse_battery_staple/corpus/redis.rb', line 113
def zcount(key, min, max)
db.zcount(key, min, max)
end
|