Class: CorrectHorseBatteryStaple::Corpus::Base
- Inherits:
-
CorrectHorseBatteryStaple::Corpus
- Object
- CorrectHorseBatteryStaple::Corpus
- CorrectHorseBatteryStaple::Corpus::Base
- Extended by:
- Forwardable
- Includes:
- CorrectHorseBatteryStaple::Common, Memoize, Enumerable
- Defined in:
- lib/correct_horse_battery_staple/corpus/base.rb
Direct Known Subclasses
Isam, IsamKD, Redis, Serialized, Sqlite
Instance Attribute Summary collapse
-
#frequency_mean ⇒ Object
Returns the value of attribute frequency_mean.
-
#frequency_stddev ⇒ Object
Returns the value of attribute frequency_stddev.
-
#original_size ⇒ Object
Returns the value of attribute original_size.
-
#probability_mean ⇒ Object
Returns the value of attribute probability_mean.
-
#probability_stddev ⇒ Object
Returns the value of attribute probability_stddev.
-
#weighted_size ⇒ Object
Returns the value of attribute weighted_size.
Class Method Summary collapse
Instance Method Summary collapse
-
#candidates(options = {}) ⇒ Object
return all the candidates for a given set of options.
-
#compose_filters(filters) ⇒ Object
create a single composed function of all the filters.
-
#count(*args, &block) ⇒ Object
(also: #length)
other methods you should implement if possible:.
- #count_by_options(options = {}) ⇒ Object
- #count_candidates(options = {}) ⇒ Object
-
#each(&block) ⇒ Object
you MUST override this method for Enumerable to use.
- #entropy_per_word(options = {}) ⇒ Object
- #entropy_per_word_by_filter(&filter) ⇒ Object
-
#filter(&block) ⇒ Object
filtering.
-
#filter_for_options(options = {}) ⇒ Object
Return a single lambda that will return true/false given a Word object.
- #frequencies ⇒ Object
-
#initialize(*args) ⇒ Base
constructor
A new instance of Base.
- #inspect ⇒ Object
-
#load_stats_from_hash(hash) ⇒ Object
statistics.
-
#pick(count, options = {}) ⇒ Object
this is the core password picker method.
-
#precache(max = 0) ⇒ Object
no-op for serialized forms.
- #recalculate ⇒ Object
- #reset ⇒ Object
- #result ⇒ Object
- #sorted_entries ⇒ Object
- #stats ⇒ Object
- #words ⇒ Object
Methods included from Memoize
Methods included from CorrectHorseBatteryStaple::Common
#array_sample, #logger, #random_in_range, #random_number, #set_sample
Methods inherited from CorrectHorseBatteryStaple::Corpus
Constructor Details
#initialize(*args) ⇒ Base
Returns a new instance of Base.
17 18 19 |
# File 'lib/correct_horse_battery_staple/corpus/base.rb', line 17 def initialize(*args) initialize_backend_variables if respond_to?(:initialize_backend_variables) end |
Instance Attribute Details
#frequency_mean ⇒ Object
Returns the value of attribute frequency_mean.
8 9 10 |
# File 'lib/correct_horse_battery_staple/corpus/base.rb', line 8 def frequency_mean @frequency_mean end |
#frequency_stddev ⇒ Object
Returns the value of attribute frequency_stddev.
8 9 10 |
# File 'lib/correct_horse_battery_staple/corpus/base.rb', line 8 def frequency_stddev @frequency_stddev end |
#original_size ⇒ Object
Returns the value of attribute original_size.
10 11 12 |
# File 'lib/correct_horse_battery_staple/corpus/base.rb', line 10 def original_size @original_size end |
#probability_mean ⇒ Object
Returns the value of attribute probability_mean.
9 10 11 |
# File 'lib/correct_horse_battery_staple/corpus/base.rb', line 9 def probability_mean @probability_mean end |
#probability_stddev ⇒ Object
Returns the value of attribute probability_stddev.
9 10 11 |
# File 'lib/correct_horse_battery_staple/corpus/base.rb', line 9 def probability_stddev @probability_stddev end |
#weighted_size ⇒ Object
Returns the value of attribute weighted_size.
11 12 13 |
# File 'lib/correct_horse_battery_staple/corpus/base.rb', line 11 def weighted_size @weighted_size end |
Class Method Details
.read(dest) ⇒ Object
21 22 23 |
# File 'lib/correct_horse_battery_staple/corpus/base.rb', line 21 def self.read(dest) self.new dest end |
Instance Method Details
#candidates(options = {}) ⇒ Object
return all the candidates for a given set of options
65 66 67 68 69 70 |
# File 'lib/correct_horse_battery_staple/corpus/base.rb', line 65 def candidates( = {}) return size if ! || .empty? filter = () return size unless filter entries.select {|entry| filter.call(entry) } end |
#compose_filters(filters) ⇒ Object
create a single composed function of all the filters
168 169 170 171 172 173 |
# File 'lib/correct_horse_battery_staple/corpus/base.rb', line 168 def compose_filters(filters) return nil if !filters || filters.empty? filters.reduce do |prev, current| lambda {|value| prev.call(value) && current.call(value) } end end |
#count(*args, &block) ⇒ Object Also known as: length
other methods you should implement if possible:
Enumerable
size
CHBS::Corpus
pick
words
frequencies
42 43 44 45 46 47 48 |
# File 'lib/correct_horse_battery_staple/corpus/base.rb', line 42 def count(*args, &block) if args.length > 0 || block super(*args, &block) else size end end |
#count_by_options(options = {}) ⇒ Object
51 52 53 54 55 56 57 |
# File 'lib/correct_horse_battery_staple/corpus/base.rb', line 51 def ( = {}) if .empty? count else count &() end end |
#count_candidates(options = {}) ⇒ Object
72 73 74 75 76 77 78 79 80 81 82 |
# File 'lib/correct_horse_battery_staple/corpus/base.rb', line 72 def count_candidates( = {}) return size if ! || .empty? filter = () return size unless filter count = 0 each do |entry| count += 1 if filter.call(entry) end count end |
#each(&block) ⇒ Object
you MUST override this method for Enumerable to use
27 28 29 |
# File 'lib/correct_horse_battery_staple/corpus/base.rb', line 27 def each(&block) raise NotImplementedError end |
#entropy_per_word(options = {}) ⇒ Object
148 149 150 |
# File 'lib/correct_horse_battery_staple/corpus/base.rb', line 148 def entropy_per_word( = {}) Math.log(()) / Math.log(2) end |
#entropy_per_word_by_filter(&filter) ⇒ Object
152 153 154 |
# File 'lib/correct_horse_battery_staple/corpus/base.rb', line 152 def entropy_per_word_by_filter(&filter) Math.log(filter ? count(&filter) : size) / Math.log(2) end |
#filter(&block) ⇒ Object
filtering
158 159 160 161 |
# File 'lib/correct_horse_battery_staple/corpus/base.rb', line 158 def filter(&block) (@filters ||= []) << block self end |
#filter_for_options(options = {}) ⇒ Object
Return a single lambda that will return true/false given a Word object
Respects the :word_length, :percentile, and :filter options :word_length and :percentile should be Range objects :filter can be a single Proc/lambda or an array of them
279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 |
# File 'lib/correct_horse_battery_staple/corpus/base.rb', line 279 def ( = {}) return nil if ! || .empty? filters = Array([:filter]) if [:percentile] p_range = [:percentile] filters << lambda {|entry| p_range.include? entry.percentile } end if [:word_length] wl_range = [:word_length] filters << lambda {|entry| wl_range.include? entry.word.length } end filters.empty? ? nil : compose_filters(filters) end |
#frequencies ⇒ Object
143 144 145 |
# File 'lib/correct_horse_battery_staple/corpus/base.rb', line 143 def frequencies CorrectHorseBatteryStaple::StatisticalArray.new(entries.map {|entry| entry.frequency }) end |
#inspect ⇒ Object
232 233 234 235 236 237 238 239 240 |
# File 'lib/correct_horse_battery_staple/corpus/base.rb', line 232 def inspect <<INSPECT Type: #{self.class.name} Entry count: #{count} Stats: #{stats.map {|k,v| " #{k}: #{v}\n" }.join("") } INSPECT end |
#load_stats_from_hash(hash) ⇒ Object
statistics
186 187 188 189 190 191 |
# File 'lib/correct_horse_battery_staple/corpus/base.rb', line 186 def load_stats_from_hash(hash) hash.each do |k,v| setter = "#{k}=".to_sym send setter, v if respond_to?(setter) end end |
#pick(count, options = {}) ⇒ Object
this is the core password picker method. it is not especially efficient but it is relatively generic. If a corpus supports Enumerable, it will work.
92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 |
# File 'lib/correct_horse_battery_staple/corpus/base.rb', line 92 def pick(count, = {}) array = CorrectHorseBatteryStaple::StatisticalArray.new(sorted_entries) filters = Array([:filter]) if [:percentile] range = array.index_range_for_percentile([:percentile]) else range = 0..array.size-1 end range_size = range_size(range) if range_size < count raise ArgumentError, "Percentile range contains fewer words than requested count" end if [:word_length] wl = [:word_length] filters << lambda {|entry| wl.include? entry.word.length } end filter = filters.empty? ? nil : compose_filters(filters) max_iterations = [:max_iterations] || 1000 result = [] iterations = 0 while result.length < count && iterations < max_iterations i = random_number(range_size) entry = array[i + range.first] if entry && (!filter || filter.call(entry)) result << entry end iterations += 1 end raise "Cannot find #{count} words matching criteria" if result.length < count result end |
#precache(max = 0) ⇒ Object
no-op for serialized forms
140 141 |
# File 'lib/correct_horse_battery_staple/corpus/base.rb', line 140 def precache(max=0) end |
#recalculate ⇒ Object
193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 |
# File 'lib/correct_horse_battery_staple/corpus/base.rb', line 193 def recalculate size = self.size frequencies = self.frequencies # corpus-wide statistics self.weighted_size = frequencies.reduce(BigDecimal.new("0"), :+) (self.probability_mean, self.probability_stddev) = CorrectHorseBatteryStaple::StatisticalArray.new(frequencies.map do |freq| (freq/weighted_size) * 100 end).mean_and_standard_deviation (self.frequency_mean, self.frequency_stddev) = frequencies.mean_and_standard_deviation # stats = corpus.stats # size = corpus.size # frequency_mean = corpus.frequency_mean # frequency_stddev = corpus.frequency_stddev # weighted_size = corpus.weighted_size # probability_mean = corpus.probability_mean # probability_stddev = corpus.probability_stddev each_with_index do |entry, index| entry.rank = size - index entry.distance = (entry.frequency-frequency_mean)/frequency_stddev entry.probability = entry.frequency / weighted_size entry.distance_probability = (entry.probability - probability_mean) / probability_stddev entry.percentile = (index-0.5)/size * 100 end self end |
#reset ⇒ Object
163 164 165 |
# File 'lib/correct_horse_battery_staple/corpus/base.rb', line 163 def reset @filters = [] end |
#result ⇒ Object
175 176 177 178 179 180 181 |
# File 'lib/correct_horse_battery_staple/corpus/base.rb', line 175 def result return self if @filters.empty? self.class.new(execute_filters).tap do |new_corpus| new_corpus.original_size = self.original_size end end |
#sorted_entries ⇒ Object
60 61 62 |
# File 'lib/correct_horse_battery_staple/corpus/base.rb', line 60 def sorted_entries entries.sort end |
#stats ⇒ Object
225 226 227 228 229 230 |
# File 'lib/correct_horse_battery_staple/corpus/base.rb', line 225 def stats {:frequency_mean => frequency_mean, :frequency_stddev => frequency_stddev, :probability_mean => probability_mean, :probability_stddev => probability_stddev, :size => count, :original_size => original_size, :weighted_size => weighted_size.to_f} end |
#words ⇒ Object
134 135 136 |
# File 'lib/correct_horse_battery_staple/corpus/base.rb', line 134 def words execute_filters.map {|entry| entry.word } end |