Class: CorrectHorseBatteryStaple::Corpus::Serialized
- Inherits:
-
Base
show all
- Defined in:
- lib/correct_horse_battery_staple/corpus/serialized.rb
Constant Summary
collapse
- CSVLIB =
CSV
Instance Attribute Summary collapse
Attributes inherited from Base
#frequency_mean, #frequency_stddev, #original_size, #probability_mean, #probability_stddev, #weighted_size
Class Method Summary
collapse
Instance Method Summary
collapse
Methods inherited from Base
#candidates, #compose_filters, #count, #count_by_options, #count_candidates, #entropy_per_word, #entropy_per_word_by_filter, #filter, #filter_for_options, #frequencies, #inspect, #load_stats_from_hash, #pick, #precache, #recalculate, #reset, #result, #stats, #words
Methods included from Memoize
included
#array_sample, #logger, #random_in_range, #random_number, #set_sample
format_for
Constructor Details
#initialize(table, stats = nil) ⇒ Serialized
Returns a new instance of Serialized.
15
16
17
18
19
20
21
22
23
24
25
26
|
# File 'lib/correct_horse_battery_staple/corpus/serialized.rb', line 15
def initialize(table, stats = nil)
super
@table = CorrectHorseBatteryStaple::StatisticalArray.cast(table.sort, true)
@stats = stats
@filters = []
if stats && !stats.empty?
load_stats_from_hash(stats)
end
self.original_size = @table.size
end
|
Dynamic Method Handling
This class handles dynamic methods through the method_missing method
#method_missing(name, *args, &block) ⇒ Object
117
118
119
|
# File 'lib/correct_horse_battery_staple/corpus/serialized.rb', line 117
def method_missing(name, *args, &block)
@table.__send__(name, *args, &block)
end
|
Instance Attribute Details
#table ⇒ Object
Returns the value of attribute table.
5
6
7
|
# File 'lib/correct_horse_battery_staple/corpus/serialized.rb', line 5
def table
@table
end
|
Class Method Details
.read(filename, fformat = nil) ⇒ Object
63
64
65
66
67
68
69
|
# File 'lib/correct_horse_battery_staple/corpus/serialized.rb', line 63
def self.read(filename, fformat=nil)
if ! fformat
fformat = File.extname(filename)[1..-1]
end
raise ArgumentError, "Cannot determine file format for #{filename}" if !fformat || fformat.empty?
send "read_#{fformat}", filename
end
|
.read_json(file) ⇒ Object
53
54
55
56
57
|
# File 'lib/correct_horse_battery_staple/corpus/serialized.rb', line 53
def self.read_json(file)
json = JSON.parse(open(file).read)
self.new(json["corpus"].map {|hash| CorrectHorseBatteryStaple::Word.new(hash)},
json["stats"])
end
|
.read_marshal(file) ⇒ Object
59
60
61
|
# File 'lib/correct_horse_battery_staple/corpus/serialized.rb', line 59
def self.read_marshal(file)
Marshal.load(open(file).read)
end
|
Instance Method Details
#each(&block) ⇒ Object
some core Enumerable building blocks
30
31
32
|
# File 'lib/correct_horse_battery_staple/corpus/serialized.rb', line 30
def each(&block)
table.each &block
end
|
#entries ⇒ Object
38
39
40
|
# File 'lib/correct_horse_battery_staple/corpus/serialized.rb', line 38
def entries
table
end
|
#size ⇒ Object
34
35
36
|
# File 'lib/correct_horse_battery_staple/corpus/serialized.rb', line 34
def size
table.length
end
|
#sorted_entries ⇒ Object
42
43
44
|
# File 'lib/correct_horse_battery_staple/corpus/serialized.rb', line 42
def sorted_entries
table
end
|
#write(io, fformat = nil) ⇒ Object
110
111
112
113
|
# File 'lib/correct_horse_battery_staple/corpus/serialized.rb', line 110
def write(io, fformat=nil)
raise ArgumentError, "Cannot determine file format for output" if !fformat || fformat.empty?
send "write_#{fformat}", io
end
|
#write_csv(io) ⇒ Object
73
74
75
76
77
78
79
80
|
# File 'lib/correct_horse_battery_staple/corpus/serialized.rb', line 73
def write_csv(io)
io.puts "index,rank,word,frequency,percentile,distance,probability,distance_probability"
@table.each_with_index do |w, index|
io.puts sprintf("%d,%d,\"%s\",%d,%.4f,%.6f,%.8f,%.8f\n",
index, w.rank, w.word, w.frequency || 0,
w.percentile || 0, w.distance || 0, w.probability || 0, w.distance_probability || 0)
end
end
|
#write_isam(io) ⇒ Object
103
104
105
106
107
108
|
# File 'lib/correct_horse_battery_staple/corpus/serialized.rb', line 103
def write_isam(io)
sorted_entries.each_with_index do |w, index|
io.print sprintf("%-40s%10d", w.word, w.frequency || 0) if
w.word.length <= 40
end
end
|
#write_json(io) ⇒ Object
86
87
88
89
90
91
92
93
94
95
96
97
|
# File 'lib/correct_horse_battery_staple/corpus/serialized.rb', line 86
def write_json(io)
io.print '{"stats": '
io.print stats.to_json
io.print ', "corpus": ['
i = 0
@table.each do |word|
io.puts "," if i >= 1
io.print(word.to_hash.to_json)
i += 1
end
io.puts "]\n}"
end
|
#write_json1(io) ⇒ Object
82
83
84
|
# File 'lib/correct_horse_battery_staple/corpus/serialized.rb', line 82
def write_json1(io)
io.write({"stats" => stats, "corpus" => @table }.to_json)
end
|
#write_marshal(io) ⇒ Object
99
100
101
|
# File 'lib/correct_horse_battery_staple/corpus/serialized.rb', line 99
def write_marshal(io)
io.write Marshal.dump(self)
end
|