Class: CorrectHorseBatteryStaple::Corpus::Serialized

Inherits:
Base show all
Defined in:
lib/correct_horse_battery_staple/corpus/serialized.rb

Constant Summary collapse

CSVLIB =
CSV

Instance Attribute Summary collapse

Attributes inherited from Base

#frequency_mean, #frequency_stddev, #original_size, #probability_mean, #probability_stddev, #weighted_size

Class Method Summary collapse

Instance Method Summary collapse

Methods inherited from Base

#candidates, #compose_filters, #count, #count_by_options, #count_candidates, #entropy_per_word, #entropy_per_word_by_filter, #filter, #filter_for_options, #frequencies, #inspect, #load_stats_from_hash, #pick, #precache, #recalculate, #reset, #result, #stats, #words

Methods included from Memoize

included

Methods included from CorrectHorseBatteryStaple::Common

#array_sample, #logger, #random_in_range, #random_number, #set_sample

Methods inherited from CorrectHorseBatteryStaple::Corpus

format_for

Constructor Details

#initialize(table, stats = nil) ⇒ Serialized

Returns a new instance of Serialized.



15
16
17
18
19
20
21
22
23
24
25
26
# File 'lib/correct_horse_battery_staple/corpus/serialized.rb', line 15

def initialize(table, stats = nil)
  super
  @table   = CorrectHorseBatteryStaple::StatisticalArray.cast(table.sort, true)
  @stats   = stats
  @filters = []

  if stats && !stats.empty?
    load_stats_from_hash(stats)
  end

  self.original_size = @table.size
end

Dynamic Method Handling

This class handles dynamic methods through the method_missing method

#method_missing(name, *args, &block) ⇒ Object (protected)



117
118
119
# File 'lib/correct_horse_battery_staple/corpus/serialized.rb', line 117

def method_missing(name, *args, &block)
  @table.__send__(name, *args, &block)
end

Instance Attribute Details

#tableObject (readonly)

Returns the value of attribute table.



5
6
7
# File 'lib/correct_horse_battery_staple/corpus/serialized.rb', line 5

def table
  @table
end

Class Method Details

.read(filename, fformat = nil) ⇒ Object

Raises:

  • (ArgumentError)


63
64
65
66
67
68
69
# File 'lib/correct_horse_battery_staple/corpus/serialized.rb', line 63

def self.read(filename, fformat=nil)
  if ! fformat
    fformat = File.extname(filename)[1..-1]
  end
  raise ArgumentError, "Cannot determine file format for #{filename}" if !fformat || fformat.empty?
  send "read_#{fformat}", filename
end

.read_csv(file) ⇒ Object

serialization reading



49
50
51
# File 'lib/correct_horse_battery_staple/corpus/serialized.rb', line 49

def self.read_csv(file)
  self.new CSVLIB.table(file).map {|row| CorrectHorseBatteryStaple::Word.new(row.to_hash) }
end

.read_json(file) ⇒ Object



53
54
55
56
57
# File 'lib/correct_horse_battery_staple/corpus/serialized.rb', line 53

def self.read_json(file)
  json = JSON.parse(open(file).read)
  self.new(json["corpus"].map {|hash| CorrectHorseBatteryStaple::Word.new(hash)},
           json["stats"])
end

.read_marshal(file) ⇒ Object



59
60
61
# File 'lib/correct_horse_battery_staple/corpus/serialized.rb', line 59

def self.read_marshal(file)
  Marshal.load(open(file).read)
end

Instance Method Details

#each(&block) ⇒ Object

some core Enumerable building blocks



30
31
32
# File 'lib/correct_horse_battery_staple/corpus/serialized.rb', line 30

def each(&block)
  table.each &block
end

#entriesObject



38
39
40
# File 'lib/correct_horse_battery_staple/corpus/serialized.rb', line 38

def entries
  table
end

#sizeObject



34
35
36
# File 'lib/correct_horse_battery_staple/corpus/serialized.rb', line 34

def size
  table.length
end

#sorted_entriesObject



42
43
44
# File 'lib/correct_horse_battery_staple/corpus/serialized.rb', line 42

def sorted_entries
  table
end

#write(io, fformat = nil) ⇒ Object

Raises:

  • (ArgumentError)


110
111
112
113
# File 'lib/correct_horse_battery_staple/corpus/serialized.rb', line 110

def write(io, fformat=nil)
  raise ArgumentError, "Cannot determine file format for output" if !fformat || fformat.empty?
  send "write_#{fformat}", io
end

#write_csv(io) ⇒ Object

writing



73
74
75
76
77
78
79
80
# File 'lib/correct_horse_battery_staple/corpus/serialized.rb', line 73

def write_csv(io)
  io.puts "index,rank,word,frequency,percentile,distance,probability,distance_probability"
  @table.each_with_index do |w, index|
    io.puts sprintf("%d,%d,\"%s\",%d,%.4f,%.6f,%.8f,%.8f\n",
      index, w.rank, w.word, w.frequency || 0,
      w.percentile || 0, w.distance || 0, w.probability || 0, w.distance_probability || 0)
  end
end

#write_isam(io) ⇒ Object



103
104
105
106
107
108
# File 'lib/correct_horse_battery_staple/corpus/serialized.rb', line 103

def write_isam(io)
  sorted_entries.each_with_index do |w, index|
    io.print sprintf("%-40s%10d", w.word, w.frequency || 0) if
      w.word.length <= 40
  end
end

#write_json(io) ⇒ Object



86
87
88
89
90
91
92
93
94
95
96
97
# File 'lib/correct_horse_battery_staple/corpus/serialized.rb', line 86

def write_json(io)
  io.print '{"stats": '
  io.print stats.to_json
  io.print ', "corpus": ['
  i = 0
  @table.each do |word|
    io.puts "," if i >= 1
    io.print(word.to_hash.to_json)
    i += 1
  end
  io.puts "]\n}"
end

#write_json1(io) ⇒ Object



82
83
84
# File 'lib/correct_horse_battery_staple/corpus/serialized.rb', line 82

def write_json1(io)
  io.write({"stats" => stats, "corpus" => @table }.to_json)
end

#write_marshal(io) ⇒ Object



99
100
101
# File 'lib/correct_horse_battery_staple/corpus/serialized.rb', line 99

def write_marshal(io)
  io.write Marshal.dump(self)
end