Class: CorrectHorseBatteryStaple::Writer::Isam
- Inherits:
-
File
show all
- Defined in:
- lib/correct_horse_battery_staple/writer/isam.rb
Instance Attribute Summary
Attributes inherited from File
#io
Attributes inherited from Base
#dest, #options
Instance Method Summary
collapse
Methods inherited from File
#close
Methods inherited from Base
#close
Methods included from Common
#array_sample, #logger, #random_in_range, #random_number, #set_sample
make_writer, write
Constructor Details
#initialize(dest, options = {}) ⇒ Isam
Returns a new instance of Isam.
3
4
5
|
# File 'lib/correct_horse_battery_staple/writer/isam.rb', line 3
def initialize(dest, options={})
super
end
|
Instance Method Details
#binwrite(*args) ⇒ Object
41
42
43
44
|
# File 'lib/correct_horse_battery_staple/writer/isam.rb', line 41
def binwrite(*args)
method = io.respond_to?(:binwrite) ? :binwrite : :write
io.send(method, *args)
end
|
#fix_stats(stats) ⇒ Object
7
8
9
10
11
12
13
14
|
# File 'lib/correct_horse_battery_staple/writer/isam.rb', line 7
def fix_stats(stats)
stats.each do |k,v|
if v.respond_to?(:nan?) && v.nan?
stats[k] = -1
end
end
stats
end
|
#openmode ⇒ Object
46
47
48
|
# File 'lib/correct_horse_battery_staple/writer/isam.rb', line 46
def openmode
IO.respond_to?(:binwrite) ? "wb:ASCII-8BIT" : "w"
end
|
#write_corpus(corpus) ⇒ Object
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
|
# File 'lib/correct_horse_battery_staple/writer/isam.rb', line 16
def write_corpus(corpus)
@word_length = corpus.reduce(0) { |m, e| m > e.word.length ? m : e.word.length } + 1
@freq_length = 4
@entry_length = @word_length + @freq_length
stats = fix_stats(corpus.stats)
prelude = {
"wlen" => @word_length,
"flen" => 4,
"entrylen" => @word_length + @freq_length,
"sort" => "frequency",
"n" => corpus.length,
"stats" => stats
}.to_json
record_offset = [((prelude.length+8.0)/512).ceil, 1].max * 512
io.write(pre=[record_offset, prelude.length, prelude].pack("NNA#{record_offset-8}"))
corpus.each_with_index do |w, index|
io.write(s=[w.word.length, w.word, w.frequency].pack("Ca#{@word_length-1}N"))
end
end
|