Class: CorrectHorseBatteryStaple::Writer::Isam

Inherits:
File show all
Defined in:
lib/correct_horse_battery_staple/writer/isam.rb

Instance Attribute Summary

Attributes inherited from File

#io

Attributes inherited from Base

#dest, #options

Instance Method Summary collapse

Methods inherited from File

#close

Methods inherited from Base

#close

Methods included from Common

#array_sample, #logger, #random_in_range, #random_number, #set_sample

Methods inherited from CorrectHorseBatteryStaple::Writer

make_writer, write

Constructor Details

#initialize(dest, options = {}) ⇒ Isam

Returns a new instance of Isam.



3
4
5
# File 'lib/correct_horse_battery_staple/writer/isam.rb', line 3

def initialize(dest, options={})
  super
end

Instance Method Details

#binwrite(*args) ⇒ Object



41
42
43
44
# File 'lib/correct_horse_battery_staple/writer/isam.rb', line 41

def binwrite(*args)
  method = io.respond_to?(:binwrite) ? :binwrite : :write
  io.send(method, *args)
end

#fix_stats(stats) ⇒ Object



7
8
9
10
11
12
13
14
# File 'lib/correct_horse_battery_staple/writer/isam.rb', line 7

def fix_stats(stats)
  stats.each do |k,v|
    if v.respond_to?(:nan?) && v.nan?
      stats[k] = -1
    end
  end
  stats
end

#openmodeObject



46
47
48
# File 'lib/correct_horse_battery_staple/writer/isam.rb', line 46

def openmode
  IO.respond_to?(:binwrite) ? "wb:ASCII-8BIT" : "w"
end

#write_corpus(corpus) ⇒ Object



16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
# File 'lib/correct_horse_battery_staple/writer/isam.rb', line 16

def write_corpus(corpus)
  # includes prefix length byte
  @word_length = corpus.reduce(0) { |m, e| m > e.word.length ? m : e.word.length } + 1
  @freq_length = 4
  @entry_length = @word_length + @freq_length

  stats = fix_stats(corpus.stats)

  prelude = {
    "wlen"     => @word_length,
    "flen"     => 4,
    "entrylen" => @word_length + @freq_length,
    "sort"     => "frequency",
    "n"        => corpus.length,
    "stats"    => stats
  }.to_json
  record_offset = [((prelude.length+8.0)/512).ceil, 1].max * 512
  io.write(pre=[record_offset, prelude.length, prelude].pack("NNA#{record_offset-8}"))
  # STDERR.puts "pre size is #{pre.length}"
  corpus.each_with_index do |w, index|
    io.write(s=[w.word.length, w.word, w.frequency].pack("Ca#{@word_length-1}N"))
    # STDERR.puts "s size is #{s.length}"
  end
end