Module: Freql::BinData

Defined in:
lib/freql/bindata.rb

Constant Summary collapse

LANG_FILE_PATH =
"lib/freql/data/%s_%s.msgpack.gz"

Class Method Summary collapse

Class Method Details

.pack(hash_data, size: nil) ⇒ Object



20
21
22
23
24
25
26
27
28
29
# File 'lib/freql/bindata.rb', line 20

def pack hash_data, size: nil
  size ||= hash_data.values.max
  bin_data = Array.new(size+1) { [] }

  hash_data.each do |key, val|
    bin_data[val.to_i] << key
  end

  return bin_data
end

.read_and_unpack_lang(lang = :en, size: :small, &block) ⇒ Object



50
51
52
# File 'lib/freql/bindata.rb', line 50

def read_and_unpack_lang lang = :en, size: :small, &block
  read_lang(lang, size: size) {|data| block.call( unpack(data) ) }
end

.read_lang(lang = :en, size: :small, &block) ⇒ Object



41
42
43
44
45
46
47
48
# File 'lib/freql/bindata.rb', line 41

def read_lang lang = :en, size: :small, &block
  Zlib::GzipReader.open(LANG_FILE_PATH % [size,lang]) do |gz|
    # The first item in the language data contains version and format information
    # Im choosing to ignore that information for now.
    # The rest of the data is word frequency bindata
    block.call MessagePack.unpack(gz.read)[1..]
  end
end

.unpack(bin_data) ⇒ Object



31
32
33
34
35
36
37
38
39
# File 'lib/freql/bindata.rb', line 31

def unpack bin_data
  hash_data = {}
  bin_data.each.with_index do |group, val|
    group.each do |key|
      hash_data[key] = val
    end
  end
  return hash_data
end