Module: Freql::BinData
- Defined in:
- lib/freql/bindata.rb
Constant Summary collapse
- LANG_FILE_PATH =
"lib/freql/data/%s_%s.msgpack.gz"
Class Method Summary collapse
- .pack(hash_data, size: nil) ⇒ Object
- .read_and_unpack_lang(lang = :en, size: :small, &block) ⇒ Object
- .read_lang(lang = :en, size: :small, &block) ⇒ Object
- .unpack(bin_data) ⇒ Object
Class Method Details
.pack(hash_data, size: nil) ⇒ Object
20 21 22 23 24 25 26 27 28 29 |
# File 'lib/freql/bindata.rb', line 20 def pack hash_data, size: nil size ||= hash_data.values.max bin_data = Array.new(size+1) { [] } hash_data.each do |key, val| bin_data[val.to_i] << key end return bin_data end |
.read_and_unpack_lang(lang = :en, size: :small, &block) ⇒ Object
50 51 52 |
# File 'lib/freql/bindata.rb', line 50 def read_and_unpack_lang lang = :en, size: :small, &block read_lang(lang, size: size) {|data| block.call( unpack(data) ) } end |
.read_lang(lang = :en, size: :small, &block) ⇒ Object
41 42 43 44 45 46 47 48 |
# File 'lib/freql/bindata.rb', line 41 def read_lang lang = :en, size: :small, &block Zlib::GzipReader.open(LANG_FILE_PATH % [size,lang]) do |gz| # The first item in the language data contains version and format information # Im choosing to ignore that information for now. # The rest of the data is word frequency bindata block.call MessagePack.unpack(gz.read)[1..] end end |
.unpack(bin_data) ⇒ Object
31 32 33 34 35 36 37 38 39 |
# File 'lib/freql/bindata.rb', line 31 def unpack bin_data hash_data = {} bin_data.each.with_index do |group, val| group.each do |key| hash_data[key] = val end end return hash_data end |