Class: LZWrb

Inherits:

Object

Object
LZWrb

show all

Defined in:: lib/lzwrb.rb

Overview

Holds an LZW encoder/decoder with a specific configuration. These objects are not thread safe, so for concurrent use different ones should be created.

Constant Summary collapse

DEC = Alphabet containing the digits 0 to 9

(0...10).to_a.map(&:chr)

HEX_UPPER = Alphabet containing the hex digits 0 to F in uppercase

(0...16).to_a.map{ |n| n.to_s(16).upcase }

HEX_LOWER = Alphabet containing the hex digits 0 to f in lower case

(0...16).to_a.map{ |n| n.to_s(16).downcase }

LATIN_UPPER = Alphabet containing the 26 letters of the latin alphabet in upper case

('A'..'Z').to_a

LATIN_LOWER = Alphabet containing the 26 letters of the latin alphabet in lower case

('a'..'z').to_a

ALPHA_UPPER = Alphabet containing the alphanumeric characters in upper case (A-Z, 0-9)

LATIN_UPPER + DEC

ALPHA_LOWER = Alphabet containing the alphanumeric characters in lower case (a-z, 0-9)

LATIN_LOWER + DEC

ALPHA = Alphabet containing the alphanumeric characters (A-Z, a-z, 0-9)

LATIN_UPPER + LATIN_LOWER + DEC

PRINTABLE = Alphabet containing all printable ASCII characters (ASCII 32 - 127)

(32...127).to_a.map(&:chr)

ASCII = Alphabet containing all ASCII characters

(0...128).to_a.map(&:chr)

BINARY = Alphabet containing all possible byte values, suitable for any input data

(0...256).to_a.map(&:chr)

PRESET_GIF = Preset to satisfy the GIF specification

{
  min_bits: 8,
  max_bits: 12,
  lsb:      true,
  clear:    true,
  stop:     true,
  deferred: true
}

PRESET_FAST = Preset optimized for speed

{
  min_bits: 16,
  max_bits: 16,
  lsb:      true,
  clear:    false,
  stop:     false
}

PRESET_BEST = Preset optimized for compression

{
  min_bits: 8,
  max_bits: 16,
  lsb:      true,
  clear:    false,
  stop:     false
}

VERBOSITY = Verbosity of the encoder/decoder

{
  silent:  0, # Don't print anything to the console
  minimal: 1, # Print only errors
  quiet:   2, # Print errors and warnings
  normal:  3, # Print errors, warnings and regular encoding information
  debug:   4  # Print everything, including debug details about the encoding process
}

DEFAULT_MIN_BITS = Default value for minimum code bits (may be changed depending on alphabet)

DEFAULT_MAX_BITS = Default value for maximum code bits before rebuilding code table (may be changed depending on alphabet)

DEFAULT_LSB = Use Least Significant Bit packing order

true

DEFAULT_CLEAR = Use explicit Clear codes to indicate the initialization of the code table

false

DEFAULT_STOP = Use explicit Stop codes to indicate the end of the data

false

DEFAULT_DEFERRED = Enable the use of deferred Clear codes (the decoder won't rebuild the table, even when full, unless an explicit Clear code is received)

false

CHARS = Dictionary of binary chars for fast access

256.times.map(&:chr)

Instance Method Summary collapse

#decode(data) ⇒ String
Decode the provided data.
#encode(data) ⇒ String
Encode the provided data.
#initialize(preset: nil, bits: nil, min_bits: nil, max_bits: nil, binary: nil, alphabet: BINARY, safe: false, lsb: nil, clear: nil, stop: nil, deferred: nil, verbosity: :normal) ⇒ LZWrb constructor
Creates a new encoder/decoder object with the given settings.

Constructor Details

#initialize(preset: nil, bits: nil, min_bits: nil, max_bits: nil, binary: nil, alphabet: BINARY, safe: false, lsb: nil, clear: nil, stop: nil, deferred: nil, verbosity: :normal) ⇒ `LZWrb`

Creates a new encoder/decoder object with the given settings.

Parameters:

alphabet (Array<String>) (defaults to: BINARY) —
Set of characters that compose the messages to encode.
binary (Boolean) (defaults to: nil) —
Use binary encoding or textual encoding.
bits (Integer) (defaults to: nil) —
Code bit size for constant length encoding (superseeds min/max bit size).
clear (Boolean) (defaults to: nil) —
Use clear codes every time the table gets reinitialized.
deferred (Boolean) (defaults to: nil) —
Support deferred clear codes when decoding (i.e., don't refresh code table unless an explicit clear code is received, even when it's full).
lsb (Boolean) (defaults to: nil) —
Use least or most significant bit packing (currently useless, only LSB supported).
max_bits (Integer) (defaults to: nil) —
Maximum code bit size for variable length encoding (superseeded by bits).
min_bits (Integer) (defaults to: nil) —
Minimum code bit size for variable length encoding (superseeded by bits).
preset (Hash) (defaults to: nil) —
Predefined configurations for a few settings (such as bit count or usage of clear/stop codes)
safe (Boolean) (defaults to: false) —
First encoding pass to verify alphabet covers all data
stop (Boolean) (defaults to: nil) —
Use stop codes to denote the end of the encoded data.
verbosity (Integer) (defaults to: :normal) —
Verbosity level of the encoder (see VERBOSITY).

# File 'lib/lzwrb.rb', line 110

def initialize(
    preset:    nil,
    bits:      nil,
    min_bits:  nil,
    max_bits:  nil,
    binary:    nil,
    alphabet:  BINARY,
    safe:      false,
    lsb:       nil,
    clear:     nil,
    stop:      nil,
    deferred:  nil,
    verbosity: :normal
  )
  # Parse preset
  params = preset || {}

  # Verbosity
  if VERBOSITY[verbosity]
    @verbosity = VERBOSITY[verbosity]
  else
    warn("Unrecognized verbosity level, using normal.")
    @verbosity = VERBOSITY[:normal]
  end

  # Alphabet
  if !alphabet.is_a?(Array) || alphabet.any?{ |a| !a.is_a?(String) || a.length > 1 }
    err('The alphabet must be an array of characters, i.e., of strings of length 1')
    exit
  end
  @alphabet = alphabet.uniq
  warn('Removed duplicate entries from alphabet') if @alphabet.size < alphabet.size

  # Binary compression
  @binary = binary == false ? false : true
  warn("Binary alphabet being used with textual mode, are you sure this is what you want?") if !@binary && @alphabet == BINARY

  # Safe mode for encoding (verifies that the data provided is composed exclusively
  # by characters from the alphabet)
  @safe = safe

  # Code bit size
  if bits
    if !bits.is_a?(Integer) || bits < 1
      err('Code size should be a positive integer.')
      exit
    else
      @min_bits = bits
      @max_bits = bits
    end
  else
    @min_bits = find_arg(min_bits, params[:min_bits], DEFAULT_MIN_BITS)
    @max_bits = find_arg(max_bits, params[:max_bits], DEFAULT_MAX_BITS)
    if @max_bits < @min_bits
      warn("Max code size (#{@max_bits}) should be higher than min code size (#{@min_bits}): changed max code size to #{@min_bits}.")
      @max_bits = @min_bits
    end
  end

  # Determine min bits based on alphabet length if not specified
  if !find_arg(min_bits, params[:min_bits])
    @min_bits = (@alphabet.size - 1).bit_length
    @max_bits = @min_bits if @max_bits < @min_bits
  end

  # Clear and stop codes
  use_clear = find_arg(clear, params[:clear], DEFAULT_CLEAR)
  use_stop = find_arg(stop, params[:stop], DEFAULT_STOP)
  if !use_stop && @min_bits < 8
    use_stop = true
    # Warning if stop codes were explicitly disabled (false, NOT nil)
    if find_arg(stop, params[:stop]) == false
      warn("Stop codes are necessary for code sizes below 8 bits to prevent ambiguity: enabled stop codes.")
    end
  end

  # Alphabet length checks
  extra = (use_clear ? 1 : 0) + (use_stop ? 1 : 0)
    # Max bits doesn't fit alphabet (needs explicit adjustment)
  if (@alphabet.size + extra) > 1 << @max_bits
    if @binary
      @alphabet = @alphabet.take((1 << @max_bits - 1))
      warn("Using #{@max_bits - 1} bit binary alphabet (#{(1 << @max_bits - 1)} entries).")
    else
      @max_bits = (@alphabet.size + extra).bit_length
      warn("Max code size needs to fit the alphabet (and clear & stop codes, if used): increased to #{@max_bits} bits.")
    end
  end
    # Min bits doesn't fit alphabet (needs implicit adjustment)
  if (@alphabet.size + extra) > 1 << @min_bits
    @min_bits = (@alphabet.size + extra - 1).bit_length
  end

  # Clear and stop codes
  idx = @alphabet.size - 1
  @clear = use_clear ? idx += 1 : nil
  @stop = use_stop ? idx += 1 : nil
  @deferred = find_arg(deferred, params[:deferred], DEFAULT_DEFERRED)

  # Least/most significant bit packing order
  @lsb = find_arg(lsb, params[:lsb], DEFAULT_LSB)
end

Instance Method Details

#decode(data) ⇒ `String`

Decode the provided data.

Parameters:

data (String) —
Data to decode.

Returns:

(String) —
Decoded data.

# File 'lib/lzwrb.rb', line 261

def decode(data)
  # Log
  log("<- Decoding #{format_size(data.bytesize)} with #{format_params}.")
  stime = Time.now

  # Setup
  init(false)
  table_init
  bits = data.unpack('b*')[0]
  len = bits.length

  # Parse data
  off = 0
  out = ''.b
  old_code = nil
  width = @bits
  while off + width <= len
    # Parse code
    @count += 1
    code = bits[off ... off + width].reverse.to_i(2)
    off += width

    # Handle clear and stop codes, if present
    if code == @clear && @clear
      table_init
      old_code = nil
      width = @bits
      next
    end
    break if code == @stop && @stop

    # Handle regular codes
    if old_code.nil?        # Initial code
      out << @table[code]
    elsif table_has(code)   # Existing code
      out << @table[code]
      table_add(@table[old_code] + @table[code][0])
    else                    # New code
      out << @table[old_code] + @table[old_code][0]
      table_add(@table[old_code] + @table[old_code][0])
    end

    # Prepare next iteration
    old_code = table_check ? nil : code
    width = @bits unless !old_code && @clear
  end

  # Return
  ttime = Time.now - stime
  log("-> Decoding finished in #{"%.3fs" % [ttime]} (avg. #{"%.3f" % [(8.0 * data.bytesize / 1024 ** 2) / ttime]} mbit\/s).")
  log("-> Decoded data: #{format_size(out.bytesize)} (#{"%5.2f%%" % [100 * (1 - data.bytesize.to_f / out.bytesize)]} compression).")
  @binary ? out.force_encoding('ASCII-8BIT') : out.force_encoding('UTF-8')
rescue => e
  lex(e, 'Decoding error', false)
end

#encode(data) ⇒ `String`

Encode the provided data.

Parameters:

data (String) —
Data to encode.

Returns:

(String) —
Encoded data.

# File 'lib/lzwrb.rb', line 216

def encode(data)
  # Log
  log("<- Encoding #{format_size(data.bytesize)} with #{format_params}.")
  stime = Time.now

  # Setup
  init(true)
  table_init
  verify_data(data) if @safe

  # LZW-encode data
  buf = ''.b
  put_code(@clear) if !@clear.nil?
  proc = -> (c) {
    c = CHARS[c] if @binary
    @count += 1
    next_buf = buf + c
    if table_has(next_buf)
      buf = next_buf
    else
      put_code(@table[buf])
      table_add(next_buf)
      table_check()
      buf = c
    end
  }
  @binary ? data.each_byte(&proc) : data.each_char(&proc)
  put_code(@table[buf])
  put_code(@stop) if !@stop.nil?

  # Pack codes to binary string
  res = @buffer.pack('C*')

  # Return
  ttime = Time.now - stime
  log("-> Encoding finished in #{"%.3fs" % [ttime]} (avg. #{"%.3f" % [(8.0 * data.bytesize / 1024 ** 2) / ttime]} mbit\/s).")
  log("-> Encoded data: #{format_size(res.bytesize)} (#{"%5.2f%%" % [100 * (1 - res.bytesize.to_f / data.bytesize)]} compression).")
  res
rescue => e
  lex(e, 'Encoding error', true)
end

Class: LZWrb

Overview

Constant Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(preset: nil, bits: nil, min_bits: nil, max_bits: nil, binary: nil, alphabet: BINARY, safe: false, lsb: nil, clear: nil, stop: nil, deferred: nil, verbosity: :normal) ⇒ LZWrb

Instance Method Details

#decode(data) ⇒ String

#encode(data) ⇒ String

#initialize(preset: nil, bits: nil, min_bits: nil, max_bits: nil, binary: nil, alphabet: BINARY, safe: false, lsb: nil, clear: nil, stop: nil, deferred: nil, verbosity: :normal) ⇒ `LZWrb`

#decode(data) ⇒ `String`

#encode(data) ⇒ `String`