Class: Spreadsheet::Excel::Reader

Inherits:

Object

Object
Spreadsheet::Excel::Reader

show all

Includes:: Spreadsheet::Encodings, Internals

Defined in:: lib/parseexcel/parseexcel.rb,
lib/spreadsheet/excel/reader.rb,
lib/spreadsheet/excel/reader/biff5.rb,
lib/spreadsheet/excel/reader/biff8.rb

Overview

Reader class for Excel Workbooks. Most read_* method correspond to an Excel-Record/Opcode. You should not need to call any of its methods directly. If you think you do, look at #read

Defined Under Namespace

Modules: Biff5, Biff8

Constant Summary collapse

ROW_BLOCK_OPS =

{
  blank: true, boolerr: true, dbcell: true, formula: true,
  label: true, labelsst: true, mulblank: true, mulrk: true,
  number: true, rk: true, rstring: true
}

Constants included from Internals

Internals::BINARY_FORMATS, Internals::BUILTIN_FORMATS, Internals::BUILTIN_STYLES, Internals::CODEPAGES, Internals::COLOR_CODES, Internals::EIGHT_BYTE_DOUBLE, Internals::ESCAPEMENT_TYPES, Internals::FONT_ENCODINGS, Internals::FONT_FAMILIES, Internals::FONT_WEIGHTS, Internals::LEAP_ERROR, Internals::NGILA_H_FX, Internals::NGILA_V_FX, Internals::NOITCERID_TXET_FX, Internals::OPCODES, Internals::OPCODE_SIZE, Internals::ROW_HEIGHT, Internals::SEDOCPO, Internals::SEDOC_ROLOC, Internals::SEGAPEDOC, Internals::SEILIMAF_TNOF, Internals::SEITILIBISIV_TEEHSKROW, Internals::SELYTS_ENIL_REDROB_FX, Internals::SEPYT_ENILREDNU, Internals::SEPYT_TNEMEPACSE, Internals::SGNIDOCNE_TNOF, Internals::SST_CHUNKSIZE, Internals::TWIPS, Internals::UNDERLINE_TYPES, Internals::WORKSHEET_VISIBILITIES, Internals::XF_BORDER_LINE_STYLES, Internals::XF_H_ALIGN, Internals::XF_TEXT_DIRECTION, Internals::XF_V_ALIGN

Instance Method Summary collapse

Methods included from Internals

#binfmt, #opcode

Constructor Details

#initialize(opts = {}) ⇒ `Reader`

# File 'lib/spreadsheet/excel/reader.rb', line 26

def initialize opts = {}
  @pos = 0
  @bigendian = opts.fetch(:bigendian) {
    [1].pack("l") != "\001\000\000\000"
  }
  @opts = opts
  @boundsheets = nil
  @current_row_block = {}
  @current_row_block_offset = nil
  @formats = {}
  BUILTIN_FORMATS.each { |key, fmt| @formats.store key, client(fmt, "UTF-8") }
end

Instance Method Details

#decode_rk(work) ⇒ `Object`

# File 'lib/spreadsheet/excel/reader.rb', line 39

def decode_rk work
  #  Bit  Mask        Contents
  #    0  0x00000001  0 = Value not changed 1 = Value is multiplied by 100
  #    1  0x00000002  0 = Floating-point value 1 = Signed integer value
  # 31-2  0xFFFFFFFC  Encoded value
  #
  # If bit 1 is cleared, the encoded value represents the 30 most significant
  # bits of an IEEE 754 floating-point value (64-bit double precision). The
  # 34 least significant bits must be set to zero. If bit 1 is set, the
  # encoded value represents a signed 30-bit integer value. To get the
  # correct integer, the encoded value has to be shifted right arithmetically
  # by 2 bits. If bit 0 is set, the decoded value (both integer and
  # floating-point) must be divided by 100 to get the final result.
  flags, = work.unpack "C"
  cent = flags & 1
  int = flags & 2
  value = 0
  if int == 0
    ## remove two bits
    integer, = work.unpack "V"
    integer &= 0xfffffffc
    value, = ("\0\0\0\0" + [integer].pack("V")).unpack EIGHT_BYTE_DOUBLE
  else
    ## I can't find a format for unpacking a little endian signed integer.
    #  'V' works for packing, but not for unpacking. But the following works
    #  fine afaics:
    unsigned, = (@bigendian ? work.reverse : work).unpack "l"
    ## remove two bits
    value = unsigned >> 2
  end
  if cent == 1
    value /= 100.0
  end
  value
end

#encoding(codepage_id) ⇒ `Object`

# File 'lib/spreadsheet/excel/reader.rb', line 75

def encoding codepage_id
  name = CODEPAGES.fetch(codepage_id) do
    raise Spreadsheet::Errors::UnknownCodepage, "Unknown Codepage 0x%04x" % codepage_id
  end

  if RUBY_VERSION >= "1.9"
    begin
      Encoding.find name
    rescue ArgumentError
      raise Spreadsheet::Errors::UnsupportedEncoding, "Unsupported encoding with name '#{name}'"
    end
  else
    name
  end
end

#get_next_chunk ⇒ `Object`

# File 'lib/spreadsheet/excel/reader.rb', line 91

def get_next_chunk
  pos = @pos
  if pos < @data.size
    op, len = @data[@pos, OPCODE_SIZE].unpack("v2")
    @pos += OPCODE_SIZE
    if len
      work = @data[@pos, len]
      @pos += len
      code = SEDOCPO.fetch(op, op)
      if (io = @opts[:print_opcodes])
        io.puts sprintf("0x%04x/%-16s %5i: %s",
          op, code.inspect, len, work.inspect)
      end
      [pos, code, len + OPCODE_SIZE, work]
    end
  end
end

#in_row_block?(op, previous) ⇒ `Boolean`

# File 'lib/spreadsheet/excel/reader.rb', line 109

def in_row_block? op, previous
  if op == :row
    previous == op
  else
    ROW_BLOCK_OPS.include?(op)
  end
end

#memoize? ⇒ `Boolean`



117
118
119

# File 'lib/spreadsheet/excel/reader.rb', line 117

def memoize?
  @opts[:memoization]
end

#postread_workbook ⇒ `Object`

# File 'lib/spreadsheet/excel/reader.rb', line 121

def postread_workbook
  sheets = @workbook.worksheets
  sheets.each_with_index do |sheet, idx|
    offset = sheet.offset
    nxt = (nxtsheet = sheets[idx + 1]) ? nxtsheet.offset : @workbook.ole.size
    @workbook.offsets.store sheet, [offset, nxt - offset]
  end
end

#postread_worksheet(worksheet) ⇒ `Object`

# File 'lib/spreadsheet/excel/reader.rb', line 130

def postread_worksheet worksheet
  # We now have a lot of Note and NoteObjects, but they're not linked
  # So link the noteObject(text) to the note (with author, position)
  # TODO
  @note_list.each do |i|
    matching_objs = @note_ob_list.select { |j| j.obj_id == i.obj_id }
    if matching_objs.length > 1
      puts "ERROR - more than one matching object ID!"
    end
    matching_obj = matching_objs.first
    i.text = matching_obj.nil? ? "" : matching_obj.text
    worksheet.add_note i.row, i.col, i.text
  end
end

#read(io) ⇒ `Object`

The entry-point for reading Excel-documents. Reads the Biff-Version and loads additional reader-methods before proceeding with parsing the document.

# File 'lib/spreadsheet/excel/reader.rb', line 148

def read io
  setup io
  read_workbook
  @workbook.default_format = @workbook.format 0
  @workbook.changes.clear
  @workbook
end

#read_blank(worksheet, addr, work) ⇒ `Object`

# File 'lib/spreadsheet/excel/reader.rb', line 156

def read_blank worksheet, addr, work
  # Offset  Size  Contents
  #      0     2  Index to row
  #      2     2  Index to column
  #      4     2  Index to XF record (➜ 6.115)
  row, column, xf = work.unpack binfmt(:blank)
  set_cell worksheet, row, column, xf
end

#read_bof ⇒ `Object`

# File 'lib/spreadsheet/excel/reader.rb', line 165

def read_bof
  # Offset  Size  Contents
  #      0     2  BIFF version (always 0x0600 for BIFF8)
  #      2     2  Type of the following data: 0x0005 = Workbook globals
  #                                           0x0006 = Visual Basic module
  #                                           0x0010 = Worksheet
  #                                           0x0020 = Chart
  #                                           0x0040 = Macro sheet
  #                                           0x0100 = Workspace file
  #      4     2  Build identifier
  #      6     2  Build year
  #      8     4  File history flags
  #     12     4  Lowest Excel version that can read all records in this file
  _, @bof, _, work = get_next_chunk
  ## version and datatype are common to all Excel-Versions. Later versions
  #  have additional information such as build-id and -year (from BIFF5).
  #  These are ignored for the time being.
  version, datatype = work.unpack("v2")
  if datatype == 0x5
    @version = version
  end
end

#read_boolerr(worksheet, addr, work) ⇒ `Object`

# File 'lib/spreadsheet/excel/reader.rb', line 188

def read_boolerr worksheet, addr, work
  # Offset  Size  Contents
  #      0     2  Index to row
  #      2     2  Index to column
  #      4     2  Index to XF record (➜ 6.115)
  #      6     1  Boolean or error value (type depends on the following byte)
  #      7     1  0 = Boolean value; 1 = Error code
  row, column, xf, value, error = work.unpack "v3C2"
  set_cell worksheet, row, column, xf, (error == 0) ? value > 0 : Error.new(value)
end

#read_boundsheet(work, pos, len) ⇒ `Object`

# File 'lib/spreadsheet/excel/reader.rb', line 199

def read_boundsheet work, pos, len
  # Offset  Size  Contents
  #      0     4  Absolute stream position of the BOF record of the sheet
  #               represented by this record. This field is never encrypted
  #               in protected files.
  #      4     1  Visibility: 0x00 = Visible
  #                           0x01 = Hidden
  #                           0x02 = Strong hidden (see below)
  #      5     1  Sheet type: 0x00 = Worksheet
  #                           0x02 = Chart
  #                           0x06 = Visual Basic module
  #      6  var.  Sheet name: BIFF5/BIFF7: Byte string,
  #                           8-bit string length (➜ 3.3)
  #                           BIFF8: Unicode string, 8-bit string length (➜ 3.4)
  offset, visibility, _ = work.unpack("VC2")
  name = client read_string(work[6..]), @workbook.encoding
  if @boundsheets
    @boundsheets[0] += 1
    @boundsheets[2] += len
  else
    @boundsheets = [1, pos, len]
  end
  @workbook.set_boundsheets(*@boundsheets)
  @workbook.add_worksheet Worksheet.new(name: name,
    ole: @book,
    offset: offset,
    reader: self,
    visibility: WORKSHEET_VISIBILITIES[visibility])
end

#read_codepage(work, pos, len) ⇒ `Object`

# File 'lib/spreadsheet/excel/reader.rb', line 229

def read_codepage work, pos, len
  codepage, _ = work.unpack "v"
  @workbook.set_encoding encoding(codepage), pos, len
end

#read_colinfo(worksheet, work, pos, len) ⇒ `Object`

# File 'lib/spreadsheet/excel/reader.rb', line 234

def read_colinfo worksheet, work, pos, len
  # Offset  Size  Contents
  #      0     2  Index to first column in the range
  #      2     2  Index to last column in the range
  #      4     2  Width of the columns in 1/256 of the width of the zero
  #               character, using default font (first FONT record in the
  #               file)
  #      6     2  Index to XF record (➜ 6.115) for default column formatting
  #      8     2  Option flags:
  #               Bits  Mask    Contents
  #                  0  0x0001  1 = Columns are hidden
  #               10-8  0x0700  Outline level of the columns (0 = no outline)
  #                 12  0x1000  1 = Columns are collapsed
  #     10     2  Not used
  first, last, width, xf, opts = work.unpack binfmt(:colinfo)[0..-2]
  first.upto last do |col|
    column = Column.new col, @workbook.format(xf),
      width: width.to_f / 256,
      hidden: (opts & 0x0001) > 0,
      collapsed: (opts & 0x1000) > 0,
      outline_level: (opts & 0x0700) / 256
    column.worksheet = worksheet
    worksheet.columns[col] = column
  end
end

#read_dimensions(worksheet, work, pos, len) ⇒ `Object`

# File 'lib/spreadsheet/excel/reader.rb', line 260

def read_dimensions worksheet, work, pos, len
  # Offset  Size  Contents
  #      0     4  Index to first used row
  #      4     4  Index to last used row, increased by 1
  #      8     2  Index to first used column
  #     10     2  Index to last used column, increased by 1
  #     12     2  Not used
  worksheet.set_dimensions work.unpack(binfmt(:dimensions)), pos, len
end

#read_font(work, pos, len) ⇒ `Object`

# File 'lib/spreadsheet/excel/reader.rb', line 270

def read_font work, pos, len
  # Offset  Size  Contents
  #      0     2  Height of the font (in twips = 1/20 of a point)
  #      2     2  Option flags:
  #               Bit  Mask    Contents
  #                 0  0x0001  1 = Characters are bold (redundant, see below)
  #                 1  0x0002  1 = Characters are italic
  #                 2  0x0004  1 = Characters are underlined
  #                                (redundant, see below)
  #                 3  0x0008  1 = Characters are struck out
  #                 4  0x0010  1 = Characters are outlined (djberger)
  #                 5  0x0020  1 = Characters are shadowed (djberger)
  #      4     2  Colour index (➜ 6.70)
  #      6     2  Font weight (100-1000). Standard values are
  #                           0x0190 (400) for normal text and
  #                           0x02bc (700) for bold text.
  #      8     2  Escapement type: 0x0000 = None
  #                                0x0001 = Superscript
  #                                0x0002 = Subscript
  #     10     1  Underline type: 0x00 = None
  #                               0x01 = Single
  #                               0x02 = Double
  #                               0x21 = Single accounting
  #                               0x22 = Double accounting
  #     11     1  Font family:
  #               0x00 = None (unknown or don't care)
  #               0x01 = Roman (variable width, serifed)
  #               0x02 = Swiss (variable width, sans-serifed)
  #               0x03 = Modern (fixed width, serifed or sans-serifed)
  #               0x04 = Script (cursive)
  #               0x05 = Decorative (specialised,
  #                                  for example Old English, Fraktur)
  #     12     1  Character set: 0x00 =   0 = ANSI Latin
  #                              0x01 =   1 = System default
  #                              0x02 =   2 = Symbol
  #                              0x4d =  77 = Apple Roman
  #                              0x80 = 128 = ANSI Japanese Shift-JIS
  #                              0x81 = 129 = ANSI Korean (Hangul)
  #                              0x82 = 130 = ANSI Korean (Johab)
  #                              0x86 = 134 = ANSI Chinese Simplified GBK
  #                              0x88 = 136 = ANSI Chinese Traditional BIG5
  #                              0xa1 = 161 = ANSI Greek
  #                              0xa2 = 162 = ANSI Turkish
  #                              0xa3 = 163 = ANSI Vietnamese
  #                              0xb1 = 177 = ANSI Hebrew
  #                              0xb2 = 178 = ANSI Arabic
  #                              0xba = 186 = ANSI Baltic
  #                              0xcc = 204 = ANSI Cyrillic
  #                              0xde = 222 = ANSI Thai
  #                              0xee = 238 = ANSI Latin II (Central European)
  #                              0xff = 255 = OEM Latin I
  #     13     1  Not used
  #     14  var.  Font name:
  #               BIFF5/BIFF7: Byte string, 8-bit string length (➜ 3.3)
  #               BIFF8: Unicode string, 8-bit string length (➜ 3.4)
  name = client read_string(work[14..]), @workbook.encoding
  font = Font.new name
  size, opts, color, font.weight, escapement, underline,
    family, encoding = work.unpack binfmt(:font)
  font.size = size / TWIPS
  font.italic = opts & 0x0002
  font.strikeout = opts & 0x0008
  font.outline = opts & 0x0010
  font.shadow = opts & 0x0020
  font.color = COLOR_CODES[color] || :text
  font.escapement = ESCAPEMENT_TYPES[escapement]
  font.underline = UNDERLINE_TYPES[underline]
  font.family = FONT_FAMILIES[family]
  font.encoding = FONT_ENCODINGS[encoding]
  @workbook.add_font font
end

#read_format(work, pos, len) ⇒ `Object`

# File 'lib/spreadsheet/excel/reader.rb', line 342

def read_format work, pos, len
  # Offset  Size  Contents
  #      0     2  Format index used in other records
  #      2  var.  Number format string
  #               (Unicode string, 16-bit string length, ➜ 3.4)
  idx, = work.unpack "v"
  value = read_string work[2..], 2
  @formats.store idx, client(value, @workbook.encoding)
end

#read_formula(worksheet, addr, work) ⇒ `Object`

# File 'lib/spreadsheet/excel/reader.rb', line 352

def read_formula worksheet, addr, work
  # Offset  Size  Contents
  #      0     2  Index to row
  #      2     2  Index to column
  #      4     2  Index to XF record (➜ 6.115)
  #      6     8  Result of the formula. See below for details.
  #     14     2  Option flags:
  #               Bit  Mask    Contents
  #                 0  0x0001  1 = Recalculate always
  #                 1  0x0002  1 = Calculate on open
  #                 3  0x0008  1 = Part of a shared formula
  #     16     4  Not used
  #     20  var.  Formula data (RPN token array, ➜ 4)
  #               Offset  Size  Contents
  #                    0     2  Size of the following formula data (sz)
  #                    2    sz  Formula data (RPN token array)
  #               [2+sz]  var.  (optional) Additional data for specific tokens
  #                             (➜ 4.1.6, for example tArray token, ➜ 4.8.7)
  #
  # Result of the Formula
  # Dependent on the type of value the formula returns, the result field has
  # the following format:
  #
  # Result is a numeric value:
  # Offset  Size  Contents
  #      0     8  IEEE 754 floating-point value (64-bit double precision)
  #
  # Result is a string (the string follows in a STRING record, ➜ 6.98):
  # Offset  Size  Contents
  #      0     1  0x00 (identifier for a string value)
  #      1     5  Not used
  #      6     2  0xffff
  # Note: In BIFF8 the string must not be empty. For empty cells there is a
  # special identifier defined (see below).
  #
  # Result is a Boolean value:
  # Offset  Size  Contents
  #      0     1  0x01 (identifier for a Boolean value)
  #      1     1  Not used
  #      2     1  0 = FALSE, 1 = TRUE
  #      3     3  Not used
  #      6     2  0xffff
  #
  # Result is an error value:
  # Offset  Size  Contents
  #      0     1  0x02 (identifier for an error value)
  #      1     1  Not used
  #      2     1  Error code (➜ 3.7)
  #      3     3  Not used
  #      6     2  0xffff
  #
  # Result is an empty cell (BIFF8), for example an empty string:
  # Offset  Size  Contents
  #      0     1  0x03 (identifier for an empty cell)
  #      1     5  Not used
  #      6     2  0xffff
  row, column, xf, rtype, rval, rcheck, opts = work.unpack "v3CxCx3v2"
  formula = Formula.new
  formula.shared = (opts & 0x08) > 0
  formula.data = work[20..]
  if rcheck != 0xffff || rtype > 3
    value, = work.unpack "x6E"
    unless value
      # on architectures where sizeof(double) > 8
      value, = work.unpack "x6e"
    end
    formula.value = value
  elsif rtype == 0
    pos, op, _len, work = get_next_chunk
    if op == :sharedfmla
      ## TODO: formula-support in 0.8.0
      pos, op, _len, work = get_next_chunk
    end
    if op == :string
      formula.value = client read_string(work, 2), @workbook.encoding
    else
      warn "String Value expected after Formula, but got #{op}"
      formula.value = Error.new 0x2a
      @pos = pos
    end
  elsif rtype == 1
    formula.value = rval > 0
  elsif rtype == 2
    formula.value = Error.new rval
  else
    # leave the Formula value blank
  end
  set_cell worksheet, row, column, xf, formula
end

#read_guts(worksheet, work, pos, len) ⇒ `Object`

# File 'lib/spreadsheet/excel/reader.rb', line 990

def read_guts worksheet, work, pos, len
  # Offset Size Contents
  #      0    2 Width of the area to display row outlines (left of the sheet), in pixel
  #      2    2 Height of the area to display column outlines (above the sheet), in pixel
  #      4    2 Number of visible row outline levels (used row levels + 1; or 0, if not used)
  #      6    2 Number of visible column outline levels (used column levels + 1; or 0, if not used)
  width, height, row_level, col_level = work.unpack "v4"
  worksheet.guts[:width] = width
  worksheet.guts[:height] = height
  worksheet.guts[:row_level] = row_level
  worksheet.guts[:col_level] = col_level
end

#read_hlink(worksheet, work, pos_unused, len) ⇒ `Object`

# File 'lib/spreadsheet/excel/reader.rb', line 442

def read_hlink worksheet, work, pos_unused, len
  # 6.53.1 Common Record Contents
  # Offset  Size  Contents
  #      0     8  Cell range address of all cells containing this hyperlink
  #               (➜ 3.13.1)
  #      8    16  GUID of StdLink:
  #               D0 C9 EA 79 F9 BA CE 11 8C 82 00 AA 00 4B A9 0B
  #               (79EAC9D0-BAF9-11CE-8C82-00AA004BA90B)
  #     24     4  Unknown value: 0x00000002
  #     28     4  Option flags (see below)
  #               Bit  Mask        Contents
  #                 0  0x00000001  0 = No link extant
  #                                1 = File link or URL
  #                 1  0x00000002  0 = Relative file path
  #                                1 = Absolute path or URL
  #           2 and 4  0x00000014  0 = No description
  #                                1 (both bits) = Description
  #                 3  0x00000008  0 = No text mark
  #                                1 = Text mark
  #                 7  0x00000080  0 = No target frame
  #                                1 = Target frame
  #                 8  0x00000100  0 = File link or URL
  #                                1 = UNC path (incl. server name)
  #--------------------------------------------------------------------------
  #   [32]     4  (optional, see option flags) Character count of description
  #               text, including trailing zero word (dl)
  #   [36]  2∙dl  (optional, see option flags) Character array of description
  #               text, no Unicode string header, always 16-bit characters,
  #               zero-terminated
  #--------------------------------------------------------------------------
  # [var.]     4  (optional, see option flags) Character count of target
  #               frame, including trailing zero word (fl)
  # [var.]  2∙fl  (optional, see option flags) Character array of target
  #               frame, no Unicode string header, always 16-bit characters,
  #               zero-terminated
  #--------------------------------------------------------------------------
  #   var.  var.  Special data (➜ 6.53.2 and following)
  #--------------------------------------------------------------------------
  # [var.]     4  (optional, see option flags) Character count of the text
  #               mark, including trailing zero word (tl)
  # [var.]  2∙tl  (optional, see option flags) Character array of the text
  #               mark without “#” sign, no Unicode string header, always
  #               16-bit characters, zero-terminated
  firstrow, lastrow, firstcol, lastcol, _, opts = work.unpack "v4H32x4V"
  has_link = opts & 0x0001
  desc = opts & 0x0014
  textmark = opts & 0x0008
  target = opts & 0x0080
  unc = opts & 0x0100
  link = Link.new
  _, _ = nil
  pos = 32
  if desc > 0
    description, pos = read_hlink_string work, pos
    link << description
  end
  if target > 0
    link.target_frame, pos = read_hlink_string work, pos
  end
  if unc > 0
    # 6.53.4 Hyperlink to a File with UNC (Universal Naming Convention) Path
    # These data fields are for UNC paths containing a server name (for
    # instance “\\server\path\file.xls”). The lower 9 bits of the option
    # flags field must be 1.x00x.xx112.
    # Offset  Size  Contents
    #      0     4  Character count of the UNC,
    #               including trailing zero word (fl)
    #      4  2∙fl  Character array of the UNC, no Unicode string header,
    #               always 16-bit characters, zeroterminated.
    link.url, pos = read_hlink_string work, pos
  elsif has_link > 0
    uid, = work.unpack "x#{pos}H32"
    pos += 16
    if uid == "e0c9ea79f9bace118c8200aa004ba90b"
      # 6.53.2 Hyperlink containing a URL (Uniform Resource Locator)
      # These data fields occur for links which are not local files or files
      # in the local network (for instance HTTP and FTP links and e-mail
      # addresses). The lower 9 bits of the option flags field must be
      # 0.x00x.xx112 (x means optional, depending on hyperlink content). The
      # GUID could be used to distinguish a URL from a file link.
      # Offset  Size  Contents
      #      0    16  GUID of URL Moniker:
      #               E0 C9 EA 79 F9 BA CE 11 8C 82 00 AA 00 4B A9 0B
      #               (79EAC9E0-BAF9-11CE-8C82-00AA004BA90B)
      #     16     4  Size of character array of the URL, including trailing
      #               zero word (us). There are us/2-1 characters in the
      #               following string.
      #     20    us  Character array of the URL, no Unicode string header,
      #               always 16-bit characters, zeroterminated
      size, = work.unpack "x#{pos}V"
      pos += 4
      data = work[pos, size].chomp "\000\000"
      link.url = client data
      pos += size
    else
      # 6.53.3 Hyperlink to a Local File
      # These data fields are for links to files on local drives. The path of
      # the file can be complete with drive letter (absolute) or relative to
      # the location of the workbook. The lower 9 bits of the option flags
      # field must be 0.x00x.xxx12. The GUID could be used to distinguish a
      # URL from a file link.
      # Offset  Size  Contents
      #      0    16  GUID of File Moniker:
      #               03 03 00 00 00 00 00 00 C0 00 00 00 00 00 00 46
      #               (00000303-0000-0000-C000-000000000046)
      #     16     2  Directory up-level count. Each leading “..\” in the
      #               file link is deleted and increases this counter.
      #     18     4  Character count of the shortened file path and name,
      #               including trailing zero byte (sl)
      #     22    sl  Character array of the shortened file path and name in
      #               8.3-DOS-format. This field can be filled with a long
      #               file name too. No Unicode string header, always 8-bit
      #               characters, zeroterminated.
      #  22+sl    24  Unknown byte sequence:
      #               FF FF AD DE 00 00 00 00
      #               00 00 00 00 00 00 00 00
      #               00 00 00 00 00 00 00 00
      #  46+sl     4  Size of the following file link field including string
      #               length field and additional data field (sz). If sz is
      #               zero, nothing will follow (except a text mark).
      # [50+sl]    4  (optional) Size of character array of the extended file
      #               path and name (xl). There are xl/2 characters in the
      #               following string.
      # [54+sl]    2  (optional) Unknown byte sequence: 03 00
      # [56+sl]   xl  (optional) Character array of the extended file path
      #               and name (xl), no Unicode string header, always 16-bit
      #               characters, not zero-terminated
      uplevel, count = work.unpack "x#{pos}vV"
      pos += 6
      # TODO: short file path may have any of the OEM encodings. Find out which
      #       and use the #client method to convert the encoding.
      prefix = internal("..\\", "UTF-8") * uplevel
      link.dos = link.url = prefix << work[pos, count].chomp("\000")
      pos += count + 24
      total, size = work.unpack "x#{pos}V2"
      pos += 10
      if total > 0
        link.url = client work[pos, size]
        pos += size
      end
    end
  else
    # 6.53.5 Hyperlink to the Current Workbook
    # In this case only the text mark field is present (optional with
    # description).
    # Example: The URL “#Sheet2!B1:C2” refers to the given range in the
    # current workbook.
    # The lower 9 bits of the option flags field must be 0.x00x.1x002.
  end
  if textmark > 0
    link.fragment, _ = read_hlink_string work, pos
  end
  if link.empty?
    link << link.href
  end
  firstrow.upto lastrow do |row|
    firstcol.upto lastcol do |col|
      worksheet.add_link row, col, link
    end
  end
end

#read_hlink_string(work, pos) ⇒ `Object`

# File 'lib/spreadsheet/excel/reader.rb', line 604

def read_hlink_string work, pos
  count, = work.unpack "x#{pos}V"
  len = count * 2
  pos += 4
  data = work[pos, len].chomp "\000\000"
  pos += len
  [client(data, "UTF-16LE"), pos]
end

#read_index(worksheet, work, pos, len) ⇒ `Object`

# File 'lib/spreadsheet/excel/reader.rb', line 613

def read_index worksheet, work, pos, len
  # Offset  Size  Contents
  #      0     4  Not used
  #      4     4  Index to first used row (rf, 0-based)
  #      8     4  Index to first row of unused tail of sheet
  #               (rl, last used row + 1, 0-based)
  #     12     4  Absolute stream position of the
  #               DEFCOLWIDTH record (➜ 6.29) of the current sheet. If this
  #               record does not exist, the offset points to the record at
  #               the position where the DEFCOLWIDTH record would occur.
  #     16  4∙nm  Array of nm absolute stream positions to the
  #               DBCELL record (➜ 6.26) of each Row Block
  # TODO: use the index if it exists
  # _, first_used, first_unused, defcolwidth, *indices = work.unpack 'V*'
end

#read_label(worksheet, addr, work) ⇒ `Object`

# File 'lib/spreadsheet/excel/reader.rb', line 629

def read_label worksheet, addr, work
  # Offset  Size  Contents
  #      0     2  Index to row
  #      2     2  Index to column
  #      4     2  Index to XF record (➜ 6.115)
  #      6  var.  Unicode string, 16-bit string length (➜ 3.4)
  row, column, xf = work.unpack "v3"
  value = client read_string(work[6..], 2), @workbook.encoding
  set_cell worksheet, row, column, xf, value
end

#read_labelsst(worksheet, addr, work) ⇒ `Object`

# File 'lib/spreadsheet/excel/reader.rb', line 640

def read_labelsst worksheet, addr, work
  # Offset  Size  Contents
  #      0     2  Index to row
  #      2     2  Index to column
  #      4     2  Index to XF record (➜ 6.115)
  #      6     4  Index into SST record (➜ 6.96)
  row, column, xf, index = work.unpack binfmt(:labelsst)
  set_cell worksheet, row, column, xf, worksheet.shared_string(index)
end

#read_merged_cells(worksheet, work, pos, len) ⇒ `Object`

# File 'lib/spreadsheet/excel/reader.rb', line 815

def read_merged_cells worksheet, work, pos, len
  # This record contains the addresses of merged cell ranges in the current sheet.
  # Record MERGEDCELLS, BIFF8:
  # Offset  Size  Contents
  # 0       var.  Cell range address list with merged ranges (➜ 2.5.15)
  # If the record size exceeds the limit, it is not continued with a CONTINUE record,
  # but another self-contained MERGEDCELLS record is started. The limit of 8224 bytes
  # per record results in a maximum number of 1027 merged ranges.

  worksheet.merged_cells.push(*read_range_address_list(work, len))
  #
  # A cell range address list consists of a field with the number of ranges and the list
  # of the range addresses.
  # Cell range address list, BIFF2-BIFF8:
  # Offset  Size          Contents
  # 0       2             Number of following cell range addresses (nm)
  # 2       6∙nm or 8∙nm  List of nm cell range addresses (➜ 2.5.14)
  #
end

#read_mulblank(worksheet, addr, work) ⇒ `Object`

# File 'lib/spreadsheet/excel/reader.rb', line 650

def read_mulblank worksheet, addr, work
  # Offset  Size  Contents
  #      0     2  Index to row
  #      2     2  Index to first column (fc)
  #      4  2∙nc  List of nc=lc-fc+1 16-bit indexes to XF records (➜ 6.115)
  # 4+2∙nc     2  Index to last column (lc)
  row, column, *xfs = work.unpack "v*"
  xfs.pop #=> last_column
  xfs.each_with_index { |xf, idx| set_cell worksheet, row, column + idx, xf }
end

#read_mulrk(worksheet, addr, work) ⇒ `Object`

# File 'lib/spreadsheet/excel/reader.rb', line 661

def read_mulrk worksheet, addr, work
  # Offset  Size  Contents
  #      0     2  Index to row
  #      2     2  Index to first column (fc)
  #      4  6∙nc  List of nc=lc-fc+1 XF/RK structures. Each XF/RK contains:
  #               Offset  Size  Contents
  #                    0     2  Index to XF record (➜ 6.115)
  #                    2     4  RK value (➜ 3.6)
  # 4+6∙nc     2  Index to last column (lc)
  row, column = work.unpack "v2"
  4.step(work.size - 6, 6) do |idx|
    xf, = work.unpack "x#{idx}v"
    set_cell worksheet, row, column, xf, decode_rk(work[idx + 2, 4])
    column += 1
  end
end

#read_note(worksheet, work, pos, len) ⇒ `Object`

# File 'lib/spreadsheet/excel/reader.rb', line 1184

def read_note worksheet, work, pos, len
  # puts "\nDEBUG: found a note record in read_worksheet\n"
  row, col, _, obj_id, obj_auth_en, obj_auth_len_fmt = work.unpack("v5C")
  if obj_auth_en > 0
    if obj_auth_len_fmt == 0
      # puts "Picking compressed charset"
      # Skip to offset due to 'v5C' used above
      obj_auth = work.unpack("@11C" + (obj_auth_en - 1).to_s + "C")
    elsif obj_auth_len_fmt == 1
      # puts "Picking uncompressed charset"
      obj_auth = work.unpack("@11S" + (obj_auth_en - 1).to_s + "S")
    end
    obj_auth = obj_auth.pack("C*")
  else
    obj_auth = ""
  end
  @note = Note.new
  @note.length = len
  @note.row = row
  @note.col = col
  @note.author = obj_auth
  @note.obj_id = obj_id
  # Pop it on the list to be sorted in postread_worksheet
  @note_list << @note
end

#read_number(worksheet, addr, work) ⇒ `Object`

# File 'lib/spreadsheet/excel/reader.rb', line 678

def read_number worksheet, addr, work
  # Offset  Size  Contents
  #      0     2  Index to row
  #      2     2  Index to column
  #      4     2  Index to XF record (➜ 6.115)
  #      6     8  IEEE 754 floating-point value (64-bit double precision)
  row, column, xf, value = work.unpack binfmt(:number)
  set_cell worksheet, row, column, xf, value
end

#read_pagesetup(worksheet, work, pos, len) ⇒ `Object`

# File 'lib/spreadsheet/excel/reader.rb', line 980

def read_pagesetup(worksheet, work, pos, len)
  worksheet.pagesetup.delete_if { true }
  data = work.unpack(binfmt(:pagesetup))
  worksheet.pagesetup[:orientation] = (data[5] == 0) ? :landscape : :portrait
  worksheet.pagesetup[:adjust_to] = data[1]

  worksheet.pagesetup[:orig_data] = data
  # TODO: add options acording to specification
end

#read_rk(worksheet, addr, work) ⇒ `Object`

# File 'lib/spreadsheet/excel/reader.rb', line 688

def read_rk worksheet, addr, work
  # Offset  Size  Contents
  #      0     2  Index to row
  #      2     2  Index to column
  #      4     2  Index to XF record (➜ 6.115)
  #      6     4  RK value (➜ 3.6)
  row, column, xf = work.unpack "v3"
  set_cell worksheet, row, column, xf, decode_rk(work[6, 4])
end

#read_row(worksheet, addr) ⇒ `Object`

# File 'lib/spreadsheet/excel/reader.rb', line 698

def read_row worksheet, addr
  row = addr[:index]
  @current_row_block.fetch [worksheet, row] do
    @current_row_block.clear
    cells = @current_row_block[[worksheet, row]] = Row.new(nil, row)
    @pos = addr[:offset]
    found = false
    while (tuple = get_next_chunk)
      pos, op, _, work = tuple
      case op
      when :eof      # ●  EOF ➜ 6.36 - we should only get here if there is just
        #                 one Row-Block
        @pos = pos
        return cells
      when :dbcell   # ○  DBCELL Stream offsets to the cell records of each row
        return cells
      when :row      # ○○ Row Blocks ➜ 5.7
        # ●  ROW ➜ 6.83
        # ignore, we already did these in read_worksheet
        return cells if found
      when :blank    # BLANK ➜ 6.7
        found = true
        read_blank worksheet, addr, work
      when :boolerr  # BOOLERR ➜ 6.10
        found = true
        read_boolerr worksheet, addr, work
      when 0x0002    # INTEGER ➜ 6.56 (BIFF2 only)
        found = true
        # TODO: implement for BIFF2 support
      when :formula  # FORMULA ➜ 6.46
        found = true
        read_formula worksheet, addr, work
      when :label    # LABEL ➜ 6.59 (BIFF2-BIFF7)
        found = true
        read_label worksheet, addr, work
      when :labelsst # LABELSST ➜ 6.61 (BIFF8 only)
        found = true
        read_labelsst worksheet, addr, work
      when :mulblank # MULBLANK ➜ 6.64 (BIFF5-BIFF8)
        found = true
        read_mulblank worksheet, addr, work
      when :mulrk    # MULRK ➜ 6.65 (BIFF5-BIFF8)
        found = true
        read_mulrk worksheet, addr, work
      when :number   # NUMBER ➜ 6.68
        found = true
        read_number worksheet, addr, work
      when :rk       # RK ➜ 6.82 (BIFF3-BIFF8)
        found = true
        read_rk worksheet, addr, work
      when :rstring  # RSTRING ➜ 6.84 (BIFF5/BIFF7)
        found = true
        read_rstring worksheet, addr, work
      end
    end
    cells
  end
end

#read_rstring(worksheet, addr, work) ⇒ `Object`

# File 'lib/spreadsheet/excel/reader.rb', line 757

def read_rstring worksheet, addr, work
  # Offset  Size  Contents
  #      0     2  Index to row
  #      2     2  Index to column
  #      4     2  Index to XF record (➜ 6.115)
  #      6    sz  Unformatted Unicode string, 16-bit string length (➜ 3.4)
  #   6+sz     2  Number of Rich-Text formatting runs (rt)
  #   8+sz  4·rt  List of rt formatting runs (➜ 3.2)
  row, column, xf = work.unpack "v3"
  value = client read_string(work[6..], 2), @workbook.encoding
  set_cell worksheet, row, column, xf, value
end

#read_sheet_protection(worksheet, op, data) ⇒ `Object`

# File 'lib/spreadsheet/excel/reader.rb', line 1210

def read_sheet_protection worksheet, op, data
  case op
  when :protect
    worksheet.protect! if data.unpack1("v") == 1
  when :password
    worksheet.password_hash = data.unpack1("v")
  end
end

#read_style(work, pos, len) ⇒ `Object`

# File 'lib/spreadsheet/excel/reader.rb', line 1003

def read_style work, pos, len
  # User-Defined Cell Styles:
  # Offset  Size  Contents
  #      0     2  Bit   Mask    Contents
  #               11-0  0x0fff  Index to style XF record (➜ 6.115)
  #                 15  0x8000  Always 0 for user-defined styles
  #      2  var.  BIFF2-BIFF7:  Non-empty byte string,
  #                             8-bit string length (➜ 3.3)
  #               BIFF8:        Non-empty Unicode string,
  #                             16-bit string length (➜ 3.4)
  #
  # Built-In Cell Styles
  # Offset  Size  Contents
  #      0     2  Bit   Mask    Contents
  #               11-0  0x0FFF  Index to style XF record (➜ 6.115)
  #                 15  0x8000  Always 1 for built-in styles
  #      2     1  Identifier of the built-in cell style:
  #               0x00 = Normal
  #               0x01 = RowLevel_lv (see next field)
  #               0x02 = ColLevel_lv (see next field)
  #               0x03 = Comma
  #               0x04 = Currency
  #               0x05 = Percent
  #               0x06 = Comma [0] (BIFF4-BIFF8)
  #               0x07 = Currency [0] (BIFF4-BIFF8)
  #               0x08 = Hyperlink (BIFF8)
  #               0x09 = Followed Hyperlink (BIFF8)
  #      3     1  Level for RowLevel or ColLevel style (zero-based, lv),
  #               FFH otherwise
  flags, = work.unpack "v"
  xf_idx = flags & 0x0fff
  xf = @workbook.format xf_idx
  builtin = flags & 0x8000
  if builtin == 0
    xf.name = client read_string(work[2..], 2), @workbook.encoding
  else
    id, level = work.unpack "x2C2"
    if (name = BUILTIN_STYLES[id])
      name.sub "_lv", "_#{level}"
      xf.name = client name, "UTF-8"
    end
  end
end

#read_window2(worksheet, work, pos, len) ⇒ `Object`

# File 'lib/spreadsheet/excel/reader.rb', line 770

def read_window2 worksheet, work, pos, len
  # This record contains additional settings for the document window
  # (BIFF2-BIFF4) or for the window of a specific worksheet (BIFF5-BIFF8).
  # It is part of the Sheet View Settings Block (➜ 4.5).
  # Offset  Size  Contents
  #      0     2  Option flags:
  #               Bits  Mask    Contents
  #                  0  0x0001  0 = Show formula results
  #                             1 = Show formulas
  #                  1  0x0002  0 = Do not show grid lines
  #                             1 = Show grid lines
  #                  2  0x0004  0 = Do not show sheet headers
  #                             1 = Show sheet headers
  #                  3  0x0008  0 = Panes are not frozen
  #                             1 = Panes are frozen (freeze)
  #                  4  0x0010  0 = Show zero values as empty cells
  #                             1 = Show zero values
  #                  5  0x0020  0 = Manual grid line colour
  #                             1 = Automatic grid line colour
  #                  6  0x0040  0 = Columns from left to right
  #                             1 = Columns from right to left
  #                  7  0x0080  0 = Do not show outline symbols
  #                             1 = Show outline symbols
  #                  8  0x0100  0 = Keep splits if pane freeze is removed
  #                             1 = Remove splits if pane freeze is removed
  #                  9  0x0200  0 = Sheet not selected
  #                             1 = Sheet selected (BIFF5-BIFF8)
  #                 10  0x0400  0 = Sheet not active
  #                             1 = Sheet active (BIFF5-BIFF8)
  #                 11  0x0800  0 = Show in normal view
  #                             1 = Show in page break preview (BIFF8)
  #      2     2  Index to first visible row
  #      4     2  Index to first visible column
  #      6     2  Colour index of grid line colour (➜ 5.74).
  #               Note that in BIFF2-BIFF5 an RGB colour is written instead.
  #      8     2  Not used
  #     10     2  Cached magnification factor in page break preview (in percent)
  #               0 = Default (60%)
  #     12     2  Cached magnification factor in normal view (in percent)
  #               0 = Default (100%)
  #     14     4  Not used
  flags, _ = work.unpack "v"
  worksheet.selected = flags & 0x0200 > 0
end

#read_workbook ⇒ `Object`

# File 'lib/spreadsheet/excel/reader.rb', line 835

def read_workbook
  previous_op = nil
  while (tuple = get_next_chunk)
    pos, op, len, work = tuple
    case op
    when @bof, :bof  # ●  BOF Type = worksheet (➜ 6.8)
      return
    when :eof        # ●  EOF ➜ 6.36
      postread_workbook
      return
    when :datemode   # ○  DATEMODE ➜ 6.25
      flag, _ = work.unpack "v"
      @workbook.date_base = if flag == 1
        DateTime.new 1904, 1, 1
      else
        DateTime.new 1899, 12, 31
      end
    when :continue   # ○  CONTINUE ➜ 6.22
      case previous_op
      when :sst      # ●  SST ➜ 6.96
        continue_sst work, pos, len
      end
    when :codepage   # ○  CODEPAGE ➜ 6.17
      read_codepage work, pos, len
    when :boundsheet # ●● BOUNDSHEET ➜ 6.12
      read_boundsheet work, pos, len
    when :xf         # ●● XF ➜ 6.115
      read_xf work, pos, len
    when :sst        # ○  Shared String Table ➜ 5.11
      # ●  SST ➜ 6.96
      read_sst work, pos, len
      # TODO: implement memory-efficient sst handling, possibly in conjunction
      #       with EXTSST
    when :extsst     # ●  EXTSST ➜ 6.40
      read_extsst work, pos, len
    when :style      # ●● STYLE ➜ 6.99
      read_style work, pos, len
    when :format     # ○○ FORMAT (Number Format) ➜ 6.45
      read_format work, pos, len
    when :font       # ●● FONT ➜ 6.43
      read_font work, pos, len
    end
    previous_op = op unless op == :continue
  end
end

#read_worksheet(worksheet, offset) ⇒ `Object`

# File 'lib/spreadsheet/excel/reader.rb', line 881

def read_worksheet worksheet, offset
  @pos = offset
  @detected_rows = {}
  @note_ob_list = []
  @note_list = []
  @note_object = nil
  previous = nil
  while (tuple = get_next_chunk)
    pos, op, len, work = tuple
    if (offset = @current_row_block_offset) && !in_row_block?(op, previous)
      @current_row_block_offset = nil
      offset[1] = pos - offset[0]
    end
    case op
    when :eof        # ●  EOF ➜ 6.36
      postread_worksheet worksheet
      return
      # when :uncalced   # ○  UNCALCED ➜ 6.104
      # TODO: Formula support. Values were not calculated before saving
      # warn <<-EOS
      #  Some fields containig formulas were saved without a computed value.
      #  Support Spreadsheet::Excel by implementing formula-calculations!
      # EOS
      # when :index      # ○  INDEX ➜ 5.7 (Row Blocks), ➜ 6.55
      # TODO: if there are changes in rows, omit index when writing
      # read_index worksheet, work, pos, len
    when :guts       #    GUTS      5.53
      read_guts worksheet, work, pos, len
    when :colinfo    # ○○ COLINFO ➜ 6.18
      read_colinfo worksheet, work, pos, len
    when :dimensions # ●  DIMENSIONS ➜ 6.31
      read_dimensions worksheet, work, pos, len
    when :row        # ○○ Row Blocks ➜ 5.7
      # ●  ROW ➜ 6.83
      set_row_address worksheet, work, pos, len
    when :hlink
      read_hlink worksheet, work, pos, len
    when :window2
      read_window2 worksheet, work, pos, len
    when :mergedcells # ○○ MERGEDCELLS  ➜ 5.67
      read_merged_cells worksheet, work, pos, len
    when :protect, :password
      read_sheet_protection worksheet, op, work
    when :note # a note references an :obj
      read_note worksheet, work, pos, len
    when :obj # it contains the author in the NTS structure
      _ft, _cb, ot, obj_id = work.unpack("v4")
      if ot == 0x19
        # puts "\nDEBUG: found Note Obj record"
        @note_object = NoteObject.new
        @note_object.obj_id = obj_id
      end
      # p work
    when :drawing # this can be followed by txo in case of a note
      if previous == :obj
        # puts "\nDEBUG: found MsDrawing record"
        # p work
      end
    when :txo # this contains the length of the note text
      if previous == :drawing
        # puts "\nDEBUG: found TxO record"
        # p work
      end
    when :continue # this contains the actual note text
      if previous == :txo && @note_object
        # puts "\nDEBUG: found Continue record"
        continue_fmt = work.unpack("C")
        if continue_fmt.first == 0
          # puts "Picking compressed charset"
          # Skip to offset due to 'v5C' used above
          text = work.unpack("@1C*")
          @note_object.text = text.pack("C*")
        elsif continue_fmt.first == 1
          # puts "Picking uncompressed charset"
          text = work.unpack("@1S*")
          @note_object.text = text.pack("U*")
        end
        @note_ob_list << @note_object
      end
    when :pagesetup
      read_pagesetup(worksheet, work, pos, len)
    when :leftmargin
      worksheet.margins[:left] = work.unpack1(binfmt(:margin))
    when :rightmargin
      worksheet.margins[:right] = work.unpack1(binfmt(:margin))
    when :topmargin
      worksheet.margins[:top] = work.unpack1(binfmt(:margin))
    when :bottommargin
      worksheet.margins[:bottom] = work.unpack1(binfmt(:margin))
    else
      if ROW_BLOCK_OPS.include?(op)
        set_missing_row_address worksheet, work, pos, len
      end
    end
    previous = op
    # previous = op unless op == :continue
  end
end

#read_xf(work, pos, len) ⇒ `Object`

# File 'lib/spreadsheet/excel/reader.rb', line 1047

def read_xf work, pos, len
  # Offset  Size  Contents
  #      0     2  Index to FONT record (➜ 6.43)
  #      2     2  Index to FORMAT record (➜ 6.45)
  #      4     2   Bit  Mask    Contents
  #                2-0  0x0007  XF_TYPE_PROT – XF type, cell protection
  #                             Bit  Mask  Contents
  #                               0  0x01  1 = Cell is locked
  #                               1  0x02  1 = Formula is hidden
  #                               2  0x04  0 = Cell XF; 1 = Style XF
  #               15-4  0xfff0  Index to parent style XF
  #                             (always 0xfff in style XFs)
  #      6     1   Bit  Mask    Contents
  #                2-0  0x07    XF_HOR_ALIGN – Horizontal alignment
  #                             Value  Horizontal alignment
  #                             0x00   General
  #                             0x01   Left
  #                             0x02   Centred
  #                             0x03   Right
  #                             0x04   Filled
  #                             0x05   Justified (BIFF4-BIFF8X)
  #                             0x06   Centred across selection
  #                                    (BIFF4-BIFF8X)
  #                             0x07   Distributed (BIFF8X)
  #                  3  0x08    1 = Text is wrapped at right border
  #                6-4  0x70    XF_VERT_ALIGN – Vertical alignment
  #                             Value  Vertical alignment
  #                             0x00   Top
  #                             0x01   Centred
  #                             0x02   Bottom
  #                             0x03   Justified (BIFF5-BIFF8X)
  #                             0x04   Distributed (BIFF8X)
  #      7     1  XF_ROTATION: Text rotation angle (see above)
  #                Value  Text rotation
  #                    0  Not rotated
  #                 1-90  1 to 90 degrees counterclockwise
  #               91-180  1 to 90 degrees clockwise
  #                  255  Letters are stacked top-to-bottom,
  #                       but not rotated
  #      8     1   Bit  Mask    Contents
  #                3-0  0x0f    Indent level
  #                  4  0x10    1 = Shrink content to fit into cell
  #                  5  0x40    1 = Merge Range (djberger)
  #                7-6  0xc0    Text direction (BIFF8X only)
  #                             0 = According to context
  #                             1 = Left-to-right
  #                             2 = Right-to-left
  #      9     1   Bit  Mask    Contents
  #                7-2  0xfc    XF_USED_ATTRIB – Used attributes
  #                             Each bit describes the validity of a
  #                             specific group of attributes. In cell XFs
  #                             a cleared bit means the attributes of the
  #                             parent style XF are used (but only if the
  #                             attributes are valid there), a set bit
  #                             means the attributes of this XF are used.
  #                             In style XFs a cleared bit means the
  #                             attribute setting is valid, a set bit
  #                             means the attribute should be ignored.
  #                             Bit  Mask  Contents
  #                               0  0x01  Flag for number format
  #                               1  0x02  Flag for font
  #                               2  0x04  Flag for horizontal and
  #                                        vertical alignment, text wrap,
  #                                        indentation, orientation,
  #                                        rotation, and text direction
  #                               3  0x08  Flag for border lines
  #                               4  0x10  Flag for background area style
  #                               5  0x20  Flag for cell protection (cell
  #                                        locked and formula hidden)
  #     10     4  Cell border lines and background area:
  #                 Bit  Mask        Contents
  #                3- 0  0x0000000f  Left line style (➜ 3.10)
  #                7- 4  0x000000f0  Right line style (➜ 3.10)
  #               11- 8  0x00000f00  Top line style (➜ 3.10)
  #               15-12  0x0000f000  Bottom line style (➜ 3.10)
  #               22-16  0x007f0000  Colour index (➜ 6.70)
  #                                  for left line colour
  #               29-23  0x3f800000  Colour index (➜ 6.70)
  #                                  for right line colour
  #                  30  0x40000000  1 = Diagonal line
  #                                  from top left to right bottom
  #                  31  0x80000000  1 = Diagonal line
  #                                  from bottom left to right top
  #     14     4    Bit  Mask        Contents
  #                6- 0  0x0000007f  Colour index (➜ 6.70)
  #                                  for top line colour
  #               13- 7  0x00003f80  Colour index (➜ 6.70)
  #                                  for bottom line colour
  #               20-14  0x001fc000  Colour index (➜ 6.70)
  #                                  for diagonal line colour
  #               24-21  0x01e00000  Diagonal line style (➜ 3.10)
  #               31-26  0xfc000000  Fill pattern (➜ 3.11)
  #     18     2    Bit  Mask        Contents
  #                 6-0  0x007f      Colour index (➜ 6.70)
  #                                  for pattern colour
  #                13-7  0x3f80      Colour index (➜ 6.70)
  #                                  for pattern background
  fmt = Format.new
  font_idx, numfmt, _, xf_align, xf_rotation, xf_indent, _,
    xf_borders, xf_brdcolors, xf_pattern = work.unpack binfmt(:xf)
  fmt.number_format = @formats[numfmt]
  ## this appears to be undocumented: the first 4 fonts seem to be accessed
  #  with a 0-based index, but all subsequent font indices are 1-based.
  fmt.font = @workbook.font((font_idx > 3) ? font_idx - 1 : font_idx)
  fmt.horizontal_align = NGILA_H_FX[xf_align & 0x07]
  fmt.text_wrap = xf_align & 0x08 > 0
  fmt.vertical_align = NGILA_V_FX[xf_align & 0x70]
  fmt.rotation = if xf_rotation == 255
    :stacked
  elsif xf_rotation > 90
    90 - xf_rotation
  else
    xf_rotation
  end
  fmt.indent_level = xf_indent & 0x0f
  fmt.shrink = xf_indent & 0x10 > 0
  fmt.text_direction = NOITCERID_TXET_FX[xf_indent & 0xc0]
  fmt.left = XF_BORDER_LINE_STYLES[xf_borders & 0x0000000f]
  fmt.right = XF_BORDER_LINE_STYLES[(xf_borders & 0x000000f0) >> 4]
  fmt.top = XF_BORDER_LINE_STYLES[(xf_borders & 0x00000f00) >> 8]
  fmt.bottom = XF_BORDER_LINE_STYLES[(xf_borders & 0x0000f000) >> 12]
  fmt.left_color = COLOR_CODES[(xf_borders & 0x007f0000) >> 16] || :black
  fmt.right_color = COLOR_CODES[(xf_borders & 0x3f800000) >> 23] || :black
  fmt.cross_down = xf_borders & 0x40000000 > 0
  fmt.cross_up = xf_borders & 0x80000000 > 0
  if xf_brdcolors
    fmt.top_color = COLOR_CODES[xf_brdcolors & 0x0000007f] || :black
    fmt.bottom_color = COLOR_CODES[(xf_brdcolors & 0x00003f80) >> 7] || :black
    fmt.diagonal_color = COLOR_CODES[(xf_brdcolors & 0x001fc000) >> 14] || :black
    # fmt.diagonal_style = COLOR_CODES[xf_brdcolors & 0x01e00000]
    fmt.pattern = (xf_brdcolors & 0xfc000000) >> 26
  end
  fmt.pattern_fg_color = COLOR_CODES[xf_pattern & 0x007f] || :border
  fmt.pattern_bg_color = COLOR_CODES[(xf_pattern & 0x3f80) >> 7] || :pattern_bg
  @workbook.add_format fmt
end

#set_cell(worksheet, row, column, xf, value = nil) ⇒ `Object`

# File 'lib/parseexcel/parseexcel.rb', line 76

def set_cell worksheet, row, column, xf, value = nil
  cells = @current_row_block[row] ||= Row.new(nil, row)
  cells.formats[column] = xf = @workbook.format(xf)
  cells[column] = ParseExcel::Worksheet::Cell.new(value, xf, cells, column)
end

#set_missing_row_address(worksheet, work, pos, len) ⇒ `Object`

# File 'lib/spreadsheet/excel/reader.rb', line 1225

def set_missing_row_address worksheet, work, pos, len
  # Offset  Size  Contents
  #      0     2  Index of this row
  #      2     2  Index to this column
  row_index, _ = work.unpack "v2"
  unless worksheet.offsets[row_index]
    @current_row_block_offset ||= [pos]
    data = {
      index: row_index,
      row_block: @current_row_block_offset,
      offset: @current_row_block_offset[0]
    }
    worksheet.set_row_address row_index, data
  end
end

#set_row_address(worksheet, work, pos, len) ⇒ `Object`

# File 'lib/spreadsheet/excel/reader.rb', line 1241

def set_row_address worksheet, work, pos, len
  # Offset  Size  Contents
  #      0     2  Index of this row
  #      2     2  Index to column of the first cell which
  #               is described by a cell record
  #      4     2  Index to column of the last cell which is
  #               described by a cell record, increased by 1
  #      6     2  Bit   Mask    Contents
  #               14-0  0x7fff  Height of the row, in twips = 1/20 of a point
  #                 15  0x8000  0 = Row has custom height;
  #                             1 = Row has default height
  #      8     2  Not used
  #     10     2  In BIFF3-BIFF4 this field contains a relative offset to
  #               calculate stream position of the first cell record for this
  #               row (➜ 5.7.1). In BIFF5-BIFF8 this field is not used
  #               anymore, but the DBCELL record (➜ 6.26) instead.
  #     12     4  Option flags and default row formatting:
  #                  Bit  Mask        Contents
  #                  2-0  0x00000007  Outline level of the row
  #                    4  0x00000010  1 = Outline group starts or ends here
  #                                       (depending on where the outline
  #                                       buttons are located, see WSBOOL
  #                                       record, ➜ 6.113), and is collapsed
  #                    5  0x00000020  1 = Row is hidden (manually, or by a
  #                                       filter or outline group)
  #                    6  0x00000040  1 = Row height and default font height
  #                                       do not match
  #                    7  0x00000080  1 = Row has explicit default format (fl)
  #                    8  0x00000100  Always 1
  #                27-16  0x0fff0000  If fl = 1: Index to default XF record
  #                                              (➜ 6.115)
  #                   28  0x10000000  1 = Additional space above the row.
  #                                       This flag is set, if the upper
  #                                       border of at least one cell in this
  #                                       row or if the lower border of at
  #                                       least one cell in the row above is
  #                                       formatted with a thick line style.
  #                                       Thin and medium line styles are not
  #                                       taken into account.
  #                   29  0x20000000  1 = Additional space below the row.
  #                                       This flag is set, if the lower
  #                                       border of at least one cell in this
  #                                       row or if the upper border of at
  #                                       least one cell in the row below is
  #                                       formatted with a medium or thick
  #                                       line style. Thin line styles are
  #                                       not taken into account.
  @current_row_block_offset ||= [pos]
  index, first_used, first_unused, height, flags = work.unpack binfmt(:row)
  height &= 0x7fff
  format = nil
  # TODO: read attributes from work[13,3], read flags
  attrs = {
    default_format: format,
    first_used: first_used,
    first_unused: first_unused,
    index: index,
    row_block: @current_row_block_offset,
    offset: @current_row_block_offset[0],
    outline_level: flags & 0x00000007,
    collapsed: (flags & 0x0000010) > 0,
    hidden: (flags & 0x0000020) > 0
  }
  if (flags & 0x00000040) > 0
    attrs.store :height, height / TWIPS
  end
  if (flags & 0x00000080) > 0
    xf = (flags & 0x0fff0000) >> 16
    attrs.store :default_format, @workbook.format(xf)
  end
  # TODO: Row spacing
  worksheet.set_row_address index, attrs
end

#setup(io) ⇒ `Object`

# File 'lib/spreadsheet/excel/reader.rb', line 1315

def setup io
  ## Reading from StringIO fails without forced encoding
  if Gem::Version.new(RUBY_VERSION) >= Gem::Version.new("2.3.0")
    io.set_encoding("ASCII-8BIT")
  elsif io.respond_to?(:string) && (str = io.string) && str.respond_to?(:force_encoding)
    str.force_encoding("ASCII-8BIT")
  end
  io.rewind
  @ole = Ole::Storage.open io
  @workbook = Workbook.new io, {}
  %w[Book Workbook BOOK WORKBOOK book workbook].any? do |name|
    @book = begin
      @ole.file.open(name)
    rescue
      false
    end
  end
  raise "could not locate a workbook, possibly an empty file passed" unless @book
  @data = @book.read
  read_bof
  @workbook.ole = @book
  @workbook.bof = @bof
  @workbook.version = @version
  biff = @workbook.biff_version
  extend_reader biff
  extend_internals biff
end

Class: Spreadsheet::Excel::Reader

Overview

Defined Under Namespace

Constant Summary collapse

Constants included from Internals

Instance Method Summary collapse

Methods included from Internals

Constructor Details

#initialize(opts = {}) ⇒ Reader

Instance Method Details

#decode_rk(work) ⇒ Object

#encoding(codepage_id) ⇒ Object

#get_next_chunk ⇒ Object

#in_row_block?(op, previous) ⇒ Boolean

#memoize? ⇒ Boolean

#postread_workbook ⇒ Object

#postread_worksheet(worksheet) ⇒ Object

#read(io) ⇒ Object

#read_blank(worksheet, addr, work) ⇒ Object

#read_bof ⇒ Object

#read_boolerr(worksheet, addr, work) ⇒ Object

#read_boundsheet(work, pos, len) ⇒ Object

#read_codepage(work, pos, len) ⇒ Object

#read_colinfo(worksheet, work, pos, len) ⇒ Object

#read_dimensions(worksheet, work, pos, len) ⇒ Object

#read_font(work, pos, len) ⇒ Object

#read_format(work, pos, len) ⇒ Object

#read_formula(worksheet, addr, work) ⇒ Object

#read_guts(worksheet, work, pos, len) ⇒ Object

#read_hlink(worksheet, work, pos_unused, len) ⇒ Object

#read_hlink_string(work, pos) ⇒ Object

#read_index(worksheet, work, pos, len) ⇒ Object

#read_label(worksheet, addr, work) ⇒ Object

#read_labelsst(worksheet, addr, work) ⇒ Object

#read_merged_cells(worksheet, work, pos, len) ⇒ Object

#read_mulblank(worksheet, addr, work) ⇒ Object

#read_mulrk(worksheet, addr, work) ⇒ Object

#read_note(worksheet, work, pos, len) ⇒ Object

#read_number(worksheet, addr, work) ⇒ Object

#read_pagesetup(worksheet, work, pos, len) ⇒ Object

#read_rk(worksheet, addr, work) ⇒ Object

#read_row(worksheet, addr) ⇒ Object

#read_rstring(worksheet, addr, work) ⇒ Object

#read_sheet_protection(worksheet, op, data) ⇒ Object

#read_style(work, pos, len) ⇒ Object

#read_window2(worksheet, work, pos, len) ⇒ Object

#read_workbook ⇒ Object

#read_worksheet(worksheet, offset) ⇒ Object

#read_xf(work, pos, len) ⇒ Object

#set_cell(worksheet, row, column, xf, value = nil) ⇒ Object

#set_missing_row_address(worksheet, work, pos, len) ⇒ Object

#set_row_address(worksheet, work, pos, len) ⇒ Object

#setup(io) ⇒ Object