Class: FormatParser::ZIPParser::FileReader

Inherits:
Object
  • Object
show all
Defined in:
lib/parsers/zip_parser/file_reader.rb

Overview

A very barebones ZIP file reader

Defined Under Namespace

Classes: ZipEntry

Constant Summary collapse

Error =
Class.new(StandardError)
ReadError =
Class.new(Error)
UnsupportedFeature =
Class.new(Error)
InvalidStructure =
Class.new(Error)
LocalHeaderPending =
Class.new(Error) do
  def message
    'The compressed data offset is not available (local header has not been read)'
  end
end
MissingEOCD =
Class.new(Error) do
  def message
    'Could not find the EOCD signature in the buffer - maybe a malformed ZIP file'
  end
end

Instance Method Summary collapse

Instance Method Details

#read_zip_structure(io:) ⇒ Array<ZipEntry>

Parse an IO handle to a ZIP archive into an array of Entry objects.

Parameters:

  • io (#tell, #seek, #read, #size)

    an IO-ish object

Returns:

  • (Array<ZipEntry>)

    an array of entries within the ZIP being parsed



159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
# File 'lib/parsers/zip_parser/file_reader.rb', line 159

def read_zip_structure(io:)
  zip_file_size = io.size
  eocd_offset = get_eocd_offset(io, zip_file_size)
  zip64_end_of_cdir_location = get_zip64_eocd_location(io, eocd_offset)
  num_files, cdir_location, cdir_size =
    if zip64_end_of_cdir_location
      num_files_and_central_directory_offset_zip64(io, zip64_end_of_cdir_location)
    else
      num_files_and_central_directory_offset(io, eocd_offset)
    end

  log { format('Located the central directory start at %d', cdir_location) }
  seek(io, cdir_location)

  # In zip_tricks we read the entire central directory _and_ enything behind it.
  # Strictly speaking, we should be able to read `cdir_size` bytes and not a byte more.
  # BUT! in format_parser we avoid unbounded reads, as a matter of fact they are forbidden.
  # So we will again limit ouselves to cdir_size, and we will take cushion of 1 KB.
  central_directory_str = io.read(cdir_size + 1024)
  central_directory_io = StringIO.new(central_directory_str)
  log do
    format(
      'Read %d bytes with central directory + EOCD record and locator',
      central_directory_str.bytesize)
  end

  entries = (0...num_files).map do |entry_n|
    offset_location = cdir_location + central_directory_io.pos
    log do
      format(
        'Reading the central directory entry %d starting at offset %d',
        entry_n, offset_location)
    end
    read_cdir_entry(central_directory_io)
  end

  entries
end

#zip?(io) ⇒ Boolean

Tells whether the given IO is likely to be a ZIP file without performing too many detailed reads

Parameters:

  • io (#tell, #seek, #read, #size)

    an IO-ish object

Returns:

  • (Boolean)


203
204
205
206
207
208
209
210
211
212
213
214
215
# File 'lib/parsers/zip_parser/file_reader.rb', line 203

def zip?(io)
  zip_file_size = io.size
  eocd_offset = get_eocd_offset(io, zip_file_size)
  zip64_end_of_cdir_location = get_zip64_eocd_location(io, eocd_offset)
  if zip64_end_of_cdir_location
    num_files_and_central_directory_offset_zip64(io, zip64_end_of_cdir_location)
  else
    num_files_and_central_directory_offset(io, eocd_offset)
  end
  true
rescue Error
  false
end