Method: PDF::Reader::Buffer#find_first_xref_offset

Defined in:
lib/pdf/reader/buffer.rb

#find_first_xref_offsetObject

return the byte offset where the first XRef table in th source can be found.

Raises:



144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
# File 'lib/pdf/reader/buffer.rb', line 144

def find_first_xref_offset
  check_size_is_non_zero
  @io.seek(-TRAILING_BYTECOUNT, IO::SEEK_END) rescue @io.seek(0)
  data = @io.read(TRAILING_BYTECOUNT)

  raise MalformedPDFError, "PDF does not contain EOF marker" if data.nil?

  # the PDF 1.7 spec (section #3.4) says that EOL markers can be either \r, \n, or both.
  lines = data.split(/[\n\r]+/).reverse
  eof_index = lines.index { |l| l.strip[/^%%EOF/] }

  raise MalformedPDFError, "PDF does not contain EOF marker" if eof_index.nil?
  raise MalformedPDFError, "PDF EOF marker does not follow offset" if eof_index >= lines.size-1
  offset = lines[eof_index+1].to_i

  # a byte offset < 0 doesn't make much sense. This is unlikely to happen, but in theory some
  # corrupted PDFs might have a line that looks like a negative int preceding the `%%EOF`
  raise MalformedPDFError, "invalid xref offset" if offset < 0
  offset
end