Class: Tabula::Extraction::SpreadsheetExtractor

# File 'lib/tabula/spreadsheet_extractor.rb', line 15

def extract(options={})
  Enumerator.new do |y|
    begin
      @pages.each do |i|
        pdfbox_page = @all_pages.get(i-1) #TODO: this can error out ungracefully if you try to extract a page that doesn't exist (e.g. page 5 of a 4 page doc). we should catch and handle.
        contents = pdfbox_page.getContents
        next if contents.nil?
        self.clear!
        self.drawPage pdfbox_page

        page = Tabula::Page.new( @pdf_filename,
                                 pdfbox_page.findCropBox.width,
                                 pdfbox_page.findCropBox.height,
                                 pdfbox_page.getRotation.to_i,
                                 i, #one-indexed, just like `i` is.
                                 self.characters,
                                 self.rulings)

        page.spreadsheets(options).each do |spreadsheet|
          spreadsheet.cells.each do |cell|
            cell.text_elements = page.get_cell_text(cell)
          end
          y.yield page, spreadsheet
        end
      end
    ensure
      @pdf_file.close
    end # begin
  end
end

Class: Tabula::Extraction::SpreadsheetExtractor

Constant Summary

Constants inherited from ObjectExtractor

Instance Attribute Summary

Attributes inherited from ObjectExtractor

Instance Method Summary collapse

Methods inherited from ObjectExtractor

Constructor Details

Instance Method Details

#extract(options = {}) ⇒ Object

#extract(options = {}) ⇒ `Object`