Class: Tabula::Extraction::PagesInfoExtractor

Inherits:

Object

Object
Tabula::Extraction::PagesInfoExtractor

show all

Defined in:: lib/tabula/extraction.rb

Instance Method Summary collapse

#initialize(pdf_file_path, password = '') ⇒ PagesInfoExtractor constructor

A new instance of PagesInfoExtractor.
#pages ⇒ Object

Constructor Details

#initialize(pdf_file_path, password = '') ⇒ `PagesInfoExtractor`

Returns a new instance of PagesInfoExtractor.

# File 'lib/tabula/extraction.rb', line 374

def initialize(pdf_file_path, password='')
  @pdf_filename = pdf_file_path
  @pdf_file = Extraction.openPDF(pdf_file_path, password)
  @all_pages = @pdf_file.getDocumentCatalog.getAllPages

  @extractor = Tabula::Extraction::ObjectExtractor.new(pdf_file_path, :all )
end

Instance Method Details

#pages ⇒ `Object`

# File 'lib/tabula/extraction.rb', line 382

def pages
  found_page_with_texts = false
  Enumerator.new do |y|
    begin
      @all_pages.each_with_index do |page, i|
        contents = page.getContents

        if found_page_with_texts
          page = Tabula::Page.new(@pdf_filename,
                                 page.findCropBox.width,
                                 page.findCropBox.height,
                                 page.getRotation.to_i,
                                 i+1) #remember, these are one-indexed
        else 
          page = @extractor.extract_page(i+1)
          found_page_with_texts = page.has_text?
        end

        y.yield page
      end
    ensure
      @pdf_file.close
      @extractor.close!
    end
  end
end

Class: Tabula::Extraction::PagesInfoExtractor

Instance Method Summary collapse

Constructor Details

#initialize(pdf_file_path, password = '') ⇒ PagesInfoExtractor

Instance Method Details

#pages ⇒ Object

#initialize(pdf_file_path, password = '') ⇒ `PagesInfoExtractor`

#pages ⇒ `Object`