Class: Tabula::Extraction::PagesInfoExtractor
- Inherits:
-
Object
- Object
- Tabula::Extraction::PagesInfoExtractor
- Defined in:
- lib/tabula/extraction.rb
Instance Method Summary collapse
-
#initialize(pdf_file_path, password = '') ⇒ PagesInfoExtractor
constructor
A new instance of PagesInfoExtractor.
- #pages ⇒ Object
Constructor Details
#initialize(pdf_file_path, password = '') ⇒ PagesInfoExtractor
Returns a new instance of PagesInfoExtractor.
374 375 376 377 378 379 380 |
# File 'lib/tabula/extraction.rb', line 374 def initialize(pdf_file_path, password='') @pdf_filename = pdf_file_path @pdf_file = Extraction.openPDF(pdf_file_path, password) @all_pages = @pdf_file.getDocumentCatalog.getAllPages @extractor = Tabula::Extraction::ObjectExtractor.new(pdf_file_path, :all ) end |
Instance Method Details
#pages ⇒ Object
382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 |
# File 'lib/tabula/extraction.rb', line 382 def pages found_page_with_texts = false Enumerator.new do |y| begin @all_pages.each_with_index do |page, i| contents = page.getContents if found_page_with_texts page = Tabula::Page.new(@pdf_filename, page.findCropBox.width, page.findCropBox.height, page.getRotation.to_i, i+1) #remember, these are one-indexed else page = @extractor.extract_page(i+1) found_page_with_texts = page.has_text? end y.yield page end ensure @pdf_file.close @extractor.close! end end end |