Class: Docsplit::PageExtractor

Inherits:
Object
  • Object
show all
Defined in:
lib/docsplit/page_extractor.rb

Overview

Delegates to pdftk in order to create bursted single pages from a PDF document.

Instance Method Summary collapse

Instance Method Details

#extract(pdfs, opts) ⇒ Object

Burst a list of pdfs into single pages, as ‘pdfname_pagenumber.pdf`.



8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
# File 'lib/docsplit/page_extractor.rb', line 8

def extract(pdfs, opts)
  extract_options opts
  [pdfs].flatten.each do |pdf|
    pdf_name = File.basename(pdf, File.extname(pdf))
    page_path = ESCAPE[File.join(@output, "#{pdf_name}")] + "_%d.pdf"
    FileUtils.mkdir_p @output unless File.exists?(@output)
    
    cmd = if DEPENDENCIES[:pdftailor] # prefer pdftailor, but keep pdftk for backwards compatability
      "pdftailor unstitch --output #{page_path} #{ESCAPE[pdf]} 2>&1"
    else
      "pdftk #{ESCAPE[pdf]} burst output #{page_path} 2>&1"
    end
    result = `#{cmd}`.chomp
    FileUtils.rm('doc_data.txt') if File.exists?('doc_data.txt')
    raise ExtractionFailed, result if $? != 0
    result
  end
end