Module: AnyStyle::PDFUtils

Included in:
Document
Defined in:
lib/anystyle/utils.rb

Class Method Summary collapse

Class Method Details

.pdf_info(path, pdfinfo: 'pdfinfo', **opts) ⇒ Object


72
73
74
75
76
77
# File 'lib/anystyle/utils.rb', line 72

def pdf_info(path, pdfinfo: 'pdfinfo', **opts)
  raise "pdfinfo is tainted" if pdfinfo.tainted?
  Hash[%x{#{pdfinfo} "#{path}"}.split("\n").map { |ln|
    ln.split(/:\s+/, 2)
  }]
end

.pdf_page_size(path) ⇒ Object


84
85
86
# File 'lib/anystyle/utils.rb', line 84

def pdf_page_size(path)
  pdf_info(path)['Page size'].scan(/\d+/)[0, 2].map(&:to_i)
end

.pdf_to_text(path, pdftotext: 'pdftotext', **opts) ⇒ Object


65
66
67
68
69
70
# File 'lib/anystyle/utils.rb', line 65

def pdf_to_text(path, pdftotext: 'pdftotext', **opts)
  raise "pdftotext is tainted" if pdftotext.tainted?
  text = %x{#{pdftotext} #{pdf_opts(path, **opts).join(' ')} "#{path}" -}
  raise "pdftotext failed with error code #{$?.exitstatus}" unless $?.success?
  text.force_encoding(opts[:encoding] || 'UTF-8')
end