Module: Pdf::Parser

Defined in:
lib/pdf/parser.rb,
lib/pdf/parser/version.rb

Constant Summary collapse

VERSION =
"0.2.0"

Class Method Summary collapse

Class Method Details

.parse(pdf, print_option = "") ⇒ Object



15
16
17
18
19
20
21
22
23
24
25
26
# File 'lib/pdf/parser.rb', line 15

def self.parse(pdf, print_option = "")
  parsed = PDF.read(pdf)
  data = parsed.grep("><xfa:data")
  string = data.to_s
  val = string.string_between_markers('<xfa:data\n>', '</xfa:data\n>')
  encoded = val.encode("ASCII-8BIT").force_encoding("utf-8")
  final = encoded.gsub('\n','')
  xml = Nokogiri::XML(final).to_xml
  doc = Nokogiri::XML(xml)
  puts doc if print_option == true
  return doc
end