Module: Pdf::Parser
- Defined in:
- lib/pdf/parser.rb,
lib/pdf/parser/version.rb
Constant Summary collapse
- VERSION =
"0.2.0"
Class Method Summary collapse
Class Method Details
.parse(pdf, print_option = "") ⇒ Object
15 16 17 18 19 20 21 22 23 24 25 26 |
# File 'lib/pdf/parser.rb', line 15 def self.parse(pdf, print_option = "") parsed = PDF.read(pdf) data = parsed.grep("><xfa:data") string = data.to_s val = string.string_between_markers('<xfa:data\n>', '</xfa:data\n>') encoded = val.encode("ASCII-8BIT").force_encoding("utf-8") final = encoded.gsub('\n','') xml = Nokogiri::XML(final).to_xml doc = Nokogiri::XML(xml) puts doc if print_option == true return doc end |