40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
|
# File 'lib/chupa-text/decomposers/pdf.rb', line 40
def decompose(data)
document = create_document(data)
return if document.nil?
text = ""
document.each do |page|
page_text = page.get_text
next if page_text.empty?
text << page_text
text << "\n" unless page_text.end_with?("\n")
end
text_data = TextData.new(text, :source_data => data)
add_attribute(text_data, document, :title)
add_attribute(text_data, document, :author)
add_attribute(text_data, document, :subject)
add_attribute(text_data, document, :keywords)
add_attribute(text_data, document, :creator)
add_attribute(text_data, document, :producer)
add_attribute(text_data, document, :creation_date, :created_time)
if data.need_screenshot?
text_data.screenshot = create_screenshot(data, document)
end
yield(text_data)
end
|