Class: PdfContentExtractor

Inherits:
Object
  • Object
show all
Defined in:
lib/content_extractors/pdf_content_extractor.rb

Defined Under Namespace

Classes: EmptyContentError, ExtractionError, UnreadableContentError

Instance Attribute Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(file) ⇒ PdfContentExtractor

Returns a new instance of PdfContentExtractor.



14
15
16
17
# File 'lib/content_extractors/pdf_content_extractor.rb', line 14

def initialize(file)
  @file = file
  @page_count = 0
end

Instance Attribute Details

#page_countObject (readonly)

Returns the value of attribute page_count.



7
8
9
# File 'lib/content_extractors/pdf_content_extractor.rb', line 7

def page_count
  @page_count
end

Instance Method Details

#callObject



19
20
21
22
23
24
25
26
27
# File 'lib/content_extractors/pdf_content_extractor.rb', line 19

def call
  extract_content
rescue PDF::Reader::MalformedPDFError, PDF::Reader::UnsupportedFeatureError => e
  # Handle known PDF::Reader errors
  raise UnreadableContentError, "PDF could not be read: #{e.message}"
rescue StandardError => e
  # Handle any other unforeseen errors
  raise ExtractionError, "Content extraction failed: #{e.message}"
end