Class: PdfContentExtractor

Inherits:

Object

Object
PdfContentExtractor

show all

Defined in:: lib/content_extractors/pdf_content_extractor.rb

Defined Under Namespace

Classes: EmptyContentError, ExtractionError, UnreadableContentError

Instance Attribute Summary collapse

#page_count ⇒ Object readonly

Returns the value of attribute page_count.

Instance Method Summary collapse

#call ⇒ Object
#initialize(file) ⇒ PdfContentExtractor constructor

A new instance of PdfContentExtractor.

Constructor Details

#initialize(file) ⇒ `PdfContentExtractor`

Returns a new instance of PdfContentExtractor.

# File 'lib/content_extractors/pdf_content_extractor.rb', line 14

def initialize(file)
  @file = file
  @page_count = 0
end

Instance Attribute Details

#page_count ⇒ `Object` (readonly)

Returns the value of attribute page_count.



7
8
9

# File 'lib/content_extractors/pdf_content_extractor.rb', line 7

def page_count
  @page_count
end

Instance Method Details

#call ⇒ `Object`

# File 'lib/content_extractors/pdf_content_extractor.rb', line 19

def call
  extract_content
rescue PDF::Reader::MalformedPDFError, PDF::Reader::UnsupportedFeatureError => e
  # Handle known PDF::Reader errors
  raise UnreadableContentError, "PDF could not be read: #{e.message}"
rescue StandardError => e
  # Handle any other unforeseen errors
  raise ExtractionError, "Content extraction failed: #{e.message}"
end

Class: PdfContentExtractor

Defined Under Namespace

Instance Attribute Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(file) ⇒ PdfContentExtractor

Instance Attribute Details

#page_count ⇒ Object (readonly)

Instance Method Details

#call ⇒ Object

#initialize(file) ⇒ `PdfContentExtractor`

#page_count ⇒ `Object` (readonly)

#call ⇒ `Object`