Class: HtmlContentExtractor

Inherits:

Object

Object
HtmlContentExtractor

show all

Defined in:: lib/content_extractors/html_content_extractor.rb

Defined Under Namespace

Classes: ExtractionError, ParsingError, UrlDownloadError

Constant Summary collapse

WORDS_PER_PAGE =

BROWSERS =

[:chrome, :firefox, :safari]

Instance Attribute Summary collapse

#content ⇒ Object readonly

Returns the value of attribute content.

Instance Method Summary collapse

#call ⇒ Object
#initialize(url) ⇒ HtmlContentExtractor constructor

A new instance of HtmlContentExtractor.

Constructor Details

#initialize(url) ⇒ `HtmlContentExtractor`

Returns a new instance of HtmlContentExtractor.



17
18
19

# File 'lib/content_extractors/html_content_extractor.rb', line 17

def initialize(url)
  @url = url
end

Instance Attribute Details

#content ⇒ `Object` (readonly)

Returns the value of attribute content.



7
8
9

# File 'lib/content_extractors/html_content_extractor.rb', line 7

def content
  @content
end

Instance Method Details

#call ⇒ `Object`

# File 'lib/content_extractors/html_content_extractor.rb', line 21

def call
  extract_content
rescue StandardError => e
  raise ExtractionError, "HTML content extraction failed: #{e.message}"
end

Class: HtmlContentExtractor

Defined Under Namespace

Constant Summary collapse

Instance Attribute Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(url) ⇒ HtmlContentExtractor

Instance Attribute Details

#content ⇒ Object (readonly)

Instance Method Details

#call ⇒ Object

#initialize(url) ⇒ `HtmlContentExtractor`

#content ⇒ `Object` (readonly)

#call ⇒ `Object`