Class: HtmlContentExtractor

Inherits:
Object
  • Object
show all
Defined in:
lib/content_extractors/html_content_extractor.rb

Defined Under Namespace

Classes: ExtractionError, ParsingError, UrlDownloadError

Constant Summary collapse

WORDS_PER_PAGE =
500
BROWSERS =
[:chrome, :firefox, :safari]

Instance Attribute Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(url) ⇒ HtmlContentExtractor

Returns a new instance of HtmlContentExtractor.



17
18
19
# File 'lib/content_extractors/html_content_extractor.rb', line 17

def initialize(url)
  @url = url
end

Instance Attribute Details

#contentObject (readonly)

Returns the value of attribute content.



7
8
9
# File 'lib/content_extractors/html_content_extractor.rb', line 7

def content
  @content
end

Instance Method Details

#callObject



21
22
23
24
25
# File 'lib/content_extractors/html_content_extractor.rb', line 21

def call
  extract_content
rescue StandardError => e
  raise ExtractionError, "HTML content extraction failed: #{e.message}"
end