Class: Html2rss::AutoSource::Scraper::Schema::Base

Inherits:
Object
  • Object
show all
Defined in:
lib/html2rss/auto_source/scraper/schema/base.rb

Overview

Base class for Schema.org schema_objects.

Constant Summary collapse

DEFAULT_ATTRIBUTES =
%i[id title description url image published_at].freeze

Instance Method Summary collapse

Constructor Details

#initialize(schema_object, url:) ⇒ Base

Returns a new instance of Base.



16
17
18
19
# File 'lib/html2rss/auto_source/scraper/schema/base.rb', line 16

def initialize(schema_object, url:)
  @schema_object = schema_object
  @url = url
end

Instance Method Details

#callHash

Returns the scraped article hash with DEFAULT_ATTRIBUTES.

Returns:

  • (Hash)

    the scraped article hash with DEFAULT_ATTRIBUTES



22
23
24
25
26
# File 'lib/html2rss/auto_source/scraper/schema/base.rb', line 22

def call
  DEFAULT_ATTRIBUTES.to_h do |attribute|
    [attribute, public_send(attribute)]
  end
end

#descriptionObject



31
32
33
34
# File 'lib/html2rss/auto_source/scraper/schema/base.rb', line 31

def description
  [schema_object[:description], schema_object[:schema_object_body], schema_object[:abstract]]
    .max_by { |desc| desc.to_s.size }
end

#idObject



28
# File 'lib/html2rss/auto_source/scraper/schema/base.rb', line 28

def id = schema_object[:@id] || url&.path || title.to_s.downcase.gsub(/\s+/, '-')

#imageObject



47
# File 'lib/html2rss/auto_source/scraper/schema/base.rb', line 47

def image = images.first || nil

#published_atObject



48
# File 'lib/html2rss/auto_source/scraper/schema/base.rb', line 48

def published_at = schema_object[:datePublished]

#titleObject



29
# File 'lib/html2rss/auto_source/scraper/schema/base.rb', line 29

def title = schema_object[:title]

#urlAddressable::URI?

Returns the URL of the schema object.

Returns:

  • (Addressable::URI, nil)

    the URL of the schema object



37
38
39
40
41
42
43
44
45
# File 'lib/html2rss/auto_source/scraper/schema/base.rb', line 37

def url
  url = schema_object[:url]
  if url.to_s.empty?
    Log.debug("Schema#Base.url: no url in schema_object: #{schema_object.inspect}")
    return
  end

  Utils.build_absolute_url_from_relative(url, @url)
end