Class: Html2rss::AutoSource::Channel

Inherits:
Object
  • Object
show all
Defined in:
lib/html2rss/auto_source/channel.rb

Overview

Extracts channel information from

  1. the HTML document’s <head>.

  2. the HTTP response

Instance Attribute Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(parsed_body, url:, headers:, articles: [], stylesheets: []) ⇒ Channel

Returns a new instance of Channel.

Parameters:

  • parsed_body (Nokogiri::HTML::Document)

    The parsed HTML document.

  • url (Addressable::URI)

    The URL of the channel.

  • headers (Hash<String, String>)

    the http headers

  • articles (Array<Html2rss::AutoSource::Article>) (defaults to: [])

    The articles.

[View source]

16
17
18
19
20
21
22
# File 'lib/html2rss/auto_source/channel.rb', line 16

def initialize(parsed_body, url:, headers:, articles: [], stylesheets: [])
  @parsed_body = parsed_body
  @url = url
  @headers = headers
  @articles = articles
  @stylesheets = stylesheets
end

Instance Attribute Details

#articles=(value) ⇒ Object (writeonly)

Sets the attribute articles

Parameters:

  • value

    the value to set the attribute articles to.


24
25
26
# File 'lib/html2rss/auto_source/channel.rb', line 24

def articles=(value)
  @articles = value
end

#stylesheetsObject (readonly)

Returns the value of attribute stylesheets.


25
26
27
# File 'lib/html2rss/auto_source/channel.rb', line 25

def stylesheets
  @stylesheets
end

Instance Method Details

#descriptionObject

[View source]

37
# File 'lib/html2rss/auto_source/channel.rb', line 37

def description = parsed_body.at_css('meta[name="description"]')&.[]('content')

#generatorObject

[View source]

58
59
60
# File 'lib/html2rss/auto_source/channel.rb', line 58

def generator
  "html2rss V. #{::Html2rss::VERSION} (using auto_source scrapers: #{scraper_counts})"
end

#imageObject

[View source]

46
47
48
49
# File 'lib/html2rss/auto_source/channel.rb', line 46

def image
  url = parsed_body.at_css('meta[property="og:image"]')&.[]('content')
  Html2rss::Utils.sanitize_url(url) if url
end

#languageObject

[View source]

40
41
42
43
44
# File 'lib/html2rss/auto_source/channel.rb', line 40

def language
  return parsed_body['lang'] if parsed_body.name == 'html' && parsed_body['lang']

  parsed_body.at_css('[lang]')&.[]('lang')
end

#last_build_dateObject

[View source]

38
# File 'lib/html2rss/auto_source/channel.rb', line 38

def last_build_date = headers['last-modified']

#titleObject

[View source]

29
30
31
32
33
34
35
# File 'lib/html2rss/auto_source/channel.rb', line 29

def title
  @title ||= if (title = parsed_body.at_css('head > title')&.text.to_s) && !title.empty?
               title.gsub(/\s+/, ' ').strip
             else
               Utils.titleized_channel_url(@url)
             end
end

#ttlObject

[View source]

51
52
53
54
55
56
# File 'lib/html2rss/auto_source/channel.rb', line 51

def ttl
  ttl = headers['cache-control']&.match(/max-age=(\d+)/)&.[](1)
  return unless ttl

  ttl.to_i.fdiv(60).ceil
end

#urlObject

[View source]

27
# File 'lib/html2rss/auto_source/channel.rb', line 27

def url = @url.normalize.to_s