Class: Html2rss::AutoSource::Article

Inherits:
Object
  • Object
show all
Includes:
Comparable, Enumerable
Defined in:
lib/html2rss/auto_source/article.rb

Overview

Article is a simple data object representing an article extracted from a page. It is enumerable and responds to all keys specified in PROVIDED_KEYS.

Constant Summary collapse

PROVIDED_KEYS =
%i[id title description url image guid published_at scraper].freeze

Instance Method Summary collapse

Constructor Details

#initialize(**options) ⇒ Article

Returns a new instance of Article.

Parameters:

  • options (Hash<Symbol, String>)


18
19
20
21
22
23
24
25
26
# File 'lib/html2rss/auto_source/article.rb', line 18

def initialize(**options)
  @to_h = {}
  options.each_pair { |key, value| @to_h[key] = value.freeze if value }
  @to_h.freeze

  return unless (unknown_keys = options.keys - PROVIDED_KEYS).any?

  Log.warn "Article: unknown keys found: #{unknown_keys.join(', ')}"
end

Instance Method Details

#<=>(other) ⇒ Object



88
89
90
91
92
# File 'lib/html2rss/auto_source/article.rb', line 88

def <=>(other)
  return nil unless other.is_a?(Article)

  0 if other.all? { |key, value| value == public_send(key) ? public_send(key) <=> value : false }
end

#descriptionObject



50
51
52
53
54
55
56
# File 'lib/html2rss/auto_source/article.rb', line 50

def description
  return @description if defined?(@description)

  return if url.to_s.empty? || @to_h[:description].to_s.empty?

  @description ||= Html2rss::AttributePostProcessors::SanitizeHtml.get(@to_h[:description], url)
end

#each {|key, value| ... } ⇒ Enumerator

Returns if no block is given.

Yields:

  • (key, value)

Returns:

  • (Enumerator)

    if no block is given



36
37
38
39
40
# File 'lib/html2rss/auto_source/article.rb', line 36

def each
  return enum_for(:each) unless block_given?

  PROVIDED_KEYS.each { |key| yield(key, public_send(key)) }
end

#guidString

Generates a unique identifier based on the URL and ID using CRC32.

Returns:

  • (String)


70
71
72
# File 'lib/html2rss/auto_source/article.rb', line 70

def guid
  @guid ||= Zlib.crc32([url, id].join('#!/')).to_s(36).encode('utf-8')
end

#idObject



42
43
44
# File 'lib/html2rss/auto_source/article.rb', line 42

def id
  @to_h[:id]
end

#imageAddressable::URI?

Returns:

  • (Addressable::URI, nil)


64
65
66
# File 'lib/html2rss/auto_source/article.rb', line 64

def image
  @image ||= Html2rss::Utils.sanitize_url(@to_h[:image])
end

#published_atTime?

Parses and returns the published_at time.

Returns:

  • (Time, nil)


76
77
78
79
80
81
82
# File 'lib/html2rss/auto_source/article.rb', line 76

def published_at
  return if (string = @to_h[:published_at].to_s).strip.empty?

  @published_at ||= Time.parse(string)
rescue ArgumentError
  nil
end

#scraperObject



84
85
86
# File 'lib/html2rss/auto_source/article.rb', line 84

def scraper
  @to_h[:scraper]
end

#titleObject



46
47
48
# File 'lib/html2rss/auto_source/article.rb', line 46

def title
  @to_h[:title]
end

#urlAddressable::URI?

Returns:

  • (Addressable::URI, nil)


59
60
61
# File 'lib/html2rss/auto_source/article.rb', line 59

def url
  @url ||= Html2rss::Utils.sanitize_url(@to_h[:url])
end

#valid?Boolean

Checks if the article is valid based on the presence of URL, ID, and either title or description.

Returns:

  • (Boolean)

    True if the article is valid, otherwise false.



30
31
32
# File 'lib/html2rss/auto_source/article.rb', line 30

def valid?
  !url.to_s.empty? && (!title.to_s.empty? || !description.to_s.empty?) && !id.to_s.empty?
end