Method: Html2rss::AutoSource::Scraper::SemanticHtml::Image.from_source

Defined in:
lib/html2rss/auto_source/scraper/semantic_html/image.rb

.from_source(article_tag) ⇒ Object

Extracts the largest image source from the srcset attribute of an img tag or a source tag inside a picture tag.

See Also:


29
30
31
32
33
34
35
36
37
38
39
40
41
42
# File 'lib/html2rss/auto_source/scraper/semantic_html/image.rb', line 29

def self.from_source() # rubocop:disable Metrics/AbcSize
  hash = .css('img[srcset], picture > source[srcset]')
                    .flat_map do |source|
    source['srcset'].to_s.scan(/(\S+)\s+(\d+w|\d+h)/).map do |url, width|
      next if url.nil? || url.start_with?('data:')

      width_value = width.to_i.zero? ? 0 : width.scan(/\d+/).first.to_i

      [width_value, url.strip]
    end
  end.to_h

  hash[hash.keys.max]
end