Method: Html2rss::AutoSource::Scraper::SemanticHtml::Image.from_source
- Defined in:
- lib/html2rss/auto_source/scraper/semantic_html/image.rb
.from_source(article_tag) ⇒ Object
Extracts the largest image source from the srcset attribute of an img tag or a source tag inside a picture tag.
29 30 31 32 33 34 35 36 37 38 39 40 41 42 |
# File 'lib/html2rss/auto_source/scraper/semantic_html/image.rb', line 29 def self.from_source(article_tag) # rubocop:disable Metrics/AbcSize hash = article_tag.css('img[srcset], picture > source[srcset]') .flat_map do |source| source['srcset'].to_s.scan(/(\S+)\s+(\d+w|\d+h)/).map do |url, width| next if url.nil? || url.start_with?('data:') width_value = width.to_i.zero? ? 0 : width.scan(/\d+/).first.to_i [width_value, url.strip] end end.to_h hash[hash.keys.max] end |