Class: ReadabilityJs::Extended

Inherits:
Object
  • Object
show all
Defined in:
lib/readability_js/extended.rb

Constant Summary collapse

SELECTOR_BLACKLIST =
[
  ".Article-Partner",
  ".Article-Partner-Text",
  ".Article-Comments-Button",
  "#isl-5-AdCarousel",
  "#isl-10-ArticleComments",
  "*[data-element-tracking-name]",
  "*[aria-label='Anzeige']",
  "nav[aria-label='breadcrumb']",
  # heise
  "a-video",
  "a-gift",
  "a-collapse",
  "a-opt-in",
  # spiegel
  "[data-area='related_articles']",
  # welt
  "nav[aria-label='Breadcrumb']",
  ".c-inline-teaser-list",
  "[width='1'][height='1']",
  # golem
  ".go-alink-list",
  # faz
  "[data-external-selector='related-articles-entries']",
  ".BigBox",
  # frankfurter rundschau
  ".id-Breadcrumb-item",
  ".id-Story-interactionBar",
  "revenue-reel",
  ".id-StoryElement-factBox",
  # stern
  ".breadcrumb",
  ".teaser",
  ".group-teaserblock__items",
  ".title__kicker",
  "ws-adtag",
  # taz
  "[data-for='webelement_bio']",
  "[data-for='webelement_citation']",
  "#articleTeaser",
  ".article-produktteaser-container",
  "[x-data='{}']",
  "#komune",
  ".community",
]

Class Method Summary collapse

Class Method Details

.after_cleanup(result, html) ⇒ Object



55
56
57
58
# File 'lib/readability_js/extended.rb', line 55

def self.after_cleanup(result, html)
  find_and_add_picture result, html
  clean_up_and_enrich_result result
end

.before_cleanup(html) ⇒ Object



51
52
53
# File 'lib/readability_js/extended.rb', line 51

def self.before_cleanup(html)
  pre_parser html
end