Class: BrandEins::Downloader::ArchiveSite::ArchiveMagazine
- Inherits:
-
Object
- Object
- BrandEins::Downloader::ArchiveSite::ArchiveMagazine
- Defined in:
- lib/brandeins.rb
Defined Under Namespace
Classes: MagazineArticle
Instance Attribute Summary collapse
-
#doc ⇒ Object
Returns the value of attribute doc.
-
#url ⇒ Object
Returns the value of attribute url.
Instance Method Summary collapse
- #get_editorial_article_links ⇒ Object
- #get_links(css_selector) ⇒ Object
- #get_magazine_pdf_links ⇒ Object
- #get_schwerpunkt_article_links ⇒ Object
-
#initialize(url, base_url, html = false) ⇒ ArchiveMagazine
constructor
A new instance of ArchiveMagazine.
Constructor Details
#initialize(url, base_url, html = false) ⇒ ArchiveMagazine
Returns a new instance of ArchiveMagazine.
194 195 196 197 198 199 |
# File 'lib/brandeins.rb', line 194 def initialize(url, base_url, html = false) puts "Parsing #{url}" @url = url @base_url = base_url @doc = Nokogiri::HTML(open(url)) end |
Instance Attribute Details
#doc ⇒ Object
Returns the value of attribute doc.
192 193 194 |
# File 'lib/brandeins.rb', line 192 def doc @doc end |
#url ⇒ Object
Returns the value of attribute url.
192 193 194 |
# File 'lib/brandeins.rb', line 192 def url @url end |
Instance Method Details
#get_editorial_article_links ⇒ Object
210 211 212 |
# File 'lib/brandeins.rb', line 210 def get_editorial_article_links get_links(".editorial-links li a") end |
#get_links(css_selector) ⇒ Object
214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 |
# File 'lib/brandeins.rb', line 214 def get_links(css_selector) pdf_links = Array.new link_nodes = @doc.css(css_selector) link_nodes.each do |node| article_link = @base_url + '/' + node['href'] article = MagazineArticle.new(article_link) pdf_link = article.get_pdf_link if pdf_link.nil? then puts "------------------------------" puts "No Content for: #{article_link}" puts "------------------------------" else pdf_links << @base_url + '/' + pdf_link end end pdf_links end |
#get_magazine_pdf_links ⇒ Object
201 202 203 204 |
# File 'lib/brandeins.rb', line 201 def get_magazine_pdf_links [get_editorial_article_links, get_schwerpunkt_article_links].flatten end |
#get_schwerpunkt_article_links ⇒ Object
206 207 208 |
# File 'lib/brandeins.rb', line 206 def get_schwerpunkt_article_links get_links("div.articleList ul h4 a") end |