Class: BrandEins::Downloader::ArchiveSite::ArchiveMagazine

Inherits:
Object
  • Object
show all
Defined in:
lib/brandeins.rb

Defined Under Namespace

Classes: MagazineArticle

Instance Attribute Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(url, base_url, html = false) ⇒ ArchiveMagazine

Returns a new instance of ArchiveMagazine.



194
195
196
197
198
199
# File 'lib/brandeins.rb', line 194

def initialize(url, base_url, html = false)
  puts "Parsing #{url}"
  @url = url
  @base_url = base_url
  @doc = Nokogiri::HTML(open(url))
end

Instance Attribute Details

#docObject

Returns the value of attribute doc.



192
193
194
# File 'lib/brandeins.rb', line 192

def doc
  @doc
end

#urlObject

Returns the value of attribute url.



192
193
194
# File 'lib/brandeins.rb', line 192

def url
  @url
end

Instance Method Details

#get_editorial_article_linksObject



210
211
212
# File 'lib/brandeins.rb', line 210

def get_editorial_article_links
  get_links(".editorial-links li a")
end


214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
# File 'lib/brandeins.rb', line 214

def get_links(css_selector)
  pdf_links = Array.new
  link_nodes = @doc.css(css_selector)
  link_nodes.each do |node|
    article_link = @base_url + '/' + node['href']
    article = MagazineArticle.new(article_link)
    pdf_link = article.get_pdf_link
    if pdf_link.nil? then
      puts "------------------------------"
      puts "No Content for: #{article_link}"
      puts "------------------------------"
    else
      pdf_links << @base_url + '/' + pdf_link
    end
  end
  pdf_links
end


201
202
203
204
# File 'lib/brandeins.rb', line 201

def get_magazine_pdf_links
  [get_editorial_article_links, get_schwerpunkt_article_links].flatten

end

#get_schwerpunkt_article_linksObject



206
207
208
# File 'lib/brandeins.rb', line 206

def get_schwerpunkt_article_links
  get_links("div.articleList ul h4 a")
end