Class: BrandEins::Downloader::ArchiveSite

Inherits:
Object
  • Object
show all
Defined in:
lib/brandeins.rb

Defined Under Namespace

Classes: ArchiveMagazine

Instance Attribute Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(base_url, html = false) ⇒ ArchiveSite

Returns a new instance of ArchiveSite.



155
156
157
158
159
160
161
# File 'lib/brandeins.rb', line 155

def initialize(base_url, html = false)
  @base_url = base_url
  @archive_url = @base_url + "/archiv.html"
  if html
    @doc = Nokogiri::HTML(html)
  end
end

Instance Attribute Details

#docObject

Returns the value of attribute doc.



153
154
155
# File 'lib/brandeins.rb', line 153

def doc
  @doc
end

Instance Method Details



168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
# File 'lib/brandeins.rb', line 168

def get_magazine_links_by_year(year = 2000)
  setup
  puts "Loading Magazine from year #{year}"
  magazine_nodes_with_meta = @doc.css(".jahrgang-#{year} ul li")
  magazine_links = Array.new
  magazine_nodes_with_meta.each_with_index do |node, index|
    if node['id'].nil? then
      link = node.css('a')
      if link[0].nil? then
        next
      end
      href = link[0]['href']
      magazine_links << @base_url + '/' + href
    end
  end
  magazine_links
end


186
187
188
189
# File 'lib/brandeins.rb', line 186

def magazine_pdf_links(url)
  magazine = ArchiveMagazine.new(url, @base_url)
  magazine.get_magazine_pdf_links
end

#setupObject



163
164
165
166
# File 'lib/brandeins.rb', line 163

def setup
  return if defined?(@doc) != nil
  @doc = Nokogiri::HTML(open(@archive_url))
end