Class: Panner::Pans::Wordpress

Inherits:
Object
  • Object
show all
Defined in:
lib/panner/pans/wordpress.rb

Class Method Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(url) ⇒ Wordpress

Returns a new instance of Wordpress.



6
7
8
9
10
# File 'lib/panner/pans/wordpress.rb', line 6

def initialize(url)
  @agent = Mechanize.new
  @next_url = url
  @page = nil
end

Class Method Details

.eligible?(url) ⇒ Boolean

Returns:

  • (Boolean)


2
3
4
# File 'lib/panner/pans/wordpress.rb', line 2

def self.eligible?(url)
  url =~ /^https?:\/\/[^\/]+\.wordpress\.com/
end

Instance Method Details

#authenticate(options) ⇒ Object



12
13
# File 'lib/panner/pans/wordpress.rb', line 12

def authenticate(options)
end

#downloadObject



15
16
17
18
19
20
21
22
23
24
25
26
27
# File 'lib/panner/pans/wordpress.rb', line 15

def download
  @page = @agent.get(@next_url)
  puts "got page content"

  if @next_url.nil?
    puts "no more content"
    return
  end
  
  @page.search("article.post").map do |article|
    parse_article(article)
  end
end

#nextObject



29
30
31
32
33
# File 'lib/panner/pans/wordpress.rb', line 29

def next
  link = @page.search("div.nav-links div.nav-previous a").first
  @next_url = link ? link['href'] : nil
  puts "next_url: @next_url"
end

#parse_article(article) ⇒ Object



35
36
37
38
39
40
41
# File 'lib/panner/pans/wordpress.rb', line 35

def parse_article(article)
  out = {}
  out[:title] = article.at_css(".entry-title").text
  out[:body] = Deba.extract(article.at_css(".entry-content").inner_html)
  
  out
end