Class: PageByPage::Jump

Inherits:
Object
  • Object
show all
Includes:
Common
Defined in:
lib/page_by_page/jump.rb

Instance Method Summary collapse

Methods included from Common

#header, #initialize, #interval, #no_progress, #selector, #to

Instance Method Details

#iterate(selector) ⇒ Object



12
13
14
# File 'lib/page_by_page/jump.rb', line 12

def iterate selector
  @iterate = selector
end

#processObject



16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
# File 'lib/page_by_page/jump.rb', line 16

def process
  url, items, page_count = @start, [], 0

  while true do
    doc = parse url
    doc.css(@selector).each{ |item| items << item }

    page_count += 1
    update_progress Thread.current, page_count if @progress
    break if page_count >= limit

    next_url = doc.at_css(@iterate)
    break unless next_url

    path = next_url.attr('href')
    url = path.start_with?('/') ? concat_host(path) : path

    sleep @interval if @interval
  end

  puts if @progress
  items
end

#start(url) ⇒ Object



8
9
10
# File 'lib/page_by_page/jump.rb', line 8

def start url
  @start = url
end