Class: PageByPage::Jump
- Inherits:
-
Object
show all
- Includes:
- Common
- Defined in:
- lib/page_by_page/jump.rb
Instance Method Summary
collapse
Methods included from Common
#header, #initialize, #interval, #no_progress, #selector, #to
Instance Method Details
#iterate(selector) ⇒ Object
12
13
14
|
# File 'lib/page_by_page/jump.rb', line 12
def iterate selector
@iterate = selector
end
|
#process ⇒ Object
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
|
# File 'lib/page_by_page/jump.rb', line 16
def process
url, items, page_count = @start, [], 0
while true do
doc = parse url
doc.css(@selector).each{ |item| items << item }
page_count += 1
update_progress Thread.current, page_count if @progress
break if page_count >= limit
next_url = doc.at_css(@iterate)
break unless next_url
path = next_url.attr('href')
url = path.start_with?('/') ? concat_host(path) : path
sleep @interval if @interval
end
puts if @progress
items
end
|
#start(url) ⇒ Object
8
9
10
|
# File 'lib/page_by_page/jump.rb', line 8
def start url
@start = url
end
|