Class: TheScrap::DetailObj

Inherits:
Scrap
  • Object
show all
Defined in:
lib/the_scrap/detail_obj.rb

Instance Attribute Summary

Attributes inherited from Scrap

#base_url, #data_proc, #debug, #detail_info, #encoding, #html_proc, #item_frag, #result_proc, #url, #verbose

Instance Method Summary collapse

Methods inherited from Scrap

#initialize, #method_missing, #retryable

Constructor Details

This class inherits a constructor from TheScrap::Scrap

Dynamic Method Handling

This class handles dynamic methods through the method_missing method in the class TheScrap::Scrap

Instance Method Details

#do_scrap(url, item_info) ⇒ Object



10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
# File 'lib/the_scrap/detail_obj.rb', line 10

def do_scrap( url, item_info )
  html = open(url).read
  html_proc.each do |dp|
    html = dp.call(html)
  end

  doc = Nokogiri::HTML(html,nil,encoding)
  get_attrs(url,doc,item_info)

  #has detail page?
  #可以递归下层
  detail_info.each do |detail|
    detail[0].scrap(item_info[detail[1]],item_info)
  end

  #proc data
  data_proc.each do |dp|
    dp.call(url,item_info)
  end

  #proc result
  #此处可以单独指定对明细信息的入库处理
  result_proc.each do |rp|
    rp.call(url,[item_info])
  end

  pp item_info if debug?
  return item_info
end

#scrap(url, item_info) ⇒ Object



4
5
6
7
8
# File 'lib/the_scrap/detail_obj.rb', line 4

def scrap( url, item_info )
  return retryable(:tries => 3, :on => Timeout::Error) do
    do_scrap(url,item_info)
  end
end