Class: TheScrap::DetailObj
Instance Attribute Summary
Attributes inherited from Scrap
#base_url, #data_proc, #debug, #detail_info, #encoding, #html_proc, #item_frag, #result_proc, #url, #verbose
Instance Method Summary collapse
Methods inherited from Scrap
#initialize, #method_missing, #retryable
Constructor Details
This class inherits a constructor from TheScrap::Scrap
Dynamic Method Handling
This class handles dynamic methods through the method_missing method in the class TheScrap::Scrap
Instance Method Details
#do_scrap(url, item_info) ⇒ Object
10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 |
# File 'lib/the_scrap/detail_obj.rb', line 10 def do_scrap( url, item_info ) html = open(url).read html_proc.each do |dp| html = dp.call(html) end doc = Nokogiri::HTML(html,nil,encoding) get_attrs(url,doc,item_info) #has detail page? #可以递归下层 detail_info.each do |detail| detail[0].scrap(item_info[detail[1]],item_info) end #proc data data_proc.each do |dp| dp.call(url,item_info) end #proc result #此处可以单独指定对明细信息的入库处理 result_proc.each do |rp| rp.call(url,[item_info]) end pp item_info if debug? return item_info end |
#scrap(url, item_info) ⇒ Object
4 5 6 7 8 |
# File 'lib/the_scrap/detail_obj.rb', line 4 def scrap( url, item_info ) return retryable(:tries => 3, :on => Timeout::Error) do do_scrap(url,item_info) end end |