Class: Creepin::ResourceCreeper
- Inherits:
-
Object
- Object
- Creepin::ResourceCreeper
- Defined in:
- lib/creepin/resource_creeper.rb
Instance Attribute Summary collapse
-
#collected_attributes_hash ⇒ Object
Returns the value of attribute collected_attributes_hash.
-
#loaded_resource ⇒ Object
Returns the value of attribute loaded_resource.
-
#requested_url ⇒ Object
Returns the value of attribute requested_url.
Instance Method Summary collapse
- #build_request_params(param_string) ⇒ Object
- #full_request_url(base_url, request_params) ⇒ Object
-
#initialize(loaded_resource, params = {}) ⇒ ResourceCreeper
constructor
A new instance of ResourceCreeper.
- #load_resource(collected_attributes_hash, resource_klass) ⇒ Object
- #load_response_resource ⇒ Object
- #map_response_resource ⇒ Object
- #parse_response ⇒ Object
- #resource_save_strategy? ⇒ Boolean
- #response_resource? ⇒ Boolean
- #run_after_crawl_callbacks ⇒ Object
- #save_resource(collected_attributes_hash, resource) ⇒ Object
- #skip_resource_save? ⇒ Boolean
- #transmit ⇒ Object
- #url_attribute? ⇒ Boolean
Constructor Details
#initialize(loaded_resource, params = {}) ⇒ ResourceCreeper
Returns a new instance of ResourceCreeper.
7 8 9 10 11 12 |
# File 'lib/creepin/resource_creeper.rb', line 7 def initialize(loaded_resource, params = {}) @params ||= {} @params = params if params.present? @loaded_resource = loaded_resource @collected_attributes_hash = {} end |
Instance Attribute Details
#collected_attributes_hash ⇒ Object
Returns the value of attribute collected_attributes_hash.
5 6 7 |
# File 'lib/creepin/resource_creeper.rb', line 5 def collected_attributes_hash @collected_attributes_hash end |
#loaded_resource ⇒ Object
Returns the value of attribute loaded_resource.
5 6 7 |
# File 'lib/creepin/resource_creeper.rb', line 5 def loaded_resource @loaded_resource end |
#requested_url ⇒ Object
Returns the value of attribute requested_url.
5 6 7 |
# File 'lib/creepin/resource_creeper.rb', line 5 def requested_url @requested_url end |
Instance Method Details
#build_request_params(param_string) ⇒ Object
35 36 37 38 |
# File 'lib/creepin/resource_creeper.rb', line 35 def build_request_params(param_string) params_hash = Rack::Utils.parse_query(param_string.split('?').pop) @request_params = { :query => params_hash.with_indifferent_access } if params_hash.present? end |
#full_request_url(base_url, request_params) ⇒ Object
40 41 42 |
# File 'lib/creepin/resource_creeper.rb', line 40 def full_request_url(base_url, request_params) base_url + request_params[:query].map{|k,v| "#{k}=#{v}"}.join("&").insert(0, '?') end |
#load_resource(collected_attributes_hash, resource_klass) ⇒ Object
69 70 71 72 73 74 75 |
# File 'lib/creepin/resource_creeper.rb', line 69 def load_resource(collected_attributes_hash, resource_klass) if resource_load_strategy? resource_load_strategy.call(collected_attributes_hash, resource_klass) else resource_klass.new(collected_attributes_hash) end end |
#load_response_resource ⇒ Object
50 51 52 |
# File 'lib/creepin/resource_creeper.rb', line 50 def load_response_resource @response_resource = @response_html.document.at_css(selector) end |
#map_response_resource ⇒ Object
54 55 56 57 58 59 60 61 62 63 |
# File 'lib/creepin/resource_creeper.rb', line 54 def map_response_resource element_mappings.each_pair do |attribute, block| value = instance_exec(@response_resource, &block) collected_attributes_hash[attribute] = value end resource = save_resource(collected_attributes_hash, loaded_resource) end |
#parse_response ⇒ Object
44 45 46 47 48 |
# File 'lib/creepin/resource_creeper.rb', line 44 def parse_response @response_html = Nokogiri::HTML::Document.parse(@response.body) load_response_resource map_response_resource if response_resource? end |
#resource_save_strategy? ⇒ Boolean
87 88 89 |
# File 'lib/creepin/resource_creeper.rb', line 87 def resource_save_strategy? respond_to?(:resource_save_strategy) end |
#response_resource? ⇒ Boolean
65 66 67 |
# File 'lib/creepin/resource_creeper.rb', line 65 def response_resource? @response_resource.present? ? true : false end |
#run_after_crawl_callbacks ⇒ Object
14 15 16 |
# File 'lib/creepin/resource_creeper.rb', line 14 def run_after_crawl_callbacks transmit end |
#save_resource(collected_attributes_hash, resource) ⇒ Object
77 78 79 80 81 82 83 84 85 |
# File 'lib/creepin/resource_creeper.rb', line 77 def save_resource(collected_attributes_hash, resource) if resource_save_strategy? resource_save_strategy.call(collected_attributes_hash, resource) else collected_attributes_hash.each_pair{|k,v| resource.send("#{k}=", v) } resource.save unless skip_resource_save? resource end end |
#skip_resource_save? ⇒ Boolean
91 92 93 |
# File 'lib/creepin/resource_creeper.rb', line 91 def skip_resource_save? respond_to?(:skip_resource_save) end |
#transmit ⇒ Object
18 19 20 21 22 23 24 25 26 27 28 29 |
# File 'lib/creepin/resource_creeper.rb', line 18 def transmit if url_attribute? @response = HTTParty.get(loaded_resource.send(url_attribute)) @requested_url = loaded_resource.send(url_attribute) else @request_params ||= {:query => default_params.merge!(@params) } @response = HTTParty.get(base_url, @request_params) @requested_url = full_request_url(base_url, @request_params) end crawl_finished end |
#url_attribute? ⇒ Boolean
31 32 33 |
# File 'lib/creepin/resource_creeper.rb', line 31 def url_attribute? respond_to?(:url_attribute) end |