Class: Creepin::ResourceCreeper

Inherits:
Object
  • Object
show all
Defined in:
lib/creepin/resource_creeper.rb

Instance Attribute Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(loaded_resource, params = {}) ⇒ ResourceCreeper

Returns a new instance of ResourceCreeper.



7
8
9
10
11
12
# File 'lib/creepin/resource_creeper.rb', line 7

def initialize(loaded_resource, params = {})
  @params ||= {}
  @params = params if params.present?
  @loaded_resource = loaded_resource
  @collected_attributes_hash = {}
end

Instance Attribute Details

#collected_attributes_hashObject

Returns the value of attribute collected_attributes_hash.



5
6
7
# File 'lib/creepin/resource_creeper.rb', line 5

def collected_attributes_hash
  @collected_attributes_hash
end

#loaded_resourceObject

Returns the value of attribute loaded_resource.



5
6
7
# File 'lib/creepin/resource_creeper.rb', line 5

def loaded_resource
  @loaded_resource
end

#requested_urlObject

Returns the value of attribute requested_url.



5
6
7
# File 'lib/creepin/resource_creeper.rb', line 5

def requested_url
  @requested_url
end

Instance Method Details

#build_request_params(param_string) ⇒ Object



35
36
37
38
# File 'lib/creepin/resource_creeper.rb', line 35

def build_request_params(param_string)
  params_hash = Rack::Utils.parse_query(param_string.split('?').pop)
  @request_params = { :query => params_hash.with_indifferent_access } if params_hash.present?
end

#full_request_url(base_url, request_params) ⇒ Object



40
41
42
# File 'lib/creepin/resource_creeper.rb', line 40

def full_request_url(base_url, request_params)
  base_url + request_params[:query].map{|k,v| "#{k}=#{v}"}.join("&").insert(0, '?')
end

#load_resource(collected_attributes_hash, resource_klass) ⇒ Object



69
70
71
72
73
74
75
# File 'lib/creepin/resource_creeper.rb', line 69

def load_resource(collected_attributes_hash, resource_klass)
  if resource_load_strategy?
    resource_load_strategy.call(collected_attributes_hash, resource_klass)
  else
    resource_klass.new(collected_attributes_hash)
  end
end

#load_response_resourceObject



50
51
52
# File 'lib/creepin/resource_creeper.rb', line 50

def load_response_resource
  @response_resource = @response_html.document.at_css(selector)
end

#map_response_resourceObject



54
55
56
57
58
59
60
61
62
63
# File 'lib/creepin/resource_creeper.rb', line 54

def map_response_resource

  element_mappings.each_pair do |attribute, block|
    value = instance_exec(@response_resource, &block)
    collected_attributes_hash[attribute] = value
  end

  resource = save_resource(collected_attributes_hash, loaded_resource)

end

#parse_responseObject



44
45
46
47
48
# File 'lib/creepin/resource_creeper.rb', line 44

def parse_response
  @response_html = Nokogiri::HTML::Document.parse(@response.body)
  load_response_resource
  map_response_resource if response_resource?
end

#resource_save_strategy?Boolean

Returns:

  • (Boolean)


87
88
89
# File 'lib/creepin/resource_creeper.rb', line 87

def resource_save_strategy?
  respond_to?(:resource_save_strategy)
end

#response_resource?Boolean

Returns:

  • (Boolean)


65
66
67
# File 'lib/creepin/resource_creeper.rb', line 65

def response_resource?
  @response_resource.present? ? true : false
end

#run_after_crawl_callbacksObject



14
15
16
# File 'lib/creepin/resource_creeper.rb', line 14

def run_after_crawl_callbacks
  transmit
end

#save_resource(collected_attributes_hash, resource) ⇒ Object



77
78
79
80
81
82
83
84
85
# File 'lib/creepin/resource_creeper.rb', line 77

def save_resource(collected_attributes_hash, resource)
  if resource_save_strategy?
    resource_save_strategy.call(collected_attributes_hash, resource)
  else
    collected_attributes_hash.each_pair{|k,v| resource.send("#{k}=", v) }
    resource.save unless skip_resource_save?
    resource
  end
end

#skip_resource_save?Boolean

Returns:

  • (Boolean)


91
92
93
# File 'lib/creepin/resource_creeper.rb', line 91

def skip_resource_save?
  respond_to?(:skip_resource_save)
end

#transmitObject



18
19
20
21
22
23
24
25
26
27
28
29
# File 'lib/creepin/resource_creeper.rb', line 18

def transmit
  if url_attribute?
    @response = HTTParty.get(loaded_resource.send(url_attribute))
    @requested_url = loaded_resource.send(url_attribute)
  else
    @request_params ||= {:query => default_params.merge!(@params) }
    @response = HTTParty.get(base_url, @request_params)
    @requested_url = full_request_url(base_url, @request_params)           
  end

  crawl_finished
end

#url_attribute?Boolean

Returns:

  • (Boolean)


31
32
33
# File 'lib/creepin/resource_creeper.rb', line 31

def url_attribute?
  respond_to?(:url_attribute)
end