Class: RailsSpider::Resource
- Inherits:
-
Object
- Object
- RailsSpider::Resource
- Defined in:
- lib/rails_spider/resource.rb
Constant Summary collapse
- DEFAULT_EXP =
"([^\/.?]+)"
- SYMBOL_EXP =
/:\w+/
Instance Attribute Summary collapse
-
#fetcher ⇒ Object
readonly
Returns the value of attribute fetcher.
-
#host ⇒ Object
readonly
Returns the value of attribute host.
-
#item_path ⇒ Object
readonly
Returns the value of attribute item_path.
-
#list_path ⇒ Object
readonly
Returns the value of attribute list_path.
-
#page ⇒ Object
Returns the value of attribute page.
-
#page_params ⇒ Object
readonly
Returns the value of attribute page_params.
-
#work ⇒ Object
readonly
Returns the value of attribute work.
Instance Method Summary collapse
- #get_items ⇒ Object
-
#initialize(work, **options) ⇒ Resource
constructor
A new instance of Resource.
- #item_exp ⇒ Object
- #list_url ⇒ Object
- #run ⇒ Object
- #save(url) ⇒ Object
Constructor Details
#initialize(work, **options) ⇒ Resource
Returns a new instance of Resource.
10 11 12 13 14 15 16 17 18 |
# File 'lib/rails_spider/resource.rb', line 10 def initialize(work, **) @work = work @host = work.host @list_path = work.list_path @item_path = work.item_path @page_params = work.page_params @page = 1 @fetcher ||= RailsSpider::Mechanize.new end |
Instance Attribute Details
#fetcher ⇒ Object (readonly)
Returns the value of attribute fetcher.
5 6 7 |
# File 'lib/rails_spider/resource.rb', line 5 def fetcher @fetcher end |
#host ⇒ Object (readonly)
Returns the value of attribute host.
5 6 7 |
# File 'lib/rails_spider/resource.rb', line 5 def host @host end |
#item_path ⇒ Object (readonly)
Returns the value of attribute item_path.
5 6 7 |
# File 'lib/rails_spider/resource.rb', line 5 def item_path @item_path end |
#list_path ⇒ Object (readonly)
Returns the value of attribute list_path.
5 6 7 |
# File 'lib/rails_spider/resource.rb', line 5 def list_path @list_path end |
#page ⇒ Object
Returns the value of attribute page.
6 7 8 |
# File 'lib/rails_spider/resource.rb', line 6 def page @page end |
#page_params ⇒ Object (readonly)
Returns the value of attribute page_params.
5 6 7 |
# File 'lib/rails_spider/resource.rb', line 5 def page_params @page_params end |
#work ⇒ Object (readonly)
Returns the value of attribute work.
5 6 7 |
# File 'lib/rails_spider/resource.rb', line 5 def work @work end |
Instance Method Details
#get_items ⇒ Object
32 33 34 |
# File 'lib/rails_spider/resource.rb', line 32 def get_items fetcher.links(list_url).select { |link| item_exp.match? link } end |
#item_exp ⇒ Object
53 54 55 |
# File 'lib/rails_spider/resource.rb', line 53 def item_exp Regexp.new(item_path.gsub SYMBOL_EXP, DEFAULT_EXP) end |
#list_url ⇒ Object
43 44 45 46 47 48 49 50 51 |
# File 'lib/rails_spider/resource.rb', line 43 def list_url list_url = URI.join host, list_path if page.to_i > 0 page_query = URI.encode_www_form page_params => page list_url.query = page_query end list_url end |
#run ⇒ Object
20 21 22 23 24 25 26 27 28 29 30 |
# File 'lib/rails_spider/resource.rb', line 20 def run items = get_items while items.size > 0 do items.each do |item| save(item) end self.page += 1 items = get_items end end |
#save(url) ⇒ Object
36 37 38 39 40 41 |
# File 'lib/rails_spider/resource.rb', line 36 def save(url) body = fetcher.body(url) local = Local.find_or_initialize_by url: url, work_id: work.id local.body = body local.save end |