Class: Datahen::Client::Scraper

Inherits:
Base
  • Object
show all
Defined in:
lib/datahen/client/scraper.rb

Constant Summary

Constants inherited from Base

Base::CHECK_EMPTY_BODY, Base::CHECK_NIL, Base::DEFAULT_RETRY_LIMIT

Instance Method Summary collapse

Methods inherited from Base

#auth_token, #auth_token=, #default_retry_limit, #env_api_url, env_auth_token, env_ignore_ssl, #ignore_ssl, #initialize, #left_merge, random_delay, #retry

Constructor Details

This class inherits a constructor from Datahen::Client::Base

Instance Method Details

#all(opts = {}) ⇒ Object



9
10
11
12
# File 'lib/datahen/client/scraper.rb', line 9

def all(opts={})
  params = @options.merge opts
  self.class.get("/scrapers", params)
end

#create(scraper_name, git_repository, opts = {}) ⇒ Object



14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
# File 'lib/datahen/client/scraper.rb', line 14

def create(scraper_name, git_repository, opts={})
  body = {}
  body[:name] = scraper_name
  body[:git_repository] = git_repository
  body[:git_branch] = opts[:branch] || opts[:git_branch] || "master" if opts[:branch] || opts[:git_branch]
  body[:freshness_type] = opts[:freshness_type] if opts[:freshness_type]
  body[:force_fetch] = opts[:force_fetch] if opts[:force_fetch]
  body[:parser_worker_count] = opts[:parsers] || opts[:parser_worker_count] if opts[:parsers] || opts[:parser_worker_count]
  body[:fetcher_worker_count] = opts[:fetchers] || opts[:fetcher_worker_count] if opts[:fetchers] || opts[:fetcher_worker_count]
  body[:browser_worker_count] = opts[:browsers] || opts[:browser_worker_count] if opts[:browsers] || opts[:browser_worker_count]
  body[:proxy_type] = opts[:proxy_type] if opts[:proxy_type]
  body[:disable_scheduler] = opts[:disable_scheduler] if opts[:disable_scheduler]
  body[:cancel_current_job] = opts[:cancel_current_job] if opts[:cancel_current_job]
  body[:schedule] = opts[:schedule] if opts[:schedule]
  body[:timezone] = opts[:timezone] if opts[:timezone]
  body[:profile] = opts[:profile] if opts[:profile]
  body[:multiple_jobs] = opts[:multiple_jobs] if opts[:multiple_jobs]
  body[:max_job_count] = opts[:max_job_count] if opts[:max_job_count]
  body[:max_page_size] = opts[:max_page_size] if opts[:max_page_size]
  body[:enable_global_cache] = opts[:enable_global_cache] if opts.has_key?("enable_global_cache") || opts.has_key?(:enable_global_cache)
  body[:retry_interval] = opts[:retry_interval] if opts[:retry_interval]
  body[:soft_fetching_try_limit] = opts[:soft_fetching_try_limit] if opts[:soft_fetching_try_limit]
  body[:soft_refetch_limit] = opts[:soft_refetch_limit] if opts[:soft_refetch_limit]
  body[:parsing_try_limit] = opts[:parsing_try_limit] if opts[:parsing_try_limit]
  body[:prevent_kb_autoscaler] = opts[:prevent_kb_autoscaler] if opts.has_key?("prevent_kb_autoscaler") || opts.has_key?(:prevent_kb_autoscaler)
  params = @options.merge({body: body.to_json})
  self.class.post("/scrapers", params)
end

#delete(scraper_name, opts = {}) ⇒ Object



73
74
75
76
# File 'lib/datahen/client/scraper.rb', line 73

def delete(scraper_name, opts={})
  params = @options.merge(opts)
  self.class.delete("/scrapers/#{scraper_name}", params)
end

#find(scraper_name) ⇒ Object



5
6
7
# File 'lib/datahen/client/scraper.rb', line 5

def find(scraper_name)
  self.class.get("/scrapers/#{scraper_name}", @options)
end

#profile(scraper_name, opts = {}) ⇒ Object



78
79
80
81
82
# File 'lib/datahen/client/scraper.rb', line 78

def profile(scraper_name, opts={})
  params = @options.merge(opts)

  self.class.get("/scrapers/#{scraper_name}/profile", params)
end

#update(scraper_name, opts = {}) ⇒ Object



43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
# File 'lib/datahen/client/scraper.rb', line 43

def update(scraper_name, opts={})
  body = {}
  body[:name] = opts[:name] if opts[:name]
  body[:git_repository] = opts[:repo] || opts[:git_repository] if opts[:repo] || opts[:git_repository]
  body[:git_branch] = opts[:branch] || opts[:git_branch] if opts[:branch] || opts[:git_branch]
  body[:freshness_type] = opts[:freshness_type] if opts[:freshness_type]
  body[:force_fetch] = opts[:force_fetch] if opts.has_key?("force_fetch") || opts.has_key?(:force_fetch)
  body[:parser_worker_count] = opts[:parsers] || opts[:parser_worker_count] if opts[:parsers] || opts[:parser_worker_count]
  body[:fetcher_worker_count] = opts[:fetchers] || opts[:fetcher_worker_count] if opts[:fetchers] || opts[:fetcher_worker_count]
  body[:browser_worker_count] = opts[:browsers] || opts[:browser_worker_count] if opts[:browsers] || opts[:browser_worker_count]
  body[:proxy_type] = opts[:proxy_type] if opts[:proxy_type]
  body[:disable_scheduler] = opts[:disable_scheduler] if opts.has_key?("disable_scheduler") || opts.has_key?(:disable_scheduler)
  body[:cancel_current_job] = opts[:cancel_current_job] if opts.has_key?("cancel_current_job") || opts.has_key?(:cancel_current_job)
  body[:schedule] = opts[:schedule] if opts[:schedule]
  body[:timezone] = opts[:timezone] if opts[:timezone]
  body[:profile] = opts[:profile] if opts[:profile]
  body[:multiple_jobs] = opts[:multiple_jobs] if opts.has_key?("multiple_jobs") || opts.has_key?(:multiple_jobs)
  body[:max_job_count] = opts[:max_job_count] if opts.has_key?("max_job_count") || opts.has_key?(:max_job_count)
  body[:max_page_size] = opts[:max_page_size] if opts.has_key?("max_page_size") || opts.has_key?(:max_page_size)
  body[:enable_global_cache] = opts[:enable_global_cache] if opts.has_key?("enable_global_cache") || opts.has_key?(:enable_global_cache)
  body[:retry_interval] = opts[:retry_interval] if opts[:retry_interval]
  body[:soft_fetching_try_limit] = opts[:soft_fetching_try_limit] if opts[:soft_fetching_try_limit]
  body[:soft_refetch_limit] = opts[:soft_refetch_limit] if opts[:soft_refetch_limit]
  body[:parsing_try_limit] = opts[:parsing_try_limit] if opts[:parsing_try_limit]
  body[:prevent_kb_autoscaler] = opts[:prevent_kb_autoscaler] if opts.has_key?("prevent_kb_autoscaler") || opts.has_key?(:prevent_kb_autoscaler)
  params = @options.merge({body: body.to_json})

  self.class.put("/scrapers/#{scraper_name}", params)
end