Class: Datahen::Client::JobPage

Inherits:
Base
  • Object
show all
Defined in:
lib/datahen/client/job_page.rb

Constant Summary

Constants inherited from Base

Base::CHECK_EMPTY_BODY, Base::CHECK_NIL, Base::DEFAULT_RETRY_LIMIT

Instance Method Summary collapse

Methods inherited from Base

#auth_token, #auth_token=, #default_retry_limit, #env_api_url, env_auth_token, env_ignore_ssl, #ignore_ssl, #initialize, #left_merge, random_delay, #retry

Constructor Details

This class inherits a constructor from Datahen::Client::Base

Instance Method Details

#all(job_id, opts = {}) ⇒ Object



8
9
10
11
# File 'lib/datahen/client/job_page.rb', line 8

def all(job_id, opts={})
  params = @options.merge(opts)
  self.class.get("/jobs/#{job_id}/pages", params)
end

#dequeue(job_id, limit, page_types, parse_fetching_failed, opts = {}) ⇒ Object



44
45
46
47
48
49
50
51
52
# File 'lib/datahen/client/job_page.rb', line 44

def dequeue(job_id, limit, page_types, parse_fetching_failed, opts = {})
  body = {
    limit: limit,
    page_types: page_types,
    parse_fetching_failed: parse_fetching_failed
  }
  params = @options.merge(opts).merge({body: body.to_json})
  self.class.put("/jobs/#{job_id}/pages/parse_dequeue", params)
end

#enqueue(job_id, page, opts = {}) ⇒ Object



30
31
32
33
34
35
# File 'lib/datahen/client/job_page.rb', line 30

def enqueue(job_id, page, opts={})
  params = @options.merge(opts).merge({body: page.to_json})

  self.class.post("/jobs/#{job_id}/pages", params)
  
end

#find(job_id, gid) ⇒ Object



4
5
6
# File 'lib/datahen/client/job_page.rb', line 4

def find(job_id, gid)
  self.class.get("/jobs/#{job_id}/pages/#{gid}", @options)
end

#find_content(job_id, gid) ⇒ Object



75
76
77
# File 'lib/datahen/client/job_page.rb', line 75

def find_content(job_id, gid)
  self.class.get("/jobs/#{job_id}/pages/#{gid}/content", @options)
end

#find_failed_content(job_id, gid) ⇒ Object



79
80
81
# File 'lib/datahen/client/job_page.rb', line 79

def find_failed_content(job_id, gid)
  self.class.get("/jobs/#{job_id}/pages/#{gid}/failed_content", @options)
end

#get_gid(job_id, page, opts = {}) ⇒ Object



37
38
39
40
41
42
# File 'lib/datahen/client/job_page.rb', line 37

def get_gid(job_id, page, opts={})

  params = @options.merge(opts).merge({body: page.to_json})

  self.class.post("/jobs/#{job_id}/generate_gid", params)
end

#limbo(job_id, opts = {}) ⇒ Object



93
94
95
96
# File 'lib/datahen/client/job_page.rb', line 93

def limbo(job_id, opts={})
  params = @options.merge(opts)
  self.class.put("/jobs/#{job_id}/pages/limbo", params)
end

#parsing_update(job_id, gid, opts = {}) ⇒ Object



54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
# File 'lib/datahen/client/job_page.rb', line 54

def parsing_update(job_id, gid, opts={})
  body = {}
  body[:outputs] = opts.fetch(:outputs) {[]}
  body[:pages] = opts.fetch(:pages) {[]}
  body[:parsing_status] = opts.fetch(:parsing_status){ nil }
  body[:log_error] = opts[:log_error] if opts[:log_error]
  body[:keep_outputs] = !!opts[:keep_outputs] if opts.has_key?(:keep_outputs)
  body[:parsing_try_limit] = opts[:parsing_try_limit] if opts.fetch(:parsing_try_limit){ nil }

  params = @options.merge({body: body.to_json})

  limit = opts.has_key?(:retry_limit) ? opts.fetch(:retry_limit) : self.default_retry_limit[:parser]
  self.retry(limit, 5, "Error while updating the parser.", false, CHECK_EMPTY_BODY) do
    response = self.class.put("/jobs/#{job_id}/pages/#{gid}/parsing_update", params)
    if response.code == 422 && response.body.to_s =~ /pq:\s*deadlock/i
      raise Error::CustomRetryError.new(self.class.random_delay(5), response.body.to_s)
    end
    response
  end
end

#refetch(job_id, opts = {}) ⇒ Object



88
89
90
91
# File 'lib/datahen/client/job_page.rb', line 88

def refetch(job_id, opts={})
  params = @options.merge(opts)
  self.class.put("/jobs/#{job_id}/pages/refetch", params)
end

#reparse(job_id, opts = {}) ⇒ Object



83
84
85
86
# File 'lib/datahen/client/job_page.rb', line 83

def reparse(job_id, opts={})
  params = @options.merge(opts)
  self.class.put("/jobs/#{job_id}/pages/reparse", params)
end

#still_alive(job_id, gid, opts = {}) ⇒ Object



98
99
100
101
# File 'lib/datahen/client/job_page.rb', line 98

def still_alive(job_id, gid, opts={})
  params = @options.merge(opts)
  self.class.put("/jobs/#{job_id}/pages/#{gid}/still_alive", params)
end

#update(job_id, gid, opts = {}) ⇒ Object



13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
# File 'lib/datahen/client/job_page.rb', line 13

def update(job_id, gid, opts={})
  body = {}
  body[:page_type] = opts[:page_type] if opts[:page_type]
  body[:priority] = opts[:priority] if opts[:priority]
  body[:vars] = opts[:vars] if opts[:vars]
  body[:max_size] = opts[:max_size] if opts[:max_size]
  body[:enable_global_cache] = opts[:enable_global_cache] if opts.has_key?("enable_global_cache") || opts.has_key?(:enable_global_cache)
  body[:retry_interval] = opts[:retry_interval] if opts[:retry_interval]
  body[:soft_fetching_try_limit] = opts[:soft_fetching_try_limit] if opts[:soft_fetching_try_limit]
  body[:soft_refetch_limit] = opts[:soft_refetch_limit] if opts[:soft_refetch_limit]
  body[:parsing_try_limit] = opts[:parsing_try_limit] if opts[:parsing_try_limit]

  params = @options.merge({body: body.to_json})

  self.class.put("/jobs/#{job_id}/pages/#{gid}", params)
end