Class: Datahen::Scraper::RubyParserExecutor
- Inherits:
-
Executor
- Object
- Executor
- Datahen::Scraper::RubyParserExecutor
show all
- Defined in:
- lib/datahen/scraper/ruby_parser_executor.rb
Constant Summary
collapse
- FIND_OUTPUTS_RETRY_LIMIT =
nil
Constants inherited
from Executor
Executor::MAX_FIND_OUTPUTS_PER_PAGE
Instance Attribute Summary collapse
Attributes inherited from Executor
#filename, #gid, #job_id, #page
Class Method Summary
collapse
Instance Method Summary
collapse
Methods inherited from Executor
#clean_backtrace, #eval_with_context, #find_output, #find_outputs, #finish, #finisher_update, #get_content, #get_failed_content, #get_job_id, #init_global_page, #init_job_page, #init_page, #parsing_update, #remove_old_dups!, #remove_old_output_dups!, #remove_old_page_dups!, #save_outputs, #save_pages, #save_pages_and_outputs, #seeding_update
#create_context, #expose_to, #exposed_env, #exposed_methods, #isolated_binding, #var_or_proc
Constructor Details
Returns a new instance of RubyParserExecutor.
19
20
21
22
23
24
25
26
|
# File 'lib/datahen/scraper/ruby_parser_executor.rb', line 19
def initialize(options={})
@filename = options.fetch(:filename) { raise "Filename is required"}
@page = options.fetch(:page) { nil }
@gid = (self.page || {})['gid'] || options.fetch(:gid) { raise "GID or a page with a GID is required"}
@job_id = options.fetch(:job_id)
@page_vars = options.fetch(:vars) { {} }
@keep_outputs = !!(options.fetch(:keep_outputs) { false })
end
|
Instance Attribute Details
#limbo_self ⇒ Boollean
15
16
17
|
# File 'lib/datahen/scraper/ruby_parser_executor.rb', line 15
def limbo_self
@limbo_self
end
|
#refetch_self ⇒ Boollean
Note:
It is stronger than #reparse_self flag.
Refetch self page flag.
8
9
10
|
# File 'lib/datahen/scraper/ruby_parser_executor.rb', line 8
def refetch_self
@refetch_self
end
|
#reparse_self ⇒ Boollean
Note:
It is stronger than #limbo_self flag.
Reparse self page flag.
12
13
14
|
# File 'lib/datahen/scraper/ruby_parser_executor.rb', line 12
def reparse_self
@reparse_self
end
|
#save ⇒ Object
Returns the value of attribute save.
4
5
6
|
# File 'lib/datahen/scraper/ruby_parser_executor.rb', line 4
def save
@save
end
|
Class Method Details
.exposed_methods ⇒ Object
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
|
# File 'lib/datahen/scraper/ruby_parser_executor.rb', line 28
def self.exposed_methods
[
:get_content,
:get_failed_content,
:content,
:failed_content,
:outputs,
:pages,
:save_pages,
:save_outputs,
:find_output,
:find_outputs,
:refetch,
:reparse,
:limbo,
:finish,
:still_alive
].freeze
end
|
Instance Method Details
#content ⇒ Object
235
236
237
|
# File 'lib/datahen/scraper/ruby_parser_executor.rb', line 235
def content
@content ||= get_content(job_id, gid)
end
|
#eval_parser_script(save = false) ⇒ Object
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
|
# File 'lib/datahen/scraper/ruby_parser_executor.rb', line 185
def eval_parser_script(save=false)
update_parsing_starting_status
proc = Proc.new do
page = init_page
outputs = []
pages = []
page = init_page_vars(page)
self.refetch_self = false
self.reparse_self = false
self.limbo_self = false
begin
context = isolated_binding({
outputs: outputs,
pages: pages,
page: page
})
eval_with_context filename, context
rescue Error::SafeTerminateError => e
rescue SyntaxError => e
handle_error(e) if save
raise e
rescue => e
handle_error(e) if save
raise e
end
puts "=========== Parsing Executed ==========="
begin
save_pages_and_outputs(pages, outputs, :parsing) unless refetch_self
rescue => e
handle_error(e) if save
raise e
end
if refetch_self
update_parsing_status gid, :to_refetch
elsif reparse_self
update_parsing_status gid, :to_reparse
elsif limbo_self
update_parsing_status gid, :limbo
else
update_parsing_status gid, :done
end
end
proc.call
end
|
#exec_parser(save = false) ⇒ Object
48
49
50
51
52
53
54
55
56
57
|
# File 'lib/datahen/scraper/ruby_parser_executor.rb', line 48
def exec_parser(save=false)
@save = save
if save
puts "Executing parser script"
else
puts "Trying parser script"
end
eval_parser_script(save)
end
|
#failed_content ⇒ Object
239
240
241
|
# File 'lib/datahen/scraper/ruby_parser_executor.rb', line 239
def failed_content
@failed_content ||= get_failed_content(job_id, gid)
end
|
#handle_error(e) ⇒ Object
249
250
251
252
253
254
255
256
257
258
|
# File 'lib/datahen/scraper/ruby_parser_executor.rb', line 249
def handle_error(e)
error = ["Parsing #{e.class}: #{e.to_s} (Job:#{job_id} GID:#{gid})",clean_backtrace(e.backtrace)].join("\n")
parsing_update(
job_id: job_id,
gid: gid,
parsing_status: :failed,
log_error: error,
parsing_try_limit: (page || {})['parsing_try_limit'])
end
|
#init_page_vars(page) ⇒ Object
59
60
61
62
63
64
65
66
|
# File 'lib/datahen/scraper/ruby_parser_executor.rb', line 59
def init_page_vars(page)
return self.page unless self.page.nil?
if !@page_vars.nil? && !@page_vars.empty?
page['vars'] = @page_vars
end
page
end
|
#limbo(page_gid) ⇒ Object
176
177
178
179
180
181
182
183
|
# File 'lib/datahen/scraper/ruby_parser_executor.rb', line 176
def limbo page_gid
raise ArgumentError.new("page_gid needs to be a String.") unless page_gid.is_a?(String)
if page_gid == gid
self.limbo_self = true
raise Error::SafeTerminateError
end
limbo_page page_gid
end
|
#limbo_page(page_gid) ⇒ Object
167
168
169
170
171
172
173
174
|
# File 'lib/datahen/scraper/ruby_parser_executor.rb', line 167
def limbo_page page_gid
if save
update_parsing_status page_gid, :limbo
puts "Limbo page #{page_gid}"
else
puts "Would have limbo page #{page_gid}"
end
end
|
#refetch(page_gid) ⇒ Object
140
141
142
143
144
145
146
147
|
# File 'lib/datahen/scraper/ruby_parser_executor.rb', line 140
def refetch page_gid
raise ArgumentError.new("page_gid needs to be a String.") unless page_gid.is_a?(String)
if page_gid == gid
self.refetch_self = true
raise Error::SafeTerminateError
end
refetch_page page_gid
end
|
#refetch_page(page_gid) ⇒ Object
131
132
133
134
135
136
137
138
|
# File 'lib/datahen/scraper/ruby_parser_executor.rb', line 131
def refetch_page page_gid
if save
update_parsing_status page_gid, :to_refetch
puts "Refetch page #{page_gid}"
else
puts "Would have refetch page #{page_gid}"
end
end
|
#reparse(page_gid) ⇒ Object
158
159
160
161
162
163
164
165
|
# File 'lib/datahen/scraper/ruby_parser_executor.rb', line 158
def reparse page_gid
raise ArgumentError.new("page_gid needs to be a String.") unless page_gid.is_a?(String)
if page_gid == gid
self.reparse_self = true
raise Error::SafeTerminateError
end
reparse_page page_gid
end
|
#reparse_page(page_gid) ⇒ Object
149
150
151
152
153
154
155
156
|
# File 'lib/datahen/scraper/ruby_parser_executor.rb', line 149
def reparse_page page_gid
if save
update_parsing_status page_gid, :to_reparse
puts "Reparse page #{page_gid}"
else
puts "Would have reparse page #{page_gid}"
end
end
|
#save_type ⇒ Object
127
128
129
|
# File 'lib/datahen/scraper/ruby_parser_executor.rb', line 127
def save_type
:parsing
end
|
#still_alive(page_gid = nil) ⇒ Object
243
244
245
246
247
|
# File 'lib/datahen/scraper/ruby_parser_executor.rb', line 243
def still_alive page_gid = nil
page_gid = gid if page_gid.nil?
client = Client::JobPage.new()
client.still_alive(job_id, page_gid)
end
|
#update_parsing_done_status ⇒ Object
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
|
# File 'lib/datahen/scraper/ruby_parser_executor.rb', line 95
def update_parsing_done_status
return unless save
response = parsing_update(
job_id: job_id,
gid: gid,
parsing_status: :done)
if response.code == 200
puts "Page Parsing Done."
else
puts "Error: Unable to save Page Parsing Done Status to server: #{response.body}"
raise "Unable to save Page Parsing Done Status to server: #{response.body}"
end
end
|
#update_parsing_starting_status ⇒ Object
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
|
# File 'lib/datahen/scraper/ruby_parser_executor.rb', line 77
def update_parsing_starting_status
return unless save
response = parsing_update(
job_id: job_id,
gid: gid,
parsing_status: :starting,
keep_outputs: @keep_outputs
)
if response.code == 200
puts "Page Parsing Status Updated."
else
puts "Error: Unable to save Page Parsing Status to server: #{response.body}"
raise "Unable to save Page Parsing Status to server: #{response.body}"
end
end
|
#update_parsing_status(page_gid, status) ⇒ Object
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
|
# File 'lib/datahen/scraper/ruby_parser_executor.rb', line 111
def update_parsing_status page_gid, status
return unless save
response = parsing_update(
job_id: job_id,
gid: page_gid,
parsing_status: status)
if response.code == 200
puts "Page #{page_gid} status changed to #{status}."
else
puts "Error: Unable to change page #{page_gid} status: #{response.body} to #{status}"
raise "Unable to change page #{page_gid} status: #{response.body} to #{status}"
end
end
|
#update_to_server(opts = {}) ⇒ Object
68
69
70
71
72
73
74
75
|
# File 'lib/datahen/scraper/ruby_parser_executor.rb', line 68
def update_to_server(opts = {})
parsing_update(
job_id: opts[:job_id],
gid: opts[:gid],
pages: opts[:pages],
outputs: opts[:outputs],
parsing_status: opts[:status])
end
|