Class: AnswersEngine::Scraper::RubyParserExecutor
- Inherits:
-
Executor
- Object
- Executor
- AnswersEngine::Scraper::RubyParserExecutor
show all
- Defined in:
- lib/answersengine/scraper/ruby_parser_executor.rb
Constant Summary
Constants inherited
from Executor
Executor::MAX_FIND_OUTPUTS_PER_PAGE
Instance Attribute Summary collapse
Attributes inherited from Executor
#filename, #gid, #job_id
Class Method Summary
collapse
Instance Method Summary
collapse
Methods inherited from Executor
#clean_backtrace, #eval_with_context, #find_output, #find_outputs, #finisher_update, #get_content, #get_failed_content, #get_job_id, #init_global_page, #init_job_page, #init_page, #parsing_update, #remove_old_dups!, #remove_old_output_dups!, #remove_old_page_dups!, #save_outputs, #save_pages, #save_pages_and_outputs, #seeding_update
#create_context, #expose_to, #exposed_env, #exposed_methods, #isolated_binding, #var_or_proc
Constructor Details
Returns a new instance of RubyParserExecutor.
13
14
15
16
17
18
|
# File 'lib/answersengine/scraper/ruby_parser_executor.rb', line 13
def initialize(options={})
@filename = options.fetch(:filename) { raise "Filename is required"}
@gid = options.fetch(:gid) { raise "GID is required"}
@job_id = options.fetch(:job_id)
@page_vars = options.fetch(:vars) { {} }
end
|
Instance Attribute Details
#refetch_self ⇒ Boollean
Note:
It is stronger than #reparse_self flag.
Refetch self page flag.
8
9
10
|
# File 'lib/answersengine/scraper/ruby_parser_executor.rb', line 8
def refetch_self
@refetch_self
end
|
#reparse_self ⇒ Boollean
11
12
13
|
# File 'lib/answersengine/scraper/ruby_parser_executor.rb', line 11
def reparse_self
@reparse_self
end
|
#save ⇒ Object
Returns the value of attribute save.
4
5
6
|
# File 'lib/answersengine/scraper/ruby_parser_executor.rb', line 4
def save
@save
end
|
Class Method Details
.exposed_methods ⇒ Object
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
|
# File 'lib/answersengine/scraper/ruby_parser_executor.rb', line 20
def self.exposed_methods
[
:content,
:failed_content,
:outputs,
:pages,
:page,
:save_pages,
:save_outputs,
:find_output,
:find_outputs,
:refetch,
:reparse
].freeze
end
|
Instance Method Details
#content ⇒ Object
174
175
176
|
# File 'lib/answersengine/scraper/ruby_parser_executor.rb', line 174
def content
@content ||= get_content(gid)
end
|
#eval_parser_script(save = false) ⇒ Object
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
|
# File 'lib/answersengine/scraper/ruby_parser_executor.rb', line 135
def eval_parser_script(save=false)
update_parsing_starting_status
proc = Proc.new do
page = init_page
outputs = []
pages = []
page = init_page_vars(page)
self.refetch_self = false
self.reparse_self = false
begin
context = isolated_binding({
outputs: outputs,
pages: pages,
page: page
})
eval_with_context filename, context
rescue SyntaxError => e
handle_error(e) if save
raise e
rescue => e
handle_error(e) if save
raise e
end
puts "=========== Parsing Executed ==========="
save_pages_and_outputs(pages, outputs, :parsing)
if refetch_self
refetch_page gid
elsif reparse_self
reparse_page gid
else
update_parsing_done_status
end
end
proc.call
end
|
#exec_parser(save = false) ⇒ Object
36
37
38
39
40
41
42
43
44
45
|
# File 'lib/answersengine/scraper/ruby_parser_executor.rb', line 36
def exec_parser(save=false)
@save = save
if save
puts "Executing parser script"
else
puts "Trying parser script"
end
eval_parser_script(save)
end
|
#failed_content ⇒ Object
178
179
180
|
# File 'lib/answersengine/scraper/ruby_parser_executor.rb', line 178
def failed_content
@failed_content ||= get_failed_content(gid)
end
|
#handle_error(e) ⇒ Object
182
183
184
185
186
187
188
189
190
|
# File 'lib/answersengine/scraper/ruby_parser_executor.rb', line 182
def handle_error(e)
error = ["Parsing #{e.class}: #{e.to_s} (Job:#{job_id} GID:#{gid})",clean_backtrace(e.backtrace)].join("\n")
parsing_update(
job_id: job_id,
gid: gid,
parsing_status: :failed,
log_error: error)
end
|
#init_page_vars(page) ⇒ Object
47
48
49
50
51
52
|
# File 'lib/answersengine/scraper/ruby_parser_executor.rb', line 47
def init_page_vars(page)
if !@page_vars.nil? && !@page_vars.empty?
page['vars'] = @page_vars
end
page
end
|
#refetch(page_gid) ⇒ Object
108
109
110
111
112
113
114
115
|
# File 'lib/answersengine/scraper/ruby_parser_executor.rb', line 108
def refetch page_gid
raise ArgumentError.new("page_gid needs to be a String.") unless page_gid.is_a?(String)
if page_gid == gid
self.refetch_self = true
return
end
refetch_page page_gid
end
|
#refetch_page(gid) ⇒ Object
99
100
101
102
103
104
105
106
|
# File 'lib/answersengine/scraper/ruby_parser_executor.rb', line 99
def refetch_page gid
if save
Client::ScraperJobPage.new({gid: gid}).refetch_by_job(self.job_id)
puts "Refetch page #{gid}"
else
puts "Would have refetch page #{gid}"
end
end
|
#reparse(page_gid) ⇒ Object
126
127
128
129
130
131
132
133
|
# File 'lib/answersengine/scraper/ruby_parser_executor.rb', line 126
def reparse page_gid
raise ArgumentError.new("page_gid needs to be a String.") unless page_gid.is_a?(String)
if page_gid == gid
self.reparse_self = true
return
end
reparse_page page_gid
end
|
#reparse_page(gid) ⇒ Object
117
118
119
120
121
122
123
124
|
# File 'lib/answersengine/scraper/ruby_parser_executor.rb', line 117
def reparse_page gid
if save
Client::ScraperJobPage.new({gid: gid}).reparse_by_job(self.job_id)
puts "Reparse page #{gid}"
else
puts "Would have reparse page #{gid}"
end
end
|
#save_type ⇒ Object
95
96
97
|
# File 'lib/answersengine/scraper/ruby_parser_executor.rb', line 95
def save_type
:parsing
end
|
#update_parsing_done_status ⇒ Object
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
|
# File 'lib/answersengine/scraper/ruby_parser_executor.rb', line 79
def update_parsing_done_status
return unless save
response = parsing_update(
job_id: job_id,
gid: gid,
parsing_status: :done)
if response.code == 200
puts "Page Parsing Done."
else
puts "Error: Unable to save Page Parsing Done Status to server: #{response.body}"
raise "Unable to save Page Parsing Done Status to server: #{response.body}"
end
end
|
#update_parsing_starting_status ⇒ Object
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
|
# File 'lib/answersengine/scraper/ruby_parser_executor.rb', line 63
def update_parsing_starting_status
return unless save
response = parsing_update(
job_id: job_id,
gid: gid,
parsing_status: :starting)
if response.code == 200
puts "Page Parsing Status Updated."
else
puts "Error: Unable to save Page Parsing Status to server: #{response.body}"
raise "Unable to save Page Parsing Status to server: #{response.body}"
end
end
|
#update_to_server(opts = {}) ⇒ Object
54
55
56
57
58
59
60
61
|
# File 'lib/answersengine/scraper/ruby_parser_executor.rb', line 54
def update_to_server(opts = {})
parsing_update(
job_id: opts[:job_id],
gid: opts[:gid],
pages: opts[:pages],
outputs: opts[:outputs],
parsing_status: opts[:status])
end
|