Class: AnswersEngine::Scraper::RubyParserExecutor

Inherits:
Executor
  • Object
show all
Defined in:
lib/answersengine/scraper/ruby_parser_executor.rb

Constant Summary

Constants inherited from Executor

Executor::MAX_FIND_OUTPUTS_PER_PAGE

Instance Attribute Summary collapse

Attributes inherited from Executor

#filename, #gid, #job_id

Class Method Summary collapse

Instance Method Summary collapse

Methods inherited from Executor

#clean_backtrace, #eval_with_context, #find_output, #find_outputs, #finisher_update, #get_content, #get_failed_content, #get_job_id, #init_global_page, #init_job_page, #init_page, #parsing_update, #remove_old_dups!, #remove_old_output_dups!, #remove_old_page_dups!, #save_outputs, #save_pages, #save_pages_and_outputs, #seeding_update

Methods included from Plugin::ContextExposer

#create_context, #expose_to, #exposed_env, #exposed_methods, #isolated_binding, #var_or_proc

Constructor Details

#initialize(options = {}) ⇒ RubyParserExecutor

Returns a new instance of RubyParserExecutor.



13
14
15
16
17
18
# File 'lib/answersengine/scraper/ruby_parser_executor.rb', line 13

def initialize(options={})
  @filename = options.fetch(:filename) { raise "Filename is required"}
  @gid = options.fetch(:gid) { raise "GID is required"}
  @job_id = options.fetch(:job_id)
  @page_vars = options.fetch(:vars) { {} }
end

Instance Attribute Details

#refetch_selfBoollean

Note:

It is stronger than #reparse_self flag.

Refetch self page flag.

Returns:

  • (Boollean)


8
9
10
# File 'lib/answersengine/scraper/ruby_parser_executor.rb', line 8

def refetch_self
  @refetch_self
end

#reparse_selfBoollean

Reparse self page flag.

Returns:

  • (Boollean)


11
12
13
# File 'lib/answersengine/scraper/ruby_parser_executor.rb', line 11

def reparse_self
  @reparse_self
end

#saveObject

Returns the value of attribute save.



4
5
6
# File 'lib/answersengine/scraper/ruby_parser_executor.rb', line 4

def save
  @save
end

Class Method Details

.exposed_methodsObject



20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
# File 'lib/answersengine/scraper/ruby_parser_executor.rb', line 20

def self.exposed_methods
  [
    :content,
    :failed_content,
    :outputs,
    :pages,
    :page,
    :save_pages,
    :save_outputs,
    :find_output,
    :find_outputs,
    :refetch,
    :reparse
  ].freeze
end

Instance Method Details

#contentObject



174
175
176
# File 'lib/answersengine/scraper/ruby_parser_executor.rb', line 174

def content
  @content ||= get_content(gid)
end

#eval_parser_script(save = false) ⇒ Object



135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
# File 'lib/answersengine/scraper/ruby_parser_executor.rb', line 135

def eval_parser_script(save=false)
  update_parsing_starting_status

  proc = Proc.new do
    page = init_page
    outputs = []
    pages = []
    page = init_page_vars(page)
    self.refetch_self = false
    self.reparse_self = false

    begin
      context = isolated_binding({
        outputs: outputs,
        pages: pages,
        page: page
      })
      eval_with_context filename, context
    rescue SyntaxError => e
      handle_error(e) if save
      raise e
    rescue => e
      handle_error(e) if save
      raise e
    end

    puts "=========== Parsing Executed ==========="
    save_pages_and_outputs(pages, outputs, :parsing)
    if refetch_self
      refetch_page gid
    elsif reparse_self
      reparse_page gid
    else
      update_parsing_done_status
    end
  end
  proc.call
end

#exec_parser(save = false) ⇒ Object



36
37
38
39
40
41
42
43
44
45
# File 'lib/answersengine/scraper/ruby_parser_executor.rb', line 36

def exec_parser(save=false)
  @save = save
  if save
    puts "Executing parser script"
  else
    puts "Trying parser script"
  end

  eval_parser_script(save)
end

#failed_contentObject



178
179
180
# File 'lib/answersengine/scraper/ruby_parser_executor.rb', line 178

def failed_content
  @failed_content ||= get_failed_content(gid)
end

#handle_error(e) ⇒ Object



182
183
184
185
186
187
188
189
190
# File 'lib/answersengine/scraper/ruby_parser_executor.rb', line 182

def handle_error(e)
  error = ["Parsing #{e.class}: #{e.to_s} (Job:#{job_id} GID:#{gid})",clean_backtrace(e.backtrace)].join("\n")

  parsing_update(
    job_id: job_id,
    gid: gid,
    parsing_status: :failed,
    log_error: error)
end

#init_page_vars(page) ⇒ Object



47
48
49
50
51
52
# File 'lib/answersengine/scraper/ruby_parser_executor.rb', line 47

def init_page_vars(page)
  if !@page_vars.nil? && !@page_vars.empty?
    page['vars'] = @page_vars
  end
  page
end

#refetch(page_gid) ⇒ Object

Raises:

  • (ArgumentError)


108
109
110
111
112
113
114
115
# File 'lib/answersengine/scraper/ruby_parser_executor.rb', line 108

def refetch page_gid
  raise ArgumentError.new("page_gid needs to be a String.") unless page_gid.is_a?(String)
  if page_gid == gid
    self.refetch_self = true
    return
  end
  refetch_page page_gid
end

#refetch_page(gid) ⇒ Object



99
100
101
102
103
104
105
106
# File 'lib/answersengine/scraper/ruby_parser_executor.rb', line 99

def refetch_page gid
  if save
    Client::ScraperJobPage.new({gid: gid}).refetch_by_job(self.job_id)
    puts "Refetch page #{gid}"
  else
    puts "Would have refetch page #{gid}"
  end
end

#reparse(page_gid) ⇒ Object

Raises:

  • (ArgumentError)


126
127
128
129
130
131
132
133
# File 'lib/answersengine/scraper/ruby_parser_executor.rb', line 126

def reparse page_gid
  raise ArgumentError.new("page_gid needs to be a String.") unless page_gid.is_a?(String)
  if page_gid == gid
    self.reparse_self = true
    return
  end
  reparse_page page_gid
end

#reparse_page(gid) ⇒ Object



117
118
119
120
121
122
123
124
# File 'lib/answersengine/scraper/ruby_parser_executor.rb', line 117

def reparse_page gid
  if save
    Client::ScraperJobPage.new({gid: gid}).reparse_by_job(self.job_id)
    puts "Reparse page #{gid}"
  else
    puts "Would have reparse page #{gid}"
  end
end

#save_typeObject



95
96
97
# File 'lib/answersengine/scraper/ruby_parser_executor.rb', line 95

def save_type
  :parsing
end

#update_parsing_done_statusObject



79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
# File 'lib/answersengine/scraper/ruby_parser_executor.rb', line 79

def update_parsing_done_status
  return unless save

  response = parsing_update(
    job_id: job_id,
    gid: gid,
    parsing_status: :done)

  if response.code == 200
    puts "Page Parsing Done."
  else
    puts "Error: Unable to save Page Parsing Done Status to server: #{response.body}"
    raise "Unable to save Page Parsing Done Status to server: #{response.body}"
  end
end

#update_parsing_starting_statusObject



63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
# File 'lib/answersengine/scraper/ruby_parser_executor.rb', line 63

def update_parsing_starting_status
  return unless save

  response = parsing_update(
    job_id: job_id,
    gid: gid,
    parsing_status: :starting)

  if response.code == 200
    puts "Page Parsing Status Updated."
  else
    puts "Error: Unable to save Page Parsing Status to server: #{response.body}"
    raise "Unable to save Page Parsing Status to server: #{response.body}"
  end
end

#update_to_server(opts = {}) ⇒ Object



54
55
56
57
58
59
60
61
# File 'lib/answersengine/scraper/ruby_parser_executor.rb', line 54

def update_to_server(opts = {})
  parsing_update(
    job_id: opts[:job_id],
    gid: opts[:gid],
    pages: opts[:pages],
    outputs: opts[:outputs],
    parsing_status: opts[:status])
end