Module: DhEasy::Core::Mock::FakeExecutor

Includes:
Datahen::Plugin::ContextExposer
Included in:
FakeFinisher, FakeParser, FakeSeeder
Defined in:
lib/dh_easy/core/mock/fake_executor.rb

Overview

Fake executor that emulates ‘Datahen` executor.

Constant Summary collapse

MAX_FIND_OUTPUTS_PER_PAGE =

Max allowed page size when query outputs (see #find_outputs).

500

Instance Attribute Summary collapse

Instance Method Summary collapse

Instance Attribute Details

#contentString?

Page content.

Returns:

  • (String, nil)


13
14
15
# File 'lib/dh_easy/core/mock/fake_executor.rb', line 13

def content
  @content
end

#failed_contentString?

Failed page content.

Returns:

  • (String, nil)


16
17
18
# File 'lib/dh_easy/core/mock/fake_executor.rb', line 16

def failed_content
  @failed_content
end

Instance Method Details

#dbObject

Fake database to represent what it is saved.



75
76
77
# File 'lib/dh_easy/core/mock/fake_executor.rb', line 75

def db
  @db ||= DhEasy::Core::Mock::FakeDb.new
end

#execute_script(file_path, vars = {}) ⇒ Object

Execute an script file as an executor.

Parameters:

  • file_path (String)

    Script file path to execute.



341
342
343
344
# File 'lib/dh_easy/core/mock/fake_executor.rb', line 341

def execute_script file_path, vars = {}
  eval(File.read(file_path), isolated_binding(vars), file_path)
  flush
end

#find_output(collection = 'default', query = {}, opts = {}) ⇒ Hash?

Note:

*opts ‘:job_id` option is prioritize over `:scraper_name` when both exists. If none add provided or nil values, then current job will be used to query instead, this is the defaul behavior.

Find one output by collection and query with pagination.

Examples:

find_output
find_output 'my_collection'
find_output 'my_collection', {}

Find from another scraper by name

find_output 'my_collection', {}, scraper_name: 'my_scraper'

Find from another scraper by job_id

find_output 'my_collection', {}, job_id: 123

Parameters:

  • collection (String) (defaults to: 'default')

    (‘default’) Collection name.

  • query (Hash) (defaults to: {})

    ({}) Filters to query.

  • opts (Hash) (defaults to: {})

    ({}) Configuration options.

Options Hash (opts):

  • :scraper_name (String, nil) — default: nil

    Scraper name to query from.

  • :job_id (Integer, nil) — default: nil

    Job’s id to query from.

Returns:

  • (Hash, nil)

Raises:

  • (ArgumentError)

    collection is not String.

  • (ArgumentError)

    query is not a Hash.



333
334
335
336
# File 'lib/dh_easy/core/mock/fake_executor.rb', line 333

def find_output collection = 'default', query = {}, opts = {}
  result = find_outputs(collection, query, 1, 1, opts)
  result.nil? ? nil : result.first
end

#find_outputs(collection = 'default', query = {}, page = 1, per_page = 30, opts = {}) ⇒ Array

Note:

*opts ‘:job_id` option is prioritize over `:scraper_name` when both exists. If none add provided or nil values, then current job will be used to query instead, this is the defaul behavior.

Find outputs by collection and query with pagination.

Examples:

find_outputs
find_outputs 'my_collection'
find_outputs 'my_collection', {}
find_outputs 'my_collection', {}, 1
find_outputs 'my_collection', {}, 1, 30

Find from another scraper by name

find_outputs 'my_collection', {}, 1, 30, scraper_name: 'my_scraper'

Find from another scraper by job_id

find_outputs 'my_collection', {}, 1, 30, job_id: 123

Parameters:

  • collection (String) (defaults to: 'default')

    (‘default’) Collection name.

  • query (Hash) (defaults to: {})

    ({}) Filters to query.

  • page (Integer) (defaults to: 1)

    (1) Page number.

  • per_page (Integer) (defaults to: 30)

    (30) Page size.

  • opts (Hash) (defaults to: {})

    ({}) Configuration options.

Options Hash (opts):

  • :scraper_name (String, nil) — default: nil

    Scraper name to query from.

  • :job_id (Integer, nil) — default: nil

    Job’s id to query from.

Returns:

  • (Array)

Raises:

  • (ArgumentError)

    collection is not String.

  • (ArgumentError)

    query is not a Hash.

  • (ArgumentError)

    page is not an Integer greater than 0.

  • (ArgumentError)

    per_page is not an Integer between 1 and 500.



286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
# File 'lib/dh_easy/core/mock/fake_executor.rb', line 286

def find_outputs collection = 'default', query = {}, page = 1, per_page = 30, opts = {}
  raise ArgumentError.new("collection needs to be a String.") unless collection.is_a?(String)
  raise ArgumentError.new("query needs to be a Hash.") unless query.is_a?(Hash)
  unless page.is_a?(Integer) && page > 0
    raise ArgumentError.new("page needs to be an Integer greater than 0.")
  end
  unless per_page.is_a?(Integer) && per_page > 0 && per_page <= MAX_FIND_OUTPUTS_PER_PAGE
    raise ArgumentError.new("per_page needs to be an Integer between 1 and #{MAX_FIND_OUTPUTS_PER_PAGE}.")
  end

  offset = (page - 1) * per_page
  job = latest_job_by(opts[:scraper_name])
  fixed_query = query.merge(
    '_collection' => collection,
    '_job_id' => opts[:job_id] || (job.nil? ? job_id : job['job_id'])
  )
  db.query :outputs, fixed_query, offset, per_page
end

#flushObject

Save all drafts into db and clear draft queues.



231
232
233
234
235
# File 'lib/dh_easy/core/mock/fake_executor.rb', line 231

def flush
  flush_pages
  flush_outputs
  flush_self_actions
end

#flush_outputsObject

Save draft outputs into db and clear draft queue.



225
226
227
228
# File 'lib/dh_easy/core/mock/fake_executor.rb', line 225

def flush_outputs
  save_outputs outputs
  clear_draft_outputs
end

#flush_pagesObject

Save draft pages into db and clear draft queue.



219
220
221
222
# File 'lib/dh_easy/core/mock/fake_executor.rb', line 219

def flush_pages
  save_pages pages
  clear_draft_pages
end

#flush_self_actionsObject

Execute any action applied to current page



206
207
208
209
210
211
212
213
214
215
216
# File 'lib/dh_easy/core/mock/fake_executor.rb', line 206

def flush_self_actions
  # Save page current page before refetch/reparse
  if refetch_self || reparse_self
    temp_page_gid_override = !db.allow_page_gid_override?
    db.enable_page_gid_override if temp_page_gid_override
    save_pages [page]
    db.disable_page_gid_override if temp_page_gid_override
  end
  db.refetch(page['job_id'], page['gid']) if refetch_self
  db.reparse(page['job_id'], page['gid']) if reparse_self
end

#initialize(opts = {}) ⇒ Object

Initialize object.

Parameters:

  • opts (Hash) (defaults to: {})

    ({}) Configuration options.

Options Hash (opts):

  • :pages (Array) — default: nil

    Array to initialize pages, can be nil for empty.

  • :outputs (Array) — default: nil

    Array to initialize outputs, can be nil for empty.

  • :job_id (Integer) — default: nil

    A number to represent the job_id.

  • :page (Hash) — default: nil

    Current page.

Raises:

  • (ArgumentError)

    When pages or outputs are not Array.



88
89
90
91
92
93
94
95
96
97
98
99
100
# File 'lib/dh_easy/core/mock/fake_executor.rb', line 88

def initialize opts = {}
  unless opts[:pages].nil? || opts[:pages].is_a?(Array)
    raise ArgumentError.new "Pages must be an array."
  end
  @pages = opts[:pages]
  unless opts[:outputs].nil? || opts[:outputs].is_a?(Array)
    raise ArgumentError.new "Outputs must be an array."
  end
  @outputs = opts[:outputs]
  self.job_id = opts[:job_id]
  self.scraper_name = opts[:scraper_name]
  self.page = opts[:page]
end

#job_idInteger?

Fake job ID used by executor.

Returns:

  • (Integer, nil)


115
116
117
# File 'lib/dh_easy/core/mock/fake_executor.rb', line 115

def job_id
  db.job_id
end

#job_id=(value) ⇒ Object

Set fake job ID value.



120
121
122
123
# File 'lib/dh_easy/core/mock/fake_executor.rb', line 120

def job_id= value
  db.job_id = value
  page['job_id'] = value
end

#latest_job_by(scraper_name, filter = {}) ⇒ Hash?

Get latest job by scraper_name.

Parameters:

  • scraper_name (String)

    Scraper name.

  • filter (Hash) (defaults to: {})

    ({}) Additional_filters.

Returns:

  • (Hash, nil)

    Return nil if no scraper_name or scraper_name is nil.



244
245
246
247
248
# File 'lib/dh_easy/core/mock/fake_executor.rb', line 244

def latest_job_by scraper_name, filter = {}
  return nil if scraper_name.nil?
  data = db.query :jobs, filter.merge('scraper_name' => scraper_name)
  data.max{|a,b| a['created_at'] <=> b['created_at']}
end

#outputsArray

Draft outputs, usually get saved after execution.

Returns:

  • (Array)


58
59
60
# File 'lib/dh_easy/core/mock/fake_executor.rb', line 58

def outputs
  @outputs ||= []
end

#pageHash?

Current page used by executor.

Returns:

  • (Hash, nil)


127
128
129
# File 'lib/dh_easy/core/mock/fake_executor.rb', line 127

def page
  @page ||= DhEasy::Core::Mock::FakeDb.build_fake_page job_id: job_id
end

#page=(value) ⇒ Object

Set current page.



132
133
134
135
136
137
138
139
140
# File 'lib/dh_easy/core/mock/fake_executor.rb', line 132

def page= value
  unless value.nil?
    value = DhEasy::Core::Mock::FakeDb.build_page value
    self.job_id = value['job_id'] unless value['job_id'].nil?
    value['job_id'] ||= job_id
    db.page_gid = value['gid'] unless value['gid'].nil?
  end
  @page = value
end

#pagesArray

Draft pages, usually get saved after execution.

Returns:

  • (Array)


52
53
54
# File 'lib/dh_easy/core/mock/fake_executor.rb', line 52

def pages
  @pages ||= []
end

#refetch(gid) ⇒ Object

Refetch a page by gid.

Parameters:

  • gid (String)

    Page’s gid to refetch.

Raises:

  • (ArgumentError)


349
350
351
352
353
354
355
356
# File 'lib/dh_easy/core/mock/fake_executor.rb', line 349

def refetch gid
  raise ArgumentError.new("gid needs to be a String.") unless gid.is_a?(String)
  if page['gid'] == gid
    self.refetch_self = true
    return
  end
  db.refetch(job_id, gid)
end

#refetch_selfBoollean

Note:

It is stronger than #reparse_self flag.

Refetch self page flag.

Returns:

  • (Boollean)


145
146
147
# File 'lib/dh_easy/core/mock/fake_executor.rb', line 145

def refetch_self
  @refetch_self ||= false
end

#refetch_self=(value) ⇒ Object

Set refetch self page flag.



150
151
152
# File 'lib/dh_easy/core/mock/fake_executor.rb', line 150

def refetch_self= value
  @refetch_self = value
end

#reparse(page_gid) ⇒ Object

Reparse a page by gid.

Parameters:

  • page_gid (String)

    Page’s gid to reparse.

Raises:

  • (ArgumentError)


361
362
363
364
365
366
367
368
# File 'lib/dh_easy/core/mock/fake_executor.rb', line 361

def reparse page_gid
  raise ArgumentError.new("page_gid needs to be a String.") unless page_gid.is_a?(String)
  if page['gid'] == page_gid
    self.reparse_self = true
    return
  end
  db.reparse(job_id, page_gid)
end

#reparse_selfBoollean

Reparse self page flag.

Returns:

  • (Boollean)


156
157
158
# File 'lib/dh_easy/core/mock/fake_executor.rb', line 156

def reparse_self
  @reparse_self ||= false
end

#reparse_self=(value) ⇒ Object

Set reparse self page flag.



161
162
163
# File 'lib/dh_easy/core/mock/fake_executor.rb', line 161

def reparse_self= value
  @reparse_self = value
end

#save_jobs(list) ⇒ Object

Save a job collection on db and remove all the element from list.

Parameters:

  • list (Array)

    Collection of jobs to save.



183
184
185
186
# File 'lib/dh_easy/core/mock/fake_executor.rb', line 183

def save_jobs list
  list.each{|job| db.jobs << job}
  list.clear
end

#save_outputs(list) ⇒ Object

Save an output collection on db and remove all the element from

+list+.

Parameters:

  • list (Array)

    Collection of outputs to save.



200
201
202
203
# File 'lib/dh_easy/core/mock/fake_executor.rb', line 200

def save_outputs list
  list.each{|output| db.outputs << output}
  list.clear
end

#save_pages(list) ⇒ Object

Save a page collection on db and remove all the element from list.

Parameters:

  • list (Array)

    Collection of pages to save.



191
192
193
194
# File 'lib/dh_easy/core/mock/fake_executor.rb', line 191

def save_pages list
  list.each{|page| db.pages << page}
  list.clear
end

#saved_jobsObject

Retrive a list of saved jobs.



166
167
168
# File 'lib/dh_easy/core/mock/fake_executor.rb', line 166

def saved_jobs
  db.jobs
end

#saved_outputsObject

Retrive a list of saved outputs.



176
177
178
# File 'lib/dh_easy/core/mock/fake_executor.rb', line 176

def saved_outputs
  db.outputs
end

#saved_pagesObject

Retrive a list of saved pages. Drafted pages can be included.



171
172
173
# File 'lib/dh_easy/core/mock/fake_executor.rb', line 171

def saved_pages
  db.pages
end

#scraper_nameInteger?

Fake scraper name used by executor.

Returns:

  • (Integer, nil)


104
105
106
# File 'lib/dh_easy/core/mock/fake_executor.rb', line 104

def scraper_name
  db.scraper_name
end

#scraper_name=(value) ⇒ Object

Set fake scraper name value.



109
110
111
# File 'lib/dh_easy/core/mock/fake_executor.rb', line 109

def scraper_name= value
  db.scraper_name = value
end