Class: DhEasy::Test::RecordTask

Inherits:
Object
  • Object
show all
Defined in:
lib/dh_easy/test/rake.rb

Overview

Record rake task generator. It allows Datahen pages snapshots to be

recorded for an easy way to perform integration tests.

Instance Attribute Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize {|task| ... } ⇒ RecordTask

Initialize record task. Use block to configure record task.

Yield Parameters:



328
329
330
331
332
# File 'lib/dh_easy/test/rake.rb', line 328

def initialize &block
  verbose = nil
  block.call self unless block.nil?
  create_task
end

Instance Attribute Details

#scraper_nameString?

Scraper name to be used to get job_id.

Returns:

  • (String, nil)


11
12
13
# File 'lib/dh_easy/test/rake.rb', line 11

def scraper_name
  @scraper_name
end

Instance Method Details

#create_taskObject

Create the record rake task



316
317
318
319
320
321
322
323
# File 'lib/dh_easy/test/rake.rb', line 316

def create_task
  namespace 'dh_easy' do
    desc "Generates input files by gid into the configured directories, use these on context loading."
    task :record_pages do
      record_pages input_map
    end
  end
end

#disable_verboseObject

Disable verbose.



30
31
32
# File 'lib/dh_easy/test/rake.rb', line 30

def disable_verbose
  @verbose = false
end

#enable_verboseObject

Enable verbose.



25
26
27
# File 'lib/dh_easy/test/rake.rb', line 25

def enable_verbose
  @verbose = true
end

#ensure_job_idInteger?

Ensures that job_id exists. If #scraper_name is present and no #job_id

was specified, then it will get the latest `job_id` for the
`scraper_name` provided.

Returns:

  • (Integer, nil)

    Job id.



102
103
104
105
106
107
108
109
# File 'lib/dh_easy/test/rake.rb', line 102

def ensure_job_id
  if job_id.nil && !scraper_name.nil?
    log "Retriving \"job_id\" from scraper \"#{scraper_name}\""
    job_id = @executor.get_job_id scraper_name.strip
  end
  log(job_id.nil? ? 'No "job_id" was specified.' : "Using \"job_id\" #{job_id}.")
  job_id
end

#executorDatahen::Scraper::Executor

Datahen executor used to get the data to be recorded.

Returns:

  • (Datahen::Scraper::Executor)


93
94
95
# File 'lib/dh_easy/test/rake.rb', line 93

def executor
  @executor ||= Datahen::Scraper::Executor.new
end

#input_mapArray

An array of input maps to configure what gid record will be saved into

each directory. It uses absolute paths when #root_dir is nil, and
relative paths when it has been assigned.

Returns:

  • (Array)

    Map structure is as follows (see #record_outputs for details about ‘input_map[][:outputs]` options): “` [

    {
      gid:'my-gid-123abc',
      dir:'/path/to/input/directory',
      record_content: true/false, # Default: true
      record_failed_content: true/false, # Default: true,
      record_page: true/false, # Default: true
      record_vars: true/false, # Default: false
      filters: {
        outputs: {
          # Output filters
        }
      }
    }, {
      # ...
    }
    

    ]



86
87
88
# File 'lib/dh_easy/test/rake.rb', line 86

def input_map
  @input_map ||= []
end

#job_idInteger?

Job id to be used on page recording.

Returns:

  • (Integer, nil)


37
38
39
# File 'lib/dh_easy/test/rake.rb', line 37

def job_id
  @job_id ||= nil
end

#job_id=(value) ⇒ Object

Set job id.

Parameters:

  • value (Integer, nil)

    Job id.



44
45
46
# File 'lib/dh_easy/test/rake.rb', line 44

def job_id= value
  @job_id = value
end

#log(text) ⇒ Object

Log text into stdout when verbose is enabled (see #verbose?).

Parameters:

  • text (String)

    Message to be log.



51
52
53
# File 'lib/dh_easy/test/rake.rb', line 51

def log text
  puts text unless verbose?
end

#record(map) ⇒ Object

Record a page data into a specific directory.

Parameters:

  • map (Hash)

    ({}) Input map configuration.

Options Hash (map):

  • :gid (String)

    Page ‘gid` to retrieve the data from.

  • :dir (String)

    Directory to save file into.

  • :record_content (Boolean) — default: true

    Record content when ‘true`.

  • :record_failed_content (Boolean) — default: true

    Record failed_cntent when ‘true`.

  • :record_page (Boolean) — default: true

    Record page when ‘true`.

  • :record_vars (Boolean) — default: false

    Record vars when ‘true`.

  • :filters (Hash) — default: {outputs:nil}

    Filter hash for outputs recording, will record only when a filter is specify.

Raises:

  • (ArgumentError)


267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
# File 'lib/dh_easy/test/rake.rb', line 267

def record map
  map = {
    gid: nil,
    dir: nil,
    record_content: true,
    record_failed_content: true,
    record_page: true,
    record_vars: false,
    filters: {
      outputs: nil
    }
  }.merge map

  gid = map[:gid].to_s.strip
  raise ArgumentError.new('"gid" can\'t be empty') if gid == ''
  dir = map[:dir].to_s.strip
  raise ArgumentError.new('"dir" can\'t be empty') if dir == ''
  dir = File.join root_dir, dir unless root_dir.nil? || root_dir.strip == ''
  dir = File.expand_path dir
  unless Dir.exist? dir
    raise ArgumentError.new "\"#{dir}\" don't exists or is not a directory."
  end
  log "Recording on \"#{dir}\" directory..."

  record_content gid, dir if map[:content]
  record_failed_content gid, dir if map[:failed_content]
  record_page gid, dir if map[:page]
  record_vars gid, dir if map[:vars]

  filters = map[:filters]
  unless filters.nil?
    record_outputs filters[:outputs] unless filters[:outputs].nil?
  end
  log "Finish recording \"#{dir}\" directory."
end

#record_content(gid, dir) ⇒ Object

Record a page raw content (HTML, XML, excel, zip, etc.) into ‘content`

file within the provided directory.

Parameters:

  • gid (String)

    Page ‘gid` to retrieve the data from.

  • dir (String)

    Directory to save file into.



141
142
143
144
145
# File 'lib/dh_easy/test/rake.rb', line 141

def record_content gid, dir
  content = executor.get_content gid
  path = File.join(dir, 'content')
  record_file path, content
end

#record_failed_content(gid, dir) ⇒ Object

Record a page raw failed content (HTML, XML, excel, zip, etc.) into

`failed_content` file within the provided directory.

Parameters:

  • gid (String)

    Page ‘gid` to retrieve the data from.

  • dir (String)

    Directory to save file into.



152
153
154
155
156
# File 'lib/dh_easy/test/rake.rb', line 152

def record_failed_content gid, dir
  content = executor.get_failed_content gid
  path = File.join(dir, 'failed_content')
  record_file path, content
end

#record_file(path, content) {|file| ... } ⇒ Object

Record a content into a file only when the content is not null. It will

delete the existing file regardless if a new file will be created or
not.

Parameters:

  • path (String)

    File path to override.

  • content (String, nil)

    Content to be saved on the file.

Yield Parameters:

  • file (File)

    File to save the data into.



118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
# File 'lib/dh_easy/test/rake.rb', line 118

def record_file path, content, &block
  if File.exists? path
    log "Deleting old \"#{path}\" file..."
    File.delete path
    log "Done."
  end
  if content.nil? && block.nil?
    log 'Null content detected, skip file.'
    return
  end
  log "Creating \"#{page}\" file..."
  File.open(path) do |file|
    file.write content unless content.nil?
    block.call file unless block.nil?
  end
  log "Done."
end

#record_outputs(filter = nil) ⇒ Object

Note:

Will skip when ‘nil` is provided as filters.

Record a collection of outputs (JSON) into ‘outputs.json` file within

the provided directory using filters on Datahen executor
`find_outputs` method to retrieve all matching outputs regardless of
pagination.

Parameters:

  • filter (Hash, nil) (defaults to: nil)

    (nil) Filters to retrieve ‘outputs`.

Options Hash (filter):

  • :collection (String) — default: 'default'

    Output collection.

  • :query (Hash) — default: {}

    Query that outputs should match.

  • :opts (Hash) — default: {}

    ‘find_outputs` configuration options (see Datahen::Scraper::Executor#find_outputs for details).



212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
# File 'lib/dh_easy/test/rake.rb', line 212

def record_outputs filter = nil
  if filter.nil?
    log 'Skip outputs, no filter detected.'
    return
  end
  path = File.join(dir, 'outputs.json')
  filter = {
    collection: 'default',
    query: {},
    opts: {}
  }.merge filter

  record_file path, nil do |file|
    count = 0
    page = 1
    outputs = @executor.find_outputs(
      filter[:collection],
      filter[:query],
      page,
      100,
      filter[:opts]
    )

    file.write '['
    while !outputs.nil? && outputs.count > 0
      page += 1
      outputs.each do |output|
        f.write ',' if count > 0
        count += 1
        file.write JSON.pretty_generate(output)
      end
      outputs = @executor.find_outputs(
        filter[:collection],
        filter[:query],
        page,
        100,
        filter[:opts]
      )
    end
    file.write ']'
  end
end

#record_page(gid, dir) ⇒ Object

Note:

It will prefer job page definition over global page unless no ‘job_id` (see #job_id) or `scraper_name` (see #scraper_name) is defined.

Record a page’s global or job definition (JSON) into ‘page.json` file

within the provided directory.

Parameters:

  • gid (String)

    Page ‘gid` to retrieve the data from.

  • dir (String)

    Directory to save file into.



167
168
169
170
171
172
173
174
175
176
177
# File 'lib/dh_easy/test/rake.rb', line 167

def record_page gid, dir
  if job_id.nil?
    log 'Warning: No "scraper_name" or "job_id" was specified, will use global page instead job page.'
  end
  @executor.gid = gid
  @executor.job_id = job_id
  page = @executor.init_page()
  content = JSON.pretty_generate page
  path = File.join(dir, 'page.json')
  record_file path, content
end

#record_pages(input_map) ⇒ Object

Record pages from an input map collection.

Parameters:

  • input_map (Array)

    Input map collection (see #input_map for structure).



307
308
309
310
311
312
313
# File 'lib/dh_easy/test/rake.rb', line 307

def record_pages input_map
  ensure_job_id
  input_map.each do |map|

    record_page gid, dir, opts
  end
end

#record_vars(gid, dir) ⇒ Object

Note:

It will skip it if no ‘job_id` (see #job_id) or `scraper_name` (see #scraper_name) is defined.

Record a page’s vars from job page definition (JSON) into ‘vars.json`

file within the provided directory.

Parameters:

  • gid (String)

    Page ‘gid` to retrieve the data from.

  • dir (String)

    Directory to save file into.



187
188
189
190
191
192
193
194
195
196
197
198
# File 'lib/dh_easy/test/rake.rb', line 187

def record_vars gid, dir
  if job_id.nil?
    log 'Warning: No "scraper_name" or "job_id" was specified, will skip vars.'
    return
  end
  @executor.gid = gid
  @executor.job_id = job_id
  page = @executor.init_page()
  content = JSON.pretty_generate page['vars']
  path = File.join(dir, 'vars.json')
  record_file path, content
end

#root_dirString?

Root directory to record pages. Useful to reduce input map fingerprint.

Returns:

  • (String, nil)


58
59
60
# File 'lib/dh_easy/test/rake.rb', line 58

def root_dir
  @root ||= nil
end

#verbose?Boolean

Note:

Default value is ‘true`.

Will show logs on stdout when enabled (see #enable_verbose and

#disable_verbose)

Returns:

  • (Boolean)

    ‘true` when enabled, else `false`.



19
20
21
22
# File 'lib/dh_easy/test/rake.rb', line 19

def verbose?
  @verbose = true if @verbose.nil?
  @verbose
end