Class: SiteDiff::Config

Inherits:
Object
  • Object
show all
Defined in:
lib/sitediff/config.rb,
lib/sitediff/config/preset.rb,
lib/sitediff/config/creator.rb

Overview

SiteDiff Configuration.

Defined Under Namespace

Classes: ConfigNotFound, Creator, InvalidConfig, Preset

Constant Summary collapse

DEFAULT_FILENAME =

Default config file.

'sitediff.yaml'
DEFAULT_PATHS_FILENAME =

Default paths file.

'paths.txt'
DEFAULT_CONFIG =

Default SiteDiff config.

{
  'settings' => {
    'depth' => 3,
    'interval' => 0,
    'include' => '',
    'exclude' => '',
    'concurrency' => 3,
    'preset' => nil
  },
  'before' => {},
  'after' => {},
  'paths' => []
}.freeze
ALLOWED_CONFIG_KEYS =

Keys allowed in config files. TODO: Deprecate repeated params before_url and after_url. TODO: Create a method self.supports TODO: Deprecate in favor of self.supports key, subkey, subkey…

Sanitizer::TOOLS.values.flatten(1) + %w[
  includes
  settings
  before
  after
  before_url
  after_url
  ignore_whitespace
  export
  output
  report
]
ALLOWED_SETTINGS_KEYS =

Keys allowed in the “settings” key. TODO: Create a method self.supports TODO: Deprecate in favor of self.supports key, subkey, subkey…

%w[
  preset
  depth
  include
  exclude
  concurrency
  interval
  curl_opts
].freeze

Instance Attribute Summary collapse

Class Method Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(file, directory) ⇒ Config

Creates a SiteDiff Config object.



247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
# File 'lib/sitediff/config.rb', line 247

def initialize(file, directory)
  # Fallback to default config filename, if none is specified.
  file = File.join(directory, DEFAULT_FILENAME) if file.nil?
  unless File.exist?(file)
    path = File.expand_path(file)
    raise InvalidConfig, "Missing config file #{path}."
  end
  @config = Config.merge(DEFAULT_CONFIG, Config.load_conf(file))
  @file = file
  @directory = directory

  @preset_applied = { 'before' => false, 'after' => false }
  # Validate configurations.
  validate
end

Instance Attribute Details

#directoryObject (readonly)

Returns the value of attribute directory.



67
68
69
# File 'lib/sitediff/config.rb', line 67

def directory
  @directory
end

Class Method Details

.create_regexp(string_param) ⇒ Object

Creates a RegExp from a string.



458
459
460
461
462
463
464
465
466
467
468
469
# File 'lib/sitediff/config.rb', line 458

def self.create_regexp(string_param)
  begin
    @return_value = string_param == '' ? nil : Regexp.new(string_param)
  rescue SiteDiffException => e
    @return_value = nil
    SiteDiff.log "Invalid RegExp: #{string_param}", :error
    SiteDiff.log e.message, :error
    # TODO: Use SiteDiff.log type :debug
    # SiteDiff.log e.backtrace, :error if options[:verbose]
  end
  @return_value
end

.merge(first, second) ⇒ Object

Merges two normalized Hashes according to the following rules: 1 paths are merged as arrays. 2 before and after: for each subhash H (e.g. [‘before’]):

a)  if first[H] and second[H] are expected to be arrays, their values
    are merged as such,
b)  if first[H] and second[H] are expected to be scalars, the value for
    second[H] is kept if and only if first[H] is nil.

For example, merge(h1, h2) results in h3:

(h1) before: foo, sanitization: [pattern: foo] (h2) before: bar, sanitization: [pattern: bar] (h3) before: foo, sanitization: [pattern: foo, pattern: bar]



133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
# File 'lib/sitediff/config.rb', line 133

def self.merge(first, second)
  result = {
    'before' => {},
    'after' => {},
    'output' => [],
    'settings' => {}
  }

  # Merge sanitization rules.
  Sanitizer::TOOLS.values.flatten(1).each do |key|
    result[key] = second[key] || first[key]
    result.delete(key) unless result[key]
  end

  # Rule 1.
  %w[before after].each do |pos|
    first[pos] ||= {}
    second[pos] ||= {}

    # If only the second hash has the value.
    unless first[pos]
      result[pos] = second[pos] || {}
      next
    end

    result[pos] = first[pos].merge!(second[pos]) do |key, a, b|
      # Rule 2a.
      result[pos][key] = if Sanitizer::TOOLS[:array].include? key
                           (a || []) + (b || [])
                         elsif key == 'settings'
                           b
                         else
                           a || b # Rule 2b.
                         end
    end
  end

  # Merge output array.
  result['output'] += (first['output'] || []) + (second['output'] || [])

  # Merge url_report keys.
  %w[before_url_report after_url_report].each do |pos|
    result[pos] = first[pos] || second[pos]
  end

  # Merge settings.
  result['settings'] = merge_deep(
    first['settings'] || {},
    second['settings'] || {}
  )

  # Merge report labels.
  result['report'] = merge_deep(
    first['report'] || {},
    second['report'] || {}
  )

  result
end

.merge_deep(first, second) ⇒ Object

Merges 2 iterable objects deeply.



195
196
197
198
199
200
201
202
203
204
205
206
# File 'lib/sitediff/config.rb', line 195

def self.merge_deep(first, second)
  first.merge(second) do |_key, val1, val2|
    case val1.class
    when Hash
      self.class.merge_deep(val1, val2 || {})
    when Array
      val1 + (val2 || [])
    else
      val2
    end
  end
end

.normalize(conf) ⇒ Object

Takes a Hash and normalizes it to the following form by merging globals into before and after. A normalized config Hash looks like this:

paths:
- /about

before:
  url: http://before
  selector: body
  ## Note: use either `selector` or `regions`, but not both
  regions:
    - name: title
      selector: .field-name-title h2
    - name: body
      selector: .field-name-field-news-description .field-item
  dom_transform:
  - type: remove
    selector: script

after:
  url: http://after
  selector: body

## Note: use `output` only with `regions`
output:
  - title
  - author
  - source
  - body


99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
# File 'lib/sitediff/config.rb', line 99

def self.normalize(conf)
  tools = Sanitizer::TOOLS

  # Merge globals
  %w[before after].each do |pos|
    conf[pos] ||= {}
    tools[:array].each do |key|
      conf[pos][key] ||= []
      conf[pos][key] += conf[key] if conf[key]
    end
    tools[:scalar].each { |key| conf[pos][key] ||= conf[key] }
    conf[pos]['url'] ||= conf["pos#{_url}"] if defined?(_url)
    conf[pos]['curl_opts'] = conf['curl_opts']
  end

  # Normalize paths.
  conf['paths'] = Config.normalize_paths(conf['paths'])

  conf.select { |k, _v| ALLOWED_CONFIG_KEYS.include? k }
end

.remove_defaults(data) ⇒ Object

Removes default parameters from a config hash.

I know this is weird, but it’ll be fixed. The config management needs to be streamlined further.



223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
# File 'lib/sitediff/config.rb', line 223

def self.remove_defaults(data)
  # Create a deep copy of the config data.
  result = data

  # Exclude default settings.
  result['settings'].delete_if do |key, value|
    value == DEFAULT_CONFIG['settings'][key] || !value
  end

  # Exclude default curl opts.
  result['settings']['curl_opts'] ||= {}
  result['settings']['curl_opts'].delete_if do |key, value|
    value == UriWrapper::DEFAULT_CURL_OPTS[key.to_sym]
  end

  # Delete curl opts if empty.
  unless result['settings']['curl_opts'].length.positive?
    result['settings'].delete('curl_opts')
  end

  result
end

.stringify_keys(object) ⇒ Object

Returns object clone with stringified keys. TODO: Make this method available globally, if required.



441
442
443
444
445
446
447
448
449
450
451
452
453
454
# File 'lib/sitediff/config.rb', line 441

def self.stringify_keys(object)
  # Do nothing if it is not an object.
  return object unless object.respond_to?('each_key')

  # Convert symbol indices to strings.
  output = {}
  object.each_key do |old_k|
    new_k = old_k.is_a?(Symbol) ? old_k.to_s : old_k
    output[new_k] = stringify_keys object[old_k]
  end

  # Return the new hash with string indices.
  output
end

Instance Method Details

#after(apply_preset: false) ⇒ Object

Get “after” site configuration.



275
276
277
# File 'lib/sitediff/config.rb', line 275

def after(apply_preset: false)
  section(:after, with_preset: apply_preset)
end

#after_time=(time) ⇒ Object

Set crawl time for ‘after’



340
341
342
# File 'lib/sitediff/config.rb', line 340

def after_time=(time)
  @config['report']['after_time'] = time
end

#after_urlObject

Get “after” site URL.



280
281
282
283
# File 'lib/sitediff/config.rb', line 280

def after_url
  result = after
  result['url'] if result
end

#allHash

Gets all loaded configuration except defaults.

Returns:

  • (Hash)

    Config data.



213
214
215
216
# File 'lib/sitediff/config.rb', line 213

def all
  result = Marshal.load(Marshal.dump(@config))
  self.class.remove_defaults(result)
end

#before(apply_preset: false) ⇒ Object

Get “before” site configuration.



264
265
266
# File 'lib/sitediff/config.rb', line 264

def before(apply_preset: false)
  section(:before, with_preset: apply_preset)
end

#before_time=(time) ⇒ Object

Set crawl time for ‘before’



335
336
337
# File 'lib/sitediff/config.rb', line 335

def before_time=(time)
  @config['report']['before_time'] = time
end

#before_urlObject

Get “before” site URL.



269
270
271
272
# File 'lib/sitediff/config.rb', line 269

def before_url
  result = before
  result['url'] if result
end

#curl_optsObject

Return merged CURL options.



473
474
475
476
477
478
479
480
481
# File 'lib/sitediff/config.rb', line 473

def curl_opts
  # We do want string keys here
  bool_hash = { 'true' => true, 'false' => false }
  curl_opts = UriWrapper::DEFAULT_CURL_OPTS
              .clone
              .merge(settings['curl_opts'] || {})
  curl_opts.each { |k, v| curl_opts[k] = bool_hash.fetch(v, v) }
  curl_opts
end

#exportObject

Get export option



308
309
310
# File 'lib/sitediff/config.rb', line 308

def export
  @config['export']
end

#export=(export) ⇒ Object

Set export option



313
314
315
# File 'lib/sitediff/config.rb', line 313

def export=(export)
  @config['export'] = export
end

#ignore_whitespaceObject

Get ignore_whitespace option



298
299
300
# File 'lib/sitediff/config.rb', line 298

def ignore_whitespace
  @config['ignore_whitespace']
end

#ignore_whitespace=(ignore_whitespace) ⇒ Object

Set ignore_whitespace option



303
304
305
# File 'lib/sitediff/config.rb', line 303

def ignore_whitespace=(ignore_whitespace)
  @config['ignore_whitespace'] = ignore_whitespace
end

#outputObject

Get output option



318
319
320
# File 'lib/sitediff/config.rb', line 318

def output
  @config['output']
end

#output=(output) ⇒ Object

Set output option



323
324
325
326
327
# File 'lib/sitediff/config.rb', line 323

def output=(output)
  raise 'Output must be an Array' unless output.is_a? Array

  @config['output'] = output
end

#pathsObject

Get paths.



286
287
288
# File 'lib/sitediff/config.rb', line 286

def paths
  @config['paths']
end

#paths=(paths) ⇒ Object

Set paths.



291
292
293
294
295
# File 'lib/sitediff/config.rb', line 291

def paths=(paths)
  raise 'Paths must be an Array' unless paths.is_a? Array

  @config['paths'] = Config.normalize_paths(paths)
end

#paths_file_read(file = nil) ⇒ Integer

Reads a collection of paths from a file.

Parameters:

  • file (String) (defaults to: nil)

    A file containing one path per line.

Returns:

  • (Integer)

    Number of paths read.



368
369
370
371
372
373
374
375
376
377
378
379
# File 'lib/sitediff/config.rb', line 368

def paths_file_read(file = nil)
  file ||= File.join(@directory, DEFAULT_PATHS_FILENAME)

  unless File.exist? file
    raise Config::InvalidConfig, "File not found: #{file}"
  end

  self.paths = File.readlines(file)

  # Return the number of paths.
  paths.length
end

#paths_file_write(paths, file = nil) ⇒ Object

Writes an array of paths to a file.

Parameters:

  • paths (Array)

    An array of paths.

  • file (String) (defaults to: nil)

    Optional path to a file.



351
352
353
354
355
356
357
358
# File 'lib/sitediff/config.rb', line 351

def paths_file_write(paths, file = nil)
  unless paths.is_a?(Array) && paths.length.positive?
    raise SiteDiffException, 'Write failed. Invalid paths.'
  end

  file ||= File.join(@directory, DEFAULT_PATHS_FILENAME)
  File.open(file, 'w+') { |f| f.puts(paths) }
end

#reportObject

Return report display settings.



330
331
332
# File 'lib/sitediff/config.rb', line 330

def report
  @config['report']
end

#rootsObject

Get roots.

Example: If the config has a “before” and “after” sections, then roots will be [“before”, “after”].



386
387
388
389
390
# File 'lib/sitediff/config.rb', line 386

def roots
  @roots = { 'after' => after_url }
  @roots['before'] = before_url if before
  @roots
end

#setting(key) ⇒ *

Gets a setting.

Parameters:

  • key (String)

    A key.

Returns:

  • (*)

    A value, if exists.



400
401
402
403
# File 'lib/sitediff/config.rb', line 400

def setting(key)
  key = key.to_s if key.is_a?(Symbol)
  return @config['settings'][key] if @config['settings'].key?(key)
end

#settingsHash

Gets all settings.

TODO: Make sure the settings are not writable.

Returns:

  • (Hash)

    All settings.



412
413
414
# File 'lib/sitediff/config.rb', line 412

def settings
  @config['settings']
end

#validate(opts = {}) ⇒ Object

Checks if the configuration is usable for diff-ing. TODO: Do we actually need the opts argument?

Raises:



418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
# File 'lib/sitediff/config.rb', line 418

def validate(opts = {})
  opts = { need_before: true }.merge(opts)

  if opts[:need_before] && !before['url']
    raise InvalidConfig, "Undefined 'before' base URL."
  end

  raise InvalidConfig, "Undefined 'after' base URL." unless after['url']

  # Validate interval and concurrency.
  interval = setting(:interval)
  concurrency = setting(:concurrency)
  if interval.to_i != 0 && concurrency != 1
    raise InvalidConfig, 'Concurrency must be 1 when an interval is set.'
  end

  # Validate preset.
  Preset.exist? setting(:preset), exception: true if setting(:preset)
end