Class: Linkage::Configuration

Inherits:
Object
  • Object
show all
Defined in:
lib/linkage/configuration.rb

Defined Under Namespace

Classes: DSL

Instance Attribute Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(dataset_1, dataset_2) ⇒ Configuration

Returns a new instance of Configuration.



307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
# File 'lib/linkage/configuration.rb', line 307

def initialize(dataset_1, dataset_2)
  @dataset_1 = dataset_1
  @dataset_2 = dataset_2
  @linkage_type = dataset_1 == dataset_2 ? :self : :dual
  @simple_expectations = []
  @exhaustive_expectations = []
  @visual_comparisons = []
  @results_uri_options = {}
  @decollation_needed = false
  @record_cache_size = 10_000
  @groups_table_name = :groups
  @original_groups_table_name = :original_groups
  @scores_table_name = :scores
  @matches_table_name = :matches
end

Instance Attribute Details

#dataset_1Object (readonly)

Returns the value of attribute dataset_1.



301
302
303
# File 'lib/linkage/configuration.rb', line 301

def dataset_1
  @dataset_1
end

#dataset_2Object (readonly)

Returns the value of attribute dataset_2.



301
302
303
# File 'lib/linkage/configuration.rb', line 301

def dataset_2
  @dataset_2
end

#exhaustive_expectationsObject (readonly)

Returns the value of attribute exhaustive_expectations.



301
302
303
# File 'lib/linkage/configuration.rb', line 301

def exhaustive_expectations
  @exhaustive_expectations
end

#groups_table_nameObject

Returns the value of attribute groups_table_name.



303
304
305
# File 'lib/linkage/configuration.rb', line 303

def groups_table_name
  @groups_table_name
end

#linkage_typeObject

Returns the value of attribute linkage_type.



303
304
305
# File 'lib/linkage/configuration.rb', line 303

def linkage_type
  @linkage_type
end

#matches_table_nameObject

Returns the value of attribute matches_table_name.



303
304
305
# File 'lib/linkage/configuration.rb', line 303

def matches_table_name
  @matches_table_name
end

#original_groups_table_nameObject

Returns the value of attribute original_groups_table_name.



303
304
305
# File 'lib/linkage/configuration.rb', line 303

def original_groups_table_name
  @original_groups_table_name
end

#record_cache_sizeObject

Returns the value of attribute record_cache_size.



303
304
305
# File 'lib/linkage/configuration.rb', line 303

def record_cache_size
  @record_cache_size
end

#results_uriObject

Returns the value of attribute results_uri.



303
304
305
# File 'lib/linkage/configuration.rb', line 303

def results_uri
  @results_uri
end

#results_uri_optionsObject

Returns the value of attribute results_uri_options.



303
304
305
# File 'lib/linkage/configuration.rb', line 303

def results_uri_options
  @results_uri_options
end

#scores_table_nameObject

Returns the value of attribute scores_table_name.



303
304
305
# File 'lib/linkage/configuration.rb', line 303

def scores_table_name
  @scores_table_name
end

#simple_expectationsObject (readonly)

Returns the value of attribute simple_expectations.



301
302
303
# File 'lib/linkage/configuration.rb', line 301

def simple_expectations
  @simple_expectations
end

#visual_comparisonsObject (readonly)

Returns the value of attribute visual_comparisons.



301
302
303
# File 'lib/linkage/configuration.rb', line 301

def visual_comparisons
  @visual_comparisons
end

Instance Method Details

#add_exhaustive_expectation(expectation) ⇒ Object



426
427
428
429
# File 'lib/linkage/configuration.rb', line 426

def add_exhaustive_expectation(expectation)
  @exhaustive_expectations << expectation
  expectation
end

#add_simple_expectation(expectation) ⇒ Object



420
421
422
423
424
# File 'lib/linkage/configuration.rb', line 420

def add_simple_expectation(expectation)
  @simple_expectations << expectation
  @decollation_needed ||= decollation_needed_for_simple_expectation?(expectation)
  expectation
end

#apply_exhaustive_expectations(dataset_1, dataset_2) ⇒ Object



449
450
451
452
453
454
455
456
457
# File 'lib/linkage/configuration.rb', line 449

def apply_exhaustive_expectations(dataset_1, dataset_2)
  dataset_1 = dataset_1.select(dataset_1.field_set.primary_key.to_expr)
  dataset_2 = dataset_2.select(dataset_2.field_set.primary_key.to_expr)
  @exhaustive_expectations.each do |exp|
    dataset_1 = exp.apply_to(dataset_1, :lhs)
    dataset_2 = exp.apply_to(dataset_2, :rhs)
  end
  [dataset_1, dataset_2]
end

#configure(&block) ⇒ Object



323
324
325
# File 'lib/linkage/configuration.rb', line 323

def configure(&block)
  DSL.new(self, &block)
end

#datasets_with_applied_exhaustive_expectationsObject



445
446
447
# File 'lib/linkage/configuration.rb', line 445

def datasets_with_applied_exhaustive_expectations
  apply_exhaustive_expectations(@dataset_1, @dataset_2)
end

#datasets_with_applied_simple_expectationsObject



435
436
437
438
439
440
441
442
443
# File 'lib/linkage/configuration.rb', line 435

def datasets_with_applied_simple_expectations
  dataset_1 = @dataset_1
  dataset_2 = @dataset_2
  @simple_expectations.each do |exp|
    dataset_1 = exp.apply_to(dataset_1, :lhs)
    dataset_2 = exp.apply_to(dataset_2, :rhs) if @linkage_type != :self
  end
  @linkage_type == :self ? [dataset_1, dataset_1] : [dataset_1, dataset_2]
end

#decollation_needed?Boolean

Returns:

  • (Boolean)


340
341
342
# File 'lib/linkage/configuration.rb', line 340

def decollation_needed?
  @decollation_needed
end

#groups_table_needed?Boolean

Returns:

  • (Boolean)


459
460
461
# File 'lib/linkage/configuration.rb', line 459

def groups_table_needed?
  has_simple_expectations?
end

#groups_table_schemaObject



344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
# File 'lib/linkage/configuration.rb', line 344

def groups_table_schema
  schema = []

  # add id
  schema << [:id, Integer, {:primary_key => true}]

  # add values
  @simple_expectations.each do |exp|
    next  if exp.kind == :filter

    merged_field = exp.merged_field
    merged_type = merged_field.ruby_type

    # if the merged field's database type is different than the result
    # database, strip collation information
    result_db_type = nil
    result_set.database do |db|
      result_db_type = db.database_type
    end
    if merged_field.database_type != result_db_type && merged_type.has_key?(:opts)
      new_opts = merged_type[:opts].reject { |k, v| k == :collate }
      merged_type = merged_type.merge(:opts => new_opts)
    end

    col = [merged_field.name, merged_type[:type], merged_type[:opts] || {}]
    schema << col
  end

  schema
end

#has_exhaustive_expectations?Boolean

Returns:

  • (Boolean)


471
472
473
# File 'lib/linkage/configuration.rb', line 471

def has_exhaustive_expectations?
  !@exhaustive_expectations.empty?
end

#has_simple_expectations?Boolean

Returns:

  • (Boolean)


467
468
469
# File 'lib/linkage/configuration.rb', line 467

def has_simple_expectations?
  !@simple_expectations.empty?
end

#matches_table_schemaObject



399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
# File 'lib/linkage/configuration.rb', line 399

def matches_table_schema
  schema = []

  # add id
  schema << [:id, Integer, {:primary_key => true}]

  # add record ids
  pk = dataset_1.field_set.primary_key
  ruby_type = pk.ruby_type
  schema << [:record_1_id, ruby_type[:type], ruby_type[:opts] || {}]

  pk = dataset_2.field_set.primary_key
  ruby_type = pk.ruby_type
  schema << [:record_2_id, ruby_type[:type], ruby_type[:opts] || {}]

  # add score
  schema << [:total_score, Integer, {}]

  schema
end

#result_setObject



431
432
433
# File 'lib/linkage/configuration.rb', line 431

def result_set
  @result_set ||= ResultSet.new(self)
end

#scores_table_needed?Boolean

Returns:

  • (Boolean)


463
464
465
# File 'lib/linkage/configuration.rb', line 463

def scores_table_needed?
  has_exhaustive_expectations?
end

#scores_table_schemaObject



375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
# File 'lib/linkage/configuration.rb', line 375

def scores_table_schema
  schema = []

  # add id
  schema << [:id, Integer, {:primary_key => true}]

  # add comparator id
  schema << [:comparator_id, Integer, {}]

  # add record ids
  pk = dataset_1.field_set.primary_key
  ruby_type = pk.ruby_type
  schema << [:record_1_id, ruby_type[:type], ruby_type[:opts] || {}]

  pk = dataset_2.field_set.primary_key
  ruby_type = pk.ruby_type
  schema << [:record_2_id, ruby_type[:type], ruby_type[:opts] || {}]

  # add score
  schema << [:score, Integer, {}]

  schema
end