Class: Linkage::Configuration
- Inherits:
-
Object
- Object
- Linkage::Configuration
- Defined in:
- lib/linkage/configuration.rb
Defined Under Namespace
Classes: DSL
Instance Attribute Summary collapse
-
#dataset_1 ⇒ Object
readonly
Returns the value of attribute dataset_1.
-
#dataset_2 ⇒ Object
readonly
Returns the value of attribute dataset_2.
-
#exhaustive_expectations ⇒ Object
readonly
Returns the value of attribute exhaustive_expectations.
-
#groups_table_name ⇒ Object
Returns the value of attribute groups_table_name.
-
#linkage_type ⇒ Object
Returns the value of attribute linkage_type.
-
#matches_table_name ⇒ Object
Returns the value of attribute matches_table_name.
-
#original_groups_table_name ⇒ Object
Returns the value of attribute original_groups_table_name.
-
#record_cache_size ⇒ Object
Returns the value of attribute record_cache_size.
-
#results_uri ⇒ Object
Returns the value of attribute results_uri.
-
#results_uri_options ⇒ Object
Returns the value of attribute results_uri_options.
-
#scores_table_name ⇒ Object
Returns the value of attribute scores_table_name.
-
#simple_expectations ⇒ Object
readonly
Returns the value of attribute simple_expectations.
-
#visual_comparisons ⇒ Object
readonly
Returns the value of attribute visual_comparisons.
Instance Method Summary collapse
- #add_exhaustive_expectation(expectation) ⇒ Object
- #add_simple_expectation(expectation) ⇒ Object
- #apply_exhaustive_expectations(dataset_1, dataset_2) ⇒ Object
- #configure(&block) ⇒ Object
- #datasets_with_applied_exhaustive_expectations ⇒ Object
- #datasets_with_applied_simple_expectations ⇒ Object
- #decollation_needed? ⇒ Boolean
- #groups_table_needed? ⇒ Boolean
- #groups_table_schema ⇒ Object
- #has_exhaustive_expectations? ⇒ Boolean
- #has_simple_expectations? ⇒ Boolean
-
#initialize(dataset_1, dataset_2) ⇒ Configuration
constructor
A new instance of Configuration.
- #matches_table_schema ⇒ Object
- #result_set ⇒ Object
- #scores_table_needed? ⇒ Boolean
- #scores_table_schema ⇒ Object
Constructor Details
#initialize(dataset_1, dataset_2) ⇒ Configuration
Returns a new instance of Configuration.
307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 |
# File 'lib/linkage/configuration.rb', line 307 def initialize(dataset_1, dataset_2) @dataset_1 = dataset_1 @dataset_2 = dataset_2 @linkage_type = dataset_1 == dataset_2 ? :self : :dual @simple_expectations = [] @exhaustive_expectations = [] @visual_comparisons = [] @results_uri_options = {} @decollation_needed = false @record_cache_size = 10_000 @groups_table_name = :groups @original_groups_table_name = :original_groups @scores_table_name = :scores @matches_table_name = :matches end |
Instance Attribute Details
#dataset_1 ⇒ Object (readonly)
Returns the value of attribute dataset_1.
301 302 303 |
# File 'lib/linkage/configuration.rb', line 301 def dataset_1 @dataset_1 end |
#dataset_2 ⇒ Object (readonly)
Returns the value of attribute dataset_2.
301 302 303 |
# File 'lib/linkage/configuration.rb', line 301 def dataset_2 @dataset_2 end |
#exhaustive_expectations ⇒ Object (readonly)
Returns the value of attribute exhaustive_expectations.
301 302 303 |
# File 'lib/linkage/configuration.rb', line 301 def exhaustive_expectations @exhaustive_expectations end |
#groups_table_name ⇒ Object
Returns the value of attribute groups_table_name.
303 304 305 |
# File 'lib/linkage/configuration.rb', line 303 def groups_table_name @groups_table_name end |
#linkage_type ⇒ Object
Returns the value of attribute linkage_type.
303 304 305 |
# File 'lib/linkage/configuration.rb', line 303 def linkage_type @linkage_type end |
#matches_table_name ⇒ Object
Returns the value of attribute matches_table_name.
303 304 305 |
# File 'lib/linkage/configuration.rb', line 303 def matches_table_name @matches_table_name end |
#original_groups_table_name ⇒ Object
Returns the value of attribute original_groups_table_name.
303 304 305 |
# File 'lib/linkage/configuration.rb', line 303 def original_groups_table_name @original_groups_table_name end |
#record_cache_size ⇒ Object
Returns the value of attribute record_cache_size.
303 304 305 |
# File 'lib/linkage/configuration.rb', line 303 def record_cache_size @record_cache_size end |
#results_uri ⇒ Object
Returns the value of attribute results_uri.
303 304 305 |
# File 'lib/linkage/configuration.rb', line 303 def results_uri @results_uri end |
#results_uri_options ⇒ Object
Returns the value of attribute results_uri_options.
303 304 305 |
# File 'lib/linkage/configuration.rb', line 303 def @results_uri_options end |
#scores_table_name ⇒ Object
Returns the value of attribute scores_table_name.
303 304 305 |
# File 'lib/linkage/configuration.rb', line 303 def scores_table_name @scores_table_name end |
#simple_expectations ⇒ Object (readonly)
Returns the value of attribute simple_expectations.
301 302 303 |
# File 'lib/linkage/configuration.rb', line 301 def simple_expectations @simple_expectations end |
#visual_comparisons ⇒ Object (readonly)
Returns the value of attribute visual_comparisons.
301 302 303 |
# File 'lib/linkage/configuration.rb', line 301 def visual_comparisons @visual_comparisons end |
Instance Method Details
#add_exhaustive_expectation(expectation) ⇒ Object
426 427 428 429 |
# File 'lib/linkage/configuration.rb', line 426 def add_exhaustive_expectation(expectation) @exhaustive_expectations << expectation expectation end |
#add_simple_expectation(expectation) ⇒ Object
420 421 422 423 424 |
# File 'lib/linkage/configuration.rb', line 420 def add_simple_expectation(expectation) @simple_expectations << expectation @decollation_needed ||= decollation_needed_for_simple_expectation?(expectation) expectation end |
#apply_exhaustive_expectations(dataset_1, dataset_2) ⇒ Object
449 450 451 452 453 454 455 456 457 |
# File 'lib/linkage/configuration.rb', line 449 def apply_exhaustive_expectations(dataset_1, dataset_2) dataset_1 = dataset_1.select(dataset_1.field_set.primary_key.to_expr) dataset_2 = dataset_2.select(dataset_2.field_set.primary_key.to_expr) @exhaustive_expectations.each do |exp| dataset_1 = exp.apply_to(dataset_1, :lhs) dataset_2 = exp.apply_to(dataset_2, :rhs) end [dataset_1, dataset_2] end |
#configure(&block) ⇒ Object
323 324 325 |
# File 'lib/linkage/configuration.rb', line 323 def configure(&block) DSL.new(self, &block) end |
#datasets_with_applied_exhaustive_expectations ⇒ Object
445 446 447 |
# File 'lib/linkage/configuration.rb', line 445 def datasets_with_applied_exhaustive_expectations apply_exhaustive_expectations(@dataset_1, @dataset_2) end |
#datasets_with_applied_simple_expectations ⇒ Object
435 436 437 438 439 440 441 442 443 |
# File 'lib/linkage/configuration.rb', line 435 def datasets_with_applied_simple_expectations dataset_1 = @dataset_1 dataset_2 = @dataset_2 @simple_expectations.each do |exp| dataset_1 = exp.apply_to(dataset_1, :lhs) dataset_2 = exp.apply_to(dataset_2, :rhs) if @linkage_type != :self end @linkage_type == :self ? [dataset_1, dataset_1] : [dataset_1, dataset_2] end |
#decollation_needed? ⇒ Boolean
340 341 342 |
# File 'lib/linkage/configuration.rb', line 340 def decollation_needed? @decollation_needed end |
#groups_table_needed? ⇒ Boolean
459 460 461 |
# File 'lib/linkage/configuration.rb', line 459 def groups_table_needed? has_simple_expectations? end |
#groups_table_schema ⇒ Object
344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 |
# File 'lib/linkage/configuration.rb', line 344 def groups_table_schema schema = [] # add id schema << [:id, Integer, {:primary_key => true}] # add values @simple_expectations.each do |exp| next if exp.kind == :filter merged_field = exp.merged_field merged_type = merged_field.ruby_type # if the merged field's database type is different than the result # database, strip collation information result_db_type = nil result_set.database do |db| result_db_type = db.database_type end if merged_field.database_type != result_db_type && merged_type.has_key?(:opts) new_opts = merged_type[:opts].reject { |k, v| k == :collate } merged_type = merged_type.merge(:opts => new_opts) end col = [merged_field.name, merged_type[:type], merged_type[:opts] || {}] schema << col end schema end |
#has_exhaustive_expectations? ⇒ Boolean
471 472 473 |
# File 'lib/linkage/configuration.rb', line 471 def has_exhaustive_expectations? !@exhaustive_expectations.empty? end |
#has_simple_expectations? ⇒ Boolean
467 468 469 |
# File 'lib/linkage/configuration.rb', line 467 def has_simple_expectations? !@simple_expectations.empty? end |
#matches_table_schema ⇒ Object
399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 |
# File 'lib/linkage/configuration.rb', line 399 def matches_table_schema schema = [] # add id schema << [:id, Integer, {:primary_key => true}] # add record ids pk = dataset_1.field_set.primary_key ruby_type = pk.ruby_type schema << [:record_1_id, ruby_type[:type], ruby_type[:opts] || {}] pk = dataset_2.field_set.primary_key ruby_type = pk.ruby_type schema << [:record_2_id, ruby_type[:type], ruby_type[:opts] || {}] # add score schema << [:total_score, Integer, {}] schema end |
#result_set ⇒ Object
431 432 433 |
# File 'lib/linkage/configuration.rb', line 431 def result_set @result_set ||= ResultSet.new(self) end |
#scores_table_needed? ⇒ Boolean
463 464 465 |
# File 'lib/linkage/configuration.rb', line 463 def scores_table_needed? has_exhaustive_expectations? end |
#scores_table_schema ⇒ Object
375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 |
# File 'lib/linkage/configuration.rb', line 375 def scores_table_schema schema = [] # add id schema << [:id, Integer, {:primary_key => true}] # add comparator id schema << [:comparator_id, Integer, {}] # add record ids pk = dataset_1.field_set.primary_key ruby_type = pk.ruby_type schema << [:record_1_id, ruby_type[:type], ruby_type[:opts] || {}] pk = dataset_2.field_set.primary_key ruby_type = pk.ruby_type schema << [:record_2_id, ruby_type[:type], ruby_type[:opts] || {}] # add score schema << [:score, Integer, {}] schema end |