Module: Association
- Defined in:
- lib/rbbt/association.rb,
lib/rbbt/association/open.rb,
lib/rbbt/association/util.rb,
lib/rbbt/association/index.rb,
lib/rbbt/association/database.rb
Defined Under Namespace
Modules: Index
Class Method Summary collapse
- .add_reciprocal(tsv) ⇒ Object
- .database(file, options = {}) ⇒ Object
- .extract_specs(all_fields = nil, options = {}) ⇒ Object
- .headers(all_fields, info_fields = nil, options = {}) ⇒ Object
- .identify_entity_format(format, fields) ⇒ Object
- .index(file, options = nil, persist_options = nil) ⇒ Object
- .normalize_specs(spec, all_fields = nil) ⇒ Object
- .open(file, options = nil, persist_options = nil) ⇒ Object
- .open_stream(stream, options = {}) ⇒ Object
- .parse_field_specification(spec) ⇒ Object
- .process_formats(field, default_format = {}) ⇒ Object
- .reorder_tsv(tsv, options = {}) ⇒ Object
- .translate(tsv, source_final_format, target_final_format, options = {}) ⇒ Object
- .version_file(file, namespace) ⇒ Object
Class Method Details
.add_reciprocal(tsv) ⇒ Object
6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 |
# File 'lib/rbbt/association/database.rb', line 6 def self.add_reciprocal(tsv) new = TSV.open(tsv.dumper_stream) tsv.with_unnamed do case tsv.type when :double tsv.through do |source, values| Misc.zip_fields(values).each do |info| target, *rest = info next if target == source rest.unshift source new.zip_new target, rest end end else end end tsv.annotate(new) new end |
.database(file, options = {}) ⇒ Object
139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 |
# File 'lib/rbbt/association/database.rb', line 139 def self.database(file, = {}) database = case file when TSV file = file.to_double unless file.type == :double reorder_tsv(file, .dup) when IO open_stream(file, .dup) else stream = TSV.get_stream(file) open_stream(stream, .dup) end database. = [:entity_options] if [:entity_options] database end |
.extract_specs(all_fields = nil, options = {}) ⇒ Object
44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 |
# File 'lib/rbbt/association/util.rb', line 44 def self.extract_specs(all_fields=nil, = {}) source, source_format, target, target_format = Misc. , :source, :source_format, :target, :target_format key_field, *fields = all_fields.nil? ? [nil] : all_fields source_specs = normalize_specs source, all_fields target_specs = normalize_specs target, all_fields source_specs = [nil, nil, nil] if source_specs.nil? target_specs = [nil, nil, nil] if target_specs.nil? source_specs[2] = source_format if source_format target_specs[2] = target_format if target_format if source_specs[0].nil? and target_specs[0].nil? source_specs[0] = key_field target_specs[0] = fields[0] elsif source_specs[0].nil? if target_specs[0] == :key or target_specs[0] == key_field source_specs[0] = fields[0] else source_specs[0] = key_field end elsif target_specs[0].nil? if source_specs[0] == fields.first target_specs[0] = key_field else target_specs[0] = fields.first end end {:source => source_specs, :target => target_specs} end |
.headers(all_fields, info_fields = nil, options = {}) ⇒ Object
87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 |
# File 'lib/rbbt/association/util.rb', line 87 def self.headers(all_fields, info_fields = nil, = {}) specs = extract_specs all_fields, source_field = specs[:source][0] target_field = specs[:target][0] source_pos = all_fields.index source_field target_pos = all_fields.index target_field source_header = specs[:source][1] || specs[:source][0] target_header = specs[:target][1] || specs[:target][0] info_fields = all_fields.dup if info_fields.nil? info_fields.delete source_field info_fields.delete target_field info_fields.unshift target_field field_headers = [target_header] info_fields[1..-1].each do |field| header = case field when String field when Fixnum all_fields[field] when :key all_fields.first end field_headers << header end field_pos = info_fields.collect{|f| raise "Field #{f} not found. Options: #{all_fields * ", "}" unless all_fields.include?(f); f == :key ? 0 : all_fields.index(f); } source_format = specs[:source][2] target_format = specs[:target][2] if format = [:format] source_format = process_formats(specs[:source][1] || specs[:source][0], format) || source_format unless source_format target_format = process_formats(specs[:target][1] || specs[:target][0], format) || target_format unless target_format end Log.low "Headers -- #{[source_pos, field_pos, source_header, field_headers, source_format, target_format]}" [source_pos, field_pos, source_header, field_headers, source_format, target_format] end |
.identify_entity_format(format, fields) ⇒ Object
4 5 6 7 8 9 10 |
# File 'lib/rbbt/association/util.rb', line 4 def self.identify_entity_format(format, fields) entity_type = Entity.formats[format] raise "Field #{ format } could not be resolved: #{fields}" if entity_type.nil? main_field = fields.select{|f| Entity.formats[f] == entity_type}.first raise "Field #{ format } not present, options: #{Misc.fingerprint fields}" if main_field.nil? [main_field, nil, format] end |
.index(file, options = nil, persist_options = nil) ⇒ Object
6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 |
# File 'lib/rbbt/association/index.rb', line 6 def self.index(file, = nil, = nil) = .nil? ? {} : .dup = .nil? ? Misc.pull_keys(, :persist) : .dup = Misc.add_defaults .dup, :persist => true, :engine => "BDB" persist = [:persist] file = version_file(file, [:namespace]) if [:namespace] and String === file Persist.persist_tsv(file, "Association Index", , .dup) do |data| recycle = [:recycle] undirected = [:undirected] [:file] = [:file] + '.database' if [:file] database = open(file, , .dup.merge(:engine => "HDB")) source_field = database.key_field fields = database.fields target_field = fields.first.split(":").last undirected = true if undirected.nil? and source_field == target_field key_field = [source_field, target_field, undirected ? "undirected" : nil].compact * "~" TSV.setup(data, :key_field => key_field, :fields => fields[1..-1], :type => :list, :serializer => :list) data.key_field = key_field data.fields = fields[1..-1] data.type = :list data.serializer = :list database.with_unnamed do database.with_monitor([:monitor]) do database.through do |source, values| case database.type when :single values = [[values]] when :list values = values.collect{|v| [v] } when :flat values = [values] end next if values.empty? next if source.nil? or source.empty? next if values.empty? targets, *rest = values size = targets ? targets.length : 0 rest.each_with_index do |list,i| list.replace [list.first] * size if list.length == 1 end if recycle and size > 1 rest = Misc.zip_fields rest annotations = rest.length > 1 ? targets.zip(rest) : targets.zip(rest * targets.length) annotations.each do |target, info| next if target.nil? or target.empty? key = [source, target] * "~" if data[key].nil? or info.nil? data[key] = info else old_info = data[key] info = old_info.zip(info).collect{|p| p * ";;" } data[key] = info end if undirected reverse_key = [target,source] * "~" data[reverse_key] = info unless data.include? reverse_key end end end end end data.close data end.tap do |data| data.read if not Hash === data and data.respond_to? :read Association::Index.setup data data. = [:entity_options] if [:entity_options] data end end |
.normalize_specs(spec, all_fields = nil) ⇒ Object
24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 |
# File 'lib/rbbt/association/util.rb', line 24 def self.normalize_specs(spec, all_fields = nil) return nil if spec.nil? field, header, format = parse_field_specification spec specs = if all_fields.nil? or all_fields.include? field [field, header, format] else if all_fields.nil? begin identify_entity_format field, all_fields rescue [field, header, format] end else [field, header, format] end end specs end |
.open(file, options = nil, persist_options = nil) ⇒ Object
10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 |
# File 'lib/rbbt/association/open.rb', line 10 def self.open(file, = nil, = nil) = .nil? ? {} : .dup = .nil? ? Misc.pull_keys(, :persist) : .dup = Misc.add_defaults , :zipped => true = Misc.add_defaults , :persist => true, :dir => Rbbt.var.associations persist = [:persist] file = version_file(file, [:namespace]) if [:namespace] and String === file file = file.call if Proc === file data = Persist.persist_tsv(file, "Association Database", , ) do |data| tsv = Association.database(file, .merge(:persist => persist)) tsv = tsv.to_double unless tsv.type == :double tsv.annotate data data.serializer = :double if data.respond_to? :serializer tsv.with_monitor([:monitor]) do tsv.through do |k,v| data[k] = v end end data end data. = [:entity_options] if [:entity_options] data end |
.open_stream(stream, options = {}) ⇒ Object
85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 |
# File 'lib/rbbt/association/database.rb', line 85 def self.open_stream(stream, = {}) fields, persist = Misc. , :fields, :persist parser = TSV::Parser.new stream, .merge(:fields => nil, :key_field => nil) key_field, *_fields = all_fields = parser.all_fields source_pos, field_pos, source_header, field_headers, source_format, target_format = headers parser.all_fields, fields, parser.key_field = source_pos parser.fields = field_pos case parser.type when :single class << parser def get_values(parts) [parts[@key_field], parts.values_at(*@fields).first] end end when :list class << parser def get_values(parts) [parts[@key_field], parts.values_at(*@fields)] end end when :double, :list, :single class << parser def get_values(parts) [parts[@key_field].split(@sep2,-1), parts.values_at(*@fields).collect{|v| v.nil? ? [] : v.split(@sep2,-1) }] end end when :flat class << parser def get_values(parts) fields = (0..parts.length-1).to_a - [@key_field] values = parts.values_at(*fields).compact.collect{|v| v.split(@sep2,-1) }.flatten [parts[@key_field].split(@sep2,-1), values] end end end = .merge(parser.).merge(:parser => parser) tsv = TSV.parse parser.stream, {}, tsv.key_field = source_header tsv.fields = field_headers tsv = tsv.to_double unless tsv.type == :double tsv = translate tsv, source_format, target_format, :persist => persist if source_format or target_format tsv end |
.parse_field_specification(spec) ⇒ Object
12 13 14 15 16 17 18 19 20 21 22 |
# File 'lib/rbbt/association/util.rb', line 12 def self.parse_field_specification(spec) return [2,nil,nil] if Fixnum === spec spec = spec.split "=>" unless Array === spec field_part, final_format = spec field, format = field_part.split "=~", -1 field = nil if field.nil? or field.empty? [field, format, final_format] end |
.process_formats(field, default_format = {}) ⇒ Object
78 79 80 81 82 83 84 85 |
# File 'lib/rbbt/association/util.rb', line 78 def self.process_formats(field, default_format = {}) return nil if default_format.nil? or default_format.empty? default_format.each do |type, format| entity_type = Entity.formats[field] || format return format if entity_type.to_s === type end return nil end |
.reorder_tsv(tsv, options = {}) ⇒ Object
64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 |
# File 'lib/rbbt/association/database.rb', line 64 def self.reorder_tsv(tsv, = {}) fields, persist = Misc. , :fields, :persist all_fields = tsv.all_fields source_pos, field_pos, source_header, field_headers, source_format, target_format = headers(all_fields, fields, ) source_field = source_pos == :key ? :key : all_fields[source_pos] info_fields = field_pos.collect{|f| f == :key ? :key : all_fields[f]} = .merge({:key_field => source_field, :fields => info_fields}) tsv = tsv.reorder source_field, fields if true or source_field != tsv.key_field or (fields and tsv.fields != fields) tsv.key_field = source_header tsv.fields = field_headers tsv = translate tsv, source_format, target_format, :persist => persist if source_format or target_format tsv end |
.translate(tsv, source_final_format, target_final_format, options = {}) ⇒ Object
28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 |
# File 'lib/rbbt/association/database.rb', line 28 def self.translate(tsv, source_final_format, target_final_format, = {}) source_field = tsv.key_field target_field = tsv.fields.first namespace = tsv.namespace if source_final_format and source_field != source_final_format Log.debug("Changing source format from #{tsv.key_field} to #{source_final_format}") identifier_files = tsv.identifier_files.dup identifier_files.concat Entity.identifier_files(source_final_format) if defined? Entity identifier_files.uniq! identifier_files.collect!{|f| f.annotate(f.gsub(/\bNAMESPACE\b/, namespace))} if namespace identifier_files.reject!{|f| f.match(/\bNAMESPACE\b/)} tsv = TSV.translate(tsv, source_field, source_final_format, .merge(:identifier_files => identifier_files)) end # Translate target if target_final_format and target_field != target_final_format Log.debug("Changing target format from #{target_field} to #{target_final_format}") old_key_field = tsv.key_field tsv.key_field = "MASK" identifier_files = tsv.identifier_files.dup identifier_files.concat Entity.identifier_files(target_final_format) if defined? Entity identifier_files.uniq! identifier_files.collect!{|f| f.annotate(f.gsub(/\bNAMESPACE\b/, namespace))} if namespace identifier_files.reject!{|f| f.match(/\bNAMESPACE\b/)} tsv = TSV.translate(tsv, target_field, target_final_format, .merge(:identifier_files => identifier_files)) tsv.key_field = old_key_field end tsv end |