Module: Association

Defined in:
lib/scout/association.rb,
lib/scout/association/util.rb,
lib/scout/association/index.rb,
lib/scout/association/fields.rb

Defined Under Namespace

Modules: Index

Class Method Summary collapse

Class Method Details

.database(*args, **kwargs) ⇒ Object



96
97
98
99
# File 'lib/scout/association.rb', line 96

def self.database(*args, **kwargs)
  tsv = open(*args, **kwargs)
  TSV::Transformer === tsv ? tsv.tsv(merge: true) : tsv
end

.extract_specs(all_fields = nil, options = {}) ⇒ Object



45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
# File 'lib/scout/association/fields.rb', line 45

def self.extract_specs(all_fields=nil, options = {})
  source, source_format, target, target_format, format = IndiferentHash.process_options options, :source, :source_format, :target, :target_format, :format

  key_field, *fields = all_fields.nil? ? [nil] : all_fields

  source_specs = normalize_specs  source, all_fields
  target_specs = normalize_specs  target, all_fields

  source_specs = [nil, nil, nil] if source_specs.nil?
  target_specs = [nil, nil, nil] if target_specs.nil?

  source_specs[2] = source_format if source_format
  target_specs[2] = target_format if target_format

  if source_specs.first and not all_fields.include? source_specs.first and defined? Entity and (_format = Entity.formats[source_specs.first.to_s])
    _source = all_fields.select{|f| Entity.formats[f.to_s] == _format }.first
    raise "Source not found #{source_specs}. Options: #{Log.fingerprint all_fields}" if _source.nil?
    source_specs[0] = _source
  end

  if target_specs.first and  not all_fields.include? target_specs.first and defined? Entity and (_format = Entity.formats[target_specs.first.to_s])
    _target = all_fields.select{|f| Entity.formats[f.to_s].to_s == _format.to_s }.first
    raise "Target not found #{target_specs}. Options: #{Log.fingerprint all_fields}" if _target.nil?
    target_specs[0] = _target
  end

  if source_specs[0].nil? and target_specs[0].nil?
    source_specs[0] = key_field 
    target_specs[0] = fields[0]
  elsif source_specs[0].nil? 
    if target_specs[0] == :key or target_specs[0] == key_field
      source_specs[0] = fields[0]
    else
      source_specs[0] = key_field
    end
  elsif target_specs[0].nil? 
    if source_specs[0] == fields.first 
      target_specs[0] = key_field
    else
      target_specs[0] = fields.first 
    end
  end

  # If format is specified, then perhaps we need to change the
  if target_specs[2].nil? 
    target_type = Entity.formats[target_specs[1] || target_specs[0]]
    target_specs[2] = format[target_type.to_s] if format
    target_specs[2] = nil if target_specs[2] == target_specs[0] or target_specs[2] == target_specs[1]
  end

  if source_specs[2].nil? 
    source_type = Entity.formats[source_specs[1] || source_specs[0]]
    source_specs[2] = format[source_type.to_s] if format
    source_specs[2] = nil if source_specs[2] == source_specs[0] or source_specs[2] == source_specs[1]
  end

  {:source => source_specs, :target => target_specs}
end

.headers(all_fields, info_fields = nil, options = {}) ⇒ Object



113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
# File 'lib/scout/association/fields.rb', line 113

def self.headers(all_fields, info_fields = nil, options = {})
  specs = extract_specs all_fields, options

  source_field = specs[:source][0]
  target_field = specs[:target][0]

  #source_pos = all_fields.index source_field
  #target_pos = all_fields.index target_field
 
  source_pos = TSV.identify_field all_fields.first, all_fields[1..-1], source_field
  target_pos = TSV.identify_field all_fields.first, all_fields[1..-1], target_field

  source_pos = source_pos == :key ? 0 : source_pos + 1
  target_pos = target_pos == :key ? 0 : target_pos + 1

  source_header = specs[:source][1] || specs[:source][0]
  target_header = specs[:target][1] || specs[:target][0]

  info_fields = all_fields.dup if info_fields.nil?
  info_fields.delete_at NamedArray.identify_name(info_fields, source_field) if NamedArray.identify_name(info_fields, source_field)
  info_fields.delete_at NamedArray.identify_name(info_fields, target_field) if NamedArray.identify_name(info_fields, target_field)
  info_fields.unshift target_field

  field_headers = [target_header] 
  info_fields[1..-1].each do |field|
    header = case field
             when String 
               field
             when Numeric
               all_fields[field] 
             when :key
               all_fields.first
             end

    field_headers << header
  end

  field_pos = info_fields.collect do |f| 
    p = TSV.identify_field all_fields.first, all_fields[1..-1], f
    p == :key ? 0 : p + 1
  end

  field_pos.delete source_pos

  source_format = specs[:source][2]
  target_format = specs[:target][2]


  if format = options[:format]
    source_format = process_formats(specs[:source][1] || specs[:source][0], format) || source_format unless source_format
    target_format = process_formats(specs[:target][1] || specs[:target][0], format) || target_format unless target_format
  end

  res = [source_pos, field_pos, source_header, field_headers, source_format, target_format]
  Log.low "Headers -- #{res}"
  res
end

.identify_entity_format(format, fields) ⇒ Object



5
6
7
8
9
10
11
# File 'lib/scout/association/fields.rb', line 5

def self.identify_entity_format(format, fields)
  entity_type = Entity.formats[format]
  raise "Field #{ format } could not be resolved: #{fields}" if entity_type.nil?
  main_field = fields.select{|f| Entity.formats[f] == entity_type}.first
  raise "Field #{ format } not present, options: #{Log.fingerprint fields}" if main_field.nil?
  [main_field, nil, format]
end

.index(file, source: nil, target: nil, source_format: nil, target_format: nil, format: nil, **kwargs) ⇒ Object



4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
# File 'lib/scout/association/index.rb', line 4

def self.index(file, source: nil, target: nil, source_format: nil, target_format: nil, format: nil, **kwargs)
  persist_options = IndiferentHash.pull_keys kwargs, :persist
  index_persist_options = IndiferentHash.add_defaults persist_options.dup, persist: true, 
    prefix: "Association::Index", 
    other_options: kwargs.merge(source: source, target: target, source_format: source_format, target_format: target_format, format: format)

  index = Persist.tsv(file, kwargs, engine: "BDB", persist_options: index_persist_options) do |data|
    recycle, undirected = IndiferentHash.process_options kwargs, :recycle, :undirected

    database = Association.open(file, source: source, target: target, source_format: source_format, target_format: target_format, **kwargs.merge(persist_prefix: "Association::Database"))

    source_field = database.key_field
    target_field, *fields = database.fields

    undirected = true if undirected.nil? and source_field == target_field

    key_field = [source_field, target_field, undirected ? "undirected" : nil].compact * "~"

    dumper = TSV::Dumper.new database.options.merge(key_field: key_field, fields: fields, type: :list)
    transformer = TSV::Transformer.new database, dumper

    if database.type == :double
      transformer.traverse do |source,value_list|
        res = []
        NamedArray.zip_fields(value_list).collect do |values|
          target, *info = values
          key = [source, target] * "~"
          res << [key, info]
          if undirected
            key = [target, source] * "~"
            res << [key, info]
          end
        end
        res.extend MultipleResult
      end
    elsif database.type == :flat
      transformer.traverse do |source,targets|
        res = []
        res.extend MultipleResult
        targets.each do |target|
          key = [source, target] * "~"
          res << [key, []]
          if undirected
            key = [target, source] * "~"
            res << [key, []]
          end
        end
        res
      end
    else
      transformer.traverse do |source,values|
        res = []
        res.extend MultipleResult
        target, *info = values
        key = [source, target] * "~"
        res << [key, info]
        if undirected
          key = [target, source] * "~"
          res << [key, info]
        end
        res
      end
    end

    tsv = transformer.tsv **kwargs.merge(data: data, fields: fields)
  end
  index.extend Index
  index.parse_key_field
  index
end

.normalize_specs(spec, all_fields = nil) ⇒ Object



25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
# File 'lib/scout/association/fields.rb', line 25

def self.normalize_specs(spec, all_fields = nil)
  return nil if spec.nil?
  field, header, format = parse_field_specification spec 

  specs = if all_fields.nil? or all_fields.include? field
             [field, header, format]
           else
             if all_fields.nil?
               begin
                 identify_entity_format field, all_fields 
               rescue
                 [field, header, format]
               end
             else
               [field, header, format]
             end
           end
  specs
end

.open(obj, source: nil, target: nil, fields: nil, source_format: nil, target_format: nil, format: nil, **kwargs) ⇒ Object



8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
# File 'lib/scout/association.rb', line 8

def self.open(obj, source: nil, target: nil, fields: nil, source_format: nil, target_format: nil, format: nil, **kwargs)
  all_fields = TSV.all_fields(obj)
  source_pos, field_pos, source_header, field_headers, source_format, target_format = headers(all_fields, fields, kwargs.merge(source: source, target: target, source_format: source_format, target_format: target_format, format: format))

  original_source_header = all_fields[source_pos]
  original_field_headers = all_fields.values_at(*field_pos)
  original_target_header = all_fields[field_pos.first]

  type, identifiers = IndiferentHash.process_options kwargs, :type, :identifiers

  if source_format
    translation_files = [TSV.identifier_files(obj), Entity.identifier_files(source_format), identifiers].flatten.compact
    source_index = begin
                     TSV.translation_index(translation_files, source_header, source_format)
                   rescue
                     TSV.translation_index(translation_files, original_source_header, source_format)
                   end
  end

  if target_format
    translation_files = [TSV.identifier_files(obj), Entity.identifier_files(target_format), identifiers].flatten.compact
    target_index = begin
                     TSV.translation_index(translation_files, field_headers.first, target_format)
                   rescue
                     TSV.translation_index(translation_files, original_target_header, target_format)
                   end
  end

  final_key_field = if source_format
                      if m = original_source_header.match(/(.*) \(.*\)/)
                        m[1] + " (#{source_format})"
                      elsif m = source_header.match(/(.*) \(.*\)/)
                        m[1] + " (#{source_format})"
                      else
                        source_format
                      end
                    else
                      if source_header
                        original_source_header.include?(source_header) ? original_source_header : source_header
                      else
                        original_source_header
                      end
                    end

  fields = original_field_headers
  final_target_field = if target_format
                   if m = original_target_header.match(/(.*) \(.*\)/)
                     m[1] + " (#{target_format})"
                   elsif m = field_headers.first.match(/(.*) \(.*\)/)
                     m[1] + " (#{target_format})"
                   else
                     target_format
                   end
                 else
                   target_header = field_headers.first
                   original_target_header.include?(target_header) ? original_target_header : target_header
                 end
  final_fields = [final_target_field] + original_field_headers[1..-1]

  if source_index.nil? && target_index.nil?
    if TSV === obj
      IndiferentHash.pull_keys kwargs, :persist
      type = kwargs[:type] || obj.type
      res = obj.reorder original_source_header, all_fields.values_at(*field_pos), **kwargs.merge(type: type, merge: true)
    else
      res = TSV.open(obj, key_field: original_source_header, fields: all_fields.values_at(*field_pos), **kwargs.merge(type: type))
    end
    res.key_field = final_key_field
    res.fields = final_fields

    return res
  end

  transformer = TSV::Transformer.new obj
  transformer.key_field = final_key_field
  transformer.fields = final_fields
  transformer.type = type if type

  transformer.traverse key_field: original_source_header, fields: all_fields.values_at(*field_pos) do |k,v|
    v = v.dup if TSV === obj
    k = source_index[k] if source_index
    v[0] = Array === v[0] ? target_index.values_at(*v[0]) : target_index[v[0]] if target_index
    [k, v]
  end

  transformer
end

.parse_field_specification(spec) ⇒ Object



13
14
15
16
17
18
19
20
21
22
23
# File 'lib/scout/association/fields.rb', line 13

def self.parse_field_specification(spec)
  return [spec,nil,nil] if Numeric === spec
  spec = spec.split "=>" unless Array === spec
  field_part, final_format = spec

  field, format = field_part.split "=~", -1

  field = nil if field.nil? or field.empty?

  [field, format, final_format]
end

.process_formats(field, default_format = {}) ⇒ Object



104
105
106
107
108
109
110
111
# File 'lib/scout/association/fields.rb', line 104

def self.process_formats(field, default_format = {})
  return nil if default_format.nil? or default_format.empty?
  default_format.each do |type, format|
    entity_type = Entity.formats[field] || format
    return format if entity_type.to_s === type 
  end
  return nil
end

.version_file(file, namespace) ⇒ Object



2
3
4
5
6
# File 'lib/scout/association/util.rb', line 2

def self.version_file(file, namespace)
  old_file, file = file, file.sub(Entity::Identified::NAMESPACE_TAG, namespace) if namespace and String === file
  old_file.annotate file if Path === old_file
  file
end