Module: DataTools::Hash

Included in:
Hash
Defined in:
lib/data_tools/hash.rb

Instance Method Summary collapse

Instance Method Details

#append(hash2) ⇒ Object

HASH OF ARRAYS



101
102
103
# File 'lib/data_tools/hash.rb', line 101

def append(hash2)
  (self.keys | hash2.keys).inject({}) {|h,k| h[k] = Array(self[k]) + Array(hash2[k]); h}
end

#cleanse(options = {}) ⇒ Object



183
184
185
186
187
188
189
190
191
192
193
194
195
# File 'lib/data_tools/hash.rb', line 183

def cleanse(options = {})
  each_with_object({}) do |(k,v), out|
    out[k] = DataTools.scour(v, options)
    if dateformat = options[:datefields][k]
      begin
        out[k] = v && DateTime.strptime(v, dateformat).to_date
      rescue ArgumentError
        warn "expected '#{dateformat}' in #{k} = '#{v}' at [#{options[:line]}]: #{self}"
        out[k] = nil
      end
    end
  end
end

#compare(hash2) ⇒ Object

HASH OF HASHES compare to another hash-of-hashes (aka changes, deltas, diffs) report the changes between a current state and a future state (hash2) each of the four sections (new elements, lost elements, unchanged elements, changes) is another hash-of-hashes



109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
# File 'lib/data_tools/hash.rb', line 109

def compare(hash2)
  newkeys = hash2.keys - self.keys
  lostkeys = self.keys - hash2.keys
  commonkeys = self.keys & hash2.keys

  unchanged = []
  changes = {}
  commonkeys.each do |k|
    if (diffs = hash2[k].diff(self[k])).any?
      changes[k] = diffs
    else
      unchanged << k
    end
  end

  {
    :new => hash2.slice(*newkeys),
    :lost => self.slice(*lostkeys),
    :unchanged => self.slice(*unchanged),
    :changes => changes
  }
end

#correlate!(args = {}) ⇒ Object

apply correlations

correlate!(:with => hash2, :through => mapping-hash, :onkey => attribute-to-record-mapping-in)

replaces any existing correlations (the ‘:on` field will be set to nil where the key does not appear in the correlation hash)



165
166
167
168
169
170
171
172
173
174
# File 'lib/data_tools/hash.rb', line 165

def correlate!(args = {})
  with = args[:with]
  through = args[:through]
  onkey = args[:onkey]
  raise "Missing argument" if args[:onkey].nil?
  each do |k,h|
    this_match = through[k] && with[through[k]]
    h[onkey] = this_match
  end
end

#correlated?(args = {}) ⇒ Boolean

ARRAY OF HASHES

correlated(:with => correlation-hash, :by => key-field)

pull subset that have mappings in the correlation hash

Returns:

  • (Boolean)


143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
# File 'lib/data_tools/hash.rb', line 143

def correlated?(args = {})
  with = args[:with]
  through = args[:through]
  onkey = args[:onkey]

  my_keys = keys
  correlation_keys = through.keys

  mismatches = select do |k,h|
    this_match = h[onkey]
    should_match = through[k] && with[through[k]]
    this_match != should_match
  end
  unmatched = correlation_keys - my_keys
  mismatches | unmatched
  # should be any empty array
  # select {|h| args[:with][h.key_for(args[:by], :delim => nil)]}
end

#diffs_from(orig) ⇒ Object

construct a hash of changes needed to convert from an original hash to the new set of values keys in the original that do not appear in the new hash should appear in the diff with nil values EXCEPT that symbol keys from the original that *do not appear* (a nil value means it still appears) in the new hash should be ignored



5
6
7
8
9
10
11
12
13
14
# File 'lib/data_tools/hash.rb', line 5

def diffs_from(orig)
  (self.keys | orig.keys).inject({}) do |diffs,key|
    if key.is_a?(Symbol) && !self.include?(key)
      # ignore this
    elsif orig[key] != self[key]
      diffs[key] = self[key]
    end
    diffs
  end
end

#dumpme(filename) ⇒ Object



91
92
93
94
# File 'lib/data_tools/hash.rb', line 91

def dumpme(filename)
  raise "#{filename} exists" if File.exists?(filename)
  File.open(filename, "w") {|f| f << Marshal.dump(self)}
end

#dumpme!(filename) ⇒ Object



95
96
97
98
# File 'lib/data_tools/hash.rb', line 95

def dumpme!(filename)
  File.unlink(filename) if File.exists?(filename)
  File.open(filename, "w") {|f| f << Marshal.dump(self)}
end

#key_for(keyarray, opts = {}) ⇒ Object

construct a key field for the has based on the list of fields provided options:

:strip (true/false, default = true): remove leading & trailing whitespace from each value
:truncate (integer): set maximum length for each value; truncate BEFORE stripping


20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
# File 'lib/data_tools/hash.rb', line 20

def key_for(keyarray, opts = {})
  opts[:strip] = true unless opts.has_key?(:strip)
  meth = lambda do |k|
    v = self[k]
    v = v[0,opts[:truncate]] if opts[:truncate]
    v = v.strip if opts[:strip] && v.is_a?(String)
    v
  end
  this_key = keyarray.map(&meth) #{|k| self[k].strip}
  return nil if this_key.all? {|v| v.nil?}
  return this_key.first if this_key.count == 1 # turn single-field keys into single values, not arrays
  if opts[:delim]
    this_key.join(opts[:delim])
  else
    this_key
  end
end

#nilify!(nilvalue = nil) ⇒ Object

remove all the keys that contain nil values (or specify a “nil” value for sources that fill in empty records with special nil placeholders)



177
178
179
180
181
# File 'lib/data_tools/hash.rb', line 177

def nilify!(nilvalue = nil)
  each do |k,v|
    self.delete(k) if v == nilvalue
  end
end

#numify!(*keyarray) ⇒ Object

convert specified fields to integers



133
134
135
136
137
138
# File 'lib/data_tools/hash.rb', line 133

def numify!(*keyarray)
  keyarray.each do |k|
    self[k] = self[k].to_i if self[k]
  end
  self
end

#pair_off(hash2) ⇒ Object

for a Hash where all the values are Arrays hash2 should also be a hash of key/array pairs find all the cases where keys appear in both source hashes



41
42
43
44
45
46
47
48
49
# File 'lib/data_tools/hash.rb', line 41

def pair_off(hash2)
  pairs = {}
  each do |k,ary|
    if hash2[k] && hash2[k].any?
      pairs[k] = [ary, hash2[k]]
    end
  end
  pairs
end

#pair_off!(hash2) ⇒ Object

destructive version of ‘#pair_off` above. when matching keys are found, the keys are removed from both source hashes.



66
67
68
69
70
71
72
73
74
75
76
# File 'lib/data_tools/hash.rb', line 66

def pair_off!(hash2)
  pairs = {}
  each do |k,ary|
    if hash2[k].any?
      pairs[k] = [ary, hash2[k]]
      delete(k)
      hash2.delete(k)
    end
  end
  pairs
end

#pair_off_by(hash2, &block) ⇒ Object

same as ‘pair_off`, except that it chooses the partner key by calling a block rather than doing a strict comparison



53
54
55
56
57
58
59
60
61
62
# File 'lib/data_tools/hash.rb', line 53

def pair_off_by(hash2, &block)
  pairs = {}
  each do |k,ary|
    k2 = block.call(k)
    if hash2[k2] && hash2[k2].any?
      pairs[k] = [ary, hash2[k2]]
    end
  end
  pairs
end

#pair_off_by!(hash2, &block) ⇒ Object



78
79
80
81
82
83
84
85
86
87
88
89
# File 'lib/data_tools/hash.rb', line 78

def pair_off_by!(hash2, &block)
  pairs = {}
  each do |k,ary|
    k2 = block.call(k)
    if hash2[k2] && hash2[k2].any?
      pairs[k] = [ary, hash2[k2]]
      delete(k)
      hash2.delete(k2)
    end
  end
  pairs
end

#pluck(keys) ⇒ Object



197
198
199
200
# File 'lib/data_tools/hash.rb', line 197

def pluck(keys)
  keys.map {|k| self[k]}
  # keys.flatten.map {|k| self[k]}
end