Class: SQA::DataFrame

Inherits:
Object
  • Object
show all
Extended by:
Forwardable
Defined in:
lib/sqa/data_frame.rb,
lib/sqa/data_frame/alpha_vantage.rb,
lib/sqa/data_frame/yahoo_finance.rb

Defined Under Namespace

Classes: AlphaVantage, Data, YahooFinance

Instance Attribute Summary collapse

Class Method Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(raw_data = {}, mapping: {}, transformers: {}) ⇒ DataFrame

Expects a Hash of Arrays (hofa) mapping: and transformers: are optional mapping is a Hash { old_key => new_key } transformers is also a Hash { key => Proc}



28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
# File 'lib/sqa/data_frame.rb', line 28

def initialize(
    raw_data=     {}, # Array of Hashes or hash of array or hash
    mapping:      {}, # { old_key => new_key }
    transformers: {}  # { key => Proc }
  )

  if raw_data.is_a? Hash
    initialize_hofa(raw_data, mapping: mapping)

  elsif raw_data.is_a?(Array)     &&
        raw_data.first.is_a?(Hash)
    initialize_aofh(raw_data, mapping: mapping)

  else
    raise BadParameterError, "Expecting Hash or Array of Hashes got: #{aofh_or_hofa.class}"
  end

  coerce_vectors!(transformers) if good_data? && !(transformers.nil? || transformers.empty?)
end

Dynamic Method Handling

This class handles dynamic methods through the method_missing method

#method_missing(method_name, *args, &block) ⇒ Object



206
207
208
209
210
211
212
213
214
215
# File 'lib/sqa/data_frame.rb', line 206

def method_missing(method_name, *args, &block)
  if @data.respond_to?(method_name)
    self.class.send(:define_method, method_name) do |*method_args, &method_block|
      @data.send(method_name, *method_args, &method_block)
    end
    send(method_name, *args, &block)
  else
    super
  end
end

Instance Attribute Details

#dataObject

Returns the value of attribute data.



22
23
24
# File 'lib/sqa/data_frame.rb', line 22

def data
  @data
end

Class Method Details

.aofh_to_hofa(aofh, mapping: {}, transformers: {}) ⇒ Object

aofh – Array of Hashes hofa – Hash of Arrays



282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
# File 'lib/sqa/data_frame.rb', line 282

def aofh_to_hofa(aofh, mapping: {}, transformers: {})
  hofa = {}
  keys = aofh.first.keys

  keys.each do |key|
    hofa[key] = []
  end

  aofh.each do |entry|
    keys.each do |key|
      hofa[key] << entry[key]
    end
  end

  # SMELL: This might not be necessary
  normalize_keys(hofa, adapter_mapping: mapping)
end

.concat(base_df, other_df) ⇒ Object



225
226
227
# File 'lib/sqa/data_frame.rb', line 225

def concat(base_df, other_df)
  base_df.concat!(other_df)
end

.from_aofh(aofh, mapping: {}, transformers: {}) ⇒ Object



253
254
255
256
257
258
259
# File 'lib/sqa/data_frame.rb', line 253

def from_aofh(aofh, mapping: {}, transformers: {})
  new(
    aofh,
    mapping: mapping,
    transformers: transformers
  )
end

.from_csv_file(source, mapping: {}, transformers: {}) ⇒ Object



262
263
264
265
266
267
268
269
270
# File 'lib/sqa/data_frame.rb', line 262

def from_csv_file(source, mapping: {}, transformers: {})
  aofh = []

  CSV.foreach(source, headers: true) do |row|
    aofh << row.to_h
  end

  from_aofh(aofh, mapping: mapping, transformers: transformers)
end

.from_json_file(source, mapping: {}, transformers: {}) ⇒ Object



273
274
275
276
277
# File 'lib/sqa/data_frame.rb', line 273

def from_json_file(source, mapping: {}, transformers: {})
  aofh = JSON.parse(source.read)

  from_aofh(aofh, mapping: mapping, transformers: transformers)
end

.generate_mapping(keys) ⇒ Object



318
319
320
321
322
323
324
325
326
# File 'lib/sqa/data_frame.rb', line 318

def generate_mapping(keys)
  mapping = {}

  keys.each do |key|
    mapping[key] = underscore_key(sanitize_key(key)) unless key.is_a?(Symbol)
  end

  mapping
end

.is_date?(key) ⇒ Boolean

returns true if key is in a date format like 2023-06-03

Returns:

  • (Boolean)


348
349
350
# File 'lib/sqa/data_frame.rb', line 348

def is_date?(key)
  !/(\d{4}-\d{2}-\d{2})/.match(key.to_s).nil?
end

.load(source:, mapping: {}, transformers: {}) ⇒ Object

TODO: The Data class has its own load which also supports

YAML by default.  Maybe this method should
make use of @data = Data.load(source)


234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
# File 'lib/sqa/data_frame.rb', line 234

def load(source:, mapping: {}, transformers:{})
  file_type = source.extname[1..].downcase.to_sym

  df  = if :csv == file_type
          from_csv_file(source, mapping: mapping, transformers: transformers)
        elsif :json == file_type
          from_json_file(source, mapping: mapping, transformers: transformers)
        else
          raise BadParameterError, "unsupported file type: #{file_type}"
        end

  unless transformers.empty?
    df.coerce_vectors!(transformers)
  end

  df
end

.normalize_keys(hofa, adapter_mapping: {}) ⇒ Object



301
302
303
304
305
306
# File 'lib/sqa/data_frame.rb', line 301

def normalize_keys(hofa, adapter_mapping: {})
  hofa    = rename(adapter_mapping, hofa)
  mapping = generate_mapping(hofa.keys)

  rename(mapping, hofa)
end

.rename(mapping, hofa) ⇒ Object



309
310
311
312
313
314
315
# File 'lib/sqa/data_frame.rb', line 309

def rename(mapping, hofa)
  mapping.each_pair do |old_key, new_key|
    hofa[new_key] = hofa.delete(old_key) if hofa.has_key?(old_key)
  end

  hofa
end

.sanitize_key(key) ⇒ Object

removes punctuation and specal characters, replaces space with underscore.



341
342
343
# File 'lib/sqa/data_frame.rb', line 341

def sanitize_key(key)
  key.tr('.():/','').gsub(/^\d+.?\s/, "").tr(' ','_')
end

.underscore_key(key) ⇒ Object

returns a snake_case Symbol



330
331
332
333
334
335
336
# File 'lib/sqa/data_frame.rb', line 330

def underscore_key(key)
  key.to_s.gsub(/::/, '/').
  gsub(/([A-Z]+)([A-Z][a-z])/,'\1_\2').
  gsub(/([a-z\d])([A-Z])/,'\1_\2').
  tr("-", "_").
  downcase.to_sym
end

Instance Method Details

#append!(new_df) ⇒ Object Also known as: concat!

Raises:



165
166
167
168
169
170
171
# File 'lib/sqa/data_frame.rb', line 165

def append!(new_df)
  raise(BadParameterError, "Key mismatch") if keys != new_df.keys

  keys.each do |key|
    @data[key] += new_df[key]
  end
end

#coerce_vectors!(transformers) ⇒ Object

Map the values of the vectors into different objects types is a Hash where the key is the vector name and

the value is a proc

For Example:

price: -> (v) {v.to_f.round(3)

}



199
200
201
202
203
# File 'lib/sqa/data_frame.rb', line 199

def coerce_vectors!(transformers)
  transformers.each_pair do |key, transformer|
    @data[key].map!{|v| transformer.call(v)}
  end
end

#good_data?Boolean

Returns:

  • (Boolean)


49
50
51
52
53
# File 'lib/sqa/data_frame.rb', line 49

def good_data?
  return false if @data.empty? || @data.values.all?{|v| v.nil? || v.empty?}

  true
end

#initialize_aofh(aofh, mapping:) ⇒ Object



56
57
58
59
60
61
62
63
64
65
# File 'lib/sqa/data_frame.rb', line 56

def initialize_aofh(aofh, mapping:)
  klass = self.class

  hofa  = klass.aofh_to_hofa(
                aofh,
                mapping:  mapping
              )

  initialize_hofa(hofa, mapping: mapping)
end

#initialize_hofa(hofa, mapping:) ⇒ Object



68
69
70
71
72
73
74
75
76
# File 'lib/sqa/data_frame.rb', line 68

def initialize_hofa(hofa, mapping:)
  klass = self.class
  hofa  = klass.normalize_keys(
            hofa,
            adapter_mapping: mapping
          ) unless mapping.empty?

  @data = Data.new(hofa)
end

#ncolsObject



117
118
119
# File 'lib/sqa/data_frame.rb', line 117

def ncols
  keys.size
end

#rename(mapping) ⇒ Object

Creates a new instance with new keys based on the mapping hash where

{ old_key => new_key }


179
180
181
182
183
184
185
186
# File 'lib/sqa/data_frame.rb', line 179

def rename(mapping)
  SQA::DataFrame.new(
    self.class.rename(
      mapping,
      @data.to_h
    )
  )
end

#rename_vectorsObject

Creates a new instance with new keys based on the mapping hash where

{ old_key => new_key }


187
188
189
190
191
192
193
194
# File 'lib/sqa/data_frame.rb', line 187

def rename(mapping)
  SQA::DataFrame.new(
    self.class.rename(
      mapping,
      @data.to_h
    )
  )
end

#respond_to_missing?(method_name, include_private = false) ⇒ Boolean

Returns:

  • (Boolean)


218
219
220
# File 'lib/sqa/data_frame.rb', line 218

def respond_to_missing?(method_name, include_private = false)
  @data.respond_to?(method_name) || super
end

#row(x) ⇒ Object



140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
# File 'lib/sqa/data_frame.rb', line 140

def row(x)
  if x.is_a?(Integer)
    raise BadParameterError if x < 0 || x >= size

  elsif x.is_a?(Hash)
    raise BadParameterError, "x is #{x}" if x.size > 1
    key = x.keys[0]
    x   = @data[key].index(x[key])
    raise BadParameterError, 'Not Found #{x}' if x.nil?
    return keys.zip(row(x)).to_h

  else
    raise BadParameterError, "Unknown x.class: #{x.class}"
  end

  entry = []

  keys.each do |key|
    entry << @data[key][x]
  end

  entry
end

#rowsObject Also known as: to_a

same as values.transpose TODO: do benchmark to see if the transpose method if faster



129
130
131
132
133
134
135
136
# File 'lib/sqa/data_frame.rb', line 129

def rows
  result = []
  size.times do |x|
    entry = row(x)
    result << entry
  end
  result
end

#sizeObject Also known as: nrows, length

The number of data rows



105
106
107
# File 'lib/sqa/data_frame.rb', line 105

def size
  data[@data.keys[0]].size
end

#to_aofhObject



95
96
97
# File 'lib/sqa/data_frame.rb', line 95

def to_aofh
  NotImplemented.raise
end

#to_csv(path_to_file) ⇒ Object



80
81
82
83
84
85
86
87
# File 'lib/sqa/data_frame.rb', line 80

def to_csv(path_to_file)
  CSV.open(path_to_file, 'w') do |csv|
    csv << keys
    size.times do |x|
      csv << row(x)
    end
  end
end

#to_json(path_to_file) ⇒ Object



90
91
92
# File 'lib/sqa/data_frame.rb', line 90

def to_json(path_to_file)
  NotImplemented.raise
end