Class: SQA::DataFrame
- Inherits:
-
Object
show all
- Extended by:
- Forwardable
- Defined in:
- lib/sqa/data_frame.rb,
lib/sqa/data_frame/alpha_vantage.rb,
lib/sqa/data_frame/yahoo_finance.rb
Defined Under Namespace
Classes: AlphaVantage, Data, YahooFinance
Instance Attribute Summary collapse
Class Method Summary
collapse
-
.aofh_to_hofa(aofh, mapping: {}, transformers: {}) ⇒ Object
aofh – Array of Hashes hofa – Hash of Arrays.
-
.concat(base_df, other_df) ⇒ Object
-
.from_aofh(aofh, mapping: {}, transformers: {}) ⇒ Object
-
.from_csv_file(source, mapping: {}, transformers: {}) ⇒ Object
-
.from_json_file(source, mapping: {}, transformers: {}) ⇒ Object
-
.generate_mapping(keys) ⇒ Object
-
.is_date?(key) ⇒ Boolean
returns true if key is in a date format like 2023-06-03.
-
.load(source:, mapping: {}, transformers: {}) ⇒ Object
TODO: The Data class has its own load which also supports YAML by default.
-
.normalize_keys(hofa, adapter_mapping: {}) ⇒ Object
-
.rename(mapping, hofa) ⇒ Object
-
.sanitize_key(key) ⇒ Object
removes punctuation and specal characters, replaces space with underscore.
-
.underscore_key(key) ⇒ Object
returns a snake_case Symbol.
Instance Method Summary
collapse
Constructor Details
#initialize(raw_data = {}, mapping: {}, transformers: {}) ⇒ DataFrame
Expects a Hash of Arrays (hofa) mapping: and transformers: are optional mapping is a Hash { old_key => new_key } transformers is also a Hash { key => Proc}
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
|
# File 'lib/sqa/data_frame.rb', line 28
def initialize(
raw_data= {}, mapping: {}, transformers: {} )
if raw_data.is_a? Hash
initialize_hofa(raw_data, mapping: mapping)
elsif raw_data.is_a?(Array) &&
raw_data.first.is_a?(Hash)
initialize_aofh(raw_data, mapping: mapping)
else
raise BadParameterError, "Expecting Hash or Array of Hashes got: #{aofh_or_hofa.class}"
end
coerce_vectors!(transformers) if good_data? && !(transformers.nil? || transformers.empty?)
end
|
Dynamic Method Handling
This class handles dynamic methods through the method_missing method
#method_missing(method_name, *args, &block) ⇒ Object
206
207
208
209
210
211
212
213
214
215
|
# File 'lib/sqa/data_frame.rb', line 206
def method_missing(method_name, *args, &block)
if @data.respond_to?(method_name)
self.class.send(:define_method, method_name) do |*method_args, &method_block|
@data.send(method_name, *method_args, &method_block)
end
send(method_name, *args, &block)
else
super
end
end
|
Instance Attribute Details
#data ⇒ Object
Returns the value of attribute data.
22
23
24
|
# File 'lib/sqa/data_frame.rb', line 22
def data
@data
end
|
Class Method Details
.aofh_to_hofa(aofh, mapping: {}, transformers: {}) ⇒ Object
aofh – Array of Hashes hofa – Hash of Arrays
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
|
# File 'lib/sqa/data_frame.rb', line 282
def aofh_to_hofa(aofh, mapping: {}, transformers: {})
hofa = {}
keys = aofh.first.keys
keys.each do |key|
hofa[key] = []
end
aofh.each do |entry|
keys.each do |key|
hofa[key] << entry[key]
end
end
normalize_keys(hofa, adapter_mapping: mapping)
end
|
.concat(base_df, other_df) ⇒ Object
225
226
227
|
# File 'lib/sqa/data_frame.rb', line 225
def concat(base_df, other_df)
base_df.concat!(other_df)
end
|
.from_aofh(aofh, mapping: {}, transformers: {}) ⇒ Object
253
254
255
256
257
258
259
|
# File 'lib/sqa/data_frame.rb', line 253
def from_aofh(aofh, mapping: {}, transformers: {})
new(
aofh,
mapping: mapping,
transformers: transformers
)
end
|
.from_csv_file(source, mapping: {}, transformers: {}) ⇒ Object
262
263
264
265
266
267
268
269
270
|
# File 'lib/sqa/data_frame.rb', line 262
def from_csv_file(source, mapping: {}, transformers: {})
aofh = []
CSV.foreach(source, headers: true) do |row|
aofh << row.to_h
end
from_aofh(aofh, mapping: mapping, transformers: transformers)
end
|
.from_json_file(source, mapping: {}, transformers: {}) ⇒ Object
273
274
275
276
277
|
# File 'lib/sqa/data_frame.rb', line 273
def from_json_file(source, mapping: {}, transformers: {})
aofh = JSON.parse(source.read)
from_aofh(aofh, mapping: mapping, transformers: transformers)
end
|
.generate_mapping(keys) ⇒ Object
318
319
320
321
322
323
324
325
326
|
# File 'lib/sqa/data_frame.rb', line 318
def generate_mapping(keys)
mapping = {}
keys.each do |key|
mapping[key] = underscore_key(sanitize_key(key)) unless key.is_a?(Symbol)
end
mapping
end
|
.is_date?(key) ⇒ Boolean
returns true if key is in a date format like 2023-06-03
348
349
350
|
# File 'lib/sqa/data_frame.rb', line 348
def is_date?(key)
!/(\d{4}-\d{2}-\d{2})/.match(key.to_s).nil?
end
|
.load(source:, mapping: {}, transformers: {}) ⇒ Object
TODO: The Data class has its own load which also supports
YAML by default. Maybe this method should
make use of @data = Data.load(source)
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
|
# File 'lib/sqa/data_frame.rb', line 234
def load(source:, mapping: {}, transformers:{})
file_type = source.extname[1..].downcase.to_sym
df = if :csv == file_type
from_csv_file(source, mapping: mapping, transformers: transformers)
elsif :json == file_type
from_json_file(source, mapping: mapping, transformers: transformers)
else
raise BadParameterError, "unsupported file type: #{file_type}"
end
unless transformers.empty?
df.coerce_vectors!(transformers)
end
df
end
|
.normalize_keys(hofa, adapter_mapping: {}) ⇒ Object
301
302
303
304
305
306
|
# File 'lib/sqa/data_frame.rb', line 301
def normalize_keys(hofa, adapter_mapping: {})
hofa = rename(adapter_mapping, hofa)
mapping = generate_mapping(hofa.keys)
rename(mapping, hofa)
end
|
.rename(mapping, hofa) ⇒ Object
309
310
311
312
313
314
315
|
# File 'lib/sqa/data_frame.rb', line 309
def rename(mapping, hofa)
mapping.each_pair do |old_key, new_key|
hofa[new_key] = hofa.delete(old_key) if hofa.has_key?(old_key)
end
hofa
end
|
.sanitize_key(key) ⇒ Object
removes punctuation and specal characters, replaces space with underscore.
341
342
343
|
# File 'lib/sqa/data_frame.rb', line 341
def sanitize_key(key)
key.tr('.():/','').gsub(/^\d+.?\s/, "").tr(' ','_')
end
|
.underscore_key(key) ⇒ Object
returns a snake_case Symbol
330
331
332
333
334
335
336
|
# File 'lib/sqa/data_frame.rb', line 330
def underscore_key(key)
key.to_s.gsub(/::/, '/').
gsub(/([A-Z]+)([A-Z][a-z])/,'\1_\2').
gsub(/([a-z\d])([A-Z])/,'\1_\2').
tr("-", "_").
downcase.to_sym
end
|
Instance Method Details
#append!(new_df) ⇒ Object
Also known as:
concat!
165
166
167
168
169
170
171
|
# File 'lib/sqa/data_frame.rb', line 165
def append!(new_df)
raise(BadParameterError, "Key mismatch") if keys != new_df.keys
keys.each do |key|
@data[key] += new_df[key]
end
end
|
#coerce_vectors!(transformers) ⇒ Object
Map the values of the vectors into different objects types is a Hash where the key is the vector name and
the value is a proc
For Example:
price: -> (v) {v.to_f.round(3)
}
199
200
201
202
203
|
# File 'lib/sqa/data_frame.rb', line 199
def coerce_vectors!(transformers)
transformers.each_pair do |key, transformer|
@data[key].map!{|v| transformer.call(v)}
end
end
|
#good_data? ⇒ Boolean
49
50
51
52
53
|
# File 'lib/sqa/data_frame.rb', line 49
def good_data?
return false if @data.empty? || @data.values.all?{|v| v.nil? || v.empty?}
true
end
|
#initialize_aofh(aofh, mapping:) ⇒ Object
56
57
58
59
60
61
62
63
64
65
|
# File 'lib/sqa/data_frame.rb', line 56
def initialize_aofh(aofh, mapping:)
klass = self.class
hofa = klass.aofh_to_hofa(
aofh,
mapping: mapping
)
initialize_hofa(hofa, mapping: mapping)
end
|
#initialize_hofa(hofa, mapping:) ⇒ Object
68
69
70
71
72
73
74
75
76
|
# File 'lib/sqa/data_frame.rb', line 68
def initialize_hofa(hofa, mapping:)
klass = self.class
hofa = klass.normalize_keys(
hofa,
adapter_mapping: mapping
) unless mapping.empty?
@data = Data.new(hofa)
end
|
#ncols ⇒ Object
117
118
119
|
# File 'lib/sqa/data_frame.rb', line 117
def ncols
keys.size
end
|
#rename(mapping) ⇒ Object
Creates a new instance with new keys based on the mapping hash where
{ old_key => new_key }
179
180
181
182
183
184
185
186
|
# File 'lib/sqa/data_frame.rb', line 179
def rename(mapping)
SQA::DataFrame.new(
self.class.rename(
mapping,
@data.to_h
)
)
end
|
#rename_vectors ⇒ Object
Creates a new instance with new keys based on the mapping hash where
{ old_key => new_key }
187
188
189
190
191
192
193
194
|
# File 'lib/sqa/data_frame.rb', line 187
def rename(mapping)
SQA::DataFrame.new(
self.class.rename(
mapping,
@data.to_h
)
)
end
|
#respond_to_missing?(method_name, include_private = false) ⇒ Boolean
218
219
220
|
# File 'lib/sqa/data_frame.rb', line 218
def respond_to_missing?(method_name, include_private = false)
@data.respond_to?(method_name) || super
end
|
#row(x) ⇒ Object
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
|
# File 'lib/sqa/data_frame.rb', line 140
def row(x)
if x.is_a?(Integer)
raise BadParameterError if x < 0 || x >= size
elsif x.is_a?(Hash)
raise BadParameterError, "x is #{x}" if x.size > 1
key = x.keys[0]
x = @data[key].index(x[key])
raise BadParameterError, 'Not Found #{x}' if x.nil?
return keys.zip(row(x)).to_h
else
raise BadParameterError, "Unknown x.class: #{x.class}"
end
entry = []
keys.each do |key|
entry << @data[key][x]
end
entry
end
|
#rows ⇒ Object
Also known as:
to_a
same as values.transpose TODO: do benchmark to see if the transpose method if faster
129
130
131
132
133
134
135
136
|
# File 'lib/sqa/data_frame.rb', line 129
def rows
result = []
size.times do |x|
entry = row(x)
result << entry
end
result
end
|
#size ⇒ Object
Also known as:
nrows, length
105
106
107
|
# File 'lib/sqa/data_frame.rb', line 105
def size
data[@data.keys[0]].size
end
|
#to_csv(path_to_file) ⇒ Object
80
81
82
83
84
85
86
87
|
# File 'lib/sqa/data_frame.rb', line 80
def to_csv(path_to_file)
CSV.open(path_to_file, 'w') do |csv|
csv << keys
size.times do |x|
csv << row(x)
end
end
end
|
#to_json(path_to_file) ⇒ Object
90
91
92
|
# File 'lib/sqa/data_frame.rb', line 90
def to_json(path_to_file)
NotImplemented.raise
end
|