Class: EasyML::Data::Datasource::MergedDatasource

Inherits:
EasyML::Data::Datasource show all
Includes:
GlueGun::DSL
Defined in:
lib/easy_ml/data/datasource/datasource_factory.rb,
lib/easy_ml/data/datasource/merged_datasource.rb

Overview

Do this here otherwise we’ll end up with a circular dependency

Instance Method Summary collapse

Instance Method Details

#cleanupObject



41
42
43
# File 'lib/easy_ml/data/datasource/merged_datasource.rb', line 41

def cleanup
  FileUtils.rm_f(file_path)
end

#dataObject



33
34
35
36
37
38
39
# File 'lib/easy_ml/data/datasource/merged_datasource.rb', line 33

def data
  @data ||= if file_exists?
              Polars.read_csv(file_path, **polars_args)
            else
              merge_and_save
            end
end

#file_pathObject



16
17
18
# File 'lib/easy_ml/data/datasource/merged_datasource.rb', line 16

def file_path
  @file_path ||= File.join(root_dir, "merged_data.csv")
end

#in_batches(of: 10_000, &block) ⇒ Object



12
13
14
# File 'lib/easy_ml/data/datasource/merged_datasource.rb', line 12

def in_batches(of: 10_000, &block)
  Polars.read_csv(file_path, **polars_args).iter_batches(batch_size: of, &block)
end

#last_updated_atObject



20
21
22
# File 'lib/easy_ml/data/datasource/merged_datasource.rb', line 20

def last_updated_at
  datasources.map(&:last_updated_at).min
end

#refresh!Object



24
25
26
27
28
29
30
31
# File 'lib/easy_ml/data/datasource/merged_datasource.rb', line 24

def refresh!
  cleanup
  if datasources.is_a?(Array)
    datasources.each(&:refresh!)
  elsif datasources.is_a?(Hash)
    datasources.values.each(&:refresh!)
  end
end