Module: EasyML::Data::Utils

Included in:
Dataset, Dataset::Splits::FileSplit, Dataset::Splits::Split
Defined in:
lib/easy_ml/data/utils.rb

Instance Method Summary collapse

Instance Method Details

#append_to_csv(df, path) ⇒ Object



4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
# File 'lib/easy_ml/data/utils.rb', line 4

def append_to_csv(df, path)
  return if df.empty?

  path = Pathname.new(path) if path.is_a?(String)
  FileUtils.mkdir_p(path.dirname)
  FileUtils.touch(path)

  # Check if the file is empty (i.e., if this is the first write)
  file_empty = File.zero?(path)

  # Write the DataFrame to a temporary file
  temp_file = "#{path}.tmp"
  df.write_csv(temp_file)

  # Append the content to the main file, skipping the header if not the first write
  File.open(path, "a") do |f|
    File.foreach(temp_file).with_index do |line, index|
      # Skip the header line if the file is not empty
      f.write(line) unless index == 0 && !file_empty
    end
  end

  # Delete the temporary file
  File.delete(temp_file)
end

#expand_dir(dir) ⇒ Object



30
31
32
33
34
# File 'lib/easy_ml/data/utils.rb', line 30

def expand_dir(dir)
  return dir if dir.to_s[0] == "/"

  Rails.root.join(dir)
end

#null_check(df) ⇒ Object



36
37
38
39
40
41
42
43
44
45
46
47
# File 'lib/easy_ml/data/utils.rb', line 36

def null_check(df)
  result = {}
  null_counts = df.null_count
  total_count = df.height
  df.columns.each do |column|
    null_count = null_counts[column][0]
    next if null_count == 0

    result[column] = { null_count: null_count, total_count: total_count }
  end
  result.empty? ? nil : result
end