Class: Capsium::Package::Dataset

Inherits:
Object
  • Object
show all
Extended by:
Forwardable
Defined in:
lib/capsium/package/dataset.rb

Instance Attribute Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(config:, data_path: nil) ⇒ Dataset

Returns a new instance of Dataset.



20
21
22
23
24
# File 'lib/capsium/package/dataset.rb', line 20

def initialize(config:, data_path: nil)
  @config = config
  @data_path = data_path || config.source
  @data = load_data
end

Instance Attribute Details

#configObject (readonly)

Returns the value of attribute config.



14
15
16
# File 'lib/capsium/package/dataset.rb', line 14

def config
  @config
end

#dataObject (readonly)

Returns the value of attribute data.



14
15
16
# File 'lib/capsium/package/dataset.rb', line 14

def data
  @data
end

#data_pathObject (readonly)

Returns the value of attribute data_path.



14
15
16
# File 'lib/capsium/package/dataset.rb', line 14

def data_path
  @data_path
end

Instance Method Details

#load_dataObject



26
27
28
29
30
31
32
33
34
35
36
# File 'lib/capsium/package/dataset.rb', line 26

def load_data
  case @config.format
  when "yaml" then YAML.load_file(@data_path)
  when "json" then JSON.parse(File.read(@data_path))
  when "csv" then CSV.read(@data_path, headers: true)
  when "tsv" then CSV.read(@data_path, col_sep: "\t", headers: true)
  when "sqlite" then load_sqlite_data
  else
    raise "Unsupported data file type: #{@config.format}"
  end
end

#save_to_file(output_path) ⇒ Object



55
56
57
# File 'lib/capsium/package/dataset.rb', line 55

def save_to_file(output_path)
  File.write(output_path, to_json)
end

#validateObject



38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
# File 'lib/capsium/package/dataset.rb', line 38

def validate
  return unless @config.schema

  schema_path = File.join(File.dirname(@data_path), @config.schema)
  schema = YAML.load_file(schema_path) if @config.format == "yaml"
  schema = JSON.parse(File.read(schema_path)) if @config.format == "json"

  case @config.format
  when "yaml" then YAML.load_file(@data_path)
  when "json" then JSON.parse(File.read(@data_path))
  else
    raise "Validation is only supported for YAML and JSON formats"
  end

  JSON::Validator.validate!(schema, @data.to_json)
end