Class: EasyML::Data::Dataset::Splitters::DateSplitter

Inherits:
Object
  • Object
show all
Includes:
GlueGun::DSL
Defined in:
lib/easy_ml/data/dataset/splitters/date_splitter.rb

Instance Method Summary collapse

Constructor Details

#initialize(options) ⇒ DateSplitter

Returns a new instance of DateSplitter.



13
14
15
16
# File 'lib/easy_ml/data/dataset/splitters/date_splitter.rb', line 13

def initialize(options)
  options[:today] ||= UTC.now
  super(options)
end

Instance Method Details

#months(n) ⇒ Object



33
34
35
# File 'lib/easy_ml/data/dataset/splitters/date_splitter.rb', line 33

def months(n)
  ActiveSupport::Duration.months(n)
end

#split(df) ⇒ Object



18
19
20
21
22
23
24
25
26
27
28
29
30
31
# File 'lib/easy_ml/data/dataset/splitters/date_splitter.rb', line 18

def split(df)
  unless df[date_col].dtype.is_a?(Polars::Datetime)
    raise "Date splitter cannot split on non-date col #{date_col}, dtype is #{df[date_col].dtype}"
  end

  validation_date_start, test_date_start = splits

  test_df = df.filter(Polars.col(date_col) >= test_date_start)
  remaining_df = df.filter(Polars.col(date_col) < test_date_start)
  valid_df = remaining_df.filter(Polars.col(date_col) >= validation_date_start)
  train_df = remaining_df.filter(Polars.col(date_col) < validation_date_start)

  [train_df, valid_df, test_df]
end

#splitsObject



37
38
39
40
41
# File 'lib/easy_ml/data/dataset/splitters/date_splitter.rb', line 37

def splits
  test_date_start = today.advance(months: -months_test).beginning_of_day
  validation_date_start = today.advance(months: -(months_test + months_valid)).beginning_of_day
  [validation_date_start, test_date_start]
end

#today=(value) ⇒ Object



6
7
8
# File 'lib/easy_ml/data/dataset/splitters/date_splitter.rb', line 6

def today=(value)
  super(value.in_time_zone(UTC).to_datetime)
end