Class: Prophet::Forecaster

Inherits:
Object
  • Object
show all
Includes:
Holidays, Plot
Defined in:
lib/prophet/forecaster.rb

Instance Attribute Summary collapse

Instance Method Summary collapse

Methods included from Plot

#add_changepoints_to_plot, #plot, #plot_components, plot_cross_validation_metric, plt

Methods included from Holidays

#get_holiday_names, #holidays_df, #make_holidays_df

Constructor Details

#initialize(growth: "linear", changepoints: nil, n_changepoints: 25, changepoint_range: 0.8, yearly_seasonality: "auto", weekly_seasonality: "auto", daily_seasonality: "auto", holidays: nil, seasonality_mode: "additive", seasonality_prior_scale: 10.0, holidays_prior_scale: 10.0, changepoint_prior_scale: 0.05, mcmc_samples: 0, interval_width: 0.80, uncertainty_samples: 1000, scaling: "absmax") ⇒ Forecaster

Returns a new instance of Forecaster.



15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
# File 'lib/prophet/forecaster.rb', line 15

def initialize(
  growth: "linear",
  changepoints: nil,
  n_changepoints: 25,
  changepoint_range: 0.8,
  yearly_seasonality: "auto",
  weekly_seasonality: "auto",
  daily_seasonality: "auto",
  holidays: nil,
  seasonality_mode: "additive",
  seasonality_prior_scale: 10.0,
  holidays_prior_scale: 10.0,
  changepoint_prior_scale: 0.05,
  mcmc_samples: 0,
  interval_width: 0.80,
  uncertainty_samples: 1000,
  scaling: "absmax"
)
  @growth = growth

  @changepoints = to_datetime(changepoints)
  if !@changepoints.nil?
    @n_changepoints = @changepoints.size
    @specified_changepoints = true
  else
    @n_changepoints = n_changepoints
    @specified_changepoints = false
  end

  @changepoint_range = changepoint_range
  @yearly_seasonality = yearly_seasonality
  @weekly_seasonality = weekly_seasonality
  @daily_seasonality = daily_seasonality
  @holidays = convert_df(holidays)

  @seasonality_mode = seasonality_mode
  @seasonality_prior_scale = seasonality_prior_scale.to_f
  @changepoint_prior_scale = changepoint_prior_scale.to_f
  @holidays_prior_scale = holidays_prior_scale.to_f

  @mcmc_samples = mcmc_samples
  @interval_width = interval_width
  @uncertainty_samples = uncertainty_samples
  if !["absmax", "minmax"].include?(scaling)
    raise ArgumentError, "scaling must be one of \"absmax\" or \"minmax\""
  end
  @scaling = scaling

  # Set during fitting or by other methods
  @start = nil
  @y_scale = nil
  @logistic_floor = false
  @t_scale = nil
  @changepoints_t = nil
  @seasonalities = {}
  @extra_regressors = {}
  @country_holidays = nil
  @stan_fit = nil
  @params = {}
  @history = nil
  @history_dates = nil
  @train_component_cols = nil
  @component_modes = nil
  @train_holiday_names = nil
  @fit_kwargs = {}
  validate_inputs

  @logger = ::Logger.new($stderr)
  @logger.level = ::Logger::WARN
  @logger.formatter = proc do |severity, datetime, progname, msg|
    "[prophet] #{msg}\n"
  end
  @stan_backend = StanBackend.new(@logger)
end

Instance Attribute Details

#changepoint_prior_scaleObject (readonly)

Returns the value of attribute changepoint_prior_scale.



6
7
8
# File 'lib/prophet/forecaster.rb', line 6

def changepoint_prior_scale
  @changepoint_prior_scale
end

#changepoint_rangeObject (readonly)

Returns the value of attribute changepoint_range.



6
7
8
# File 'lib/prophet/forecaster.rb', line 6

def changepoint_range
  @changepoint_range
end

#changepointsObject (readonly)

Returns the value of attribute changepoints.



6
7
8
# File 'lib/prophet/forecaster.rb', line 6

def changepoints
  @changepoints
end

#country_holidaysObject

Returns the value of attribute country_holidays.



13
14
15
# File 'lib/prophet/forecaster.rb', line 13

def country_holidays
  @country_holidays
end

#extra_regressorsObject

Returns the value of attribute extra_regressors.



13
14
15
# File 'lib/prophet/forecaster.rb', line 13

def extra_regressors
  @extra_regressors
end

#fit_kwargsObject (readonly)

Returns the value of attribute fit_kwargs.



6
7
8
# File 'lib/prophet/forecaster.rb', line 6

def fit_kwargs
  @fit_kwargs
end

#growthObject (readonly)

Returns the value of attribute growth.



6
7
8
# File 'lib/prophet/forecaster.rb', line 6

def growth
  @growth
end

#historyObject (readonly)

Returns the value of attribute history.



6
7
8
# File 'lib/prophet/forecaster.rb', line 6

def history
  @history
end

#holidaysObject (readonly)

Returns the value of attribute holidays.



6
7
8
# File 'lib/prophet/forecaster.rb', line 6

def holidays
  @holidays
end

#holidays_prior_scaleObject (readonly)

Returns the value of attribute holidays_prior_scale.



6
7
8
# File 'lib/prophet/forecaster.rb', line 6

def holidays_prior_scale
  @holidays_prior_scale
end

#interval_widthObject (readonly)

Returns the value of attribute interval_width.



6
7
8
# File 'lib/prophet/forecaster.rb', line 6

def interval_width
  @interval_width
end

#loggerObject (readonly)

Returns the value of attribute logger.



6
7
8
# File 'lib/prophet/forecaster.rb', line 6

def logger
  @logger
end

#mcmc_samplesObject (readonly)

Returns the value of attribute mcmc_samples.



6
7
8
# File 'lib/prophet/forecaster.rb', line 6

def mcmc_samples
  @mcmc_samples
end

#n_changepointsObject (readonly)

Returns the value of attribute n_changepoints.



6
7
8
# File 'lib/prophet/forecaster.rb', line 6

def n_changepoints
  @n_changepoints
end

#paramsObject (readonly)

Returns the value of attribute params.



6
7
8
# File 'lib/prophet/forecaster.rb', line 6

def params
  @params
end

#seasonalitiesObject

Returns the value of attribute seasonalities.



6
7
8
# File 'lib/prophet/forecaster.rb', line 6

def seasonalities
  @seasonalities
end

#seasonality_modeObject (readonly)

Returns the value of attribute seasonality_mode.



6
7
8
# File 'lib/prophet/forecaster.rb', line 6

def seasonality_mode
  @seasonality_mode
end

#seasonality_prior_scaleObject (readonly)

Returns the value of attribute seasonality_prior_scale.



6
7
8
# File 'lib/prophet/forecaster.rb', line 6

def seasonality_prior_scale
  @seasonality_prior_scale
end

#specified_changepointsObject (readonly)

Returns the value of attribute specified_changepoints.



6
7
8
# File 'lib/prophet/forecaster.rb', line 6

def specified_changepoints
  @specified_changepoints
end

#train_holiday_namesObject (readonly)

Returns the value of attribute train_holiday_names.



6
7
8
# File 'lib/prophet/forecaster.rb', line 6

def train_holiday_names
  @train_holiday_names
end

#uncertainty_samplesObject (readonly)

Returns the value of attribute uncertainty_samples.



6
7
8
# File 'lib/prophet/forecaster.rb', line 6

def uncertainty_samples
  @uncertainty_samples
end

Instance Method Details

#add_country_holidays(country_name) ⇒ Object

Raises:



422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
# File 'lib/prophet/forecaster.rb', line 422

def add_country_holidays(country_name)
  raise Error, "Country holidays must be added prior to model fitting." if @history

  # Fix for previously documented keyword argument
  if country_name.is_a?(Hash) && country_name[:country_name]
    country_name = country_name[:country_name]
  end

  # Validate names.
  get_holiday_names(country_name).each do |name|
    # Allow merging with existing holidays
    validate_column_name(name, check_holidays: false)
  end
  # Set the holidays.
  if @country_holidays
    logger.warn "Changing country holidays from #{@country_holidays.inspect} to #{country_name.inspect}."
  end
  @country_holidays = country_name
  self
end

#add_group_component(components, name, group) ⇒ Object



530
531
532
533
534
535
536
537
538
# File 'lib/prophet/forecaster.rb', line 530

def add_group_component(components, name, group)
  new_comp = components[components["component"].in?(group)].dup
  group_cols = new_comp["col"].uniq
  if group_cols.size > 0
    new_comp = Rover::DataFrame.new({"col" => group_cols, "component" => name})
    components = components.concat(new_comp)
  end
  components
end

#add_regressor(name, prior_scale: nil, standardize: "auto", mode: nil) ⇒ Object

Raises:



374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
# File 'lib/prophet/forecaster.rb', line 374

def add_regressor(name, prior_scale: nil, standardize: "auto", mode: nil)
  raise Error, "Regressors must be added prior to model fitting." if @history
  validate_column_name(name, check_regressors: false)
  prior_scale ||= @holidays_prior_scale.to_f
  mode ||= @seasonality_mode
  raise ArgumentError, "Prior scale must be > 0" if prior_scale <= 0
  if !["additive", "multiplicative"].include?(mode)
    raise ArgumentError, "mode must be \"additive\" or \"multiplicative\""
  end
  @extra_regressors[name] = {
    prior_scale: prior_scale,
    standardize: standardize,
    mu: 0.0,
    std: 1.0,
    mode: mode
  }
  self
end

#add_seasonality(name:, period:, fourier_order:, prior_scale: nil, mode: nil, condition_name: nil) ⇒ Object

Raises:



393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
# File 'lib/prophet/forecaster.rb', line 393

def add_seasonality(name:, period:, fourier_order:, prior_scale: nil, mode: nil, condition_name: nil)
  raise Error, "Seasonality must be added prior to model fitting." if @history

  if !["daily", "weekly", "yearly"].include?(name)
    # Allow overwriting built-in seasonalities
    validate_column_name(name, check_seasonalities: false)
  end
  if prior_scale.nil?
    ps = @seasonality_prior_scale
  else
    ps = prior_scale.to_f
  end
  raise ArgumentError, "Prior scale must be > 0" if ps <= 0
  raise ArgumentError, "Fourier Order must be > 0" if fourier_order <= 0
  mode ||= @seasonality_mode
  if !["additive", "multiplicative"].include?(mode)
    raise ArgumentError, "mode must be \"additive\" or \"multiplicative\""
  end
  validate_column_name(condition_name) if condition_name
  @seasonalities[name] = {
    period: period,
    fourier_order: fourier_order,
    prior_scale: ps,
    mode: mode,
    condition_name: condition_name
  }
  self
end

#construct_holiday_dataframe(dates) ⇒ Object



301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
# File 'lib/prophet/forecaster.rb', line 301

def construct_holiday_dataframe(dates)
  all_holidays = Rover::DataFrame.new
  if @holidays
    all_holidays = @holidays.dup
  end
  if @country_holidays
    year_list = dates.map(&:year)
    country_holidays_df = make_holidays_df(year_list, @country_holidays)
    all_holidays = all_holidays.concat(country_holidays_df)
  end
  # Drop future holidays not previously seen in training data
  if @train_holiday_names
    # Remove holiday names didn't show up in fit
    all_holidays = all_holidays[all_holidays["holiday"].in?(@train_holiday_names)]

    # Add holiday names in fit but not in predict with ds as NA
    holidays_to_add = Rover::DataFrame.new({
      "holiday" => @train_holiday_names[!@train_holiday_names.in?(all_holidays["holiday"])]
    })
    all_holidays = all_holidays.concat(holidays_to_add)
  end

  all_holidays
end

#fit(df, **kwargs) ⇒ Object

Raises:



652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
# File 'lib/prophet/forecaster.rb', line 652

def fit(df, **kwargs)
  raise Error, "Prophet object can only be fit once" if @history

  df = convert_df(df)
  raise ArgumentError, "Must be a data frame" unless df.is_a?(Rover::DataFrame)

  unless df.include?("ds") && df.include?("y")
    raise ArgumentError, "Data frame must have ds and y columns"
  end

  history = df[!df["y"].missing]
  raise Error, "Data has less than 2 non-nil rows" if history.size < 2

  @history_dates = to_datetime(df["ds"]).sort
  history = setup_dataframe(history, initialize_scales: true)
  @history = history
  set_auto_seasonalities
  seasonal_features, prior_scales, component_cols, modes = make_all_seasonality_features(history)
  @train_component_cols = component_cols
  @component_modes = modes
  @fit_kwargs = kwargs.dup # TODO deep dup?

  set_changepoints

  trend_indicator = {"linear" => 0, "logistic" => 1, "flat" => 2}

  dat = {
    "T" => history.shape[0],
    "K" => seasonal_features.shape[1],
    "S" => @changepoints_t.size,
    "y" => history["y_scaled"],
    "t" => history["t"],
    "t_change" => @changepoints_t,
    "X" => seasonal_features,
    "sigmas" => prior_scales,
    "tau" => @changepoint_prior_scale,
    "trend_indicator" => trend_indicator[@growth],
    "s_a" => component_cols["additive_terms"],
    "s_m" => component_cols["multiplicative_terms"]
  }

  if @growth == "linear"
    dat["cap"] = Numo::DFloat.zeros(@history.shape[0])
    kinit = linear_growth_init(history)
  elsif @growth == "flat"
    dat["cap"] = Numo::DFloat.zeros(@history.shape[0])
    kinit = flat_growth_init(history)
  else
    dat["cap"] = history["cap_scaled"]
    kinit = logistic_growth_init(history)
  end

  stan_init = {
    "k" => kinit[0],
    "m" => kinit[1],
    "delta" => Numo::DFloat.zeros(@changepoints_t.size),
    "beta" => Numo::DFloat.zeros(seasonal_features.shape[1]),
    "sigma_obs" => 1
  }

  if history["y"].min == history["y"].max && (@growth == "linear" || @growth == "flat")
    # Nothing to fit.
    @params = stan_init
    @params["sigma_obs"] = 1e-9
    @params.each do |par, _|
      @params[par] = Numo::NArray.asarray([@params[par]])
    end
  elsif @mcmc_samples > 0
    @params = @stan_backend.sampling(stan_init, dat, @mcmc_samples, **kwargs)
  else
    @params = @stan_backend.fit(stan_init, dat, **kwargs)
  end

  # If no changepoints were requested, replace delta with 0s
  if @changepoints.size == 0
    # Fold delta into the base rate k
    # Numo doesn't support -1 with reshape
    negative_one = @params["delta"].shape.inject(&:*)
    @params["k"] = @params["k"] + @params["delta"].reshape(negative_one)
    @params["delta"] = Numo::DFloat.zeros(@params["delta"].shape).reshape(negative_one, 1)
  end

  self
end

#flat_growth_init(df) ⇒ Object



646
647
648
649
650
# File 'lib/prophet/forecaster.rb', line 646

def flat_growth_init(df)
  k = 0
  m = df["y_scaled"].mean
  [k, m]
end

#flat_trend(t, m) ⇒ Object



798
799
800
801
# File 'lib/prophet/forecaster.rb', line 798

def flat_trend(t, m)
  m_t = m * t.new_ones
  m_t
end

#fourier_series(dates, period, series_order) ⇒ Object



285
286
287
288
289
290
291
292
293
294
# File 'lib/prophet/forecaster.rb', line 285

def fourier_series(dates, period, series_order)
  t = dates.map(&:to_i).to_numo / (3600 * 24.0)

  # no need for column_stack
  series_order.times.flat_map do |i|
    [Numo::DFloat::Math.method(:sin), Numo::DFloat::Math.method(:cos)].map do |fun|
      fun.call(2.0 * (i + 1) * Math::PI * t / period)
    end
  end
end

#initialize_scales(initialize_scales, df) ⇒ Object



226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
# File 'lib/prophet/forecaster.rb', line 226

def initialize_scales(initialize_scales, df)
  return unless initialize_scales

  if @growth == "logistic" && df.include?("floor")
    @logistic_floor = true
    if @scaling == "absmax"
      @y_min = (df["y"] - df["floor"]).abs.min.to_f
      @y_scale = (df["y"] - df["floor"]).abs.max.to_f
    elsif @scaling == "minmax"
      @y_min = df["floor"].min
      @y_scale = (df["cap"].max - @y_min).to_f
    end
  else
    if @scaling == "absmax"
      @y_min = 0.0
      @y_scale = df["y"].abs.max.to_f
    elsif @scaling == "minmax"
      @y_min = df["y"].min
      @y_scale =  (df["y"].max - @y_min).to_f
    end
  end
  @y_scale = 1 if @y_scale == 0
  @start = df["ds"].min
  @t_scale = df["ds"].max - @start
end

#linear_growth_init(df) ⇒ Object



609
610
611
612
613
614
615
616
# File 'lib/prophet/forecaster.rb', line 609

def linear_growth_init(df)
  i0 = 0
  i1 = df.size - 1
  t = df["t"][i1] - df["t"][i0]
  k = (df["y_scaled"][i1] - df["y_scaled"][i0]) / t
  m = df["y_scaled"][i0] - k * df["t"][i0]
  [k, m]
end

#logistic_growth_init(df) ⇒ Object



618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
# File 'lib/prophet/forecaster.rb', line 618

def logistic_growth_init(df)
  i0 = 0
  i1 = df.size - 1
  t = df["t"][i1] - df["t"][i0]

  # Force valid values, in case y > cap or y < 0
  c0 = df["cap_scaled"][i0]
  c1 = df["cap_scaled"][i1]
  y0 = [0.01 * c0, [0.99 * c0, df["y_scaled"][i0]].min].max
  y1 = [0.01 * c1, [0.99 * c1, df["y_scaled"][i1]].min].max

  r0 = c0 / y0
  r1 = c1 / y1

  if (r0 - r1).abs <= 0.01
    r0 = 1.05 * r0
  end

  l0 = Math.log(r0 - 1)
  l1 = Math.log(r1 - 1)

  # Initialize the offset
  m = l0 * t / (l0 - l1)
  # And the rate
  k = (l0 - l1) / t
  [k, m]
end

#make_all_seasonality_features(df) ⇒ Object



443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
# File 'lib/prophet/forecaster.rb', line 443

def make_all_seasonality_features(df)
  seasonal_features = []
  prior_scales = []
  modes = {"additive" => [], "multiplicative" => []}

  # Seasonality features
  @seasonalities.each do |name, props|
    features = make_seasonality_features(
      df["ds"],
      props[:period],
      props[:fourier_order],
      name
    )
    if props[:condition_name]
      features[!df.where(props[:condition_name])] = 0
    end
    seasonal_features << features
    prior_scales.concat([props[:prior_scale]] * features.shape[1])
    modes[props[:mode]] << name
  end

  # Holiday features
  holidays = construct_holiday_dataframe(df["ds"])
  if holidays.size > 0
    features, holiday_priors, holiday_names = make_holiday_features(df["ds"], holidays)
    seasonal_features << features
    prior_scales.concat(holiday_priors)
    modes[@seasonality_mode].concat(holiday_names)
  end

  # Additional regressors
  @extra_regressors.each do |name, props|
    seasonal_features << Rover::DataFrame.new({name => df[name]})
    prior_scales << props[:prior_scale]
    modes[props[:mode]] << name
  end

  # Dummy to prevent empty X
  if seasonal_features.size == 0
    seasonal_features << Rover::DataFrame.new({"zeros" => [0] * df.shape[0]})
    prior_scales << 1.0
  end

  seasonal_features = df_concat_axis_one(seasonal_features)

  component_cols, modes = regressor_column_matrix(seasonal_features, modes)

  [seasonal_features, prior_scales, component_cols, modes]
end

#make_future_dataframe(periods:, freq: "D", include_history: true) ⇒ Object

Raises:



961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
# File 'lib/prophet/forecaster.rb', line 961

def make_future_dataframe(periods:, freq: "D", include_history: true)
  raise Error, "Model has not been fit" unless @history_dates
  last_date = @history_dates.max
  # TODO add more freq
  # https://pandas.pydata.org/pandas-docs/stable/user_guide/timeseries.html#timeseries-offset-aliases
  case freq
  when /\A\d+S\z/
    secs = freq.to_i
    dates = (periods + 1).times.map { |i| last_date + i * secs }
  when "H"
    hour = 3600
    dates = (periods + 1).times.map { |i| last_date + i * hour }
  when "D"
    # days have constant length with UTC (no DST or leap seconds)
    day = 24 * 3600
    dates = (periods + 1).times.map { |i| last_date + i * day }
  when "W"
    week = 7 * 24 * 3600
    dates = (periods + 1).times.map { |i| last_date + i * week }
  when "MS"
    dates = [last_date]
    # TODO reset day from last date, but keep time
    periods.times do
      dates << dates.last.to_datetime.next_month.to_time.utc
    end
  when "QS"
    dates = [last_date]
    # TODO reset day and month from last date, but keep time
    periods.times do
      dates << dates.last.to_datetime.next_month.next_month.next_month.to_time.utc
    end
  when "YS"
    dates = [last_date]
    # TODO reset day and month from last date, but keep time
    periods.times do
      dates << dates.last.to_datetime.next_year.to_time.utc
    end
  else
    raise ArgumentError, "Unknown freq: #{freq}"
  end
  dates.select! { |d| d > last_date }
  dates = dates.last(periods)
  dates = @history_dates.to_numo.concatenate(Numo::NArray.cast(dates)) if include_history
  Rover::DataFrame.new({"ds" => dates})
end

#make_holiday_features(dates, holidays) ⇒ Object



326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
# File 'lib/prophet/forecaster.rb', line 326

def make_holiday_features(dates, holidays)
  expanded_holidays = Hash.new { |hash, key| hash[key] = Numo::DFloat.zeros(dates.size) }
  prior_scales = {}
  # Makes an index so we can perform `get_loc` below.
  # Strip to just dates.
  row_index = dates.map(&:to_date)

  holidays.each_row do |row|
    dt = row["ds"]
    lw = nil
    uw = nil
    begin
      lw = row["lower_window"].to_i
      uw = row["upper_window"].to_i
    rescue IndexError
      lw = 0
      uw = 0
    end
    ps = @holidays_prior_scale
    if prior_scales[row["holiday"]] && prior_scales[row["holiday"]] != ps
      raise ArgumentError, "Holiday #{row["holiday"].inspect} does not have consistent prior scale specification."
    end
    raise ArgumentError, "Prior scale must be > 0" if ps <= 0
    prior_scales[row["holiday"]] = ps

    lw.upto(uw).each do |offset|
      occurrence = dt ? dt + offset : nil
      loc = occurrence ? row_index.to_a.index(occurrence) : nil
      key = "#{row["holiday"]}_delim_#{offset >= 0 ? "+" : "-"}#{offset.abs}"
      if loc
        expanded_holidays[key][loc] = 1.0
      else
        expanded_holidays[key]  # Access key to generate value
      end
    end
  end
  holiday_features = Rover::DataFrame.new(expanded_holidays)
  # Make sure column order is consistent
  holiday_features = holiday_features[holiday_features.vector_names.sort]
  prior_scale_list = holiday_features.vector_names.map { |h| prior_scales[h.split("_delim_")[0]] }
  holiday_names = prior_scales.keys
  # Store holiday names used in fit
  if @train_holiday_names.nil?
    @train_holiday_names = Rover::Vector.new(holiday_names)
  end
  [holiday_features, prior_scale_list, holiday_names]
end

#make_seasonality_features(dates, period, series_order, prefix) ⇒ Object



296
297
298
299
# File 'lib/prophet/forecaster.rb', line 296

def make_seasonality_features(dates, period, series_order, prefix)
  features = fourier_series(dates, period, series_order)
  Rover::DataFrame.new(features.map.with_index { |v, i| ["#{prefix}_delim_#{i + 1}", v] }.to_h)
end

#parse_seasonality_args(name, arg, auto_disable, default_order) ⇒ Object



540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
# File 'lib/prophet/forecaster.rb', line 540

def parse_seasonality_args(name, arg, auto_disable, default_order)
  case arg
  when "auto"
    fourier_order = 0
    if @seasonalities.include?(name)
      logger.info "Found custom seasonality named #{name.inspect}, disabling built-in #{name.inspect}seasonality."
    elsif auto_disable
      logger.info "Disabling #{name} seasonality. Run prophet with #{name}_seasonality: true to override this."
    else
      fourier_order = default_order
    end
  when true
    fourier_order = default_order
  when false
    fourier_order = 0
  else
    fourier_order = arg.to_i
  end
  fourier_order
end

#piecewise_linear(t, deltas, k, m, changepoint_ts) ⇒ Object



765
766
767
768
769
770
771
772
773
774
775
776
777
# File 'lib/prophet/forecaster.rb', line 765

def piecewise_linear(t, deltas, k, m, changepoint_ts)
  # Intercept changes
  gammas = -changepoint_ts * deltas
  # Get cumulative slope and intercept at each t
  k_t = t.new_ones * k
  m_t = t.new_ones * m
  changepoint_ts.each_with_index do |t_s, s|
    indx = t >= t_s
    k_t[indx] += deltas[s]
    m_t[indx] += gammas[s]
  end
  k_t * t + m_t
end

#piecewise_logistic(t, cap, deltas, k, m, changepoint_ts) ⇒ Object



779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
# File 'lib/prophet/forecaster.rb', line 779

def piecewise_logistic(t, cap, deltas, k, m, changepoint_ts)
  k_1d = Numo::NArray.asarray(k)
  k_1d = k_1d.reshape(1) if k_1d.ndim < 1
  k_cum = k_1d.concatenate(deltas.cumsum + k)
  gammas = Numo::DFloat.zeros(changepoint_ts.size)
  changepoint_ts.each_with_index do |t_s, i|
    gammas[i] = (t_s - m - gammas.sum) * (1 - k_cum[i] / k_cum[i + 1])
  end
  # Get cumulative rate and offset at each t
  k_t = t.new_ones * k
  m_t = t.new_ones * m
  changepoint_ts.each_with_index do |t_s, s|
    indx = t >= t_s
    k_t[indx] += deltas[s]
    m_t[indx] += gammas[s]
  end
  cap.to_numo / (1 + Numo::NMath.exp(-k_t * (t - m_t)))
end

#predict(df = nil) ⇒ Object

Raises:



737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
# File 'lib/prophet/forecaster.rb', line 737

def predict(df = nil)
  raise Error, "Model has not been fit." unless @history

  if df.nil?
    df = @history.dup
  else
    raise ArgumentError, "Dataframe has no rows." if df.shape[0] == 0
    df = setup_dataframe(df.dup)
  end

  df["trend"] = predict_trend(df)
  seasonal_components = predict_seasonal_components(df)
  if @uncertainty_samples
    intervals = predict_uncertainty(df)
  else
    intervals = nil
  end

  # Drop columns except ds, cap, floor, and trend
  cols = ["ds", "trend"]
  cols << "cap" if df.include?("cap")
  cols << "floor" if @logistic_floor
  # Add in forecast components
  df2 = df_concat_axis_one([df[cols], intervals, seasonal_components])
  df2["yhat"] = df2["trend"] * (df2["multiplicative_terms"] + 1) + df2["additive_terms"]
  df2
end

#predict_seasonal_components(df) ⇒ Object



821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
# File 'lib/prophet/forecaster.rb', line 821

def predict_seasonal_components(df)
  seasonal_features, _, component_cols, _ = make_all_seasonality_features(df)
  if @uncertainty_samples
    lower_p = 100 * (1.0 - @interval_width) / 2
    upper_p = 100 * (1.0 + @interval_width) / 2
  end

  x = seasonal_features.to_numo
  data = {}
  (component_cols.vector_names - ["col"]).each do |component|
    beta_c =  @params["beta"] * component_cols[component].to_numo

    comp = x.dot(beta_c.transpose)
    if @component_modes["additive"].include?(component)
      comp *= @y_scale
    end
    data[component] = comp.mean(axis: 1, nan: true)
    if @uncertainty_samples
      data["#{component}_lower"] = comp.percentile(lower_p, axis: 1)
      data["#{component}_upper"] = comp.percentile(upper_p, axis: 1)
    end
  end
  Rover::DataFrame.new(data)
end

#predict_trend(df) ⇒ Object



803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
# File 'lib/prophet/forecaster.rb', line 803

def predict_trend(df)
  k = @params["k"].mean(nan: true)
  m = @params["m"].mean(nan: true)
  deltas = @params["delta"].mean(axis: 0, nan: true)

  t = Numo::NArray.asarray(df["t"].to_a)
  if @growth == "linear"
    trend = piecewise_linear(t, deltas, k, m, @changepoints_t)
  elsif @growth == "logistic"
    cap = df["cap_scaled"]
    trend = piecewise_logistic(t, cap, deltas, k, m, @changepoints_t)
  elsif @growth == "flat"
    trend = flat_trend(t, m)
  end

  trend * @y_scale + Numo::NArray.asarray(df["floor"].to_a)
end

#predict_uncertainty(df) ⇒ Object



885
886
887
888
889
890
891
892
893
894
895
896
897
898
# File 'lib/prophet/forecaster.rb', line 885

def predict_uncertainty(df)
  sim_values = sample_posterior_predictive(df)

  lower_p = 100 * (1.0 - @interval_width) / 2
  upper_p = 100 * (1.0 + @interval_width) / 2

  series = {}
  ["yhat", "trend"].each do |key|
    series["#{key}_lower"] = sim_values[key].percentile(lower_p, axis: 1)
    series["#{key}_upper"] = sim_values[key].percentile(upper_p, axis: 1)
  end

  Rover::DataFrame.new(series)
end

#predictive_samples(df) ⇒ Object



879
880
881
882
883
# File 'lib/prophet/forecaster.rb', line 879

def predictive_samples(df)
  df = setup_dataframe(df.dup)
  sim_values = sample_posterior_predictive(df)
  sim_values
end

#regressor_column_matrix(seasonal_features, modes) ⇒ Object



493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
# File 'lib/prophet/forecaster.rb', line 493

def regressor_column_matrix(seasonal_features, modes)
  components = Rover::DataFrame.new(
    "col" => seasonal_features.shape[1].times.to_a,
    "component" => seasonal_features.vector_names.map { |x| x.split("_delim_")[0] }
  )

  # Add total for holidays
  if @train_holiday_names
    components = add_group_component(components, "holidays", @train_holiday_names.uniq)
  end
  # Add totals additive and multiplicative components, and regressors
  ["additive", "multiplicative"].each do |mode|
    components = add_group_component(components, "#{mode}_terms", modes[mode])
    regressors_by_mode = @extra_regressors.select { |r, props| props[:mode] == mode }
      .map { |r, props| r }
    components = add_group_component(components, "extra_regressors_#{mode}", regressors_by_mode)

    # Add combination components to modes
    modes[mode] << "#{mode}_terms"
    modes[mode] << "extra_regressors_#{mode}"
  end
  # After all of the additive/multiplicative groups have been added,
  modes[@seasonality_mode] << "holidays"
  # Convert to a binary matrix
  component_cols = components["col"].crosstab(components["component"])
  component_cols["col"] = component_cols.delete("_")

  # Add columns for additive and multiplicative terms, if missing
  ["additive_terms", "multiplicative_terms"].each do |name|
    component_cols[name] = 0 unless component_cols.include?(name)
  end

  # TODO validation

  [component_cols, modes]
end

#sample_model(df, seasonal_features, iteration, s_a, s_m) ⇒ Object



900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
# File 'lib/prophet/forecaster.rb', line 900

def sample_model(df, seasonal_features, iteration, s_a, s_m)
  trend = sample_predictive_trend(df, iteration)

  beta = @params["beta"][iteration, true]
  xb_a = seasonal_features.dot(beta * s_a) * @y_scale
  xb_m = seasonal_features.dot(beta * s_m)

  sigma = @params["sigma_obs"][iteration]
  noise = Numo::DFloat.new(*df.shape[0]).rand_norm(0, sigma) * @y_scale

  # skip data frame for performance
  {
    "yhat" => trend * (1 + xb_m) + xb_a + noise,
    "trend" => trend
  }
end

#sample_posterior_predictive(df) ⇒ Object



846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
# File 'lib/prophet/forecaster.rb', line 846

def sample_posterior_predictive(df)
  n_iterations = @params["k"].shape[0]
  samp_per_iter = [1, (@uncertainty_samples / n_iterations.to_f).ceil].max

  # Generate seasonality features once so we can re-use them.
  seasonal_features, _, component_cols, _ = make_all_seasonality_features(df)

  # convert to Numo for performance
  seasonal_features = seasonal_features.to_numo
  additive_terms = component_cols["additive_terms"].to_numo
  multiplicative_terms = component_cols["multiplicative_terms"].to_numo

  sim_values = {"yhat" => [], "trend" => []}
  n_iterations.times do |i|
    samp_per_iter.times do
      sim = sample_model(
        df,
        seasonal_features,
        i,
        additive_terms,
        multiplicative_terms
      )
      sim_values.each_key do |key|
        sim_values[key] << sim[key]
      end
    end
  end
  sim_values.each do |k, v|
    sim_values[k] = Numo::NArray.column_stack(v)
  end
  sim_values
end

#sample_predictive_trend(df, iteration) ⇒ Object



917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
# File 'lib/prophet/forecaster.rb', line 917

def sample_predictive_trend(df, iteration)
  k = @params["k"][iteration]
  m = @params["m"][iteration]
  deltas = @params["delta"][iteration, true]

  t = Numo::NArray.asarray(df["t"].to_a)
  upper_t = t.max

  # New changepoints from a Poisson process with rate S on [1, T]
  if upper_t > 1
    s = @changepoints_t.size
    n_changes = poisson(s * (upper_t - 1))
  else
    n_changes = 0
  end
  if n_changes > 0
    changepoint_ts_new = 1 + Numo::DFloat.new(n_changes).rand * (upper_t - 1)
    changepoint_ts_new.sort
  else
    changepoint_ts_new = []
  end

  # Get the empirical scale of the deltas, plus epsilon to avoid NaNs.
  lambda_ = deltas.abs.mean + 1e-8

  # Sample deltas
  deltas_new = laplace(0, lambda_, n_changes)

  # Prepend the times and deltas from the history
  changepoint_ts = @changepoints_t.concatenate(changepoint_ts_new)
  deltas = deltas.concatenate(deltas_new)

  if @growth == "linear"
    trend = piecewise_linear(t, deltas, k, m, changepoint_ts)
  elsif @growth == "logistic"
    cap = df["cap_scaled"]
    trend = piecewise_logistic(t, cap, deltas, k, m, changepoint_ts)
  elsif @growth == "flat"
    trend = flat_trend(t, m)
  end

  trend * @y_scale + Numo::NArray.asarray(df["floor"].to_a)
end

#set_auto_seasonalitiesObject



561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
# File 'lib/prophet/forecaster.rb', line 561

def set_auto_seasonalities
  first = @history["ds"].min
  last = @history["ds"].max
  dt = @history["ds"].diff
  min_dt = dt.min

  days = 86400

  # Yearly seasonality
  yearly_disable = last - first < 730 * days
  fourier_order = parse_seasonality_args("yearly", @yearly_seasonality, yearly_disable, 10)
  if fourier_order > 0
    @seasonalities["yearly"] = {
      period: 365.25,
      fourier_order: fourier_order,
      prior_scale: @seasonality_prior_scale,
      mode: @seasonality_mode,
      condition_name: nil
    }
  end

  # Weekly seasonality
  weekly_disable = last - first < 14 * days || min_dt >= 7 * days
  fourier_order = parse_seasonality_args("weekly", @weekly_seasonality, weekly_disable, 3)
  if fourier_order > 0
    @seasonalities["weekly"] = {
      period: 7,
      fourier_order: fourier_order,
      prior_scale: @seasonality_prior_scale,
      mode: @seasonality_mode,
      condition_name: nil
    }
  end

  # Daily seasonality
  daily_disable = last - first < 2 * days || min_dt >= 1 * days
  fourier_order = parse_seasonality_args("daily", @daily_seasonality, daily_disable, 4)
  if fourier_order > 0
    @seasonalities["daily"] = {
      period: 1,
      fourier_order: fourier_order,
      prior_scale: @seasonality_prior_scale,
      mode: @seasonality_mode,
      condition_name: nil
    }
  end
end

#set_changepointsObject



252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
# File 'lib/prophet/forecaster.rb', line 252

def set_changepoints
  if @changepoints
    if @changepoints.size > 0
      too_low = @changepoints.min < @history["ds"].min
      too_high = @changepoints.max > @history["ds"].max
      if too_low || too_high
        raise ArgumentError, "Changepoints must fall within training data."
      end
    end
  else
    hist_size = (@history.shape[0] * @changepoint_range).floor

    if @n_changepoints + 1 > hist_size
      @n_changepoints = hist_size - 1
      logger.info "n_changepoints greater than number of observations. Using #{@n_changepoints}"
    end

    if @n_changepoints > 0
      step = (hist_size - 1) / @n_changepoints.to_f
      cp_indexes = (@n_changepoints + 1).times.map { |i| (i * step).round }
      @changepoints = Rover::Vector.new(@history["ds"].to_a.values_at(*cp_indexes)).tail(-1)
    else
      @changepoints = []
    end
  end

  if @changepoints.size > 0
    @changepoints_t = (@changepoints.map(&:to_i).sort.to_numo.cast_to(Numo::DFloat) - @start.to_i) / @t_scale.to_f
  else
    @changepoints_t = Numo::NArray.asarray([0])
  end
end

#setup_dataframe(df, initialize_scales: false) ⇒ Object

Raises:

  • (ArgumentError)


156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
# File 'lib/prophet/forecaster.rb', line 156

def setup_dataframe(df, initialize_scales: false)
  if df.include?("y")
    df["y"] = df["y"].map(&:to_f)
    raise ArgumentError, "Found infinity in column y." unless df["y"].all?(&:finite?)
  end
  # TODO support integers

  df["ds"] = to_datetime(df["ds"])

  raise ArgumentError, "Found NaN in column ds." if df["ds"].any?(&:nil?)

  @extra_regressors.each_key do |name|
    if !df.include?(name)
      raise ArgumentError, "Regressor #{name.inspect} missing from dataframe"
    end
    df[name] = df[name].map(&:to_f)
    if df[name].any?(&:nil?)
      raise ArgumentError, "Found NaN in column #{name.inspect}"
    end
  end
  @seasonalities.each_value do |props|
    condition_name = props[:condition_name]
    if condition_name
      if !df.include?(condition_name)
        raise ArgumentError, "Condition #{condition_name.inspect} missing from dataframe"
      end
      if df.where(!df[condition_name].in([true, false])).any?
        raise ArgumentError, "Found non-boolean in column #{condition_name.inspect}"
      end
    end
  end

  df = df.sort_by { |r| r["ds"] }

  initialize_scales(initialize_scales, df)

  if @logistic_floor
    unless df.include?("floor")
      raise ArgumentError, "Expected column \"floor\"."
    end
  else
    if @scaling == "absmax"
      df["floor"] = 0
    elsif @scaling == "minmax"
      df["floor"] = @y_min
    end
  end

  if @growth == "logistic"
    unless df.include?("cap")
      raise ArgumentError, "Capacities must be supplied for logistic growth in column \"cap\""
    end
    if df[df["cap"] <= df["floor"]].size > 0
      raise ArgumentError, "cap must be greater than floor (which defaults to 0)."
    end
    df["cap_scaled"] = (df["cap"] - df["floor"]) / @y_scale.to_f
  end

  df["t"] = (df["ds"] - @start) / @t_scale.to_f
  if df.include?("y")
    df["y_scaled"] = (df["y"] - df["floor"]) / @y_scale.to_f
  end

  @extra_regressors.each do |name, props|
    df[name] = (df[name] - props[:mu]) / props[:std].to_f
  end

  df
end

#to_jsonObject



1007
1008
1009
1010
1011
# File 'lib/prophet/forecaster.rb', line 1007

def to_json
  require "json"

  JSON.generate(as_json)
end

#validate_column_name(name, check_holidays: true, check_seasonalities: true, check_regressors: true) ⇒ Object



125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
# File 'lib/prophet/forecaster.rb', line 125

def validate_column_name(name, check_holidays: true, check_seasonalities: true, check_regressors: true)
  if name.include?("_delim_")
    raise ArgumentError, "Name cannot contain \"_delim_\""
  end
  reserved_names = [
    "trend", "additive_terms", "daily", "weekly", "yearly",
    "holidays", "zeros", "extra_regressors_additive", "yhat",
    "extra_regressors_multiplicative", "multiplicative_terms"
  ]
  rn_l = reserved_names.map { |n| "#{n}_lower" }
  rn_u = reserved_names.map { |n| "#{n}_upper" }
  reserved_names.concat(rn_l)
  reserved_names.concat(rn_u)
  reserved_names.concat(["ds", "y", "cap", "floor", "y_scaled", "cap_scaled"])
  if reserved_names.include?(name)
    raise ArgumentError, "Name #{name.inspect} is reserved."
  end
  if check_holidays && @holidays && @holidays["holiday"].uniq.to_a.include?(name)
    raise ArgumentError, "Name #{name.inspect} already used for a holiday."
  end
  if check_holidays && @country_holidays && get_holiday_names(@country_holidays).to_a.include?(name)
    raise ArgumentError, "Name #{name.inspect} is a holiday name in #{@country_holidays.inspect}."
  end
  if check_seasonalities && @seasonalities[name]
    raise ArgumentError, "Name #{name.inspect} already used for a seasonality."
  end
  if check_regressors && @extra_regressors[name]
    raise ArgumentError, "Name #{name.inspect} already used for an added regressor."
  end
end

#validate_inputsObject



90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
# File 'lib/prophet/forecaster.rb', line 90

def validate_inputs
  if !["linear", "logistic", "flat"].include?(@growth)
    raise ArgumentError, "Parameter \"growth\" should be \"linear\", \"logistic\", or \"flat\"."
  end
  if @changepoint_range < 0 || @changepoint_range > 1
    raise ArgumentError, "Parameter \"changepoint_range\" must be in [0, 1]"
  end
  if @holidays
    if !(@holidays.is_a?(Rover::DataFrame) && @holidays.include?("ds") && @holidays.include?("holiday"))
      raise ArgumentError, "holidays must be a DataFrame with \"ds\" and \"holiday\" columns."
    end
    @holidays["ds"] = to_datetime(@holidays["ds"])
    has_lower = @holidays.include?("lower_window")
    has_upper = @holidays.include?("upper_window")
    if has_lower ^ has_upper # xor
      raise ArgumentError, "Holidays must have both lower_window and upper_window, or neither"
    end
    if has_lower
      if @holidays["lower_window"].max > 0
        raise ArgumentError, "Holiday lower_window should be <= 0"
      end
      if @holidays["upper_window"].min < 0
        raise ArgumentError, "Holiday upper_window should be >= 0"
      end
    end
    @holidays["holiday"].uniq.each do |h|
      validate_column_name(h, check_holidays: false)
    end
  end

  if !["additive", "multiplicative"].include?(@seasonality_mode)
    raise ArgumentError, "seasonality_mode must be \"additive\" or \"multiplicative\""
  end
end