Class: Prophet::Forecaster
- Inherits:
-
Object
- Object
- Prophet::Forecaster
- Defined in:
- lib/prophet/forecaster.rb
Instance Attribute Summary collapse
-
#changepoint_prior_scale ⇒ Object
readonly
Returns the value of attribute changepoint_prior_scale.
-
#changepoint_range ⇒ Object
readonly
Returns the value of attribute changepoint_range.
-
#changepoints ⇒ Object
readonly
Returns the value of attribute changepoints.
-
#country_holidays ⇒ Object
Returns the value of attribute country_holidays.
-
#extra_regressors ⇒ Object
Returns the value of attribute extra_regressors.
-
#fit_kwargs ⇒ Object
readonly
Returns the value of attribute fit_kwargs.
-
#growth ⇒ Object
readonly
Returns the value of attribute growth.
-
#history ⇒ Object
readonly
Returns the value of attribute history.
-
#holidays ⇒ Object
readonly
Returns the value of attribute holidays.
-
#holidays_prior_scale ⇒ Object
readonly
Returns the value of attribute holidays_prior_scale.
-
#interval_width ⇒ Object
readonly
Returns the value of attribute interval_width.
-
#logger ⇒ Object
readonly
Returns the value of attribute logger.
-
#mcmc_samples ⇒ Object
readonly
Returns the value of attribute mcmc_samples.
-
#n_changepoints ⇒ Object
readonly
Returns the value of attribute n_changepoints.
-
#params ⇒ Object
readonly
Returns the value of attribute params.
-
#seasonalities ⇒ Object
Returns the value of attribute seasonalities.
-
#seasonality_mode ⇒ Object
readonly
Returns the value of attribute seasonality_mode.
-
#seasonality_prior_scale ⇒ Object
readonly
Returns the value of attribute seasonality_prior_scale.
-
#specified_changepoints ⇒ Object
readonly
Returns the value of attribute specified_changepoints.
-
#train_holiday_names ⇒ Object
readonly
Returns the value of attribute train_holiday_names.
-
#uncertainty_samples ⇒ Object
readonly
Returns the value of attribute uncertainty_samples.
Instance Method Summary collapse
- #add_country_holidays(country_name) ⇒ Object
- #add_group_component(components, name, group) ⇒ Object
- #add_regressor(name, prior_scale: nil, standardize: "auto", mode: nil) ⇒ Object
- #add_seasonality(name:, period:, fourier_order:, prior_scale: nil, mode: nil, condition_name: nil) ⇒ Object
- #construct_holiday_dataframe(dates) ⇒ Object
- #fit(df, **kwargs) ⇒ Object
- #flat_growth_init(df) ⇒ Object
- #flat_trend(t, m) ⇒ Object
- #fourier_series(dates, period, series_order) ⇒ Object
-
#initialize(growth: "linear", changepoints: nil, n_changepoints: 25, changepoint_range: 0.8, yearly_seasonality: "auto", weekly_seasonality: "auto", daily_seasonality: "auto", holidays: nil, seasonality_mode: "additive", seasonality_prior_scale: 10.0, holidays_prior_scale: 10.0, changepoint_prior_scale: 0.05, mcmc_samples: 0, interval_width: 0.80, uncertainty_samples: 1000, scaling: "absmax") ⇒ Forecaster
constructor
A new instance of Forecaster.
- #initialize_scales(initialize_scales, df) ⇒ Object
- #linear_growth_init(df) ⇒ Object
- #logistic_growth_init(df) ⇒ Object
- #make_all_seasonality_features(df) ⇒ Object
- #make_future_dataframe(periods:, freq: "D", include_history: true) ⇒ Object
- #make_holiday_features(dates, holidays) ⇒ Object
- #make_seasonality_features(dates, period, series_order, prefix) ⇒ Object
- #parse_seasonality_args(name, arg, auto_disable, default_order) ⇒ Object
- #piecewise_linear(t, deltas, k, m, changepoint_ts) ⇒ Object
- #piecewise_logistic(t, cap, deltas, k, m, changepoint_ts) ⇒ Object
- #predict(df = nil) ⇒ Object
- #predict_seasonal_components(df) ⇒ Object
- #predict_trend(df) ⇒ Object
- #predict_uncertainty(df) ⇒ Object
- #predictive_samples(df) ⇒ Object
- #regressor_column_matrix(seasonal_features, modes) ⇒ Object
- #sample_model(df, seasonal_features, iteration, s_a, s_m) ⇒ Object
- #sample_posterior_predictive(df) ⇒ Object
- #sample_predictive_trend(df, iteration) ⇒ Object
- #set_auto_seasonalities ⇒ Object
- #set_changepoints ⇒ Object
- #setup_dataframe(df, initialize_scales: false) ⇒ Object
- #to_json ⇒ Object
- #validate_column_name(name, check_holidays: true, check_seasonalities: true, check_regressors: true) ⇒ Object
- #validate_inputs ⇒ Object
Methods included from Plot
#add_changepoints_to_plot, #plot, #plot_components, plot_cross_validation_metric, plt
Methods included from Holidays
#get_holiday_names, #holidays_df, #make_holidays_df
Constructor Details
#initialize(growth: "linear", changepoints: nil, n_changepoints: 25, changepoint_range: 0.8, yearly_seasonality: "auto", weekly_seasonality: "auto", daily_seasonality: "auto", holidays: nil, seasonality_mode: "additive", seasonality_prior_scale: 10.0, holidays_prior_scale: 10.0, changepoint_prior_scale: 0.05, mcmc_samples: 0, interval_width: 0.80, uncertainty_samples: 1000, scaling: "absmax") ⇒ Forecaster
Returns a new instance of Forecaster.
15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 |
# File 'lib/prophet/forecaster.rb', line 15 def initialize( growth: "linear", changepoints: nil, n_changepoints: 25, changepoint_range: 0.8, yearly_seasonality: "auto", weekly_seasonality: "auto", daily_seasonality: "auto", holidays: nil, seasonality_mode: "additive", seasonality_prior_scale: 10.0, holidays_prior_scale: 10.0, changepoint_prior_scale: 0.05, mcmc_samples: 0, interval_width: 0.80, uncertainty_samples: 1000, scaling: "absmax" ) @growth = growth @changepoints = to_datetime(changepoints) if !@changepoints.nil? @n_changepoints = @changepoints.size @specified_changepoints = true else @n_changepoints = n_changepoints @specified_changepoints = false end @changepoint_range = changepoint_range @yearly_seasonality = yearly_seasonality @weekly_seasonality = weekly_seasonality @daily_seasonality = daily_seasonality @holidays = convert_df(holidays) @seasonality_mode = seasonality_mode @seasonality_prior_scale = seasonality_prior_scale.to_f @changepoint_prior_scale = changepoint_prior_scale.to_f @holidays_prior_scale = holidays_prior_scale.to_f @mcmc_samples = mcmc_samples @interval_width = interval_width @uncertainty_samples = uncertainty_samples if !["absmax", "minmax"].include?(scaling) raise ArgumentError, "scaling must be one of \"absmax\" or \"minmax\"" end @scaling = scaling # Set during fitting or by other methods @start = nil @y_scale = nil @logistic_floor = false @t_scale = nil @changepoints_t = nil @seasonalities = {} @extra_regressors = {} @country_holidays = nil @stan_fit = nil @params = {} @history = nil @history_dates = nil @train_component_cols = nil @component_modes = nil @train_holiday_names = nil @fit_kwargs = {} validate_inputs @logger = ::Logger.new($stderr) @logger.level = ::Logger::WARN @logger.formatter = proc do |severity, datetime, progname, msg| "[prophet] #{msg}\n" end @stan_backend = StanBackend.new(@logger) end |
Instance Attribute Details
#changepoint_prior_scale ⇒ Object (readonly)
Returns the value of attribute changepoint_prior_scale.
6 7 8 |
# File 'lib/prophet/forecaster.rb', line 6 def changepoint_prior_scale @changepoint_prior_scale end |
#changepoint_range ⇒ Object (readonly)
Returns the value of attribute changepoint_range.
6 7 8 |
# File 'lib/prophet/forecaster.rb', line 6 def changepoint_range @changepoint_range end |
#changepoints ⇒ Object (readonly)
Returns the value of attribute changepoints.
6 7 8 |
# File 'lib/prophet/forecaster.rb', line 6 def changepoints @changepoints end |
#country_holidays ⇒ Object
Returns the value of attribute country_holidays.
13 14 15 |
# File 'lib/prophet/forecaster.rb', line 13 def country_holidays @country_holidays end |
#extra_regressors ⇒ Object
Returns the value of attribute extra_regressors.
13 14 15 |
# File 'lib/prophet/forecaster.rb', line 13 def extra_regressors @extra_regressors end |
#fit_kwargs ⇒ Object (readonly)
Returns the value of attribute fit_kwargs.
6 7 8 |
# File 'lib/prophet/forecaster.rb', line 6 def fit_kwargs @fit_kwargs end |
#growth ⇒ Object (readonly)
Returns the value of attribute growth.
6 7 8 |
# File 'lib/prophet/forecaster.rb', line 6 def growth @growth end |
#history ⇒ Object (readonly)
Returns the value of attribute history.
6 7 8 |
# File 'lib/prophet/forecaster.rb', line 6 def history @history end |
#holidays ⇒ Object (readonly)
Returns the value of attribute holidays.
6 7 8 |
# File 'lib/prophet/forecaster.rb', line 6 def holidays @holidays end |
#holidays_prior_scale ⇒ Object (readonly)
Returns the value of attribute holidays_prior_scale.
6 7 8 |
# File 'lib/prophet/forecaster.rb', line 6 def holidays_prior_scale @holidays_prior_scale end |
#interval_width ⇒ Object (readonly)
Returns the value of attribute interval_width.
6 7 8 |
# File 'lib/prophet/forecaster.rb', line 6 def interval_width @interval_width end |
#logger ⇒ Object (readonly)
Returns the value of attribute logger.
6 7 8 |
# File 'lib/prophet/forecaster.rb', line 6 def logger @logger end |
#mcmc_samples ⇒ Object (readonly)
Returns the value of attribute mcmc_samples.
6 7 8 |
# File 'lib/prophet/forecaster.rb', line 6 def mcmc_samples @mcmc_samples end |
#n_changepoints ⇒ Object (readonly)
Returns the value of attribute n_changepoints.
6 7 8 |
# File 'lib/prophet/forecaster.rb', line 6 def n_changepoints @n_changepoints end |
#params ⇒ Object (readonly)
Returns the value of attribute params.
6 7 8 |
# File 'lib/prophet/forecaster.rb', line 6 def params @params end |
#seasonalities ⇒ Object
Returns the value of attribute seasonalities.
6 7 8 |
# File 'lib/prophet/forecaster.rb', line 6 def seasonalities @seasonalities end |
#seasonality_mode ⇒ Object (readonly)
Returns the value of attribute seasonality_mode.
6 7 8 |
# File 'lib/prophet/forecaster.rb', line 6 def seasonality_mode @seasonality_mode end |
#seasonality_prior_scale ⇒ Object (readonly)
Returns the value of attribute seasonality_prior_scale.
6 7 8 |
# File 'lib/prophet/forecaster.rb', line 6 def seasonality_prior_scale @seasonality_prior_scale end |
#specified_changepoints ⇒ Object (readonly)
Returns the value of attribute specified_changepoints.
6 7 8 |
# File 'lib/prophet/forecaster.rb', line 6 def specified_changepoints @specified_changepoints end |
#train_holiday_names ⇒ Object (readonly)
Returns the value of attribute train_holiday_names.
6 7 8 |
# File 'lib/prophet/forecaster.rb', line 6 def train_holiday_names @train_holiday_names end |
#uncertainty_samples ⇒ Object (readonly)
Returns the value of attribute uncertainty_samples.
6 7 8 |
# File 'lib/prophet/forecaster.rb', line 6 def uncertainty_samples @uncertainty_samples end |
Instance Method Details
#add_country_holidays(country_name) ⇒ Object
422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 |
# File 'lib/prophet/forecaster.rb', line 422 def add_country_holidays(country_name) raise Error, "Country holidays must be added prior to model fitting." if @history # Fix for previously documented keyword argument if country_name.is_a?(Hash) && country_name[:country_name] country_name = country_name[:country_name] end # Validate names. get_holiday_names(country_name).each do |name| # Allow merging with existing holidays validate_column_name(name, check_holidays: false) end # Set the holidays. if @country_holidays logger.warn "Changing country holidays from #{@country_holidays.inspect} to #{country_name.inspect}." end @country_holidays = country_name self end |
#add_group_component(components, name, group) ⇒ Object
530 531 532 533 534 535 536 537 538 |
# File 'lib/prophet/forecaster.rb', line 530 def add_group_component(components, name, group) new_comp = components[components["component"].in?(group)].dup group_cols = new_comp["col"].uniq if group_cols.size > 0 new_comp = Rover::DataFrame.new({"col" => group_cols, "component" => name}) components = components.concat(new_comp) end components end |
#add_regressor(name, prior_scale: nil, standardize: "auto", mode: nil) ⇒ Object
374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 |
# File 'lib/prophet/forecaster.rb', line 374 def add_regressor(name, prior_scale: nil, standardize: "auto", mode: nil) raise Error, "Regressors must be added prior to model fitting." if @history validate_column_name(name, check_regressors: false) prior_scale ||= @holidays_prior_scale.to_f mode ||= @seasonality_mode raise ArgumentError, "Prior scale must be > 0" if prior_scale <= 0 if !["additive", "multiplicative"].include?(mode) raise ArgumentError, "mode must be \"additive\" or \"multiplicative\"" end @extra_regressors[name] = { prior_scale: prior_scale, standardize: standardize, mu: 0.0, std: 1.0, mode: mode } self end |
#add_seasonality(name:, period:, fourier_order:, prior_scale: nil, mode: nil, condition_name: nil) ⇒ Object
393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 |
# File 'lib/prophet/forecaster.rb', line 393 def add_seasonality(name:, period:, fourier_order:, prior_scale: nil, mode: nil, condition_name: nil) raise Error, "Seasonality must be added prior to model fitting." if @history if !["daily", "weekly", "yearly"].include?(name) # Allow overwriting built-in seasonalities validate_column_name(name, check_seasonalities: false) end if prior_scale.nil? ps = @seasonality_prior_scale else ps = prior_scale.to_f end raise ArgumentError, "Prior scale must be > 0" if ps <= 0 raise ArgumentError, "Fourier Order must be > 0" if fourier_order <= 0 mode ||= @seasonality_mode if !["additive", "multiplicative"].include?(mode) raise ArgumentError, "mode must be \"additive\" or \"multiplicative\"" end validate_column_name(condition_name) if condition_name @seasonalities[name] = { period: period, fourier_order: fourier_order, prior_scale: ps, mode: mode, condition_name: condition_name } self end |
#construct_holiday_dataframe(dates) ⇒ Object
301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 |
# File 'lib/prophet/forecaster.rb', line 301 def construct_holiday_dataframe(dates) all_holidays = Rover::DataFrame.new if @holidays all_holidays = @holidays.dup end if @country_holidays year_list = dates.map(&:year) country_holidays_df = make_holidays_df(year_list, @country_holidays) all_holidays = all_holidays.concat(country_holidays_df) end # Drop future holidays not previously seen in training data if @train_holiday_names # Remove holiday names didn't show up in fit all_holidays = all_holidays[all_holidays["holiday"].in?(@train_holiday_names)] # Add holiday names in fit but not in predict with ds as NA holidays_to_add = Rover::DataFrame.new({ "holiday" => @train_holiday_names[!@train_holiday_names.in?(all_holidays["holiday"])] }) all_holidays = all_holidays.concat(holidays_to_add) end all_holidays end |
#fit(df, **kwargs) ⇒ Object
652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 |
# File 'lib/prophet/forecaster.rb', line 652 def fit(df, **kwargs) raise Error, "Prophet object can only be fit once" if @history df = convert_df(df) raise ArgumentError, "Must be a data frame" unless df.is_a?(Rover::DataFrame) unless df.include?("ds") && df.include?("y") raise ArgumentError, "Data frame must have ds and y columns" end history = df[!df["y"].missing] raise Error, "Data has less than 2 non-nil rows" if history.size < 2 @history_dates = to_datetime(df["ds"]).sort history = setup_dataframe(history, initialize_scales: true) @history = history set_auto_seasonalities seasonal_features, prior_scales, component_cols, modes = make_all_seasonality_features(history) @train_component_cols = component_cols @component_modes = modes @fit_kwargs = kwargs.dup # TODO deep dup? set_changepoints trend_indicator = {"linear" => 0, "logistic" => 1, "flat" => 2} dat = { "T" => history.shape[0], "K" => seasonal_features.shape[1], "S" => @changepoints_t.size, "y" => history["y_scaled"], "t" => history["t"], "t_change" => @changepoints_t, "X" => seasonal_features, "sigmas" => prior_scales, "tau" => @changepoint_prior_scale, "trend_indicator" => trend_indicator[@growth], "s_a" => component_cols["additive_terms"], "s_m" => component_cols["multiplicative_terms"] } if @growth == "linear" dat["cap"] = Numo::DFloat.zeros(@history.shape[0]) kinit = linear_growth_init(history) elsif @growth == "flat" dat["cap"] = Numo::DFloat.zeros(@history.shape[0]) kinit = flat_growth_init(history) else dat["cap"] = history["cap_scaled"] kinit = logistic_growth_init(history) end stan_init = { "k" => kinit[0], "m" => kinit[1], "delta" => Numo::DFloat.zeros(@changepoints_t.size), "beta" => Numo::DFloat.zeros(seasonal_features.shape[1]), "sigma_obs" => 1 } if history["y"].min == history["y"].max && (@growth == "linear" || @growth == "flat") # Nothing to fit. @params = stan_init @params["sigma_obs"] = 1e-9 @params.each do |par, _| @params[par] = Numo::NArray.asarray([@params[par]]) end elsif @mcmc_samples > 0 @params = @stan_backend.sampling(stan_init, dat, @mcmc_samples, **kwargs) else @params = @stan_backend.fit(stan_init, dat, **kwargs) end # If no changepoints were requested, replace delta with 0s if @changepoints.size == 0 # Fold delta into the base rate k # Numo doesn't support -1 with reshape negative_one = @params["delta"].shape.inject(&:*) @params["k"] = @params["k"] + @params["delta"].reshape(negative_one) @params["delta"] = Numo::DFloat.zeros(@params["delta"].shape).reshape(negative_one, 1) end self end |
#flat_growth_init(df) ⇒ Object
646 647 648 649 650 |
# File 'lib/prophet/forecaster.rb', line 646 def flat_growth_init(df) k = 0 m = df["y_scaled"].mean [k, m] end |
#flat_trend(t, m) ⇒ Object
798 799 800 801 |
# File 'lib/prophet/forecaster.rb', line 798 def flat_trend(t, m) m_t = m * t.new_ones m_t end |
#fourier_series(dates, period, series_order) ⇒ Object
285 286 287 288 289 290 291 292 293 294 |
# File 'lib/prophet/forecaster.rb', line 285 def fourier_series(dates, period, series_order) t = dates.map(&:to_i).to_numo / (3600 * 24.0) # no need for column_stack series_order.times.flat_map do |i| [Numo::DFloat::Math.method(:sin), Numo::DFloat::Math.method(:cos)].map do |fun| fun.call(2.0 * (i + 1) * Math::PI * t / period) end end end |
#initialize_scales(initialize_scales, df) ⇒ Object
226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 |
# File 'lib/prophet/forecaster.rb', line 226 def initialize_scales(initialize_scales, df) return unless initialize_scales if @growth == "logistic" && df.include?("floor") @logistic_floor = true if @scaling == "absmax" @y_min = (df["y"] - df["floor"]).abs.min.to_f @y_scale = (df["y"] - df["floor"]).abs.max.to_f elsif @scaling == "minmax" @y_min = df["floor"].min @y_scale = (df["cap"].max - @y_min).to_f end else if @scaling == "absmax" @y_min = 0.0 @y_scale = df["y"].abs.max.to_f elsif @scaling == "minmax" @y_min = df["y"].min @y_scale = (df["y"].max - @y_min).to_f end end @y_scale = 1 if @y_scale == 0 @start = df["ds"].min @t_scale = df["ds"].max - @start end |
#linear_growth_init(df) ⇒ Object
609 610 611 612 613 614 615 616 |
# File 'lib/prophet/forecaster.rb', line 609 def linear_growth_init(df) i0 = 0 i1 = df.size - 1 t = df["t"][i1] - df["t"][i0] k = (df["y_scaled"][i1] - df["y_scaled"][i0]) / t m = df["y_scaled"][i0] - k * df["t"][i0] [k, m] end |
#logistic_growth_init(df) ⇒ Object
618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 |
# File 'lib/prophet/forecaster.rb', line 618 def logistic_growth_init(df) i0 = 0 i1 = df.size - 1 t = df["t"][i1] - df["t"][i0] # Force valid values, in case y > cap or y < 0 c0 = df["cap_scaled"][i0] c1 = df["cap_scaled"][i1] y0 = [0.01 * c0, [0.99 * c0, df["y_scaled"][i0]].min].max y1 = [0.01 * c1, [0.99 * c1, df["y_scaled"][i1]].min].max r0 = c0 / y0 r1 = c1 / y1 if (r0 - r1).abs <= 0.01 r0 = 1.05 * r0 end l0 = Math.log(r0 - 1) l1 = Math.log(r1 - 1) # Initialize the offset m = l0 * t / (l0 - l1) # And the rate k = (l0 - l1) / t [k, m] end |
#make_all_seasonality_features(df) ⇒ Object
443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 |
# File 'lib/prophet/forecaster.rb', line 443 def make_all_seasonality_features(df) seasonal_features = [] prior_scales = [] modes = {"additive" => [], "multiplicative" => []} # Seasonality features @seasonalities.each do |name, props| features = make_seasonality_features( df["ds"], props[:period], props[:fourier_order], name ) if props[:condition_name] features[!df.where(props[:condition_name])] = 0 end seasonal_features << features prior_scales.concat([props[:prior_scale]] * features.shape[1]) modes[props[:mode]] << name end # Holiday features holidays = construct_holiday_dataframe(df["ds"]) if holidays.size > 0 features, holiday_priors, holiday_names = make_holiday_features(df["ds"], holidays) seasonal_features << features prior_scales.concat(holiday_priors) modes[@seasonality_mode].concat(holiday_names) end # Additional regressors @extra_regressors.each do |name, props| seasonal_features << Rover::DataFrame.new({name => df[name]}) prior_scales << props[:prior_scale] modes[props[:mode]] << name end # Dummy to prevent empty X if seasonal_features.size == 0 seasonal_features << Rover::DataFrame.new({"zeros" => [0] * df.shape[0]}) prior_scales << 1.0 end seasonal_features = df_concat_axis_one(seasonal_features) component_cols, modes = regressor_column_matrix(seasonal_features, modes) [seasonal_features, prior_scales, component_cols, modes] end |
#make_future_dataframe(periods:, freq: "D", include_history: true) ⇒ Object
961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 |
# File 'lib/prophet/forecaster.rb', line 961 def make_future_dataframe(periods:, freq: "D", include_history: true) raise Error, "Model has not been fit" unless @history_dates last_date = @history_dates.max # TODO add more freq # https://pandas.pydata.org/pandas-docs/stable/user_guide/timeseries.html#timeseries-offset-aliases case freq when /\A\d+S\z/ secs = freq.to_i dates = (periods + 1).times.map { |i| last_date + i * secs } when "H" hour = 3600 dates = (periods + 1).times.map { |i| last_date + i * hour } when "D" # days have constant length with UTC (no DST or leap seconds) day = 24 * 3600 dates = (periods + 1).times.map { |i| last_date + i * day } when "W" week = 7 * 24 * 3600 dates = (periods + 1).times.map { |i| last_date + i * week } when "MS" dates = [last_date] # TODO reset day from last date, but keep time periods.times do dates << dates.last.to_datetime.next_month.to_time.utc end when "QS" dates = [last_date] # TODO reset day and month from last date, but keep time periods.times do dates << dates.last.to_datetime.next_month.next_month.next_month.to_time.utc end when "YS" dates = [last_date] # TODO reset day and month from last date, but keep time periods.times do dates << dates.last.to_datetime.next_year.to_time.utc end else raise ArgumentError, "Unknown freq: #{freq}" end dates.select! { |d| d > last_date } dates = dates.last(periods) dates = @history_dates.to_numo.concatenate(Numo::NArray.cast(dates)) if include_history Rover::DataFrame.new({"ds" => dates}) end |
#make_holiday_features(dates, holidays) ⇒ Object
326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 |
# File 'lib/prophet/forecaster.rb', line 326 def make_holiday_features(dates, holidays) = Hash.new { |hash, key| hash[key] = Numo::DFloat.zeros(dates.size) } prior_scales = {} # Makes an index so we can perform `get_loc` below. # Strip to just dates. row_index = dates.map(&:to_date) holidays.each_row do |row| dt = row["ds"] lw = nil uw = nil begin lw = row["lower_window"].to_i uw = row["upper_window"].to_i rescue IndexError lw = 0 uw = 0 end ps = @holidays_prior_scale if prior_scales[row["holiday"]] && prior_scales[row["holiday"]] != ps raise ArgumentError, "Holiday #{row["holiday"].inspect} does not have consistent prior scale specification." end raise ArgumentError, "Prior scale must be > 0" if ps <= 0 prior_scales[row["holiday"]] = ps lw.upto(uw).each do |offset| occurrence = dt ? dt + offset : nil loc = occurrence ? row_index.to_a.index(occurrence) : nil key = "#{row["holiday"]}_delim_#{offset >= 0 ? "+" : "-"}#{offset.abs}" if loc [key][loc] = 1.0 else [key] # Access key to generate value end end end holiday_features = Rover::DataFrame.new() # Make sure column order is consistent holiday_features = holiday_features[holiday_features.vector_names.sort] prior_scale_list = holiday_features.vector_names.map { |h| prior_scales[h.split("_delim_")[0]] } holiday_names = prior_scales.keys # Store holiday names used in fit if @train_holiday_names.nil? @train_holiday_names = Rover::Vector.new(holiday_names) end [holiday_features, prior_scale_list, holiday_names] end |
#make_seasonality_features(dates, period, series_order, prefix) ⇒ Object
296 297 298 299 |
# File 'lib/prophet/forecaster.rb', line 296 def make_seasonality_features(dates, period, series_order, prefix) features = fourier_series(dates, period, series_order) Rover::DataFrame.new(features.map.with_index { |v, i| ["#{prefix}_delim_#{i + 1}", v] }.to_h) end |
#parse_seasonality_args(name, arg, auto_disable, default_order) ⇒ Object
540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 |
# File 'lib/prophet/forecaster.rb', line 540 def parse_seasonality_args(name, arg, auto_disable, default_order) case arg when "auto" fourier_order = 0 if @seasonalities.include?(name) logger.info "Found custom seasonality named #{name.inspect}, disabling built-in #{name.inspect}seasonality." elsif auto_disable logger.info "Disabling #{name} seasonality. Run prophet with #{name}_seasonality: true to override this." else fourier_order = default_order end when true fourier_order = default_order when false fourier_order = 0 else fourier_order = arg.to_i end fourier_order end |
#piecewise_linear(t, deltas, k, m, changepoint_ts) ⇒ Object
765 766 767 768 769 770 771 772 773 774 775 776 777 |
# File 'lib/prophet/forecaster.rb', line 765 def piecewise_linear(t, deltas, k, m, changepoint_ts) # Intercept changes gammas = -changepoint_ts * deltas # Get cumulative slope and intercept at each t k_t = t.new_ones * k m_t = t.new_ones * m changepoint_ts.each_with_index do |t_s, s| indx = t >= t_s k_t[indx] += deltas[s] m_t[indx] += gammas[s] end k_t * t + m_t end |
#piecewise_logistic(t, cap, deltas, k, m, changepoint_ts) ⇒ Object
779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 |
# File 'lib/prophet/forecaster.rb', line 779 def piecewise_logistic(t, cap, deltas, k, m, changepoint_ts) k_1d = Numo::NArray.asarray(k) k_1d = k_1d.reshape(1) if k_1d.ndim < 1 k_cum = k_1d.concatenate(deltas.cumsum + k) gammas = Numo::DFloat.zeros(changepoint_ts.size) changepoint_ts.each_with_index do |t_s, i| gammas[i] = (t_s - m - gammas.sum) * (1 - k_cum[i] / k_cum[i + 1]) end # Get cumulative rate and offset at each t k_t = t.new_ones * k m_t = t.new_ones * m changepoint_ts.each_with_index do |t_s, s| indx = t >= t_s k_t[indx] += deltas[s] m_t[indx] += gammas[s] end cap.to_numo / (1 + Numo::NMath.exp(-k_t * (t - m_t))) end |
#predict(df = nil) ⇒ Object
737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 |
# File 'lib/prophet/forecaster.rb', line 737 def predict(df = nil) raise Error, "Model has not been fit." unless @history if df.nil? df = @history.dup else raise ArgumentError, "Dataframe has no rows." if df.shape[0] == 0 df = setup_dataframe(df.dup) end df["trend"] = predict_trend(df) seasonal_components = predict_seasonal_components(df) if @uncertainty_samples intervals = predict_uncertainty(df) else intervals = nil end # Drop columns except ds, cap, floor, and trend cols = ["ds", "trend"] cols << "cap" if df.include?("cap") cols << "floor" if @logistic_floor # Add in forecast components df2 = df_concat_axis_one([df[cols], intervals, seasonal_components]) df2["yhat"] = df2["trend"] * (df2["multiplicative_terms"] + 1) + df2["additive_terms"] df2 end |
#predict_seasonal_components(df) ⇒ Object
821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 |
# File 'lib/prophet/forecaster.rb', line 821 def predict_seasonal_components(df) seasonal_features, _, component_cols, _ = make_all_seasonality_features(df) if @uncertainty_samples lower_p = 100 * (1.0 - @interval_width) / 2 upper_p = 100 * (1.0 + @interval_width) / 2 end x = seasonal_features.to_numo data = {} (component_cols.vector_names - ["col"]).each do |component| beta_c = @params["beta"] * component_cols[component].to_numo comp = x.dot(beta_c.transpose) if @component_modes["additive"].include?(component) comp *= @y_scale end data[component] = comp.mean(axis: 1, nan: true) if @uncertainty_samples data["#{component}_lower"] = comp.percentile(lower_p, axis: 1) data["#{component}_upper"] = comp.percentile(upper_p, axis: 1) end end Rover::DataFrame.new(data) end |
#predict_trend(df) ⇒ Object
803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 |
# File 'lib/prophet/forecaster.rb', line 803 def predict_trend(df) k = @params["k"].mean(nan: true) m = @params["m"].mean(nan: true) deltas = @params["delta"].mean(axis: 0, nan: true) t = Numo::NArray.asarray(df["t"].to_a) if @growth == "linear" trend = piecewise_linear(t, deltas, k, m, @changepoints_t) elsif @growth == "logistic" cap = df["cap_scaled"] trend = piecewise_logistic(t, cap, deltas, k, m, @changepoints_t) elsif @growth == "flat" trend = flat_trend(t, m) end trend * @y_scale + Numo::NArray.asarray(df["floor"].to_a) end |
#predict_uncertainty(df) ⇒ Object
885 886 887 888 889 890 891 892 893 894 895 896 897 898 |
# File 'lib/prophet/forecaster.rb', line 885 def predict_uncertainty(df) sim_values = sample_posterior_predictive(df) lower_p = 100 * (1.0 - @interval_width) / 2 upper_p = 100 * (1.0 + @interval_width) / 2 series = {} ["yhat", "trend"].each do |key| series["#{key}_lower"] = sim_values[key].percentile(lower_p, axis: 1) series["#{key}_upper"] = sim_values[key].percentile(upper_p, axis: 1) end Rover::DataFrame.new(series) end |
#predictive_samples(df) ⇒ Object
879 880 881 882 883 |
# File 'lib/prophet/forecaster.rb', line 879 def predictive_samples(df) df = setup_dataframe(df.dup) sim_values = sample_posterior_predictive(df) sim_values end |
#regressor_column_matrix(seasonal_features, modes) ⇒ Object
493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 |
# File 'lib/prophet/forecaster.rb', line 493 def regressor_column_matrix(seasonal_features, modes) components = Rover::DataFrame.new( "col" => seasonal_features.shape[1].times.to_a, "component" => seasonal_features.vector_names.map { |x| x.split("_delim_")[0] } ) # Add total for holidays if @train_holiday_names components = add_group_component(components, "holidays", @train_holiday_names.uniq) end # Add totals additive and multiplicative components, and regressors ["additive", "multiplicative"].each do |mode| components = add_group_component(components, "#{mode}_terms", modes[mode]) regressors_by_mode = @extra_regressors.select { |r, props| props[:mode] == mode } .map { |r, props| r } components = add_group_component(components, "extra_regressors_#{mode}", regressors_by_mode) # Add combination components to modes modes[mode] << "#{mode}_terms" modes[mode] << "extra_regressors_#{mode}" end # After all of the additive/multiplicative groups have been added, modes[@seasonality_mode] << "holidays" # Convert to a binary matrix component_cols = components["col"].crosstab(components["component"]) component_cols["col"] = component_cols.delete("_") # Add columns for additive and multiplicative terms, if missing ["additive_terms", "multiplicative_terms"].each do |name| component_cols[name] = 0 unless component_cols.include?(name) end # TODO validation [component_cols, modes] end |
#sample_model(df, seasonal_features, iteration, s_a, s_m) ⇒ Object
900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 |
# File 'lib/prophet/forecaster.rb', line 900 def sample_model(df, seasonal_features, iteration, s_a, s_m) trend = sample_predictive_trend(df, iteration) beta = @params["beta"][iteration, true] xb_a = seasonal_features.dot(beta * s_a) * @y_scale xb_m = seasonal_features.dot(beta * s_m) sigma = @params["sigma_obs"][iteration] noise = Numo::DFloat.new(*df.shape[0]).rand_norm(0, sigma) * @y_scale # skip data frame for performance { "yhat" => trend * (1 + xb_m) + xb_a + noise, "trend" => trend } end |
#sample_posterior_predictive(df) ⇒ Object
846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 |
# File 'lib/prophet/forecaster.rb', line 846 def sample_posterior_predictive(df) n_iterations = @params["k"].shape[0] samp_per_iter = [1, (@uncertainty_samples / n_iterations.to_f).ceil].max # Generate seasonality features once so we can re-use them. seasonal_features, _, component_cols, _ = make_all_seasonality_features(df) # convert to Numo for performance seasonal_features = seasonal_features.to_numo additive_terms = component_cols["additive_terms"].to_numo multiplicative_terms = component_cols["multiplicative_terms"].to_numo sim_values = {"yhat" => [], "trend" => []} n_iterations.times do |i| samp_per_iter.times do sim = sample_model( df, seasonal_features, i, additive_terms, multiplicative_terms ) sim_values.each_key do |key| sim_values[key] << sim[key] end end end sim_values.each do |k, v| sim_values[k] = Numo::NArray.column_stack(v) end sim_values end |
#sample_predictive_trend(df, iteration) ⇒ Object
917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 |
# File 'lib/prophet/forecaster.rb', line 917 def sample_predictive_trend(df, iteration) k = @params["k"][iteration] m = @params["m"][iteration] deltas = @params["delta"][iteration, true] t = Numo::NArray.asarray(df["t"].to_a) upper_t = t.max # New changepoints from a Poisson process with rate S on [1, T] if upper_t > 1 s = @changepoints_t.size n_changes = poisson(s * (upper_t - 1)) else n_changes = 0 end if n_changes > 0 changepoint_ts_new = 1 + Numo::DFloat.new(n_changes).rand * (upper_t - 1) changepoint_ts_new.sort else changepoint_ts_new = [] end # Get the empirical scale of the deltas, plus epsilon to avoid NaNs. lambda_ = deltas.abs.mean + 1e-8 # Sample deltas deltas_new = laplace(0, lambda_, n_changes) # Prepend the times and deltas from the history changepoint_ts = @changepoints_t.concatenate(changepoint_ts_new) deltas = deltas.concatenate(deltas_new) if @growth == "linear" trend = piecewise_linear(t, deltas, k, m, changepoint_ts) elsif @growth == "logistic" cap = df["cap_scaled"] trend = piecewise_logistic(t, cap, deltas, k, m, changepoint_ts) elsif @growth == "flat" trend = flat_trend(t, m) end trend * @y_scale + Numo::NArray.asarray(df["floor"].to_a) end |
#set_auto_seasonalities ⇒ Object
561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 |
# File 'lib/prophet/forecaster.rb', line 561 def set_auto_seasonalities first = @history["ds"].min last = @history["ds"].max dt = @history["ds"].diff min_dt = dt.min days = 86400 # Yearly seasonality yearly_disable = last - first < 730 * days fourier_order = parse_seasonality_args("yearly", @yearly_seasonality, yearly_disable, 10) if fourier_order > 0 @seasonalities["yearly"] = { period: 365.25, fourier_order: fourier_order, prior_scale: @seasonality_prior_scale, mode: @seasonality_mode, condition_name: nil } end # Weekly seasonality weekly_disable = last - first < 14 * days || min_dt >= 7 * days fourier_order = parse_seasonality_args("weekly", @weekly_seasonality, weekly_disable, 3) if fourier_order > 0 @seasonalities["weekly"] = { period: 7, fourier_order: fourier_order, prior_scale: @seasonality_prior_scale, mode: @seasonality_mode, condition_name: nil } end # Daily seasonality daily_disable = last - first < 2 * days || min_dt >= 1 * days fourier_order = parse_seasonality_args("daily", @daily_seasonality, daily_disable, 4) if fourier_order > 0 @seasonalities["daily"] = { period: 1, fourier_order: fourier_order, prior_scale: @seasonality_prior_scale, mode: @seasonality_mode, condition_name: nil } end end |
#set_changepoints ⇒ Object
252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 |
# File 'lib/prophet/forecaster.rb', line 252 def set_changepoints if @changepoints if @changepoints.size > 0 too_low = @changepoints.min < @history["ds"].min too_high = @changepoints.max > @history["ds"].max if too_low || too_high raise ArgumentError, "Changepoints must fall within training data." end end else hist_size = (@history.shape[0] * @changepoint_range).floor if @n_changepoints + 1 > hist_size @n_changepoints = hist_size - 1 logger.info "n_changepoints greater than number of observations. Using #{@n_changepoints}" end if @n_changepoints > 0 step = (hist_size - 1) / @n_changepoints.to_f cp_indexes = (@n_changepoints + 1).times.map { |i| (i * step).round } @changepoints = Rover::Vector.new(@history["ds"].to_a.values_at(*cp_indexes)).tail(-1) else @changepoints = [] end end if @changepoints.size > 0 @changepoints_t = (@changepoints.map(&:to_i).sort.to_numo.cast_to(Numo::DFloat) - @start.to_i) / @t_scale.to_f else @changepoints_t = Numo::NArray.asarray([0]) end end |
#setup_dataframe(df, initialize_scales: false) ⇒ Object
156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 |
# File 'lib/prophet/forecaster.rb', line 156 def setup_dataframe(df, initialize_scales: false) if df.include?("y") df["y"] = df["y"].map(&:to_f) raise ArgumentError, "Found infinity in column y." unless df["y"].all?(&:finite?) end # TODO support integers df["ds"] = to_datetime(df["ds"]) raise ArgumentError, "Found NaN in column ds." if df["ds"].any?(&:nil?) @extra_regressors.each_key do |name| if !df.include?(name) raise ArgumentError, "Regressor #{name.inspect} missing from dataframe" end df[name] = df[name].map(&:to_f) if df[name].any?(&:nil?) raise ArgumentError, "Found NaN in column #{name.inspect}" end end @seasonalities.each_value do |props| condition_name = props[:condition_name] if condition_name if !df.include?(condition_name) raise ArgumentError, "Condition #{condition_name.inspect} missing from dataframe" end if df.where(!df[condition_name].in([true, false])).any? raise ArgumentError, "Found non-boolean in column #{condition_name.inspect}" end end end df = df.sort_by { |r| r["ds"] } initialize_scales(initialize_scales, df) if @logistic_floor unless df.include?("floor") raise ArgumentError, "Expected column \"floor\"." end else if @scaling == "absmax" df["floor"] = 0 elsif @scaling == "minmax" df["floor"] = @y_min end end if @growth == "logistic" unless df.include?("cap") raise ArgumentError, "Capacities must be supplied for logistic growth in column \"cap\"" end if df[df["cap"] <= df["floor"]].size > 0 raise ArgumentError, "cap must be greater than floor (which defaults to 0)." end df["cap_scaled"] = (df["cap"] - df["floor"]) / @y_scale.to_f end df["t"] = (df["ds"] - @start) / @t_scale.to_f if df.include?("y") df["y_scaled"] = (df["y"] - df["floor"]) / @y_scale.to_f end @extra_regressors.each do |name, props| df[name] = (df[name] - props[:mu]) / props[:std].to_f end df end |
#to_json ⇒ Object
1007 1008 1009 1010 1011 |
# File 'lib/prophet/forecaster.rb', line 1007 def to_json require "json" JSON.generate(as_json) end |
#validate_column_name(name, check_holidays: true, check_seasonalities: true, check_regressors: true) ⇒ Object
125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 |
# File 'lib/prophet/forecaster.rb', line 125 def validate_column_name(name, check_holidays: true, check_seasonalities: true, check_regressors: true) if name.include?("_delim_") raise ArgumentError, "Name cannot contain \"_delim_\"" end reserved_names = [ "trend", "additive_terms", "daily", "weekly", "yearly", "holidays", "zeros", "extra_regressors_additive", "yhat", "extra_regressors_multiplicative", "multiplicative_terms" ] rn_l = reserved_names.map { |n| "#{n}_lower" } rn_u = reserved_names.map { |n| "#{n}_upper" } reserved_names.concat(rn_l) reserved_names.concat(rn_u) reserved_names.concat(["ds", "y", "cap", "floor", "y_scaled", "cap_scaled"]) if reserved_names.include?(name) raise ArgumentError, "Name #{name.inspect} is reserved." end if check_holidays && @holidays && @holidays["holiday"].uniq.to_a.include?(name) raise ArgumentError, "Name #{name.inspect} already used for a holiday." end if check_holidays && @country_holidays && get_holiday_names(@country_holidays).to_a.include?(name) raise ArgumentError, "Name #{name.inspect} is a holiday name in #{@country_holidays.inspect}." end if check_seasonalities && @seasonalities[name] raise ArgumentError, "Name #{name.inspect} already used for a seasonality." end if check_regressors && @extra_regressors[name] raise ArgumentError, "Name #{name.inspect} already used for an added regressor." end end |
#validate_inputs ⇒ Object
90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 |
# File 'lib/prophet/forecaster.rb', line 90 def validate_inputs if !["linear", "logistic", "flat"].include?(@growth) raise ArgumentError, "Parameter \"growth\" should be \"linear\", \"logistic\", or \"flat\"." end if @changepoint_range < 0 || @changepoint_range > 1 raise ArgumentError, "Parameter \"changepoint_range\" must be in [0, 1]" end if @holidays if !(@holidays.is_a?(Rover::DataFrame) && @holidays.include?("ds") && @holidays.include?("holiday")) raise ArgumentError, "holidays must be a DataFrame with \"ds\" and \"holiday\" columns." end @holidays["ds"] = to_datetime(@holidays["ds"]) has_lower = @holidays.include?("lower_window") has_upper = @holidays.include?("upper_window") if has_lower ^ has_upper # xor raise ArgumentError, "Holidays must have both lower_window and upper_window, or neither" end if has_lower if @holidays["lower_window"].max > 0 raise ArgumentError, "Holiday lower_window should be <= 0" end if @holidays["upper_window"].min < 0 raise ArgumentError, "Holiday upper_window should be >= 0" end end @holidays["holiday"].uniq.each do |h| validate_column_name(h, check_holidays: false) end end if !["additive", "multiplicative"].include?(@seasonality_mode) raise ArgumentError, "seasonality_mode must be \"additive\" or \"multiplicative\"" end end |