Module: Prophet::Diagnostics
- Defined in:
- lib/prophet/diagnostics.rb
Class Method Summary collapse
- .coverage(df, w) ⇒ Object
- .cross_validation(model, horizon:, period: nil, initial: nil, cutoffs: nil) ⇒ Object
- .deepcopy(value) ⇒ Object
- .generate_cutoffs(df, horizon, initial, period) ⇒ Object
- .mae(df, w) ⇒ Object
- .mape(df, w) ⇒ Object
- .mdape(df, w) ⇒ Object
- .mse(df, w) ⇒ Object
- .performance_metrics(df, metrics: nil, rolling_window: 0.1, monthly: false) ⇒ Object
- .prophet_copy(m, cutoff = nil) ⇒ Object
- .rmse(df, w) ⇒ Object
- .rolling_mean_by_h(x, h, w, name) ⇒ Object
- .rolling_median_by_h(x, h, w, name) ⇒ Object
- .single_cutoff_forecast(df, model, cutoff, horizon, predict_columns) ⇒ Object
- .smape(df, w) ⇒ Object
- .timedelta(value) ⇒ Object
Class Method Details
.coverage(df, w) ⇒ Object
341 342 343 344 345 346 347 |
# File 'lib/prophet/diagnostics.rb', line 341 def self.coverage(df, w) is_covered = (df["y"] >= df["yhat_lower"]) & (df["y"] <= df["yhat_upper"]) if w < 0 return Rover::DataFrame.new({"horizon" => df["horizon"], "coverage" => is_covered}) end rolling_mean_by_h(is_covered.to(:float), df["horizon"], w, "coverage") end |
.cross_validation(model, horizon:, period: nil, initial: nil, cutoffs: nil) ⇒ Object
31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 |
# File 'lib/prophet/diagnostics.rb', line 31 def self.cross_validation(model, horizon:, period: nil, initial: nil, cutoffs: nil) if model.history.nil? raise Error, "Model has not been fit. Fitting the model provides contextual parameters for cross validation." end df = model.history.dup horizon = timedelta(horizon) predict_columns = ["ds", "yhat"] if model.uncertainty_samples predict_columns.concat(["yhat_lower", "yhat_upper"]) end # Identify largest seasonality period period_max = 0.0 model.seasonalities.each do |_, s| period_max = [period_max, s[:period]].max end seasonality_dt = timedelta("#{period_max} days") if cutoffs.nil? # Set period period = period.nil? ? 0.5 * horizon : timedelta(period) # Set initial initial = initial.nil? ? [3 * horizon, seasonality_dt].max : timedelta(initial) # Compute Cutoffs cutoffs = generate_cutoffs(df, horizon, initial, period) else # add validation of the cutoff to make sure that the min cutoff is strictly greater than the min date in the history if cutoffs.min <= df["ds"].min raise Error, "Minimum cutoff value is not strictly greater than min date in history" end # max value of cutoffs is <= (end date minus horizon) end_date_minus_horizon = df["ds"].max - horizon if cutoffs.max > end_date_minus_horizon raise Error, "Maximum cutoff value is greater than end date minus horizon, no value for cross-validation remaining" end initial = cutoffs[0] - df["ds"].min end # Check if the initial window # (that is, the amount of time between the start of the history and the first cutoff) # is less than the maximum seasonality period if initial < seasonality_dt msg = "Seasonality has period of #{period_max} days " msg += "which is larger than initial window. " msg += "Consider increasing initial." # logger.warn(msg) end predicts = cutoffs.map { |cutoff| single_cutoff_forecast(df, model, cutoff, horizon, predict_columns) } # Combine all predicted DataFrame into one DataFrame predicts.reduce(Rover::DataFrame.new) { |memo, v| memo.concat(v) } end |
.deepcopy(value) ⇒ Object
167 168 169 170 171 172 173 174 175 |
# File 'lib/prophet/diagnostics.rb', line 167 def self.deepcopy(value) if value.is_a?(Hash) value.to_h { |k, v| [deepcopy(k), deepcopy(v)] } elsif value.is_a?(Array) value.map { |v| deepcopy(v) } else value.dup end end |
.generate_cutoffs(df, horizon, initial, period) ⇒ Object
3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 |
# File 'lib/prophet/diagnostics.rb', line 3 def self.generate_cutoffs(df, horizon, initial, period) # Last cutoff is 'latest date in data - horizon' date cutoff = df["ds"].max - horizon if cutoff < df["ds"].min raise Error, "Less data than horizon." end result = [cutoff] while result[-1] >= df["ds"].min + initial cutoff -= period # If data does not exist in data range (cutoff, cutoff + horizon] if !(((df["ds"] > cutoff) & (df["ds"] <= cutoff + horizon)).any?) # Next cutoff point is 'last date before cutoff in data - horizon' if cutoff > df["ds"].min closest_date = df[df["ds"] <= cutoff].max["ds"] cutoff = closest_date - horizon end # else no data left, leave cutoff as is, it will be dropped. end result << cutoff end result = result[0...-1] if result.length == 0 raise Error, "Less data than horizon after initial window. Make horizon or initial shorter." end # logger.info("Making #{result.length} forecasts with cutoffs between #{result[-1]} and #{result[0]}") result.reverse end |
.mae(df, w) ⇒ Object
309 310 311 312 313 314 315 |
# File 'lib/prophet/diagnostics.rb', line 309 def self.mae(df, w) ae = (df["y"] - df["yhat"]).abs if w < 0 return Rover::DataFrame.new({"horizon" => df["horizon"], "mae" => ae}) end rolling_mean_by_h(ae, df["horizon"], w, "mae") end |
.mape(df, w) ⇒ Object
317 318 319 320 321 322 323 |
# File 'lib/prophet/diagnostics.rb', line 317 def self.mape(df, w) ape = ((df["y"] - df["yhat"]) / df["y"]).abs if w < 0 return Rover::DataFrame.new({"horizon" => df["horizon"], "mape" => ape}) end rolling_mean_by_h(ape, df["horizon"], w, "mape") end |
.mdape(df, w) ⇒ Object
325 326 327 328 329 330 331 |
# File 'lib/prophet/diagnostics.rb', line 325 def self.mdape(df, w) ape = ((df["y"] - df["yhat"]) / df["y"]).abs if w < 0 return Rover::DataFrame.new({"horizon" => df["horizon"], "mdape" => ape}) end rolling_median_by_h(ape, df["horizon"], w, "mdape") end |
.mse(df, w) ⇒ Object
295 296 297 298 299 300 301 |
# File 'lib/prophet/diagnostics.rb', line 295 def self.mse(df, w) se = (df["y"] - df["yhat"]) ** 2 if w < 0 return Rover::DataFrame.new({"horizon" => df["horizon"], "mse" => se}) end rolling_mean_by_h(se, df["horizon"], w, "mse") end |
.performance_metrics(df, metrics: nil, rolling_window: 0.1, monthly: false) ⇒ Object
177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 |
# File 'lib/prophet/diagnostics.rb', line 177 def self.performance_metrics(df, metrics: nil, rolling_window: 0.1, monthly: false) valid_metrics = ["mse", "rmse", "mae", "mape", "mdape", "smape", "coverage"] if metrics.nil? metrics = valid_metrics end if (!df.include?("yhat_lower") || !df.include?("yhat_upper")) && metrics.include?("coverage") metrics.delete("coverage") end if metrics.uniq.length != metrics.length raise ArgumentError, "Input metrics must be a list of unique values" end if !Set.new(metrics).subset?(Set.new(valid_metrics)) raise ArgumentError, "Valid values for metrics are: #{valid_metrics}" end df_m = df.dup if monthly raise Error, "Not implemented yet" # df_m["horizon"] = df_m["ds"].dt.to_period("M").astype(int) - df_m["cutoff"].dt.to_period("M").astype(int) else df_m["horizon"] = df_m["ds"] - df_m["cutoff"] end df_m.sort_by! { |r| r["horizon"] } if metrics.include?("mape") && df_m["y"].abs.min < 1e-8 # logger.info("Skipping MAPE because y close to 0") metrics.delete("mape") end if metrics.length == 0 return nil end w = (rolling_window * df_m.shape[0]).to_i if w >= 0 w = [w, 1].max w = [w, df_m.shape[0]].min end # Compute all metrics dfs = {} metrics.each do |metric| dfs[metric] = send(metric, df_m, w) end res = dfs[metrics[0]] metrics.each do |metric| res_m = dfs[metric] res[metric] = res_m[metric] end res end |
.prophet_copy(m, cutoff = nil) ⇒ Object
115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 |
# File 'lib/prophet/diagnostics.rb', line 115 def self.prophet_copy(m, cutoff = nil) if m.history.nil? raise Error, "This is for copying a fitted Prophet object." end if m.specified_changepoints changepoints = m.changepoints if !cutoff.nil? # Filter change points '< cutoff' last_history_date = m.history["ds"][m.history["ds"] <= cutoff].max changepoints = changepoints[changepoints < last_history_date] end else changepoints = nil end # Auto seasonalities are set to False because they are already set in # m.seasonalities. m2 = m.class.new( growth: m.growth, n_changepoints: m.n_changepoints, changepoint_range: m.changepoint_range, changepoints: changepoints, yearly_seasonality: false, weekly_seasonality: false, daily_seasonality: false, holidays: m.holidays, seasonality_mode: m.seasonality_mode, seasonality_prior_scale: m.seasonality_prior_scale, changepoint_prior_scale: m.changepoint_prior_scale, holidays_prior_scale: m.holidays_prior_scale, mcmc_samples: m.mcmc_samples, interval_width: m.interval_width, uncertainty_samples: m.uncertainty_samples ) m2.extra_regressors = deepcopy(m.extra_regressors) m2.seasonalities = deepcopy(m.seasonalities) m2.country_holidays = deepcopy(m.country_holidays) m2 end |
.rmse(df, w) ⇒ Object
303 304 305 306 307 |
# File 'lib/prophet/diagnostics.rb', line 303 def self.rmse(df, w) res = mse(df, w) res["rmse"] = res.delete("mse").map { |v| Math.sqrt(v) } res end |
.rolling_mean_by_h(x, h, w, name) ⇒ Object
224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 |
# File 'lib/prophet/diagnostics.rb', line 224 def self.rolling_mean_by_h(x, h, w, name) # Aggregate over h df = Rover::DataFrame.new({"x" => x, "h" => h}) df2 = df.group("h").sum("x").inner_join(df.group("h").count).sort_by { |r| r["h"] } xs = df2["sum_x"] ns = df2["count"] hs = df2["h"] trailing_i = df2.length - 1 x_sum = 0 n_sum = 0 # We don't know output size but it is bounded by len(df2) res_x = [nil] * df2.length # Start from the right and work backwards (df2.length - 1).downto(0) do |i| x_sum += xs[i] n_sum += ns[i] while n_sum >= w # Include points from the previous horizon. All of them if still # less than w, otherwise weight the mean by the difference excess_n = n_sum - w excess_x = excess_n * xs[i] / ns[i] res_x[trailing_i] = (x_sum - excess_x) / w x_sum -= xs[trailing_i] n_sum -= ns[trailing_i] trailing_i -= 1 end end res_h = hs[(trailing_i + 1)..-1] res_x = res_x[(trailing_i + 1)..-1] Rover::DataFrame.new({"horizon" => res_h, name => res_x}) end |
.rolling_median_by_h(x, h, w, name) ⇒ Object
260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 |
# File 'lib/prophet/diagnostics.rb', line 260 def self.rolling_median_by_h(x, h, w, name) # Aggregate over h df = Rover::DataFrame.new({"x" => x, "h" => h}) grouped = df.group("h") df2 = grouped.count.sort_by { |r| r["h"] } hs = df2["h"] res_h = [] res_x = [] # Start from the right and work backwards i = hs.length - 1 while i >= 0 h_i = hs[i] xs = df[df["h"] == h_i]["x"].to_a next_idx_to_add = (h == h_i).to_numo.cast_to(Numo::UInt8).argmax - 1 while xs.length < w && next_idx_to_add >= 0 # Include points from the previous horizon. All of them if still # less than w, otherwise just enough to get to w. xs << x[next_idx_to_add] next_idx_to_add -= 1 end if xs.length < w # Ran out of points before getting enough. break end res_h << hs[i] res_x << Rover::Vector.new(xs).median i -= 1 end res_h.reverse! res_x.reverse! Rover::DataFrame.new({"horizon" => res_h, name => res_x}) end |
.single_cutoff_forecast(df, model, cutoff, horizon, predict_columns) ⇒ Object
89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 |
# File 'lib/prophet/diagnostics.rb', line 89 def self.single_cutoff_forecast(df, model, cutoff, horizon, predict_columns) # Generate new object with copying fitting options m = prophet_copy(model, cutoff) # Train model history_c = df[df["ds"] <= cutoff] if history_c.shape[0] < 2 raise Error, "Less than two datapoints before cutoff. Increase initial window." end m.fit(history_c, **model.fit_kwargs) # Calculate yhat index_predicted = (df["ds"] > cutoff) & (df["ds"] <= cutoff + horizon) # Get the columns for the future dataframe columns = ["ds"] if m.growth == "logistic" columns << "cap" if m.logistic_floor columns << "floor" end end columns.concat(m.extra_regressors.keys) columns.concat(m.seasonalities.map { |_, props| props[:condition_name] }.compact) yhat = m.predict(df[index_predicted][columns]) # Merge yhat(predicts), y(df, original data) and cutoff yhat[predict_columns].merge(df[index_predicted][["y"]]).merge(Rover::DataFrame.new({"cutoff" => [cutoff] * yhat.length})) end |
.smape(df, w) ⇒ Object
333 334 335 336 337 338 339 |
# File 'lib/prophet/diagnostics.rb', line 333 def self.smape(df, w) sape = (df["y"] - df["yhat"]).abs / ((df["y"].abs + df["yhat"].abs) / 2) if w < 0 return Rover::DataFrame.new({"horizon" => df["horizon"], "smape" => sape}) end rolling_mean_by_h(sape, df["horizon"], w, "smape") end |
.timedelta(value) ⇒ Object
156 157 158 159 160 161 162 163 164 165 |
# File 'lib/prophet/diagnostics.rb', line 156 def self.timedelta(value) if value.is_a?(Numeric) # ActiveSupport::Duration is a numeric value elsif (m = /\A(\d+(\.\d+)?) days\z/.match(value)) m[1].to_f * 86400 else raise Error, "Unknown time delta" end end |