Step 5 - Moving Block Bootstrap (MBB) for ITS confidence intervals¶

Goal: understand how MBB turns model residuals into confidence intervals around the counterfactual.

Sections:

5a. Inspect residuals and their autocorrelation (the reason we need blocks).
5b. Single simulation in slow motion - block resampling made visible.
5c. Build the pred_matrix manually - watch the fan of counterfactuals accumulate.
5d. Compare CI methods: quantile vs symmetric_sd.
5e. Block length sensitivity - how block_length affects CI width.

In [1]:

Copied!





%matplotlib inline

from IPython.display import display
import logging
import math
import warnings
from pathlib import Path

import matplotlib.dates as mdates
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

warnings.filterwarnings("ignore")
logging.basicConfig(
    level=logging.INFO,
    format="%(asctime)s  %(levelname)s  %(name)s - %(message)s",
    datefmt="%H:%M:%S",
)
logging.getLogger("cmdstanpy").setLevel(logging.WARNING)
logging.getLogger("its2s").setLevel(logging.WARNING)

OUT_DIR = Path.cwd() / "figures"
OUT_DIR.mkdir(exist_ok=True)
INTERVENTION = "2022-03-15"
TEST_DAYS    = 365
HOLDOUT_DAYS = 42
%matplotlib inline

from IPython.display import display
import logging
import math
import warnings
from pathlib import Path

import matplotlib.dates as mdates
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

warnings.filterwarnings("ignore")
logging.basicConfig(
    level=logging.INFO,
    format="%(asctime)s  %(levelname)s  %(name)s - %(message)s",
    datefmt="%H:%M:%S",
)
logging.getLogger("cmdstanpy").setLevel(logging.WARNING)
logging.getLogger("its2s").setLevel(logging.WARNING)

OUT_DIR = Path.cwd() / "figures"
OUT_DIR.mkdir(exist_ok=True)
INTERVENTION = "2022-03-15"
TEST_DAYS    = 365
HOLDOUT_DAYS = 42

Setup - load data, split, and fit the same Prophet+XGB model as step 4¶

In [2]:

Copied!





from its2s.data_prep import prepare_splits
from its2s.models.prophet_xgb import ProphetXGBHybridModel
from its2s.settings import get_model_config, load_config

df = pd.read_csv("data/dummy_data.csv", parse_dates=["ds"])
config = load_config()
splits = prepare_splits(df, INTERVENTION, test_days=TEST_DAYS, holdout_days=HOLDOUT_DAYS)

print("Fitting Prophet+XGB on training data ...")
model = ProphetXGBHybridModel(params=get_model_config(config, "prophet_xgb"))
fit_result = model.fit(splits.train_df, target_col="y", date_col="ds")
print("Fit complete.")
from its2s.data_prep import prepare_splits
from its2s.models.prophet_xgb import ProphetXGBHybridModel
from its2s.settings import get_model_config, load_config

df = pd.read_csv("data/dummy_data.csv", parse_dates=["ds"])
config = load_config()
splits = prepare_splits(df, INTERVENTION, test_days=TEST_DAYS, holdout_days=HOLDOUT_DAYS)

print("Fitting Prophet+XGB on training data ...")
model = ProphetXGBHybridModel(params=get_model_config(config, "prophet_xgb"))
fit_result = model.fit(splits.train_df, target_col="y", date_col="ds")
print("Fit complete.")

Fitting Prophet+XGB on training data ...

Fit complete.

5a. Raw residuals - the raw material for MBB¶

MBB resamples residuals in contiguous blocks to preserve autocorrelation. Non-zero autocorrelation is exactly why we can't use iid resampling.

In [3]:

Copied!





residuals     = fit_result.residuals
fitted_values = fit_result.fitted_values
train_dates   = splits.train_df["ds"]

print(f"  n residuals : {len(residuals)}")
print(f"  mean        : {residuals.mean():.4f}  (should be near 0)")
print(f"  std         : {residuals.std():.4f}")
print(f"  min / max   : {residuals.min():.3f} / {residuals.max():.3f}")

lag1_acf  = np.corrcoef(residuals[1:],  residuals[:-1])[0, 1]
lag7_acf  = np.corrcoef(residuals[7:],  residuals[:-7])[0, 1]
lag14_acf = np.corrcoef(residuals[14:], residuals[:-14])[0, 1]
print(f"\n  Autocorrelation of residuals:")
print(f"    lag-1  : {lag1_acf:.3f}")
print(f"    lag-7  : {lag7_acf:.3f}")
print(f"    lag-14 : {lag14_acf:.3f}")
residuals     = fit_result.residuals
fitted_values = fit_result.fitted_values
train_dates   = splits.train_df["ds"]

print(f"  n residuals : {len(residuals)}")
print(f"  mean        : {residuals.mean():.4f}  (should be near 0)")
print(f"  std         : {residuals.std():.4f}")
print(f"  min / max   : {residuals.min():.3f} / {residuals.max():.3f}")

lag1_acf  = np.corrcoef(residuals[1:],  residuals[:-1])[0, 1]
lag7_acf  = np.corrcoef(residuals[7:],  residuals[:-7])[0, 1]
lag14_acf = np.corrcoef(residuals[14:], residuals[:-14])[0, 1]
print(f"\n  Autocorrelation of residuals:")
print(f"    lag-1  : {lag1_acf:.3f}")
print(f"    lag-7  : {lag7_acf:.3f}")
print(f"    lag-14 : {lag14_acf:.3f}")

  n residuals : 1227
  mean        : -0.0001  (should be near 0)
  std         : 1.5196
  min / max   : -5.353 / 4.784

  Autocorrelation of residuals:
    lag-1  : 0.008
    lag-7  : -0.004
    lag-14 : -0.073

In [4]:

Copied!





fig, axes = plt.subplots(2, 1, figsize=(13, 5), sharex=True)
axes[0].plot(train_dates, splits.train_df["y"].values, color="#333333",
             linewidth=0.6, alpha=0.8, label="Observed y")
axes[0].plot(train_dates, fitted_values, color="#2166AC",
             linewidth=0.9, alpha=0.8, label="Fitted values")
axes[0].set_ylabel("y")
axes[0].legend(fontsize=8)
axes[0].set_title("3a. Fitted values vs observed (training period)", fontsize=10)

axes[1].plot(train_dates, residuals, color="#B2182B", linewidth=0.6, alpha=0.8)
axes[1].axhline(0, color="black", linewidth=0.8, linestyle="--")
axes[1].set_ylabel("Residual")
axes[1].set_title(f"Residuals  (std={residuals.std():.3f},  lag-1 ACF={lag1_acf:.3f})", fontsize=10)
axes[1].xaxis.set_major_formatter(mdates.DateFormatter("%Y"))
plt.tight_layout()
plt.savefig(OUT_DIR / "step5a_residuals.png", dpi=150)
display(fig)
fig, axes = plt.subplots(2, 1, figsize=(13, 5), sharex=True)
axes[0].plot(train_dates, splits.train_df["y"].values, color="#333333",
             linewidth=0.6, alpha=0.8, label="Observed y")
axes[0].plot(train_dates, fitted_values, color="#2166AC",
             linewidth=0.9, alpha=0.8, label="Fitted values")
axes[0].set_ylabel("y")
axes[0].legend(fontsize=8)
axes[0].set_title("3a. Fitted values vs observed (training period)", fontsize=10)

axes[1].plot(train_dates, residuals, color="#B2182B", linewidth=0.6, alpha=0.8)
axes[1].axhline(0, color="black", linewidth=0.8, linestyle="--")
axes[1].set_ylabel("Residual")
axes[1].set_title(f"Residuals  (std={residuals.std():.3f},  lag-1 ACF={lag1_acf:.3f})", fontsize=10)
axes[1].xaxis.set_major_formatter(mdates.DateFormatter("%Y"))
plt.tight_layout()
plt.savefig(OUT_DIR / "step5a_residuals.png", dpi=150)
display(fig)

No description has been provided for this image

5b. Single MBB simulation in slow motion¶

One bootstrap world = a fresh set of residuals built by stitching contiguous blocks, then added back to the fitted values to form a plausible alternative history.

In [5]:

Copied!





from its2s.bootstrap.mbb import _resample_blocks

BLOCK_LENGTH = 14
sim_rng = np.random.default_rng(0)
resampled = _resample_blocks(residuals, BLOCK_LENGTH, sim_rng)

# Recover block start indices (deterministic with same seed)
viz_rng = np.random.default_rng(0)
n = len(residuals)
n_blocks = math.ceil(n / BLOCK_LENGTH)
max_start = n - BLOCK_LENGTH
starts = viz_rng.integers(0, max_start + 1, size=n_blocks)

print(f"  block_length : {BLOCK_LENGTH}")
print(f"  n_blocks     : {n_blocks}  (to cover {n} residuals)")
print(f"  First 5 block start indices: {starts[:5].tolist()}")

perturbed_y = fitted_values + resampled
print(f"\n  Original training y std  : {splits.train_df["y"].values.std():.3f}")
print(f"  Perturbed training y std : {perturbed_y.std():.3f}")
from its2s.bootstrap.mbb import _resample_blocks

BLOCK_LENGTH = 14
sim_rng = np.random.default_rng(0)
resampled = _resample_blocks(residuals, BLOCK_LENGTH, sim_rng)

# Recover block start indices (deterministic with same seed)
viz_rng = np.random.default_rng(0)
n = len(residuals)
n_blocks = math.ceil(n / BLOCK_LENGTH)
max_start = n - BLOCK_LENGTH
starts = viz_rng.integers(0, max_start + 1, size=n_blocks)

print(f"  block_length : {BLOCK_LENGTH}")
print(f"  n_blocks     : {n_blocks}  (to cover {n} residuals)")
print(f"  First 5 block start indices: {starts[:5].tolist()}")

perturbed_y = fitted_values + resampled
print(f"\n  Original training y std  : {splits.train_df["y"].values.std():.3f}")
print(f"  Perturbed training y std : {perturbed_y.std():.3f}")

  block_length : 14
  n_blocks     : 88  (to cover 1227 residuals)
  First 5 block start indices: [1032, 773, 620, 327, 373]

  Original training y std  : 7.393
  Perturbed training y std : 7.324

In [6]:

Copied!





fig, axes = plt.subplots(2, 1, figsize=(13, 5), sharex=True)

colors_cycle = ["#4C72B0", "#DD8452", "#55A868", "#C44E52", "#8172B3"]
for b_idx, start in enumerate(starts[:8]):
    x_start = train_dates.iloc[start]
    x_end   = train_dates.iloc[min(start + BLOCK_LENGTH - 1, n - 1)]
    axes[0].axvspan(x_start, x_end,
                    color=colors_cycle[b_idx % len(colors_cycle)],
                    alpha=0.25)
axes[0].plot(train_dates, residuals, color="#333333", linewidth=0.7, label="Original residuals")
axes[0].plot(train_dates, resampled, color="#B2182B", linewidth=0.7, alpha=0.7, label="Resampled residuals")
axes[0].axhline(0, color="black", linewidth=0.6, linestyle="--")
axes[0].set_ylabel("Residual")
axes[0].legend(fontsize=8)
axes[0].set_title(
    f"3b. Original vs resampled residuals  (block_length={BLOCK_LENGTH}, shaded = first 8 blocks picked)",
    fontsize=10,
)

axes[1].plot(train_dates, splits.train_df["y"].values, color="#333333",
             linewidth=0.6, label="Original training y")
axes[1].plot(train_dates, perturbed_y, color="#2166AC",
             linewidth=0.6, alpha=0.7, label="Perturbed training y")
axes[1].set_ylabel("y")
axes[1].legend(fontsize=8)
axes[1].set_title("Perturbed training series (fitted + resampled residuals)", fontsize=10)
axes[1].xaxis.set_major_formatter(mdates.DateFormatter("%Y"))
plt.tight_layout()
plt.savefig(OUT_DIR / "step5b_block_resampling.png", dpi=150)
display(fig)
fig, axes = plt.subplots(2, 1, figsize=(13, 5), sharex=True)

colors_cycle = ["#4C72B0", "#DD8452", "#55A868", "#C44E52", "#8172B3"]
for b_idx, start in enumerate(starts[:8]):
    x_start = train_dates.iloc[start]
    x_end   = train_dates.iloc[min(start + BLOCK_LENGTH - 1, n - 1)]
    axes[0].axvspan(x_start, x_end,
                    color=colors_cycle[b_idx % len(colors_cycle)],
                    alpha=0.25)
axes[0].plot(train_dates, residuals, color="#333333", linewidth=0.7, label="Original residuals")
axes[0].plot(train_dates, resampled, color="#B2182B", linewidth=0.7, alpha=0.7, label="Resampled residuals")
axes[0].axhline(0, color="black", linewidth=0.6, linestyle="--")
axes[0].set_ylabel("Residual")
axes[0].legend(fontsize=8)
axes[0].set_title(
    f"3b. Original vs resampled residuals  (block_length={BLOCK_LENGTH}, shaded = first 8 blocks picked)",
    fontsize=10,
)

axes[1].plot(train_dates, splits.train_df["y"].values, color="#333333",
             linewidth=0.6, label="Original training y")
axes[1].plot(train_dates, perturbed_y, color="#2166AC",
             linewidth=0.6, alpha=0.7, label="Perturbed training y")
axes[1].set_ylabel("y")
axes[1].legend(fontsize=8)
axes[1].set_title("Perturbed training series (fitted + resampled residuals)", fontsize=10)
axes[1].xaxis.set_major_formatter(mdates.DateFormatter("%Y"))
plt.tight_layout()
plt.savefig(OUT_DIR / "step5b_block_resampling.png", dpi=150)
display(fig)

5c. Build `pred_matrix` manually - the fan of counterfactual worlds¶

Each simulation refits the model on a perturbed training series and forecasts over full_predict_df. Stacking N simulations gives an (n_dates x n_sims) matrix - the empirical distribution of counterfactual trajectories.

In [7]:

Copied!





from its2s.bootstrap.mbb import _single_mbb_sim

N_SIM = 100
base_rng = np.random.default_rng(42)
sim_seeds = base_rng.integers(0, 2**31, size=N_SIM)

n_dates = len(splits.full_predict_df)
pred_matrix = np.full((n_dates, N_SIM), np.nan)

print(f"  Running {N_SIM} simulations ...")
for i in range(N_SIM):
    preds = _single_mbb_sim(
        i, model, splits.train_df, splits.full_predict_df,
        fitted_values, residuals,
        BLOCK_LENGTH, "y", "ds", None, int(sim_seeds[i]),
    )
    pred_matrix[:, i] = preds

print(f"  pred_matrix shape : {pred_matrix.shape}  (n_dates x n_sims)")
print(f"  Spread at day 0   : std = {np.nanstd(pred_matrix[0, :]):.3f}")
mid = min(180, n_dates - 1)
print(f"  Spread at day {mid:>3}: std = {np.nanstd(pred_matrix[mid, :]):.3f}")
print("  -> uncertainty typically grows further from the training period")
from its2s.bootstrap.mbb import _single_mbb_sim

N_SIM = 100
base_rng = np.random.default_rng(42)
sim_seeds = base_rng.integers(0, 2**31, size=N_SIM)

n_dates = len(splits.full_predict_df)
pred_matrix = np.full((n_dates, N_SIM), np.nan)

print(f"  Running {N_SIM} simulations ...")
for i in range(N_SIM):
    preds = _single_mbb_sim(
        i, model, splits.train_df, splits.full_predict_df,
        fitted_values, residuals,
        BLOCK_LENGTH, "y", "ds", None, int(sim_seeds[i]),
    )
    pred_matrix[:, i] = preds

print(f"  pred_matrix shape : {pred_matrix.shape}  (n_dates x n_sims)")
print(f"  Spread at day 0   : std = {np.nanstd(pred_matrix[0, :]):.3f}")
mid = min(180, n_dates - 1)
print(f"  Spread at day {mid:>3}: std = {np.nanstd(pred_matrix[mid, :]):.3f}")
print("  -> uncertainty typically grows further from the training period")

  Running 100 simulations ...

  pred_matrix shape : (349, 100)  (n_dates x n_sims)
  Spread at day 0   : std = 0.525
  Spread at day 180: std = 0.579
  -> uncertainty typically grows further from the training period

In [8]:

Copied!





pred_dates = pd.to_datetime(splits.full_predict_df["ds"])
intervention_ts = pd.Timestamp(INTERVENTION)

fig, ax = plt.subplots(figsize=(13, 5))

for i in range(N_SIM):
    ax.plot(pred_dates, pred_matrix[:, i], color="#B2182B", linewidth=0.3, alpha=0.12)

conf_lo = np.nanpercentile(pred_matrix, 2.5,  axis=1)
conf_hi = np.nanpercentile(pred_matrix, 97.5, axis=1)
point_pred = model.predict(splits.full_predict_df, target_col="y", date_col="ds")

ax.fill_between(pred_dates, conf_lo, conf_hi,
                color="#B2182B", alpha=0.25, label="95% CI (quantile)")
ax.plot(pred_dates, point_pred.predicted, color="#B2182B", linewidth=1.4,
        label="Point prediction")

for part in [splits.train_df, splits.test_df, splits.holdout_df]:
    ax.plot(part["ds"], part["y"], color="#333333", linewidth=0.6, alpha=0.7)
ax.plot([], [], color="#333333", linewidth=0.6, label="Observed")

ax.axvline(intervention_ts, color="#4DAF4A", linestyle="--", linewidth=1.2, label="Intervention")
ax.set_xlabel("Date")
ax.set_ylabel("y")
ax.set_title(
    f"3c. The bootstrap fan - {N_SIM} counterfactual trajectories  (each faint red line = one simulation)",
    fontsize=10,
)
ax.legend(fontsize=8, loc="upper left")
ax.xaxis.set_major_formatter(mdates.DateFormatter("%Y"))
plt.tight_layout()
plt.savefig(OUT_DIR / "step5c_bootstrap_fan.png", dpi=150)
display(fig)
pred_dates = pd.to_datetime(splits.full_predict_df["ds"])
intervention_ts = pd.Timestamp(INTERVENTION)

fig, ax = plt.subplots(figsize=(13, 5))

for i in range(N_SIM):
    ax.plot(pred_dates, pred_matrix[:, i], color="#B2182B", linewidth=0.3, alpha=0.12)

conf_lo = np.nanpercentile(pred_matrix, 2.5,  axis=1)
conf_hi = np.nanpercentile(pred_matrix, 97.5, axis=1)
point_pred = model.predict(splits.full_predict_df, target_col="y", date_col="ds")

ax.fill_between(pred_dates, conf_lo, conf_hi,
                color="#B2182B", alpha=0.25, label="95% CI (quantile)")
ax.plot(pred_dates, point_pred.predicted, color="#B2182B", linewidth=1.4,
        label="Point prediction")

for part in [splits.train_df, splits.test_df, splits.holdout_df]:
    ax.plot(part["ds"], part["y"], color="#333333", linewidth=0.6, alpha=0.7)
ax.plot([], [], color="#333333", linewidth=0.6, label="Observed")

ax.axvline(intervention_ts, color="#4DAF4A", linestyle="--", linewidth=1.2, label="Intervention")
ax.set_xlabel("Date")
ax.set_ylabel("y")
ax.set_title(
    f"3c. The bootstrap fan - {N_SIM} counterfactual trajectories  (each faint red line = one simulation)",
    fontsize=10,
)
ax.legend(fontsize=8, loc="upper left")
ax.xaxis.set_major_formatter(mdates.DateFormatter("%Y"))
plt.tight_layout()
plt.savefig(OUT_DIR / "step5c_bootstrap_fan.png", dpi=150)
display(fig)

5d. CI method comparison: quantile vs symmetric_sd¶

quantile: take the 2.5th/97.5th percentiles of pred_matrix at each date - adapts to asymmetry.
symmetric_sd: point prediction +/- z * sd(pred_matrix) - simpler, but assumes symmetry.

If the bootstrap distribution is symmetric, both agree. If skewed, quantile CIs are more honest.

In [9]:

Copied!





from its2s.bootstrap.base import BaseBootstrap

ci_lo_q,  ci_hi_q  = BaseBootstrap.calculate_ci(pred_matrix, point_pred.predicted,
                                                method="quantile",     level=0.95)
ci_lo_sd, ci_hi_sd = BaseBootstrap.calculate_ci(pred_matrix, point_pred.predicted,
                                                method="symmetric_sd", level=0.95)

width_q  = (ci_hi_q  - ci_lo_q ).mean()
width_sd = (ci_hi_sd - ci_lo_sd).mean()
print(f"  Mean CI width - quantile     : {width_q:.3f}")
print(f"  Mean CI width - symmetric_sd : {width_sd:.3f}")

holdout_col_idx = (pred_dates >= intervention_ts).argmax()
boot_dist = pred_matrix[holdout_col_idx, :]
skew = float(pd.Series(boot_dist).skew())
print(f"  Bootstrap distribution skewness at first holdout date: {skew:.3f}")
from its2s.bootstrap.base import BaseBootstrap

ci_lo_q,  ci_hi_q  = BaseBootstrap.calculate_ci(pred_matrix, point_pred.predicted,
                                                method="quantile",     level=0.95)
ci_lo_sd, ci_hi_sd = BaseBootstrap.calculate_ci(pred_matrix, point_pred.predicted,
                                                method="symmetric_sd", level=0.95)

width_q  = (ci_hi_q  - ci_lo_q ).mean()
width_sd = (ci_hi_sd - ci_lo_sd).mean()
print(f"  Mean CI width - quantile     : {width_q:.3f}")
print(f"  Mean CI width - symmetric_sd : {width_sd:.3f}")

holdout_col_idx = (pred_dates >= intervention_ts).argmax()
boot_dist = pred_matrix[holdout_col_idx, :]
skew = float(pd.Series(boot_dist).skew())
print(f"  Bootstrap distribution skewness at first holdout date: {skew:.3f}")

  Mean CI width - quantile     : 2.316
  Mean CI width - symmetric_sd : 2.382
  Bootstrap distribution skewness at first holdout date: 0.129

In [10]:

Copied!





fig, ax = plt.subplots(figsize=(13, 4))
ax.plot(pred_dates, point_pred.predicted, color="#333333", linewidth=1.2,
        label="Point prediction", zorder=3)
ax.fill_between(pred_dates, ci_lo_q, ci_hi_q,
                color="#4C72B0", alpha=0.30, label=f"Quantile CI (width={width_q:.2f})")
ax.fill_between(pred_dates, ci_lo_sd, ci_hi_sd,
                color="#DD8452", alpha=0.30, label=f"Symmetric SD CI (width={width_sd:.2f})")
ax.axvline(intervention_ts, color="#4DAF4A", linestyle="--", linewidth=1.2, label="Intervention")
ax.set_xlabel("Date")
ax.set_ylabel("y")
ax.set_title("3d. Quantile CI vs symmetric SD CI", fontsize=10)
ax.legend(fontsize=8, loc="upper left")
ax.xaxis.set_major_formatter(mdates.DateFormatter("%Y"))
plt.tight_layout()
plt.savefig(OUT_DIR / "step5d_ci_methods.png", dpi=150)
display(fig)
fig, ax = plt.subplots(figsize=(13, 4))
ax.plot(pred_dates, point_pred.predicted, color="#333333", linewidth=1.2,
        label="Point prediction", zorder=3)
ax.fill_between(pred_dates, ci_lo_q, ci_hi_q,
                color="#4C72B0", alpha=0.30, label=f"Quantile CI (width={width_q:.2f})")
ax.fill_between(pred_dates, ci_lo_sd, ci_hi_sd,
                color="#DD8452", alpha=0.30, label=f"Symmetric SD CI (width={width_sd:.2f})")
ax.axvline(intervention_ts, color="#4DAF4A", linestyle="--", linewidth=1.2, label="Intervention")
ax.set_xlabel("Date")
ax.set_ylabel("y")
ax.set_title("3d. Quantile CI vs symmetric SD CI", fontsize=10)
ax.legend(fontsize=8, loc="upper left")
ax.xaxis.set_major_formatter(mdates.DateFormatter("%Y"))
plt.tight_layout()
plt.savefig(OUT_DIR / "step5d_ci_methods.png", dpi=150)
display(fig)

5e. Block length sensitivity¶

MBB's one tuning knob is block_length:

too short: residual autocorrelation is broken -> CIs under-estimate uncertainty.
too long: too few independent blocks -> quantile estimates become noisy.

Note that block_length is applied to the training residuals (~4 years of daily data here), not the 42-day holdout - so even block_length=28 remains well-defined.

In [11]:

Copied!





BLOCK_LENGTHS = [3, 7, 14, 28]
bl_colors = ["#4C72B0", "#55A868", "#B2182B", "#8172B3"]
ci_results = {}

for bl in BLOCK_LENGTHS:
    print(f"  Running n_sim={N_SIM} with block_length={bl} ...")
    bl_rng = np.random.default_rng(42)
    bl_seeds = bl_rng.integers(0, 2**31, size=N_SIM)
    bl_matrix = np.full((n_dates, N_SIM), np.nan)

    for i in range(N_SIM):
        preds = _single_mbb_sim(
            i, model, splits.train_df, splits.full_predict_df,
            fitted_values, residuals,
            bl, "y", "ds", None, int(bl_seeds[i]),
        )
        bl_matrix[:, i] = preds

    lo, hi = BaseBootstrap.calculate_ci(bl_matrix, point_pred.predicted,
                                        method="quantile", level=0.95)
    ci_results[bl] = (lo, hi, (hi - lo).mean())
    print(f"    mean CI width = {ci_results[bl][2]:.3f}")

print("\n  Summary:")
print(f"  {'block_length':>14}  {'mean CI width':>14}")
for bl, (_, _, w) in ci_results.items():
    print(f"  {bl:>14}  {w:>14.3f}")
BLOCK_LENGTHS = [3, 7, 14, 28]
bl_colors = ["#4C72B0", "#55A868", "#B2182B", "#8172B3"]
ci_results = {}

for bl in BLOCK_LENGTHS:
    print(f"  Running n_sim={N_SIM} with block_length={bl} ...")
    bl_rng = np.random.default_rng(42)
    bl_seeds = bl_rng.integers(0, 2**31, size=N_SIM)
    bl_matrix = np.full((n_dates, N_SIM), np.nan)

    for i in range(N_SIM):
        preds = _single_mbb_sim(
            i, model, splits.train_df, splits.full_predict_df,
            fitted_values, residuals,
            bl, "y", "ds", None, int(bl_seeds[i]),
        )
        bl_matrix[:, i] = preds

    lo, hi = BaseBootstrap.calculate_ci(bl_matrix, point_pred.predicted,
                                        method="quantile", level=0.95)
    ci_results[bl] = (lo, hi, (hi - lo).mean())
    print(f"    mean CI width = {ci_results[bl][2]:.3f}")

print("\n  Summary:")
print(f"  {'block_length':>14}  {'mean CI width':>14}")
for bl, (_, _, w) in ci_results.items():
    print(f"  {bl:>14}  {w:>14.3f}")

  Running n_sim=100 with block_length=3 ...

    mean CI width = 2.359
  Running n_sim=100 with block_length=7 ...

    mean CI width = 2.357
  Running n_sim=100 with block_length=14 ...

    mean CI width = 2.316
  Running n_sim=100 with block_length=28 ...

    mean CI width = 2.337

  Summary:
    block_length   mean CI width
               3           2.359
               7           2.357
              14           2.316
              28           2.337

In [12]:

Copied!





fig, ax = plt.subplots(figsize=(13, 5))

for part in [splits.train_df, splits.test_df, splits.holdout_df]:
    ax.plot(part["ds"], part["y"], color="#333333", linewidth=0.5, alpha=0.6)
ax.plot([], [], color="#333333", linewidth=0.6, label="Observed")

ax.plot(pred_dates, point_pred.predicted, color="#333333", linewidth=1.2,
        linestyle="--", label="Point prediction", zorder=4)

for bl, color in zip(BLOCK_LENGTHS, bl_colors):
    lo, hi, w = ci_results[bl]
    ax.fill_between(pred_dates, lo, hi, color=color, alpha=0.20,
                    label=f"block_length={bl}  (mean width={w:.2f})")

ax.axvline(intervention_ts, color="black", linestyle="--", linewidth=1.0, label="Intervention")
ax.set_xlabel("Date")
ax.set_ylabel("y")
ax.set_title("3e. MBB CI width vs block length  (all at 95%, n_sim=100)", fontsize=10)
ax.legend(fontsize=8, loc="upper left")
ax.xaxis.set_major_formatter(mdates.DateFormatter("%Y"))
plt.tight_layout()
plt.savefig(OUT_DIR / "step5e_block_length_sensitivity.png", dpi=150)
display(fig)
fig, ax = plt.subplots(figsize=(13, 5))

for part in [splits.train_df, splits.test_df, splits.holdout_df]:
    ax.plot(part["ds"], part["y"], color="#333333", linewidth=0.5, alpha=0.6)
ax.plot([], [], color="#333333", linewidth=0.6, label="Observed")

ax.plot(pred_dates, point_pred.predicted, color="#333333", linewidth=1.2,
        linestyle="--", label="Point prediction", zorder=4)

for bl, color in zip(BLOCK_LENGTHS, bl_colors):
    lo, hi, w = ci_results[bl]
    ax.fill_between(pred_dates, lo, hi, color=color, alpha=0.20,
                    label=f"block_length={bl}  (mean width={w:.2f})")

ax.axvline(intervention_ts, color="black", linestyle="--", linewidth=1.0, label="Intervention")
ax.set_xlabel("Date")
ax.set_ylabel("y")
ax.set_title("3e. MBB CI width vs block length  (all at 95%, n_sim=100)", fontsize=10)
ax.legend(fontsize=8, loc="upper left")
ax.xaxis.set_major_formatter(mdates.DateFormatter("%Y"))
plt.tight_layout()
plt.savefig(OUT_DIR / "step5e_block_length_sensitivity.png", dpi=150)
display(fig)

$No description has been provided for this image$

Key takeaways¶

MBB resamples residuals in contiguous blocks (not iid) to preserve the autocorrelation of the time series.
pred_matrix is (n_dates x n_sims) - each column is one counterfactual trajectory from a bootstrap world.
CIs are percentiles of pred_matrix across simulations at each date - no parametric assumptions needed.
quantile is preferable to symmetric_sd when the bootstrap distribution is skewed.
block_length is MBB's one tuning knob (see step 3 for the broader tune_model() framework that tunes the model hyperparameters; block_length itself is tuned separately by sensitivity analysis here in 5e); 14 (two weeks) is a sensible default for daily data with weekly seasonality.