import matplotlib.pyplot as plt
import numpy as np
from crabbymetrics import SyntheticControl, SyntheticDID
np.set_printoptions(precision=4, suppress=True)Synthetic DID Example
Synthetic difference-in-differences combines two adjustments. Unit weights make the treated units look like a weighted donor panel before treatment. Time weights make the post-treatment donor mean look like a weighted pre-treatment donor history. The final contrast is a weighted before-after comparison, not just a post-treatment synthetic-control gap.
This page uses a factor-model donor panel and treated units whose untreated path is a sparse convex combination of high-score donors. That selection makes a raw donor average and a plain DiD contrast visibly biased, while still leaving a good weighted comparison inside the donor pool.
1 Generate A Selected Panel
rng = np.random.default_rng(2031)
n_control = 72
n_treated = 8
n_periods = 30
t_pre = 20
time = np.arange(n_periods)
factors = np.column_stack(
[
np.linspace(-1.2, 1.4, n_periods),
np.sin(np.linspace(0.0, 2.8 * np.pi, n_periods)),
np.cos(np.linspace(0.0, 1.7 * np.pi, n_periods)),
]
)
loadings = rng.normal(size=(n_control, factors.shape[1]))
unit_intercepts = rng.normal(scale=0.5, size=n_control)
unit_trends = rng.normal(scale=0.035, size=n_control)
noise = rng.normal(scale=0.12, size=(n_control, n_periods))
control_panel = (
unit_intercepts[:, None]
+ unit_trends[:, None] * time
+ loadings @ factors.T
+ noise
)
selection_score = loadings[:, 0] + 0.4 * unit_trends / unit_trends.std()
active_donors = np.argsort(selection_score)[-12:]
true_weights = np.zeros(n_control)
true_weights[active_donors] = rng.dirichlet(np.ones(len(active_donors)) * 0.7)
treated_base = true_weights @ control_panel
treated_offsets = np.linspace(-0.25, 0.25, n_treated)
treated_noise = rng.normal(scale=0.04, size=(n_treated, n_periods))
treated_untreated = treated_base + treated_offsets[:, None] + treated_noise
treatment_effect = np.r_[
np.zeros(t_pre),
0.7 + 0.08 * np.arange(n_periods - t_pre),
]
observed_panel = np.vstack([control_panel, treated_untreated + treatment_effect])
control_units = np.arange(n_control)
treated_units = np.arange(n_control, n_control + n_treated)
treated_mean = observed_panel[treated_units].mean(axis=0)
control_mean = observed_panel[control_units].mean(axis=0)
print("treated units:", treated_units)
print("active donor units:", active_donors)
print("true post-period ATT:", round(float(treatment_effect[t_pre:].mean()), 4))treated units: [72 73 74 75 76 77 78 79]
active donor units: [29 39 66 2 20 9 12 68 71 16 31 51]
true post-period ATT: 1.06
2 Fit DID, Synthetic Control, And SDID
def did_estimate(panel, treated_idx, t_pre):
treated = panel[treated_idx]
controls = np.delete(panel, treated_idx, axis=0)
treated_gap = treated[:, t_pre:].mean() - treated[:, :t_pre].mean()
control_gap = controls[:, t_pre:].mean() - controls[:, :t_pre].mean()
return float(treated_gap - control_gap)
did_att = did_estimate(observed_panel, treated_units, t_pre)
donors_pre = observed_panel[control_units, :t_pre].T
treated_pre = observed_panel[treated_units, :t_pre].mean(axis=0)
sc = SyntheticControl(max_iterations=800)
sc.fit(donors_pre, treated_pre)
sc_weights = np.asarray(sc.summary()["weights"])
sc_path = observed_panel[control_units].T @ sc_weights
sc_att = float((treated_mean[t_pre:] - sc_path[t_pre:]).mean())
W = np.zeros_like(observed_panel, dtype=float)
W[treated_units, t_pre:] = 1.0
sdid = SyntheticDID(zeta_omega=1e-8, zeta_lambda=1e-8, max_iterations=1200)
sdid.fit(observed_panel, W)
sdid_summary = sdid.summary()
print("DiD estimate:", round(did_att, 4))
print("Synthetic-control estimate:", round(sc_att, 4))
print("Synthetic DID estimate:", round(float(sdid_summary["att"]), 4))
print("SDID pre RMSE:", round(float(sdid_summary["pre_rmse"]), 4))DiD estimate: 2.9571
Synthetic-control estimate: 1.0935
Synthetic DID estimate: 1.0856
SDID pre RMSE: 0.0057
The SDID estimate is computed from the matrix contrast
[-unit_weights, treated_average]' Y [-time_weights, post_average].
The unit and time weights are both simplex weights. The Rust solver minimizes the same objective as the CVXPY reference:
sum((A w + intercept - b)^2) + zeta^2 * n * sum(w^2)
subject to w >= 0 and sum(w) = 1.
3 Inspect The Weights
unit_weights = np.asarray(sdid_summary["unit_weights"])[0, control_units]
time_weights = np.asarray(sdid_summary["time_weights"])[0, :t_pre]
print("unit weight sum:", round(float(unit_weights.sum()), 8))
print("time weight sum:", round(float(time_weights.sum()), 8))
print("active donor weights:", int((unit_weights > 1e-3).sum()), "of", len(unit_weights))
print("active pre-period weights:", int((time_weights > 1e-3).sum()), "of", len(time_weights))
print("top donor weights:")
for pos in np.argsort(unit_weights)[-6:][::-1]:
print(f" unit {int(control_units[pos])}: {unit_weights[pos]:.4f}")unit weight sum: 1.0
time weight sum: 1.0
active donor weights: 12 of 72
active pre-period weights: 2 of 20
top donor weights:
unit 29: 0.3030
unit 71: 0.2274
unit 16: 0.0724
unit 31: 0.0670
unit 1: 0.0613
unit 9: 0.0521
fig, axes = plt.subplots(1, 2, figsize=(12, 4.8), constrained_layout=True)
axes[0].plot(time + 1, treated_mean, color="black", lw=2.2, label="Treated mean")
axes[0].plot(time + 1, control_mean, color="#d95f02", lw=1.8, label="Donor mean")
axes[0].plot(time + 1, sc_path, color="#1b9e77", lw=2.0, label="Synthetic control")
axes[0].plot(
time + 1,
np.asarray(sdid_summary["counterfactual"])[treated_units].mean(axis=0),
color="#7570b3",
lw=2.0,
label="SDID unit-weighted path",
)
axes[0].axvline(t_pre + 1, color="0.4", ls="--", lw=1)
axes[0].set_title("Outcome Paths")
axes[0].set_xlabel("Period")
axes[0].set_ylabel("Outcome")
axes[0].legend(frameon=False)
event_time = np.arange(n_periods) - t_pre
axes[1].plot(
event_time,
treated_mean - control_mean,
color="#d95f02",
lw=1.8,
label="Treated minus donor mean",
)
axes[1].plot(
event_time,
treated_mean - sc_path,
color="#1b9e77",
lw=2.0,
label="Treated minus synthetic control",
)
axes[1].plot(
event_time,
np.asarray(sdid_summary["treatment_effect"])[treated_units].mean(axis=0),
color="#7570b3",
lw=2.0,
label="Treated minus SDID unit path",
)
axes[1].axhline(0.0, color="0.5", lw=1)
axes[1].axvline(0, color="0.4", ls="--", lw=1)
axes[1].set_title("Gaps")
axes[1].set_xlabel("Period Relative To Treatment")
axes[1].set_ylabel("Difference")
axes[1].legend(frameon=False)
plt.show()Plain DiD uses uniform units and uniform time. Synthetic control improves the pre-treatment unit match but averages the post gap directly. SDID keeps the unit match and adds time weights, so the final estimate is anchored by both donor units and pre-treatment periods.