pyfector
pyfector — Fast counterfactual estimators for panel data in Python.
A high-performance Python reimplementation of the R fect package,
featuring randomized SVD, GPU acceleration (CuPy), parallel computing
(joblib), Polars data ingestion, and seeded reproducibility.
Usage::
import pyfector
result = pyfector.fect(
data=df,
Y="outcome", D="treat",
index=("unit", "year"),
method="ife",
r=(0, 5),
se=True,
seed=42,
)
result.summary()
result.plot()
1""" 2pyfector — Fast counterfactual estimators for panel data in Python. 3 4A high-performance Python reimplementation of the R ``fect`` package, 5featuring randomized SVD, GPU acceleration (CuPy), parallel computing 6(joblib), Polars data ingestion, and seeded reproducibility. 7 8Usage:: 9 10 import pyfector 11 12 result = pyfector.fect( 13 data=df, 14 Y="outcome", D="treat", 15 index=("unit", "year"), 16 method="ife", 17 r=(0, 5), 18 se=True, 19 seed=42, 20 ) 21 result.summary() 22 result.plot() 23 24""" 25 26__version__ = "0.1.6" 27 28from .fect import fect, FectResult 29from .backend import set_device, get_device 30from .diagnostics import run_diagnostics, DiagnosticResult 31from .plotting import plot 32 33__all__ = [ 34 "fect", 35 "FectResult", 36 "set_device", 37 "get_device", 38 "run_diagnostics", 39 "DiagnosticResult", 40 "plot", 41]
185def fect( 186 data, 187 Y: str, 188 D: str, 189 index: tuple[str, str], 190 X: list[str] | None = None, 191 W: str | None = None, 192 group: str | None = None, 193 method: Literal["fe", "ife", "mc", "cfe", "both"] = "ife", 194 force: Literal["none", "unit", "time", "two-way"] = "two-way", 195 r: int | tuple[int, int] = 0, 196 lam: float | None = None, 197 nlambda: int = 10, 198 CV: bool = True, 199 k: int = 10, 200 cv_prop: float = 0.1, 201 cv_nobs: int = 3, 202 cv_treat: bool = True, 203 cv_donut: int = 0, 204 criterion: str = "mspe", 205 cv_rule: Literal["min", "onepct"] = "min", 206 se: bool = False, 207 vartype: Literal["bootstrap", "jackknife"] = "bootstrap", 208 nboots: int = 200, 209 alpha: float = 0.05, 210 tol: float = 1e-7, 211 max_iter: int = 5000, 212 min_T0: int = 1, 213 min_T0_strict: bool = False, 214 max_missing: float = 1.0, 215 normalize: bool = False, 216 # CFE-specific 217 Z: list[str] | None = None, 218 Q: list[str] | None = None, 219 # Performance 220 device: Literal["cpu", "gpu"] = "cpu", 221 n_jobs: int | None = -1, 222 seed: int | None = None, 223) -> FectResult: 224 """Estimate counterfactual treatment effects for panel data. 225 226 This is the main Python entry point for the counterfactual estimator 227 workflow. Where the paper and the historical R package differ, 228 pyfector defaults to the paper's statistical definition and exposes 229 R-package-style behavior through explicit options. 230 231 Missing outcome policy 232 ---------------------- 233 pyfector distinguishes raw missing outcomes from counterfactual 234 missingness caused by treatment. Observed untreated cells 235 (``D == 0`` and non-missing ``Y``) fit the response surface. Observed 236 treated cells (``D == 1`` and non-missing ``Y``) contribute to ATT as 237 ``Y - Y_ct``. If a treated outcome is missing in the input data, the 238 model can still produce a counterfactual ``Y_ct`` for that cell, but 239 the cell is not counted in ``att_avg`` or ``att_on`` because the 240 treated potential outcome was not observed. 241 242 By default, ``min_T0`` is enforced only for treated and reversal 243 units. Sparse controls are retained if they have at least one 244 observed outcome, because they may still inform the low-rank response 245 surface. Set ``min_T0_strict=True`` to require controls to satisfy 246 ``min_T0`` too, matching the more conservative R fect sparse-panel 247 behavior. 248 249 Parameters 250 ---------- 251 data : polars.DataFrame, pandas.DataFrame 252 Long-format panel data. 253 Y, D : str 254 Column names for outcome and binary treatment indicator. 255 index : (str, str) 256 Column names for (unit_id, time_period). 257 X : list of str, optional 258 Time-varying covariates. 259 W : str, optional 260 Observation weight column. 261 group : str, optional 262 Reserved for grouped estimation. Currently raises 263 ``NotImplementedError`` when supplied. 264 method : {"fe", "ife", "mc", "cfe", "both"} 265 Estimation method. 266 force : {"none", "unit", "time", "two-way"} 267 Fixed effects specification. 268 r : int or (int, int) 269 Number of factors. If tuple, CV selects from range. 270 lam : float, optional 271 Nuclear norm penalty for MC. If None with CV=True, auto-selected. 272 nlambda : int 273 Number of automatically generated lambda candidates for MC CV. 274 CV : bool 275 If True, cross-validate over ``r`` for IFE when ``r`` is a tuple, 276 or over ``lam`` for MC when ``lam`` is None. 277 k : int 278 Number of CV folds. 279 cv_prop : float 280 Fraction of eligible observed control cells masked per CV fold. 281 cv_nobs : int 282 Number of consecutive within-unit observations to mask as a block. 283 cv_treat : bool 284 If True, restrict CV masks to pre-treatment cells of ever-treated 285 units. If False, use all observed control cells. 286 cv_donut : int 287 Exclude this many periods around treatment onset from CV evaluation. 288 criterion : {"mspe", "gmspe", "mad"} 289 Cross-validation loss. 290 cv_rule : {"min", "onepct"} 291 CV selection rule. ``"min"`` chooses the strict minimum-score 292 candidate and is the paper-faithful default. ``"onepct"`` chooses 293 the simplest candidate within 1% of the best score (lower ``r`` for 294 IFE, higher ``lam`` for MC). 295 se : bool 296 Compute standard errors via bootstrap/jackknife. 297 vartype : {"bootstrap", "jackknife"} 298 Inference method when ``se=True``. 299 nboots : int 300 Number of bootstrap replications. Ignored for jackknife. 301 alpha : float 302 Significance level for confidence intervals and tests. 303 tol : float 304 EM convergence tolerance for final point estimation. 305 max_iter : int 306 Maximum EM iterations. 307 min_T0 : int 308 Minimum untreated/pre-treatment observed periods. By default this is 309 enforced only for treated and treatment-reversal units. 310 min_T0_strict : bool 311 If True, enforce ``min_T0`` on all units, including controls. This 312 matches R fect's conservative handling of sparse control rows. 313 max_missing : float 314 Maximum missing-outcome fraction per unit, in ``[0, 1]``. Units with 315 no observed outcomes are always dropped, regardless of this threshold, 316 because they provide neither fitting information nor observed treated 317 effects. 318 normalize : bool 319 If True, estimate on an outcome standardized by its observed standard 320 deviation, then transform effects back to the original scale. 321 Z, Q : list of str, optional 322 Reserved CFE interaction arguments. Currently raise 323 ``NotImplementedError`` when supplied. 324 device : {"cpu", "gpu"} 325 Compute device. 326 n_jobs : int, optional 327 Parallel workers for CV and bootstrap. ``-1`` or ``None`` uses 328 all available CPUs. 329 seed : int, optional 330 Random seed for full reproducibility. 331 """ 332 # Set device 333 set_device(device) 334 xp = get_backend() 335 n_jobs = _resolve_n_jobs(n_jobs) 336 if device == "gpu": 337 n_jobs = 1 338 339 if group is not None: 340 raise NotImplementedError("The `group` argument is not implemented yet.") 341 if Z is not None or Q is not None: 342 raise NotImplementedError("The `Z` and `Q` CFE interaction arguments are not implemented yet.") 343 if criterion not in {"mspe", "gmspe", "mad"}: 344 raise ValueError("criterion must be 'mspe', 'gmspe', or 'mad'") 345 if cv_rule not in {"min", "onepct"}: 346 raise ValueError("cv_rule must be 'min' or 'onepct'") 347 if min_T0 < 0: 348 raise ValueError("min_T0 must be non-negative") 349 if not 0.0 <= max_missing <= 1.0: 350 raise ValueError("max_missing must be between 0 and 1") 351 352 # Map force string to int 353 force_map = {"none": 0, "unit": 1, "time": 2, "two-way": 3} 354 force_int = force_map[force] 355 356 # Prepare panel data 357 panel = prepare_panel( 358 data, Y=Y, D=D, index=index, X=X, W=W, 359 group=group, min_T0=min_T0, min_T0_strict=min_T0_strict, 360 max_missing=max_missing, 361 ) 362 363 # Move to device 364 Y_mat = to_device(panel.Y) 365 D_mat = to_device(panel.D) 366 I_mat = to_device(panel.I) 367 II_mat = to_device(panel.II) 368 X_mat = to_device(panel.X) if panel.X is not None else None 369 W_mat = to_device(panel.W) if panel.W is not None else None 370 371 # Normalize 372 norm_factor = 1.0 373 if normalize: 374 sd_y = float(xp.std(Y_mat[I_mat > 0])) 375 if sd_y > 0: 376 Y_mat = Y_mat / sd_y 377 norm_factor = sd_y 378 379 # Initial fit 380 Y0, beta0 = initial_fit(Y_mat, X_mat, II_mat, force_int) 381 382 # Determine r and lambda 383 r_cv = None 384 lambda_cv = None 385 cv_result = None 386 387 if method == "ife": 388 if isinstance(r, tuple) and CV: 389 cv_result = cv_ife( 390 Y_mat, Y0, X_mat, I_mat, II_mat, D_mat, W_mat, beta0, 391 force=force_int, r_range=r, k=k, cv_prop=cv_prop, 392 cv_nobs=cv_nobs, cv_treat=cv_treat, cv_donut=cv_donut, 393 criterion=criterion, cv_rule=cv_rule, 394 tol=tol, max_iter=max_iter, 395 n_jobs=n_jobs, seed=seed, 396 ) 397 r_cv = cv_result.best_r 398 else: 399 r_cv = r if isinstance(r, int) else r[0] 400 401 elif method == "mc": 402 if lam is None and CV: 403 cv_result = cv_mc( 404 Y_mat, Y0, X_mat, I_mat, II_mat, D_mat, W_mat, beta0, 405 force=force_int, nlambda=nlambda, k=k, cv_prop=cv_prop, 406 cv_nobs=cv_nobs, cv_treat=cv_treat, cv_donut=cv_donut, 407 criterion=criterion, cv_rule=cv_rule, 408 tol=tol, max_iter=max_iter, 409 n_jobs=n_jobs, seed=seed, 410 ) 411 lambda_cv = cv_result.best_lambda 412 else: 413 lambda_cv = lam if lam is not None else 0.0 414 415 elif method == "fe": 416 r_cv = 0 417 418 elif method == "cfe": 419 r_cv = r if isinstance(r, int) else r[0] 420 421 # Point estimation 422 if method in ("fe", "ife"): 423 est = estimate_ife( 424 Y_mat, Y0, X_mat, II_mat, W_mat, beta0, 425 r=r_cv, force=force_int, tol=tol, max_iter=max_iter, 426 ) 427 elif method == "mc": 428 est = estimate_mc( 429 Y_mat, Y0, X_mat, II_mat, W_mat, beta0, 430 lam=lambda_cv, force=force_int, tol=tol, max_iter=max_iter, 431 ) 432 elif method == "cfe": 433 est = estimate_cfe( 434 Y_mat, Y0, X_mat, II_mat, W_mat, beta0, 435 r=r_cv, force=force_int, tol=tol, max_iter=max_iter, 436 ) 437 elif method == "both": 438 # Run both IFE and MC, return IFE results with MC comparison 439 if isinstance(r, tuple) and CV: 440 cv_result = cv_ife( 441 Y_mat, Y0, X_mat, I_mat, II_mat, D_mat, W_mat, beta0, 442 force=force_int, r_range=r, k=k, cv_prop=cv_prop, 443 cv_nobs=cv_nobs, cv_treat=cv_treat, cv_donut=cv_donut, 444 criterion=criterion, cv_rule=cv_rule, 445 tol=tol, max_iter=max_iter, 446 n_jobs=n_jobs, seed=seed, 447 ) 448 r_cv = cv_result.best_r 449 else: 450 r_cv = r if isinstance(r, int) else r[0] 451 est = estimate_ife( 452 Y_mat, Y0, X_mat, II_mat, W_mat, beta0, 453 r=r_cv, force=force_int, tol=tol, max_iter=max_iter, 454 ) 455 else: 456 raise ValueError(f"Unknown method: {method}") 457 458 # Compute effects 459 eff = Y_mat - est.fit 460 Y_ct = est.fit 461 462 # Denormalize 463 if normalize and norm_factor != 1.0: 464 eff = eff * norm_factor 465 Y_ct = Y_ct * norm_factor 466 Y_mat = Y_mat * norm_factor 467 if est.beta is not None: 468 est = est._replace(beta=est.beta * norm_factor) 469 470 # ATT computation 471 T_on = to_device(panel.T_on) 472 att_avg, att_on, time_on, count_on, att_avg_unit = _compute_effects( 473 to_numpy(eff), to_numpy(D_mat), to_numpy(panel.T_on), to_numpy(I_mat), 474 ) 475 476 # Build result 477 result = FectResult( 478 method=method, 479 r_cv=r_cv, 480 lambda_cv=lambda_cv, 481 att_avg=att_avg, 482 att_avg_unit=att_avg_unit, 483 att_on=att_on, 484 time_on=time_on, 485 count_on=count_on, 486 beta=to_numpy(est.beta) if est.beta is not None else None, 487 covariate_names=panel.covariate_names, 488 mu=est.mu, 489 alpha=to_numpy(est.alpha) if est.alpha is not None else None, 490 xi=to_numpy(est.xi) if est.xi is not None else None, 491 factors=to_numpy(est.factors) if est.factors is not None else None, 492 loadings=to_numpy(est.loadings) if est.loadings is not None else None, 493 Y_ct=to_numpy(Y_ct), 494 eff=to_numpy(eff), 495 residuals=to_numpy(est.residuals), 496 sigma2=est.sigma2, 497 IC=est.IC, 498 PC=est.PC, 499 niter=est.niter, 500 converged=est.converged, 501 cv_result=cv_result, 502 panel=panel, 503 seed=seed, 504 ) 505 506 # Inference 507 if se: 508 result.inference = _run_inference( 509 result, panel, Y_mat, X_mat, W_mat, beta0, Y0, 510 method=method, r_cv=r_cv, lambda_cv=lambda_cv, 511 force_int=force_int, tol=tol, max_iter=max_iter, 512 vartype=vartype, nboots=nboots, alpha=alpha, 513 n_jobs=n_jobs, seed=seed, normalize=normalize, 514 norm_factor=norm_factor, 515 ) 516 517 return result
Estimate counterfactual treatment effects for panel data.
This is the main Python entry point for the counterfactual estimator workflow. Where the paper and the historical R package differ, pyfector defaults to the paper's statistical definition and exposes R-package-style behavior through explicit options.
Missing outcome policy
pyfector distinguishes raw missing outcomes from counterfactual
missingness caused by treatment. Observed untreated cells
(D == 0 and non-missing Y) fit the response surface. Observed
treated cells (D == 1 and non-missing Y) contribute to ATT as
Y - Y_ct. If a treated outcome is missing in the input data, the
model can still produce a counterfactual Y_ct for that cell, but
the cell is not counted in att_avg or att_on because the
treated potential outcome was not observed.
By default, min_T0 is enforced only for treated and reversal
units. Sparse controls are retained if they have at least one
observed outcome, because they may still inform the low-rank response
surface. Set min_T0_strict=True to require controls to satisfy
min_T0 too, matching the more conservative R fect sparse-panel
behavior.
Parameters
data : polars.DataFrame, pandas.DataFrame
Long-format panel data.
Y, D : str
Column names for outcome and binary treatment indicator.
index : (str, str)
Column names for (unit_id, time_period).
X : list of str, optional
Time-varying covariates.
W : str, optional
Observation weight column.
group : str, optional
Reserved for grouped estimation. Currently raises
NotImplementedError when supplied.
method : {"fe", "ife", "mc", "cfe", "both"}
Estimation method.
force : {"none", "unit", "time", "two-way"}
Fixed effects specification.
r : int or (int, int)
Number of factors. If tuple, CV selects from range.
lam : float, optional
Nuclear norm penalty for MC. If None with CV=True, auto-selected.
nlambda : int
Number of automatically generated lambda candidates for MC CV.
CV : bool
If True, cross-validate over r for IFE when r is a tuple,
or over lam for MC when lam is None.
k : int
Number of CV folds.
cv_prop : float
Fraction of eligible observed control cells masked per CV fold.
cv_nobs : int
Number of consecutive within-unit observations to mask as a block.
cv_treat : bool
If True, restrict CV masks to pre-treatment cells of ever-treated
units. If False, use all observed control cells.
cv_donut : int
Exclude this many periods around treatment onset from CV evaluation.
criterion : {"mspe", "gmspe", "mad"}
Cross-validation loss.
cv_rule : {"min", "onepct"}
CV selection rule. "min" chooses the strict minimum-score
candidate and is the paper-faithful default. "onepct" chooses
the simplest candidate within 1% of the best score (lower r for
IFE, higher lam for MC).
se : bool
Compute standard errors via bootstrap/jackknife.
vartype : {"bootstrap", "jackknife"}
Inference method when se=True.
nboots : int
Number of bootstrap replications. Ignored for jackknife.
alpha : float
Significance level for confidence intervals and tests.
tol : float
EM convergence tolerance for final point estimation.
max_iter : int
Maximum EM iterations.
min_T0 : int
Minimum untreated/pre-treatment observed periods. By default this is
enforced only for treated and treatment-reversal units.
min_T0_strict : bool
If True, enforce min_T0 on all units, including controls. This
matches R fect's conservative handling of sparse control rows.
max_missing : float
Maximum missing-outcome fraction per unit, in [0, 1]. Units with
no observed outcomes are always dropped, regardless of this threshold,
because they provide neither fitting information nor observed treated
effects.
normalize : bool
If True, estimate on an outcome standardized by its observed standard
deviation, then transform effects back to the original scale.
Z, Q : list of str, optional
Reserved CFE interaction arguments. Currently raise
NotImplementedError when supplied.
device : {"cpu", "gpu"}
Compute device.
n_jobs : int, optional
Parallel workers for CV and bootstrap. -1 or None uses
all available CPUs.
seed : int, optional
Random seed for full reproducibility.
68@dataclass 69class FectResult: 70 """Container for all fect estimation results.""" 71 # Method info 72 method: str 73 r_cv: int | None = None 74 lambda_cv: float | None = None 75 76 # Point estimates 77 att_avg: float = 0.0 78 att_avg_unit: float = 0.0 79 80 # Dynamic effects 81 att_on: np.ndarray | None = None 82 time_on: np.ndarray | None = None 83 count_on: np.ndarray | None = None 84 85 # Exit effects (treatment reversal) 86 att_off: np.ndarray | None = None 87 time_off: np.ndarray | None = None 88 89 # Coefficients 90 beta: np.ndarray | None = None 91 covariate_names: list[str] = field(default_factory=list) 92 93 # Fixed effects 94 mu: float = 0.0 95 alpha: np.ndarray | None = None # unit FE 96 xi: np.ndarray | None = None # time FE 97 factors: np.ndarray | None = None 98 loadings: np.ndarray | None = None 99 100 # Counterfactual and effects matrices 101 Y_ct: np.ndarray | None = None # T×N counterfactual 102 eff: np.ndarray | None = None # T×N treatment effects 103 residuals: np.ndarray | None = None 104 105 # Model fit 106 sigma2: float = 0.0 107 IC: float = 0.0 108 PC: float = 0.0 109 rmse: float = 0.0 110 niter: int = 0 111 converged: bool = False 112 113 # Inference 114 inference: InferenceResult | None = None 115 116 # CV 117 cv_result: CVResult | None = None 118 119 # Panel metadata 120 panel: PanelData | None = None 121 122 # Reproducibility 123 seed: int | None = None 124 125 def summary(self) -> str: 126 """Print summary table of results.""" 127 lines = [] 128 lines.append(f"pyfector estimation results") 129 lines.append(f"{'='*60}") 130 lines.append(f"Method: {self.method}") 131 if self.r_cv is not None: 132 lines.append(f"Number of factors (CV): {self.r_cv}") 133 if self.lambda_cv is not None: 134 lines.append(f"Lambda (CV): {self.lambda_cv:.6f}") 135 lines.append(f"Converged: {self.converged} (iter={self.niter})") 136 lines.append(f"Sigma^2: {self.sigma2:.6f}") 137 lines.append(f"") 138 lines.append(f"ATT (average): {self.att_avg:.6f}") 139 if self.inference is not None: 140 inf = self.inference 141 lines.append(f" SE: {inf.att_avg_se:.6f}") 142 lines.append(f" CI: [{inf.att_avg_ci[0]:.6f}, {inf.att_avg_ci[1]:.6f}]") 143 lines.append(f" p-val: {inf.att_avg_pval:.4f}") 144 145 if self.beta is not None and len(self.beta) > 0: 146 lines.append(f"") 147 lines.append(f"Coefficients:") 148 for i, name in enumerate(self.covariate_names): 149 lines.append(f" {name}: {self.beta[i]:.6f}") 150 151 if self.att_on is not None and self.time_on is not None: 152 lines.append(f"") 153 lines.append(f"Dynamic effects (ATT by relative time):") 154 lines.append(f" {'Time':>6s} {'ATT':>10s} {'Count':>6s}", ) 155 for i, t in enumerate(self.time_on): 156 count = self.count_on[i] if self.count_on is not None else "" 157 att = self.att_on[i] 158 if self.inference is not None: 159 se = self.inference.att_on_se[i] 160 lines.append(f" {t:>6.0f} {att:>10.4f} ({se:.4f}) {count}") 161 else: 162 lines.append(f" {t:>6.0f} {att:>10.4f} {count}") 163 164 lines.append(f"{'='*60}") 165 if self.panel is not None: 166 lines.append(f"N={self.panel.N}, T={self.panel.T}") 167 if self.seed is not None: 168 lines.append(f"Seed: {self.seed}") 169 return "\n".join(lines) 170 171 def __repr__(self): 172 return self.summary() 173 174 def plot(self, kind="gap", **kwargs): 175 """Plot results. Shortcut for ``pyfector.plot(self, kind, ...)``.""" 176 from .plotting import plot as _plot 177 return _plot(self, kind=kind, **kwargs) 178 179 def diagnose(self, **kwargs): 180 """Run diagnostic tests. Shortcut for ``pyfector.run_diagnostics(self, ...)``.""" 181 from .diagnostics import run_diagnostics 182 return run_diagnostics(self, **kwargs)
Container for all fect estimation results.
125 def summary(self) -> str: 126 """Print summary table of results.""" 127 lines = [] 128 lines.append(f"pyfector estimation results") 129 lines.append(f"{'='*60}") 130 lines.append(f"Method: {self.method}") 131 if self.r_cv is not None: 132 lines.append(f"Number of factors (CV): {self.r_cv}") 133 if self.lambda_cv is not None: 134 lines.append(f"Lambda (CV): {self.lambda_cv:.6f}") 135 lines.append(f"Converged: {self.converged} (iter={self.niter})") 136 lines.append(f"Sigma^2: {self.sigma2:.6f}") 137 lines.append(f"") 138 lines.append(f"ATT (average): {self.att_avg:.6f}") 139 if self.inference is not None: 140 inf = self.inference 141 lines.append(f" SE: {inf.att_avg_se:.6f}") 142 lines.append(f" CI: [{inf.att_avg_ci[0]:.6f}, {inf.att_avg_ci[1]:.6f}]") 143 lines.append(f" p-val: {inf.att_avg_pval:.4f}") 144 145 if self.beta is not None and len(self.beta) > 0: 146 lines.append(f"") 147 lines.append(f"Coefficients:") 148 for i, name in enumerate(self.covariate_names): 149 lines.append(f" {name}: {self.beta[i]:.6f}") 150 151 if self.att_on is not None and self.time_on is not None: 152 lines.append(f"") 153 lines.append(f"Dynamic effects (ATT by relative time):") 154 lines.append(f" {'Time':>6s} {'ATT':>10s} {'Count':>6s}", ) 155 for i, t in enumerate(self.time_on): 156 count = self.count_on[i] if self.count_on is not None else "" 157 att = self.att_on[i] 158 if self.inference is not None: 159 se = self.inference.att_on_se[i] 160 lines.append(f" {t:>6.0f} {att:>10.4f} ({se:.4f}) {count}") 161 else: 162 lines.append(f" {t:>6.0f} {att:>10.4f} {count}") 163 164 lines.append(f"{'='*60}") 165 if self.panel is not None: 166 lines.append(f"N={self.panel.N}, T={self.panel.T}") 167 if self.seed is not None: 168 lines.append(f"Seed: {self.seed}") 169 return "\n".join(lines)
Print summary table of results.
174 def plot(self, kind="gap", **kwargs): 175 """Plot results. Shortcut for ``pyfector.plot(self, kind, ...)``.""" 176 from .plotting import plot as _plot 177 return _plot(self, kind=kind, **kwargs)
Plot results. Shortcut for pyfector.plot(self, kind, ...).
179 def diagnose(self, **kwargs): 180 """Run diagnostic tests. Shortcut for ``pyfector.run_diagnostics(self, ...)``.""" 181 from .diagnostics import run_diagnostics 182 return run_diagnostics(self, **kwargs)
Run diagnostic tests. Shortcut for pyfector.run_diagnostics(self, ...).
35def set_device(device: Literal["cpu", "gpu"]) -> None: 36 """Set the compute device globally.""" 37 global _DEVICE 38 if device == "gpu" and not _check_cupy(): 39 raise ImportError( 40 "CuPy is required for GPU support. Install it with: " 41 "pip install cupy-cuda12x (adjust for your CUDA version)" 42 ) 43 _DEVICE = device
Set the compute device globally.
Return the current device.
92def run_diagnostics( 93 result, 94 f_threshold: float = 0.5, 95 tost_threshold: float = 0.36, 96 placebo_period: tuple[int, int] | None = None, 97 carryover_period: tuple[int, int] | None = None, 98 loo: bool = False, 99) -> DiagnosticResult: 100 """Run diagnostic tests on a FectResult. 101 102 Parameters 103 ---------- 104 result : FectResult 105 Must have inference results (``se=True``). 106 f_threshold : float 107 Non-centrality parameter for equivalence F-test. 108 tost_threshold : float 109 Equivalence bound for TOST. 110 placebo_period : (start, end), optional 111 Relative time window for placebo test. 112 """ 113 from scipy import stats 114 115 diag = DiagnosticResult() 116 117 if result.inference is None: 118 return diag 119 120 inf = result.inference 121 time_on = result.time_on 122 att_on = result.att_on 123 124 if time_on is None or att_on is None: 125 return diag 126 127 # Pre-treatment periods 128 pre_mask = time_on < 0 129 if not np.any(pre_mask): 130 return diag 131 132 pre_idx = np.where(pre_mask)[0] 133 k = len(pre_idx) 134 att_pre = att_on[pre_idx] 135 136 # F-test from bootstrap covariance 137 if inf.att_on_boot is not None and inf.att_on_boot.shape[1] > k: 138 boot_pre = inf.att_on_boot[pre_idx, :] # (k, nboots) 139 S = np.cov(boot_pre) # (k, k) 140 if k == 1: 141 S = S.reshape(1, 1) 142 143 n_boot = boot_pre.shape[1] 144 try: 145 cond = np.linalg.cond(S) 146 if cond > 1e12: 147 S_inv = np.linalg.pinv(S) 148 else: 149 S_inv = np.linalg.inv(S) 150 F_stat = float(att_pre @ S_inv @ att_pre) 151 scale = (n_boot - k) / ((n_boot - 1) * k) 152 F_stat_scaled = F_stat * scale 153 154 if np.isfinite(F_stat_scaled) and F_stat_scaled >= 0: 155 diag.f_stat = F_stat_scaled 156 diag.f_df1 = k 157 diag.f_df2 = n_boot - k 158 diag.f_pval = float(1 - stats.f.cdf(F_stat_scaled, k, n_boot - k)) 159 160 # Equivalence F-test (non-central F) 161 ncp = n_boot * f_threshold 162 diag.equiv_f_pval = float(stats.ncf.cdf(F_stat_scaled, k, n_boot - k, ncp)) 163 diag.equiv_threshold = f_threshold 164 except np.linalg.LinAlgError: 165 pass 166 167 # TOST 168 if inf.att_on_se is not None: 169 se_pre = inf.att_on_se[pre_idx] 170 n_boot = inf.att_on_boot.shape[1] if inf.att_on_boot is not None else 200 171 df = n_boot - 1 172 173 tost_pvals = np.full(k, np.nan) 174 for i in range(k): 175 if se_pre[i] > 0: 176 t_upper = (att_pre[i] - tost_threshold) / se_pre[i] 177 t_lower = (att_pre[i] + tost_threshold) / se_pre[i] 178 p_upper = float(stats.t.cdf(t_upper, df)) 179 p_lower = float(1 - stats.t.cdf(t_lower, df)) 180 tost_pvals[i] = max(p_upper, p_lower) 181 182 diag.tost_pvals = tost_pvals 183 diag.tost_periods = time_on[pre_idx] 184 diag.tost_threshold = tost_threshold 185 186 # Placebo test 187 if placebo_period is not None: 188 p_start, p_end = placebo_period 189 placebo_mask = (time_on >= p_start) & (time_on <= p_end) & pre_mask 190 if np.any(placebo_mask): 191 p_idx = np.where(placebo_mask)[0] 192 diag.placebo_att = float(np.mean(att_on[p_idx])) 193 if inf.att_on_boot is not None: 194 boot_placebo = np.mean(inf.att_on_boot[p_idx, :], axis=0) 195 diag.placebo_pval = min( 196 2 * np.mean(boot_placebo >= 0), 197 2 * np.mean(boot_placebo <= 0), 198 1.0, 199 ) 200 201 # Carryover test (for treatment reversals) 202 if carryover_period is not None and hasattr(result, "att_off") and result.att_off is not None: 203 c_start, c_end = carryover_period 204 time_off = getattr(result, "time_off", None) 205 if time_off is not None: 206 off_mask = (time_off >= c_start) & (time_off <= c_end) 207 if np.any(off_mask): 208 diag.carryover_att = float(np.mean(result.att_off[off_mask])) 209 210 # Leave-one-period-out test 211 if loo and att_on is not None and time_on is not None: 212 post_mask = time_on >= 0 213 post_idx = np.where(post_mask)[0] 214 if len(post_idx) > 1: 215 full_att = result.att_avg 216 loo_atts = [] 217 loo_periods = [] 218 for drop_i in post_idx: 219 remaining = np.delete(post_idx, np.where(post_idx == drop_i)) 220 if len(remaining) > 0: 221 loo_att = float(np.mean(att_on[remaining])) 222 loo_atts.append(loo_att) 223 loo_periods.append(time_on[drop_i]) 224 diag.loo_atts = np.array(loo_atts) 225 diag.loo_periods = np.array(loo_periods) 226 diag.loo_max_change = float(np.max(np.abs(diag.loo_atts - full_att))) 227 228 return diag
Run diagnostic tests on a FectResult.
Parameters
result : FectResult
Must have inference results (se=True).
f_threshold : float
Non-centrality parameter for equivalence F-test.
tost_threshold : float
Equivalence bound for TOST.
placebo_period : (start, end), optional
Relative time window for placebo test.
35@dataclass 36class DiagnosticResult: 37 """Container for diagnostic test results.""" 38 # F-test for pre-trends 39 f_stat: float | None = None 40 f_pval: float | None = None 41 f_df1: int | None = None 42 f_df2: int | None = None 43 44 # Equivalence F-test 45 equiv_f_pval: float | None = None 46 equiv_threshold: float | None = None 47 48 # TOST per pre-period 49 tost_pvals: np.ndarray | None = None 50 tost_periods: np.ndarray | None = None 51 tost_threshold: float | None = None 52 53 # Placebo 54 placebo_att: float | None = None 55 placebo_pval: float | None = None 56 57 # Carryover 58 carryover_att: float | None = None 59 carryover_pval: float | None = None 60 61 # Leave-one-out 62 loo_atts: np.ndarray | None = None # ATT when each pre-period is dropped 63 loo_periods: np.ndarray | None = None 64 loo_max_change: float | None = None # max |ATT_full - ATT_loo| 65 66 def summary(self) -> str: 67 lines = ["Diagnostic Tests", "=" * 50] 68 if self.f_stat is not None: 69 lines.append(f"Pre-trend F-test:") 70 lines.append(f" F({self.f_df1},{self.f_df2}) = {self.f_stat:.4f}, p = {self.f_pval:.4f}") 71 if self.equiv_f_pval is not None: 72 lines.append(f"Equivalence F-test:") 73 lines.append(f" p = {self.equiv_f_pval:.4f} (threshold={self.equiv_threshold})") 74 if self.tost_pvals is not None: 75 lines.append("TOST per pre-period:") 76 for i, t in enumerate(self.tost_periods): 77 p = self.tost_pvals[i] 78 status = "PASS" if p < 0.05 else "fail" 79 lines.append(f" t={t:+.0f}: p={p:.4f} [{status}]") 80 if self.placebo_pval is not None: 81 lines.append(f"Placebo test:") 82 lines.append(f" ATT={self.placebo_att:.4f}, p={self.placebo_pval:.4f}") 83 if self.carryover_pval is not None: 84 lines.append(f"Carryover test:") 85 lines.append(f" ATT={self.carryover_att:.4f}, p={self.carryover_pval:.4f}") 86 if self.loo_max_change is not None: 87 lines.append(f"Leave-one-out:") 88 lines.append(f" Max ATT change = {self.loo_max_change:.6f}") 89 return "\n".join(lines)
Container for diagnostic test results.
66 def summary(self) -> str: 67 lines = ["Diagnostic Tests", "=" * 50] 68 if self.f_stat is not None: 69 lines.append(f"Pre-trend F-test:") 70 lines.append(f" F({self.f_df1},{self.f_df2}) = {self.f_stat:.4f}, p = {self.f_pval:.4f}") 71 if self.equiv_f_pval is not None: 72 lines.append(f"Equivalence F-test:") 73 lines.append(f" p = {self.equiv_f_pval:.4f} (threshold={self.equiv_threshold})") 74 if self.tost_pvals is not None: 75 lines.append("TOST per pre-period:") 76 for i, t in enumerate(self.tost_periods): 77 p = self.tost_pvals[i] 78 status = "PASS" if p < 0.05 else "fail" 79 lines.append(f" t={t:+.0f}: p={p:.4f} [{status}]") 80 if self.placebo_pval is not None: 81 lines.append(f"Placebo test:") 82 lines.append(f" ATT={self.placebo_att:.4f}, p={self.placebo_pval:.4f}") 83 if self.carryover_pval is not None: 84 lines.append(f"Carryover test:") 85 lines.append(f" ATT={self.carryover_att:.4f}, p={self.carryover_pval:.4f}") 86 if self.loo_max_change is not None: 87 lines.append(f"Leave-one-out:") 88 lines.append(f" Max ATT change = {self.loo_max_change:.6f}") 89 return "\n".join(lines)
20def plot( 21 result, 22 kind: Literal["gap", "status", "factors", "counterfactual", "equiv", "calendar"] = "gap", 23 units: list | None = None, 24 show_ci: bool = True, 25 title: str | None = None, 26 figsize: tuple[float, float] = (10, 6), 27 ax=None, 28 **kwargs, 29): 30 """Plot fect results. 31 32 Parameters 33 ---------- 34 result : FectResult 35 Output from ``pyfector.fect()``. 36 kind : str 37 Plot type. 38 units : list, optional 39 Unit IDs for counterfactual plot. 40 show_ci : bool 41 Show confidence intervals (requires ``se=True`` in estimation). 42 """ 43 import matplotlib.pyplot as plt 44 45 if ax is None: 46 fig, ax = plt.subplots(figsize=figsize) 47 else: 48 fig = ax.get_figure() 49 50 if kind == "gap": 51 _plot_gap(result, ax, show_ci, **kwargs) 52 elif kind == "status": 53 _plot_status(result, ax, **kwargs) 54 elif kind == "factors": 55 _plot_factors(result, ax, **kwargs) 56 elif kind == "counterfactual": 57 _plot_counterfactual(result, ax, units, **kwargs) 58 elif kind == "equiv": 59 _plot_equiv(result, ax, **kwargs) 60 elif kind == "calendar": 61 _plot_calendar(result, ax, show_ci, **kwargs) 62 else: 63 raise ValueError(f"Unknown plot kind: {kind}") 64 65 if title: 66 ax.set_title(title) 67 68 fig.tight_layout() 69 return fig
Plot fect results.
Parameters
result : FectResult
Output from pyfector.fect.
kind : str
Plot type.
units : list, optional
Unit IDs for counterfactual plot.
show_ci : bool
Show confidence intervals (requires se=True in estimation).