pyfector.fect
Public entry point for pyfector.
This module exposes fect(), the single function most users call,
and the FectResult dataclass that packages its output.
fect() orchestrates the full pipeline:
pyfector.panel.prepare_panel()converts the input DataFrame to dense(T, N)matrices with treatment timing and unit classification.pyfector.panel.initial_fit()computes the starting fitY0and initial covariate coefficients on the control subsample.- If the user passes a range for
r(IFE) or leaveslam=None(MC),pyfector.cvchooses the hyperparameter by cross-validation. pyfector.estimatorsiterates the EM loop for the chosen method (fe,ife,mc, orcfe).- Treatment effects are computed from
eff = Y - Y_ct; the overall ATT and dynamic ATT by relative event time are derived in_compute_effects()vianumpy.bincount(). - When
se=Truethe requested inference routine (bootstraporjackknife) frompyfector.inferenceis called with a closure that re-runs the EM loop on resampled units.
Example
::
import pyfector
result = pyfector.fect(
data=df,
Y="outcome", D="treat",
index=("unit", "year"),
X=["gdp", "pop"],
method="ife",
r=(0, 5), # CV over 0..5 factors
se=True,
nboots=200,
device="cpu", # or "gpu"
n_jobs=4,
seed=42,
)
result.summary()
result.plot(kind="gap")
1""" 2Public entry point for pyfector. 3 4This module exposes :func:`fect`, the single function most users call, 5and the :class:`FectResult` dataclass that packages its output. 6 7:func:`fect` orchestrates the full pipeline: 8 91. :func:`pyfector.panel.prepare_panel` converts the input DataFrame to 10 dense ``(T, N)`` matrices with treatment timing and unit 11 classification. 122. :func:`pyfector.panel.initial_fit` computes the starting fit ``Y0`` 13 and initial covariate coefficients on the control subsample. 143. If the user passes a range for ``r`` (IFE) or leaves ``lam=None`` 15 (MC), :mod:`pyfector.cv` chooses the hyperparameter by 16 cross-validation. 174. :mod:`pyfector.estimators` iterates the EM loop for the chosen 18 method (``fe``, ``ife``, ``mc``, or ``cfe``). 195. Treatment effects are computed from ``eff = Y - Y_ct``; the overall 20 ATT and dynamic ATT by relative event time are derived in 21 :func:`_compute_effects` via :func:`numpy.bincount`. 226. When ``se=True`` the requested inference routine 23 (``bootstrap`` or ``jackknife``) from :mod:`pyfector.inference` is 24 called with a closure that re-runs the EM loop on resampled units. 25 26Example 27------- 28:: 29 30 import pyfector 31 32 result = pyfector.fect( 33 data=df, 34 Y="outcome", D="treat", 35 index=("unit", "year"), 36 X=["gdp", "pop"], 37 method="ife", 38 r=(0, 5), # CV over 0..5 factors 39 se=True, 40 nboots=200, 41 device="cpu", # or "gpu" 42 n_jobs=4, 43 seed=42, 44 ) 45 result.summary() 46 result.plot(kind="gap") 47""" 48 49from __future__ import annotations 50 51from dataclasses import dataclass, field 52from typing import Literal 53import os 54 55import numpy as np 56 57from .backend import set_device, get_backend, to_numpy, to_device, make_rng 58from .panel import PanelData, prepare_panel, initial_fit 59from .estimators import estimate_ife, estimate_mc, estimate_cfe, EstimationResult 60from .cv import cv_ife, cv_mc, CVResult 61from .inference import bootstrap, jackknife, InferenceResult 62 63 64@dataclass 65class FectResult: 66 """Container for all fect estimation results.""" 67 # Method info 68 method: str 69 r_cv: int | None = None 70 lambda_cv: float | None = None 71 72 # Point estimates 73 att_avg: float = 0.0 74 att_avg_unit: float = 0.0 75 76 # Dynamic effects 77 att_on: np.ndarray | None = None 78 time_on: np.ndarray | None = None 79 count_on: np.ndarray | None = None 80 81 # Exit effects (treatment reversal) 82 att_off: np.ndarray | None = None 83 time_off: np.ndarray | None = None 84 85 # Coefficients 86 beta: np.ndarray | None = None 87 covariate_names: list[str] = field(default_factory=list) 88 89 # Fixed effects 90 mu: float = 0.0 91 alpha: np.ndarray | None = None # unit FE 92 xi: np.ndarray | None = None # time FE 93 factors: np.ndarray | None = None 94 loadings: np.ndarray | None = None 95 96 # Counterfactual and effects matrices 97 Y_ct: np.ndarray | None = None # T×N counterfactual 98 eff: np.ndarray | None = None # T×N treatment effects 99 residuals: np.ndarray | None = None 100 101 # Model fit 102 sigma2: float = 0.0 103 IC: float = 0.0 104 PC: float = 0.0 105 rmse: float = 0.0 106 niter: int = 0 107 converged: bool = False 108 109 # Inference 110 inference: InferenceResult | None = None 111 112 # CV 113 cv_result: CVResult | None = None 114 115 # Panel metadata 116 panel: PanelData | None = None 117 118 # Reproducibility 119 seed: int | None = None 120 121 def summary(self) -> str: 122 """Print summary table of results.""" 123 lines = [] 124 lines.append(f"pyfector estimation results") 125 lines.append(f"{'='*60}") 126 lines.append(f"Method: {self.method}") 127 if self.r_cv is not None: 128 lines.append(f"Number of factors (CV): {self.r_cv}") 129 if self.lambda_cv is not None: 130 lines.append(f"Lambda (CV): {self.lambda_cv:.6f}") 131 lines.append(f"Converged: {self.converged} (iter={self.niter})") 132 lines.append(f"Sigma^2: {self.sigma2:.6f}") 133 lines.append(f"") 134 lines.append(f"ATT (average): {self.att_avg:.6f}") 135 if self.inference is not None: 136 inf = self.inference 137 lines.append(f" SE: {inf.att_avg_se:.6f}") 138 lines.append(f" CI: [{inf.att_avg_ci[0]:.6f}, {inf.att_avg_ci[1]:.6f}]") 139 lines.append(f" p-val: {inf.att_avg_pval:.4f}") 140 141 if self.beta is not None and len(self.beta) > 0: 142 lines.append(f"") 143 lines.append(f"Coefficients:") 144 for i, name in enumerate(self.covariate_names): 145 lines.append(f" {name}: {self.beta[i]:.6f}") 146 147 if self.att_on is not None and self.time_on is not None: 148 lines.append(f"") 149 lines.append(f"Dynamic effects (ATT by relative time):") 150 lines.append(f" {'Time':>6s} {'ATT':>10s} {'Count':>6s}", ) 151 for i, t in enumerate(self.time_on): 152 count = self.count_on[i] if self.count_on is not None else "" 153 att = self.att_on[i] 154 if self.inference is not None: 155 se = self.inference.att_on_se[i] 156 lines.append(f" {t:>6.0f} {att:>10.4f} ({se:.4f}) {count}") 157 else: 158 lines.append(f" {t:>6.0f} {att:>10.4f} {count}") 159 160 lines.append(f"{'='*60}") 161 if self.panel is not None: 162 lines.append(f"N={self.panel.N}, T={self.panel.T}") 163 if self.seed is not None: 164 lines.append(f"Seed: {self.seed}") 165 return "\n".join(lines) 166 167 def __repr__(self): 168 return self.summary() 169 170 def plot(self, kind="gap", **kwargs): 171 """Plot results. Shortcut for ``pyfector.plot(self, kind, ...)``.""" 172 from .plotting import plot as _plot 173 return _plot(self, kind=kind, **kwargs) 174 175 def diagnose(self, **kwargs): 176 """Run diagnostic tests. Shortcut for ``pyfector.run_diagnostics(self, ...)``.""" 177 from .diagnostics import run_diagnostics 178 return run_diagnostics(self, **kwargs) 179 180 181def fect( 182 data, 183 Y: str, 184 D: str, 185 index: tuple[str, str], 186 X: list[str] | None = None, 187 W: str | None = None, 188 group: str | None = None, 189 method: Literal["fe", "ife", "mc", "cfe", "both"] = "ife", 190 force: Literal["none", "unit", "time", "two-way"] = "two-way", 191 r: int | tuple[int, int] = 0, 192 lam: float | None = None, 193 nlambda: int = 10, 194 CV: bool = True, 195 k: int = 10, 196 cv_prop: float = 0.1, 197 cv_nobs: int = 3, 198 cv_treat: bool = True, 199 cv_donut: int = 0, 200 criterion: str = "mspe", 201 se: bool = False, 202 vartype: Literal["bootstrap", "jackknife"] = "bootstrap", 203 nboots: int = 200, 204 alpha: float = 0.05, 205 tol: float = 1e-7, 206 max_iter: int = 5000, 207 min_T0: int = 1, 208 max_missing: float = 1.0, 209 normalize: bool = False, 210 # CFE-specific 211 Z: list[str] | None = None, 212 Q: list[str] | None = None, 213 # Performance 214 device: Literal["cpu", "gpu"] = "cpu", 215 n_jobs: int | None = -1, 216 seed: int | None = None, 217) -> FectResult: 218 """Estimate counterfactual treatment effects for panel data. 219 220 This is the main entry point — equivalent to R fect's ``fect()`` function. 221 222 Parameters 223 ---------- 224 data : polars.DataFrame, pandas.DataFrame 225 Long-format panel data. 226 Y, D : str 227 Column names for outcome and binary treatment indicator. 228 index : (str, str) 229 Column names for (unit_id, time_period). 230 X : list of str, optional 231 Time-varying covariates. 232 method : {"fe", "ife", "mc", "cfe", "both"} 233 Estimation method. 234 force : {"none", "unit", "time", "two-way"} 235 Fixed effects specification. 236 r : int or (int, int) 237 Number of factors. If tuple, CV selects from range. 238 lam : float, optional 239 Nuclear norm penalty for MC. If None with CV=True, auto-selected. 240 se : bool 241 Compute standard errors via bootstrap/jackknife. 242 device : {"cpu", "gpu"} 243 Compute device. 244 n_jobs : int, optional 245 Parallel workers for CV and bootstrap. ``-1`` or ``None`` uses 246 all available CPUs. 247 seed : int, optional 248 Random seed for full reproducibility. 249 """ 250 # Set device 251 set_device(device) 252 xp = get_backend() 253 n_jobs = _resolve_n_jobs(n_jobs) 254 255 if group is not None: 256 raise NotImplementedError("The `group` argument is not implemented yet.") 257 if Z is not None or Q is not None: 258 raise NotImplementedError("The `Z` and `Q` CFE interaction arguments are not implemented yet.") 259 260 # Map force string to int 261 force_map = {"none": 0, "unit": 1, "time": 2, "two-way": 3} 262 force_int = force_map[force] 263 264 # Prepare panel data 265 panel = prepare_panel( 266 data, Y=Y, D=D, index=index, X=X, W=W, 267 group=group, min_T0=min_T0, max_missing=max_missing, 268 ) 269 270 # Move to device 271 Y_mat = to_device(panel.Y) 272 D_mat = to_device(panel.D) 273 I_mat = to_device(panel.I) 274 II_mat = to_device(panel.II) 275 X_mat = to_device(panel.X) if panel.X is not None else None 276 W_mat = to_device(panel.W) if panel.W is not None else None 277 278 # Normalize 279 norm_factor = 1.0 280 if normalize: 281 sd_y = float(xp.std(Y_mat[I_mat > 0])) 282 if sd_y > 0: 283 Y_mat = Y_mat / sd_y 284 norm_factor = sd_y 285 286 # Initial fit 287 Y0, beta0 = initial_fit(Y_mat, X_mat, II_mat, force_int) 288 289 # Determine r and lambda 290 r_cv = None 291 lambda_cv = None 292 cv_result = None 293 294 if method == "ife": 295 if isinstance(r, tuple) and CV: 296 cv_result = cv_ife( 297 Y_mat, Y0, X_mat, I_mat, II_mat, D_mat, W_mat, beta0, 298 force=force_int, r_range=r, k=k, cv_prop=cv_prop, 299 cv_nobs=cv_nobs, cv_treat=cv_treat, cv_donut=cv_donut, 300 criterion=criterion, tol=tol, max_iter=max_iter, 301 n_jobs=n_jobs, seed=seed, 302 ) 303 r_cv = cv_result.best_r 304 else: 305 r_cv = r if isinstance(r, int) else r[0] 306 307 elif method == "mc": 308 if lam is None and CV: 309 cv_result = cv_mc( 310 Y_mat, Y0, X_mat, I_mat, II_mat, D_mat, W_mat, beta0, 311 force=force_int, nlambda=nlambda, k=k, cv_prop=cv_prop, 312 cv_nobs=cv_nobs, cv_treat=cv_treat, cv_donut=cv_donut, 313 criterion=criterion, tol=tol, max_iter=max_iter, 314 n_jobs=n_jobs, seed=seed, 315 ) 316 lambda_cv = cv_result.best_lambda 317 else: 318 lambda_cv = lam if lam is not None else 0.0 319 320 elif method == "fe": 321 r_cv = 0 322 323 elif method == "cfe": 324 r_cv = r if isinstance(r, int) else r[0] 325 326 # Point estimation 327 if method in ("fe", "ife"): 328 est = estimate_ife( 329 Y_mat, Y0, X_mat, II_mat, W_mat, beta0, 330 r=r_cv, force=force_int, tol=tol, max_iter=max_iter, 331 ) 332 elif method == "mc": 333 est = estimate_mc( 334 Y_mat, Y0, X_mat, II_mat, W_mat, beta0, 335 lam=lambda_cv, force=force_int, tol=tol, max_iter=max_iter, 336 ) 337 elif method == "cfe": 338 est = estimate_cfe( 339 Y_mat, Y0, X_mat, II_mat, W_mat, beta0, 340 r=r_cv, force=force_int, tol=tol, max_iter=max_iter, 341 ) 342 elif method == "both": 343 # Run both IFE and MC, return IFE results with MC comparison 344 if isinstance(r, tuple) and CV: 345 cv_result = cv_ife( 346 Y_mat, Y0, X_mat, I_mat, II_mat, D_mat, W_mat, beta0, 347 force=force_int, r_range=r, k=k, cv_prop=cv_prop, 348 cv_nobs=cv_nobs, cv_treat=cv_treat, cv_donut=cv_donut, 349 criterion=criterion, tol=tol, max_iter=max_iter, 350 n_jobs=n_jobs, seed=seed, 351 ) 352 r_cv = cv_result.best_r 353 else: 354 r_cv = r if isinstance(r, int) else r[0] 355 est = estimate_ife( 356 Y_mat, Y0, X_mat, II_mat, W_mat, beta0, 357 r=r_cv, force=force_int, tol=tol, max_iter=max_iter, 358 ) 359 else: 360 raise ValueError(f"Unknown method: {method}") 361 362 # Compute effects 363 eff = Y_mat - est.fit 364 Y_ct = est.fit 365 366 # Denormalize 367 if normalize and norm_factor != 1.0: 368 eff = eff * norm_factor 369 Y_ct = Y_ct * norm_factor 370 Y_mat = Y_mat * norm_factor 371 if est.beta is not None: 372 est = est._replace(beta=est.beta * norm_factor) 373 374 # ATT computation 375 T_on = to_device(panel.T_on) 376 att_avg, att_on, time_on, count_on, att_avg_unit = _compute_effects( 377 to_numpy(eff), to_numpy(D_mat), to_numpy(panel.T_on), to_numpy(I_mat), 378 ) 379 380 # Build result 381 result = FectResult( 382 method=method, 383 r_cv=r_cv, 384 lambda_cv=lambda_cv, 385 att_avg=att_avg, 386 att_avg_unit=att_avg_unit, 387 att_on=att_on, 388 time_on=time_on, 389 count_on=count_on, 390 beta=to_numpy(est.beta) if est.beta is not None else None, 391 covariate_names=panel.covariate_names, 392 mu=est.mu, 393 alpha=to_numpy(est.alpha) if est.alpha is not None else None, 394 xi=to_numpy(est.xi) if est.xi is not None else None, 395 factors=to_numpy(est.factors) if est.factors is not None else None, 396 loadings=to_numpy(est.loadings) if est.loadings is not None else None, 397 Y_ct=to_numpy(Y_ct), 398 eff=to_numpy(eff), 399 residuals=to_numpy(est.residuals), 400 sigma2=est.sigma2, 401 IC=est.IC, 402 PC=est.PC, 403 niter=est.niter, 404 converged=est.converged, 405 cv_result=cv_result, 406 panel=panel, 407 seed=seed, 408 ) 409 410 # Inference 411 if se: 412 result.inference = _run_inference( 413 result, panel, Y_mat, X_mat, W_mat, beta0, Y0, 414 method=method, r_cv=r_cv, lambda_cv=lambda_cv, 415 force_int=force_int, tol=tol, max_iter=max_iter, 416 vartype=vartype, nboots=nboots, alpha=alpha, 417 n_jobs=n_jobs, seed=seed, normalize=normalize, 418 norm_factor=norm_factor, 419 ) 420 421 return result 422 423 424def _compute_effects(eff, D, T_on, I): 425 """Compute overall ATT, per-unit ATT, and dynamic ATT by event time. 426 427 Dynamic ATT grouping is done with :func:`numpy.bincount` so the cost 428 is O(n_observed_ever_treated_cells) regardless of the number of 429 distinct relative-time values. Per-unit ATT uses a column-wise 430 masked mean via ``np.add.reduce`` rather than a Python loop. 431 """ 432 treated = (D > 0) & (I > 0) 433 n_treated = int(np.sum(treated)) 434 435 att_avg = float(np.sum(eff[treated]) / max(n_treated, 1)) 436 437 # Per-unit ATT (post-treatment only) — sum then divide by count, 438 # keeping only units that have at least one treated observation. 439 col_counts = treated.sum(axis=0) # (N,) 440 col_sums = (eff * treated).sum(axis=0) # (N,) 441 has_any = col_counts > 0 442 if np.any(has_any): 443 unit_atts = col_sums[has_any] / col_counts[has_any] 444 att_avg_unit = float(unit_atts.mean()) 445 else: 446 att_avg_unit = 0.0 447 448 # Dynamic ATT by relative event time. Include pre-treatment periods 449 # for ever-treated units (counterfactual gaps before onset). 450 ever_treated = np.any(D > 0, axis=0) # (N,) 451 all_periods = (I > 0) & ever_treated[np.newaxis, :] # (T, N) 452 453 T_on_flat = T_on[all_periods] 454 eff_flat = eff[all_periods] 455 valid = ~np.isnan(T_on_flat) 456 T_on_flat = T_on_flat[valid].astype(np.int64) 457 eff_flat = eff_flat[valid] 458 459 if T_on_flat.size == 0: 460 return att_avg, np.array([]), np.array([]), np.array([], dtype=np.int64), att_avg_unit 461 462 # Bincount over the shifted integer indices. 463 offset = int(T_on_flat.min()) 464 idx = T_on_flat - offset 465 minlength = int(T_on_flat.max() - offset + 1) 466 sums = np.bincount(idx, weights=eff_flat, minlength=minlength) 467 counts = np.bincount(idx, minlength=minlength) 468 keep = counts > 0 469 time_on = (offset + np.arange(minlength))[keep].astype(np.float64) 470 att_on = (sums[keep] / counts[keep]).astype(np.float64) 471 count_on = counts[keep].astype(np.int64) 472 473 return att_avg, att_on, time_on, count_on, att_avg_unit 474 475 476def _run_inference( 477 result, panel, Y_mat, X_mat, W_mat, beta0, Y0, 478 method, r_cv, lambda_cv, force_int, tol, max_iter, 479 vartype, nboots, alpha, n_jobs, seed, normalize, norm_factor, 480): 481 """Run bootstrap or jackknife inference. 482 483 Bootstrap replicates use a relaxed convergence tolerance 484 (``max(tol, 1e-3)``) because bootstrap SEs converge to 3–4 485 significant digits regardless of inner precision. The full-sample 486 fit is used as the warm-start initialiser for each replicate, 487 which cuts EM iterations by ~30-60 %. 488 """ 489 xp = get_backend() 490 491 # Relaxed tolerance: bootstrap SEs don't benefit from tight inner tol. 492 boot_tol = max(tol, 1e-3) 493 494 # Pre-move panel data to device once (avoids CPU→GPU copy per bootstrap rep) 495 II_dev = to_device(panel.II) 496 D_dev = to_device(panel.D) 497 I_dev = to_device(panel.I) 498 T_on_np = panel.T_on # keep on CPU for ATT computation 499 500 # Warm-start: use full-sample fitted values as initial imputation 501 # for each replicate instead of recomputing initial_fit from scratch. 502 Y0_full = to_device(result.Y_ct) 503 504 def _estimate_fn(unit_idx): 505 """Re-estimate on a subset of units.""" 506 Y_sub = Y_mat[:, unit_idx] 507 II_sub = II_dev[:, unit_idx] 508 D_sub = D_dev[:, unit_idx] 509 I_sub = I_dev[:, unit_idx] 510 X_sub = X_mat[:, unit_idx, :] if X_mat is not None else None 511 W_sub = W_mat[:, unit_idx] if W_mat is not None else None 512 T_on_sub = T_on_np[:, unit_idx] 513 beta0_sub = beta0 514 515 # Warm-start from full-sample fit (columns for resampled units). 516 # This is much closer to the replicate's solution than a cold 517 # initial_fit, cutting EM iterations significantly. 518 Y0_sub = Y0_full[:, unit_idx].copy() 519 520 if method in ("fe", "ife"): 521 est = estimate_ife( 522 Y_sub, Y0_sub, X_sub, II_sub, W_sub, beta0_sub, 523 r=r_cv, force=force_int, tol=boot_tol, max_iter=max_iter, 524 ) 525 elif method == "mc": 526 est = estimate_mc( 527 Y_sub, Y0_sub, X_sub, II_sub, W_sub, beta0_sub, 528 lam=lambda_cv, force=force_int, tol=boot_tol, max_iter=max_iter, 529 ) 530 else: 531 est = estimate_ife( 532 Y_sub, Y0_sub, X_sub, II_sub, W_sub, beta0_sub, 533 r=r_cv or 0, force=force_int, tol=boot_tol, max_iter=max_iter, 534 ) 535 536 eff = to_numpy(Y_sub - est.fit) 537 if normalize and norm_factor != 1.0: 538 eff = eff * norm_factor 539 540 return eff, to_numpy(D_sub), T_on_sub, to_numpy(I_sub) 541 542 if vartype == "bootstrap": 543 return bootstrap( 544 _estimate_fn, to_numpy(Y_mat), to_numpy(panel.D), 545 to_numpy(panel.I), panel.T_on, panel.unit_type, 546 nboots=nboots, alpha=alpha, n_jobs=n_jobs, seed=seed, 547 ) 548 else: 549 return jackknife( 550 _estimate_fn, to_numpy(Y_mat), to_numpy(panel.D), 551 to_numpy(panel.I), panel.T_on, panel.unit_type, 552 alpha=alpha, n_jobs=n_jobs, 553 ) 554 555 556def _resolve_n_jobs(n_jobs: int | None) -> int: 557 """Normalize public n_jobs values before passing them to joblib.""" 558 if n_jobs is None or n_jobs == -1: 559 return os.cpu_count() or 1 560 if n_jobs == 0 or n_jobs < -1: 561 raise ValueError("n_jobs must be a positive integer, -1, or None") 562 return int(n_jobs)
65@dataclass 66class FectResult: 67 """Container for all fect estimation results.""" 68 # Method info 69 method: str 70 r_cv: int | None = None 71 lambda_cv: float | None = None 72 73 # Point estimates 74 att_avg: float = 0.0 75 att_avg_unit: float = 0.0 76 77 # Dynamic effects 78 att_on: np.ndarray | None = None 79 time_on: np.ndarray | None = None 80 count_on: np.ndarray | None = None 81 82 # Exit effects (treatment reversal) 83 att_off: np.ndarray | None = None 84 time_off: np.ndarray | None = None 85 86 # Coefficients 87 beta: np.ndarray | None = None 88 covariate_names: list[str] = field(default_factory=list) 89 90 # Fixed effects 91 mu: float = 0.0 92 alpha: np.ndarray | None = None # unit FE 93 xi: np.ndarray | None = None # time FE 94 factors: np.ndarray | None = None 95 loadings: np.ndarray | None = None 96 97 # Counterfactual and effects matrices 98 Y_ct: np.ndarray | None = None # T×N counterfactual 99 eff: np.ndarray | None = None # T×N treatment effects 100 residuals: np.ndarray | None = None 101 102 # Model fit 103 sigma2: float = 0.0 104 IC: float = 0.0 105 PC: float = 0.0 106 rmse: float = 0.0 107 niter: int = 0 108 converged: bool = False 109 110 # Inference 111 inference: InferenceResult | None = None 112 113 # CV 114 cv_result: CVResult | None = None 115 116 # Panel metadata 117 panel: PanelData | None = None 118 119 # Reproducibility 120 seed: int | None = None 121 122 def summary(self) -> str: 123 """Print summary table of results.""" 124 lines = [] 125 lines.append(f"pyfector estimation results") 126 lines.append(f"{'='*60}") 127 lines.append(f"Method: {self.method}") 128 if self.r_cv is not None: 129 lines.append(f"Number of factors (CV): {self.r_cv}") 130 if self.lambda_cv is not None: 131 lines.append(f"Lambda (CV): {self.lambda_cv:.6f}") 132 lines.append(f"Converged: {self.converged} (iter={self.niter})") 133 lines.append(f"Sigma^2: {self.sigma2:.6f}") 134 lines.append(f"") 135 lines.append(f"ATT (average): {self.att_avg:.6f}") 136 if self.inference is not None: 137 inf = self.inference 138 lines.append(f" SE: {inf.att_avg_se:.6f}") 139 lines.append(f" CI: [{inf.att_avg_ci[0]:.6f}, {inf.att_avg_ci[1]:.6f}]") 140 lines.append(f" p-val: {inf.att_avg_pval:.4f}") 141 142 if self.beta is not None and len(self.beta) > 0: 143 lines.append(f"") 144 lines.append(f"Coefficients:") 145 for i, name in enumerate(self.covariate_names): 146 lines.append(f" {name}: {self.beta[i]:.6f}") 147 148 if self.att_on is not None and self.time_on is not None: 149 lines.append(f"") 150 lines.append(f"Dynamic effects (ATT by relative time):") 151 lines.append(f" {'Time':>6s} {'ATT':>10s} {'Count':>6s}", ) 152 for i, t in enumerate(self.time_on): 153 count = self.count_on[i] if self.count_on is not None else "" 154 att = self.att_on[i] 155 if self.inference is not None: 156 se = self.inference.att_on_se[i] 157 lines.append(f" {t:>6.0f} {att:>10.4f} ({se:.4f}) {count}") 158 else: 159 lines.append(f" {t:>6.0f} {att:>10.4f} {count}") 160 161 lines.append(f"{'='*60}") 162 if self.panel is not None: 163 lines.append(f"N={self.panel.N}, T={self.panel.T}") 164 if self.seed is not None: 165 lines.append(f"Seed: {self.seed}") 166 return "\n".join(lines) 167 168 def __repr__(self): 169 return self.summary() 170 171 def plot(self, kind="gap", **kwargs): 172 """Plot results. Shortcut for ``pyfector.plot(self, kind, ...)``.""" 173 from .plotting import plot as _plot 174 return _plot(self, kind=kind, **kwargs) 175 176 def diagnose(self, **kwargs): 177 """Run diagnostic tests. Shortcut for ``pyfector.run_diagnostics(self, ...)``.""" 178 from .diagnostics import run_diagnostics 179 return run_diagnostics(self, **kwargs)
Container for all fect estimation results.
122 def summary(self) -> str: 123 """Print summary table of results.""" 124 lines = [] 125 lines.append(f"pyfector estimation results") 126 lines.append(f"{'='*60}") 127 lines.append(f"Method: {self.method}") 128 if self.r_cv is not None: 129 lines.append(f"Number of factors (CV): {self.r_cv}") 130 if self.lambda_cv is not None: 131 lines.append(f"Lambda (CV): {self.lambda_cv:.6f}") 132 lines.append(f"Converged: {self.converged} (iter={self.niter})") 133 lines.append(f"Sigma^2: {self.sigma2:.6f}") 134 lines.append(f"") 135 lines.append(f"ATT (average): {self.att_avg:.6f}") 136 if self.inference is not None: 137 inf = self.inference 138 lines.append(f" SE: {inf.att_avg_se:.6f}") 139 lines.append(f" CI: [{inf.att_avg_ci[0]:.6f}, {inf.att_avg_ci[1]:.6f}]") 140 lines.append(f" p-val: {inf.att_avg_pval:.4f}") 141 142 if self.beta is not None and len(self.beta) > 0: 143 lines.append(f"") 144 lines.append(f"Coefficients:") 145 for i, name in enumerate(self.covariate_names): 146 lines.append(f" {name}: {self.beta[i]:.6f}") 147 148 if self.att_on is not None and self.time_on is not None: 149 lines.append(f"") 150 lines.append(f"Dynamic effects (ATT by relative time):") 151 lines.append(f" {'Time':>6s} {'ATT':>10s} {'Count':>6s}", ) 152 for i, t in enumerate(self.time_on): 153 count = self.count_on[i] if self.count_on is not None else "" 154 att = self.att_on[i] 155 if self.inference is not None: 156 se = self.inference.att_on_se[i] 157 lines.append(f" {t:>6.0f} {att:>10.4f} ({se:.4f}) {count}") 158 else: 159 lines.append(f" {t:>6.0f} {att:>10.4f} {count}") 160 161 lines.append(f"{'='*60}") 162 if self.panel is not None: 163 lines.append(f"N={self.panel.N}, T={self.panel.T}") 164 if self.seed is not None: 165 lines.append(f"Seed: {self.seed}") 166 return "\n".join(lines)
Print summary table of results.
171 def plot(self, kind="gap", **kwargs): 172 """Plot results. Shortcut for ``pyfector.plot(self, kind, ...)``.""" 173 from .plotting import plot as _plot 174 return _plot(self, kind=kind, **kwargs)
Plot results. Shortcut for pyfector.plot(self, kind, ...).
176 def diagnose(self, **kwargs): 177 """Run diagnostic tests. Shortcut for ``pyfector.run_diagnostics(self, ...)``.""" 178 from .diagnostics import run_diagnostics 179 return run_diagnostics(self, **kwargs)
Run diagnostic tests. Shortcut for pyfector.run_diagnostics(self, ...).
182def fect( 183 data, 184 Y: str, 185 D: str, 186 index: tuple[str, str], 187 X: list[str] | None = None, 188 W: str | None = None, 189 group: str | None = None, 190 method: Literal["fe", "ife", "mc", "cfe", "both"] = "ife", 191 force: Literal["none", "unit", "time", "two-way"] = "two-way", 192 r: int | tuple[int, int] = 0, 193 lam: float | None = None, 194 nlambda: int = 10, 195 CV: bool = True, 196 k: int = 10, 197 cv_prop: float = 0.1, 198 cv_nobs: int = 3, 199 cv_treat: bool = True, 200 cv_donut: int = 0, 201 criterion: str = "mspe", 202 se: bool = False, 203 vartype: Literal["bootstrap", "jackknife"] = "bootstrap", 204 nboots: int = 200, 205 alpha: float = 0.05, 206 tol: float = 1e-7, 207 max_iter: int = 5000, 208 min_T0: int = 1, 209 max_missing: float = 1.0, 210 normalize: bool = False, 211 # CFE-specific 212 Z: list[str] | None = None, 213 Q: list[str] | None = None, 214 # Performance 215 device: Literal["cpu", "gpu"] = "cpu", 216 n_jobs: int | None = -1, 217 seed: int | None = None, 218) -> FectResult: 219 """Estimate counterfactual treatment effects for panel data. 220 221 This is the main entry point — equivalent to R fect's ``fect()`` function. 222 223 Parameters 224 ---------- 225 data : polars.DataFrame, pandas.DataFrame 226 Long-format panel data. 227 Y, D : str 228 Column names for outcome and binary treatment indicator. 229 index : (str, str) 230 Column names for (unit_id, time_period). 231 X : list of str, optional 232 Time-varying covariates. 233 method : {"fe", "ife", "mc", "cfe", "both"} 234 Estimation method. 235 force : {"none", "unit", "time", "two-way"} 236 Fixed effects specification. 237 r : int or (int, int) 238 Number of factors. If tuple, CV selects from range. 239 lam : float, optional 240 Nuclear norm penalty for MC. If None with CV=True, auto-selected. 241 se : bool 242 Compute standard errors via bootstrap/jackknife. 243 device : {"cpu", "gpu"} 244 Compute device. 245 n_jobs : int, optional 246 Parallel workers for CV and bootstrap. ``-1`` or ``None`` uses 247 all available CPUs. 248 seed : int, optional 249 Random seed for full reproducibility. 250 """ 251 # Set device 252 set_device(device) 253 xp = get_backend() 254 n_jobs = _resolve_n_jobs(n_jobs) 255 256 if group is not None: 257 raise NotImplementedError("The `group` argument is not implemented yet.") 258 if Z is not None or Q is not None: 259 raise NotImplementedError("The `Z` and `Q` CFE interaction arguments are not implemented yet.") 260 261 # Map force string to int 262 force_map = {"none": 0, "unit": 1, "time": 2, "two-way": 3} 263 force_int = force_map[force] 264 265 # Prepare panel data 266 panel = prepare_panel( 267 data, Y=Y, D=D, index=index, X=X, W=W, 268 group=group, min_T0=min_T0, max_missing=max_missing, 269 ) 270 271 # Move to device 272 Y_mat = to_device(panel.Y) 273 D_mat = to_device(panel.D) 274 I_mat = to_device(panel.I) 275 II_mat = to_device(panel.II) 276 X_mat = to_device(panel.X) if panel.X is not None else None 277 W_mat = to_device(panel.W) if panel.W is not None else None 278 279 # Normalize 280 norm_factor = 1.0 281 if normalize: 282 sd_y = float(xp.std(Y_mat[I_mat > 0])) 283 if sd_y > 0: 284 Y_mat = Y_mat / sd_y 285 norm_factor = sd_y 286 287 # Initial fit 288 Y0, beta0 = initial_fit(Y_mat, X_mat, II_mat, force_int) 289 290 # Determine r and lambda 291 r_cv = None 292 lambda_cv = None 293 cv_result = None 294 295 if method == "ife": 296 if isinstance(r, tuple) and CV: 297 cv_result = cv_ife( 298 Y_mat, Y0, X_mat, I_mat, II_mat, D_mat, W_mat, beta0, 299 force=force_int, r_range=r, k=k, cv_prop=cv_prop, 300 cv_nobs=cv_nobs, cv_treat=cv_treat, cv_donut=cv_donut, 301 criterion=criterion, tol=tol, max_iter=max_iter, 302 n_jobs=n_jobs, seed=seed, 303 ) 304 r_cv = cv_result.best_r 305 else: 306 r_cv = r if isinstance(r, int) else r[0] 307 308 elif method == "mc": 309 if lam is None and CV: 310 cv_result = cv_mc( 311 Y_mat, Y0, X_mat, I_mat, II_mat, D_mat, W_mat, beta0, 312 force=force_int, nlambda=nlambda, k=k, cv_prop=cv_prop, 313 cv_nobs=cv_nobs, cv_treat=cv_treat, cv_donut=cv_donut, 314 criterion=criterion, tol=tol, max_iter=max_iter, 315 n_jobs=n_jobs, seed=seed, 316 ) 317 lambda_cv = cv_result.best_lambda 318 else: 319 lambda_cv = lam if lam is not None else 0.0 320 321 elif method == "fe": 322 r_cv = 0 323 324 elif method == "cfe": 325 r_cv = r if isinstance(r, int) else r[0] 326 327 # Point estimation 328 if method in ("fe", "ife"): 329 est = estimate_ife( 330 Y_mat, Y0, X_mat, II_mat, W_mat, beta0, 331 r=r_cv, force=force_int, tol=tol, max_iter=max_iter, 332 ) 333 elif method == "mc": 334 est = estimate_mc( 335 Y_mat, Y0, X_mat, II_mat, W_mat, beta0, 336 lam=lambda_cv, force=force_int, tol=tol, max_iter=max_iter, 337 ) 338 elif method == "cfe": 339 est = estimate_cfe( 340 Y_mat, Y0, X_mat, II_mat, W_mat, beta0, 341 r=r_cv, force=force_int, tol=tol, max_iter=max_iter, 342 ) 343 elif method == "both": 344 # Run both IFE and MC, return IFE results with MC comparison 345 if isinstance(r, tuple) and CV: 346 cv_result = cv_ife( 347 Y_mat, Y0, X_mat, I_mat, II_mat, D_mat, W_mat, beta0, 348 force=force_int, r_range=r, k=k, cv_prop=cv_prop, 349 cv_nobs=cv_nobs, cv_treat=cv_treat, cv_donut=cv_donut, 350 criterion=criterion, tol=tol, max_iter=max_iter, 351 n_jobs=n_jobs, seed=seed, 352 ) 353 r_cv = cv_result.best_r 354 else: 355 r_cv = r if isinstance(r, int) else r[0] 356 est = estimate_ife( 357 Y_mat, Y0, X_mat, II_mat, W_mat, beta0, 358 r=r_cv, force=force_int, tol=tol, max_iter=max_iter, 359 ) 360 else: 361 raise ValueError(f"Unknown method: {method}") 362 363 # Compute effects 364 eff = Y_mat - est.fit 365 Y_ct = est.fit 366 367 # Denormalize 368 if normalize and norm_factor != 1.0: 369 eff = eff * norm_factor 370 Y_ct = Y_ct * norm_factor 371 Y_mat = Y_mat * norm_factor 372 if est.beta is not None: 373 est = est._replace(beta=est.beta * norm_factor) 374 375 # ATT computation 376 T_on = to_device(panel.T_on) 377 att_avg, att_on, time_on, count_on, att_avg_unit = _compute_effects( 378 to_numpy(eff), to_numpy(D_mat), to_numpy(panel.T_on), to_numpy(I_mat), 379 ) 380 381 # Build result 382 result = FectResult( 383 method=method, 384 r_cv=r_cv, 385 lambda_cv=lambda_cv, 386 att_avg=att_avg, 387 att_avg_unit=att_avg_unit, 388 att_on=att_on, 389 time_on=time_on, 390 count_on=count_on, 391 beta=to_numpy(est.beta) if est.beta is not None else None, 392 covariate_names=panel.covariate_names, 393 mu=est.mu, 394 alpha=to_numpy(est.alpha) if est.alpha is not None else None, 395 xi=to_numpy(est.xi) if est.xi is not None else None, 396 factors=to_numpy(est.factors) if est.factors is not None else None, 397 loadings=to_numpy(est.loadings) if est.loadings is not None else None, 398 Y_ct=to_numpy(Y_ct), 399 eff=to_numpy(eff), 400 residuals=to_numpy(est.residuals), 401 sigma2=est.sigma2, 402 IC=est.IC, 403 PC=est.PC, 404 niter=est.niter, 405 converged=est.converged, 406 cv_result=cv_result, 407 panel=panel, 408 seed=seed, 409 ) 410 411 # Inference 412 if se: 413 result.inference = _run_inference( 414 result, panel, Y_mat, X_mat, W_mat, beta0, Y0, 415 method=method, r_cv=r_cv, lambda_cv=lambda_cv, 416 force_int=force_int, tol=tol, max_iter=max_iter, 417 vartype=vartype, nboots=nboots, alpha=alpha, 418 n_jobs=n_jobs, seed=seed, normalize=normalize, 419 norm_factor=norm_factor, 420 ) 421 422 return result
Estimate counterfactual treatment effects for panel data.
This is the main entry point — equivalent to R fect's fect() function.
Parameters
data : polars.DataFrame, pandas.DataFrame
Long-format panel data.
Y, D : str
Column names for outcome and binary treatment indicator.
index : (str, str)
Column names for (unit_id, time_period).
X : list of str, optional
Time-varying covariates.
method : {"fe", "ife", "mc", "cfe", "both"}
Estimation method.
force : {"none", "unit", "time", "two-way"}
Fixed effects specification.
r : int or (int, int)
Number of factors. If tuple, CV selects from range.
lam : float, optional
Nuclear norm penalty for MC. If None with CV=True, auto-selected.
se : bool
Compute standard errors via bootstrap/jackknife.
device : {"cpu", "gpu"}
Compute device.
n_jobs : int, optional
Parallel workers for CV and bootstrap. -1 or None uses
all available CPUs.
seed : int, optional
Random seed for full reproducibility.