pyfector.fect
Public entry point for pyfector.
This module exposes fect(), the single function most users call,
and the FectResult dataclass that packages its output.
fect() orchestrates the full pipeline:
pyfector.panel.prepare_panel()converts the input DataFrame to dense(T, N)matrices with treatment timing and unit classification.pyfector.panel.initial_fit()computes the starting fitY0and initial covariate coefficients on the control subsample.- If the user passes a range for
r(IFE) or leaveslam=None(MC),pyfector.cvchooses the hyperparameter by cross-validation. pyfector.estimatorsiterates the EM loop for the chosen method (fe,ife,mc, orcfe).- Treatment effects are computed from
eff = Y - Y_ct; the overall ATT and dynamic ATT by relative event time are derived in_compute_effects()vianumpy.bincount(). - When
se=Truethe requested inference routine (bootstraporjackknife) frompyfector.inferenceis called with a closure that re-runs the EM loop on resampled units.
Example
::
import pyfector
result = pyfector.fect(
data=df,
Y="outcome", D="treat",
index=("unit", "year"),
X=["gdp", "pop"],
method="ife",
r=(0, 5), # CV over 0..5 factors
se=True,
nboots=200,
device="cpu", # or "gpu"
n_jobs=4,
seed=42,
)
result.summary()
result.plot(kind="gap")
1""" 2Public entry point for pyfector. 3 4This module exposes :func:`fect`, the single function most users call, 5and the :class:`FectResult` dataclass that packages its output. 6 7:func:`fect` orchestrates the full pipeline: 8 91. :func:`pyfector.panel.prepare_panel` converts the input DataFrame to 10 dense ``(T, N)`` matrices with treatment timing and unit 11 classification. 122. :func:`pyfector.panel.initial_fit` computes the starting fit ``Y0`` 13 and initial covariate coefficients on the control subsample. 143. If the user passes a range for ``r`` (IFE) or leaves ``lam=None`` 15 (MC), :mod:`pyfector.cv` chooses the hyperparameter by 16 cross-validation. 174. :mod:`pyfector.estimators` iterates the EM loop for the chosen 18 method (``fe``, ``ife``, ``mc``, or ``cfe``). 195. Treatment effects are computed from ``eff = Y - Y_ct``; the overall 20 ATT and dynamic ATT by relative event time are derived in 21 :func:`_compute_effects` via :func:`numpy.bincount`. 226. When ``se=True`` the requested inference routine 23 (``bootstrap`` or ``jackknife``) from :mod:`pyfector.inference` is 24 called with a closure that re-runs the EM loop on resampled units. 25 26Example 27------- 28:: 29 30 import pyfector 31 32 result = pyfector.fect( 33 data=df, 34 Y="outcome", D="treat", 35 index=("unit", "year"), 36 X=["gdp", "pop"], 37 method="ife", 38 r=(0, 5), # CV over 0..5 factors 39 se=True, 40 nboots=200, 41 device="cpu", # or "gpu" 42 n_jobs=4, 43 seed=42, 44 ) 45 result.summary() 46 result.plot(kind="gap") 47""" 48 49from __future__ import annotations 50 51from dataclasses import dataclass, field 52from typing import Literal 53 54import numpy as np 55 56from .backend import set_device, get_backend, to_numpy, to_device, make_rng 57from .panel import PanelData, prepare_panel, initial_fit 58from .estimators import estimate_ife, estimate_mc, estimate_cfe, EstimationResult 59from .cv import cv_ife, cv_mc, CVResult 60from .inference import bootstrap, jackknife, InferenceResult 61 62 63@dataclass 64class FectResult: 65 """Container for all fect estimation results.""" 66 # Method info 67 method: str 68 r_cv: int | None = None 69 lambda_cv: float | None = None 70 71 # Point estimates 72 att_avg: float = 0.0 73 att_avg_unit: float = 0.0 74 75 # Dynamic effects 76 att_on: np.ndarray | None = None 77 time_on: np.ndarray | None = None 78 count_on: np.ndarray | None = None 79 80 # Exit effects (treatment reversal) 81 att_off: np.ndarray | None = None 82 time_off: np.ndarray | None = None 83 84 # Coefficients 85 beta: np.ndarray | None = None 86 covariate_names: list[str] = field(default_factory=list) 87 88 # Fixed effects 89 mu: float = 0.0 90 alpha: np.ndarray | None = None # unit FE 91 xi: np.ndarray | None = None # time FE 92 factors: np.ndarray | None = None 93 loadings: np.ndarray | None = None 94 95 # Counterfactual and effects matrices 96 Y_ct: np.ndarray | None = None # T×N counterfactual 97 eff: np.ndarray | None = None # T×N treatment effects 98 residuals: np.ndarray | None = None 99 100 # Model fit 101 sigma2: float = 0.0 102 IC: float = 0.0 103 PC: float = 0.0 104 rmse: float = 0.0 105 niter: int = 0 106 converged: bool = False 107 108 # Inference 109 inference: InferenceResult | None = None 110 111 # CV 112 cv_result: CVResult | None = None 113 114 # Panel metadata 115 panel: PanelData | None = None 116 117 # Reproducibility 118 seed: int | None = None 119 120 def summary(self) -> str: 121 """Print summary table of results.""" 122 lines = [] 123 lines.append(f"pyfector estimation results") 124 lines.append(f"{'='*60}") 125 lines.append(f"Method: {self.method}") 126 if self.r_cv is not None: 127 lines.append(f"Number of factors (CV): {self.r_cv}") 128 if self.lambda_cv is not None: 129 lines.append(f"Lambda (CV): {self.lambda_cv:.6f}") 130 lines.append(f"Converged: {self.converged} (iter={self.niter})") 131 lines.append(f"Sigma^2: {self.sigma2:.6f}") 132 lines.append(f"") 133 lines.append(f"ATT (average): {self.att_avg:.6f}") 134 if self.inference is not None: 135 inf = self.inference 136 lines.append(f" SE: {inf.att_avg_se:.6f}") 137 lines.append(f" CI: [{inf.att_avg_ci[0]:.6f}, {inf.att_avg_ci[1]:.6f}]") 138 lines.append(f" p-val: {inf.att_avg_pval:.4f}") 139 140 if self.beta is not None and len(self.beta) > 0: 141 lines.append(f"") 142 lines.append(f"Coefficients:") 143 for i, name in enumerate(self.covariate_names): 144 lines.append(f" {name}: {self.beta[i]:.6f}") 145 146 if self.att_on is not None and self.time_on is not None: 147 lines.append(f"") 148 lines.append(f"Dynamic effects (ATT by relative time):") 149 lines.append(f" {'Time':>6s} {'ATT':>10s} {'Count':>6s}", ) 150 for i, t in enumerate(self.time_on): 151 count = self.count_on[i] if self.count_on is not None else "" 152 att = self.att_on[i] 153 if self.inference is not None: 154 se = self.inference.att_on_se[i] 155 lines.append(f" {t:>6.0f} {att:>10.4f} ({se:.4f}) {count}") 156 else: 157 lines.append(f" {t:>6.0f} {att:>10.4f} {count}") 158 159 lines.append(f"{'='*60}") 160 if self.panel is not None: 161 lines.append(f"N={self.panel.N}, T={self.panel.T}") 162 if self.seed is not None: 163 lines.append(f"Seed: {self.seed}") 164 return "\n".join(lines) 165 166 def __repr__(self): 167 return self.summary() 168 169 def plot(self, kind="gap", **kwargs): 170 """Plot results. Shortcut for ``pyfector.plot(self, kind, ...)``.""" 171 from .plotting import plot as _plot 172 return _plot(self, kind=kind, **kwargs) 173 174 def diagnose(self, **kwargs): 175 """Run diagnostic tests. Shortcut for ``pyfector.run_diagnostics(self, ...)``.""" 176 from .diagnostics import run_diagnostics 177 return run_diagnostics(self, **kwargs) 178 179 180def fect( 181 data, 182 Y: str, 183 D: str, 184 index: tuple[str, str], 185 X: list[str] | None = None, 186 W: str | None = None, 187 group: str | None = None, 188 method: Literal["fe", "ife", "mc", "cfe", "both"] = "ife", 189 force: Literal["none", "unit", "time", "two-way"] = "two-way", 190 r: int | tuple[int, int] = 0, 191 lam: float | None = None, 192 nlambda: int = 10, 193 CV: bool = True, 194 k: int = 10, 195 cv_prop: float = 0.1, 196 cv_nobs: int = 3, 197 cv_treat: bool = True, 198 cv_donut: int = 0, 199 criterion: str = "mspe", 200 se: bool = False, 201 vartype: Literal["bootstrap", "jackknife"] = "bootstrap", 202 nboots: int = 200, 203 alpha: float = 0.05, 204 tol: float = 1e-7, 205 max_iter: int = 5000, 206 min_T0: int = 1, 207 max_missing: float = 1.0, 208 normalize: bool = False, 209 # CFE-specific 210 Z: list[str] | None = None, 211 Q: list[str] | None = None, 212 # Performance 213 device: Literal["cpu", "gpu"] = "cpu", 214 n_jobs: int = 1, 215 seed: int | None = None, 216) -> FectResult: 217 """Estimate counterfactual treatment effects for panel data. 218 219 This is the main entry point — equivalent to R fect's ``fect()`` function. 220 221 Parameters 222 ---------- 223 data : polars.DataFrame, pandas.DataFrame 224 Long-format panel data. 225 Y, D : str 226 Column names for outcome and binary treatment indicator. 227 index : (str, str) 228 Column names for (unit_id, time_period). 229 X : list of str, optional 230 Time-varying covariates. 231 method : {"fe", "ife", "mc", "cfe", "both"} 232 Estimation method. 233 force : {"none", "unit", "time", "two-way"} 234 Fixed effects specification. 235 r : int or (int, int) 236 Number of factors. If tuple, CV selects from range. 237 lam : float, optional 238 Nuclear norm penalty for MC. If None with CV=True, auto-selected. 239 se : bool 240 Compute standard errors via bootstrap/jackknife. 241 device : {"cpu", "gpu"} 242 Compute device. 243 n_jobs : int 244 Parallel workers for CV and bootstrap. 245 seed : int, optional 246 Random seed for full reproducibility. 247 """ 248 # Set device 249 set_device(device) 250 xp = get_backend() 251 252 # Map force string to int 253 force_map = {"none": 0, "unit": 1, "time": 2, "two-way": 3} 254 force_int = force_map[force] 255 256 # Prepare panel data 257 panel = prepare_panel( 258 data, Y=Y, D=D, index=index, X=X, W=W, 259 group=group, min_T0=min_T0, max_missing=max_missing, 260 ) 261 262 # Move to device 263 Y_mat = to_device(panel.Y) 264 D_mat = to_device(panel.D) 265 I_mat = to_device(panel.I) 266 II_mat = to_device(panel.II) 267 X_mat = to_device(panel.X) if panel.X is not None else None 268 W_mat = to_device(panel.W) if panel.W is not None else None 269 270 # Normalize 271 norm_factor = 1.0 272 if normalize: 273 sd_y = float(xp.std(Y_mat[I_mat > 0])) 274 if sd_y > 0: 275 Y_mat = Y_mat / sd_y 276 norm_factor = sd_y 277 278 # Initial fit 279 Y0, beta0 = initial_fit(Y_mat, X_mat, II_mat, force_int) 280 281 # Determine r and lambda 282 r_cv = None 283 lambda_cv = None 284 cv_result = None 285 286 if method == "ife": 287 if isinstance(r, tuple) and CV: 288 cv_result = cv_ife( 289 Y_mat, Y0, X_mat, I_mat, II_mat, D_mat, W_mat, beta0, 290 force=force_int, r_range=r, k=k, cv_prop=cv_prop, 291 cv_nobs=cv_nobs, cv_treat=cv_treat, cv_donut=cv_donut, 292 criterion=criterion, tol=tol, max_iter=max_iter, 293 n_jobs=n_jobs, seed=seed, 294 ) 295 r_cv = cv_result.best_r 296 else: 297 r_cv = r if isinstance(r, int) else r[0] 298 299 elif method == "mc": 300 if lam is None and CV: 301 cv_result = cv_mc( 302 Y_mat, Y0, X_mat, I_mat, II_mat, D_mat, W_mat, beta0, 303 force=force_int, nlambda=nlambda, k=k, cv_prop=cv_prop, 304 cv_nobs=cv_nobs, cv_treat=cv_treat, cv_donut=cv_donut, 305 criterion=criterion, tol=tol, max_iter=max_iter, 306 n_jobs=n_jobs, seed=seed, 307 ) 308 lambda_cv = cv_result.best_lambda 309 else: 310 lambda_cv = lam if lam is not None else 0.0 311 312 elif method == "fe": 313 r_cv = 0 314 315 elif method == "cfe": 316 r_cv = r if isinstance(r, int) else r[0] 317 318 # Point estimation 319 if method in ("fe", "ife"): 320 est = estimate_ife( 321 Y_mat, Y0, X_mat, II_mat, W_mat, beta0, 322 r=r_cv, force=force_int, tol=tol, max_iter=max_iter, 323 ) 324 elif method == "mc": 325 est = estimate_mc( 326 Y_mat, Y0, X_mat, II_mat, W_mat, beta0, 327 lam=lambda_cv, force=force_int, tol=tol, max_iter=max_iter, 328 ) 329 elif method == "cfe": 330 est = estimate_cfe( 331 Y_mat, Y0, X_mat, II_mat, W_mat, beta0, 332 r=r_cv, force=force_int, tol=tol, max_iter=max_iter, 333 ) 334 elif method == "both": 335 # Run both IFE and MC, return IFE results with MC comparison 336 if isinstance(r, tuple) and CV: 337 cv_result = cv_ife( 338 Y_mat, Y0, X_mat, I_mat, II_mat, D_mat, W_mat, beta0, 339 force=force_int, r_range=r, k=k, cv_prop=cv_prop, 340 cv_nobs=cv_nobs, cv_treat=cv_treat, cv_donut=cv_donut, 341 criterion=criterion, tol=tol, max_iter=max_iter, 342 n_jobs=n_jobs, seed=seed, 343 ) 344 r_cv = cv_result.best_r 345 else: 346 r_cv = r if isinstance(r, int) else r[0] 347 est = estimate_ife( 348 Y_mat, Y0, X_mat, II_mat, W_mat, beta0, 349 r=r_cv, force=force_int, tol=tol, max_iter=max_iter, 350 ) 351 else: 352 raise ValueError(f"Unknown method: {method}") 353 354 # Compute effects 355 eff = Y_mat - est.fit 356 Y_ct = est.fit 357 358 # Denormalize 359 if normalize and norm_factor != 1.0: 360 eff = eff * norm_factor 361 Y_ct = Y_ct * norm_factor 362 Y_mat = Y_mat * norm_factor 363 if est.beta is not None: 364 est = est._replace(beta=est.beta * norm_factor) 365 366 # ATT computation 367 T_on = to_device(panel.T_on) 368 att_avg, att_on, time_on, count_on, att_avg_unit = _compute_effects( 369 to_numpy(eff), to_numpy(D_mat), to_numpy(panel.T_on), to_numpy(I_mat), 370 ) 371 372 # Build result 373 result = FectResult( 374 method=method, 375 r_cv=r_cv, 376 lambda_cv=lambda_cv, 377 att_avg=att_avg, 378 att_avg_unit=att_avg_unit, 379 att_on=att_on, 380 time_on=time_on, 381 count_on=count_on, 382 beta=to_numpy(est.beta) if est.beta is not None else None, 383 covariate_names=panel.covariate_names, 384 mu=est.mu, 385 alpha=to_numpy(est.alpha) if est.alpha is not None else None, 386 xi=to_numpy(est.xi) if est.xi is not None else None, 387 factors=to_numpy(est.factors) if est.factors is not None else None, 388 loadings=to_numpy(est.loadings) if est.loadings is not None else None, 389 Y_ct=to_numpy(Y_ct), 390 eff=to_numpy(eff), 391 residuals=to_numpy(est.residuals), 392 sigma2=est.sigma2, 393 IC=est.IC, 394 PC=est.PC, 395 niter=est.niter, 396 converged=est.converged, 397 cv_result=cv_result, 398 panel=panel, 399 seed=seed, 400 ) 401 402 # Inference 403 if se: 404 result.inference = _run_inference( 405 result, panel, Y_mat, X_mat, W_mat, beta0, Y0, 406 method=method, r_cv=r_cv, lambda_cv=lambda_cv, 407 force_int=force_int, tol=tol, max_iter=max_iter, 408 vartype=vartype, nboots=nboots, alpha=alpha, 409 n_jobs=n_jobs, seed=seed, normalize=normalize, 410 norm_factor=norm_factor, 411 ) 412 413 return result 414 415 416def _compute_effects(eff, D, T_on, I): 417 """Compute overall ATT, per-unit ATT, and dynamic ATT by event time. 418 419 Dynamic ATT grouping is done with :func:`numpy.bincount` so the cost 420 is O(n_observed_ever_treated_cells) regardless of the number of 421 distinct relative-time values. Per-unit ATT uses a column-wise 422 masked mean via ``np.add.reduce`` rather than a Python loop. 423 """ 424 treated = (D > 0) & (I > 0) 425 n_treated = int(np.sum(treated)) 426 427 att_avg = float(np.sum(eff[treated]) / max(n_treated, 1)) 428 429 # Per-unit ATT (post-treatment only) — sum then divide by count, 430 # keeping only units that have at least one treated observation. 431 col_counts = treated.sum(axis=0) # (N,) 432 col_sums = (eff * treated).sum(axis=0) # (N,) 433 has_any = col_counts > 0 434 if np.any(has_any): 435 unit_atts = col_sums[has_any] / col_counts[has_any] 436 att_avg_unit = float(unit_atts.mean()) 437 else: 438 att_avg_unit = 0.0 439 440 # Dynamic ATT by relative event time. Include pre-treatment periods 441 # for ever-treated units (counterfactual gaps before onset). 442 ever_treated = np.any(D > 0, axis=0) # (N,) 443 all_periods = (I > 0) & ever_treated[np.newaxis, :] # (T, N) 444 445 T_on_flat = T_on[all_periods] 446 eff_flat = eff[all_periods] 447 valid = ~np.isnan(T_on_flat) 448 T_on_flat = T_on_flat[valid].astype(np.int64) 449 eff_flat = eff_flat[valid] 450 451 if T_on_flat.size == 0: 452 return att_avg, np.array([]), np.array([]), np.array([], dtype=np.int64), att_avg_unit 453 454 # Bincount over the shifted integer indices. 455 offset = int(T_on_flat.min()) 456 idx = T_on_flat - offset 457 minlength = int(T_on_flat.max() - offset + 1) 458 sums = np.bincount(idx, weights=eff_flat, minlength=minlength) 459 counts = np.bincount(idx, minlength=minlength) 460 keep = counts > 0 461 time_on = (offset + np.arange(minlength))[keep].astype(np.float64) 462 att_on = (sums[keep] / counts[keep]).astype(np.float64) 463 count_on = counts[keep].astype(np.int64) 464 465 return att_avg, att_on, time_on, count_on, att_avg_unit 466 467 468def _run_inference( 469 result, panel, Y_mat, X_mat, W_mat, beta0, Y0, 470 method, r_cv, lambda_cv, force_int, tol, max_iter, 471 vartype, nboots, alpha, n_jobs, seed, normalize, norm_factor, 472): 473 """Run bootstrap or jackknife inference.""" 474 xp = get_backend() 475 476 # Pre-move panel data to device once (avoids CPU→GPU copy per bootstrap rep) 477 II_dev = to_device(panel.II) 478 D_dev = to_device(panel.D) 479 I_dev = to_device(panel.I) 480 T_on_np = panel.T_on # keep on CPU for ATT computation 481 482 def _estimate_fn(unit_idx): 483 """Re-estimate on a subset of units.""" 484 Y_sub = Y_mat[:, unit_idx] 485 II_sub = II_dev[:, unit_idx] 486 D_sub = D_dev[:, unit_idx] 487 I_sub = I_dev[:, unit_idx] 488 X_sub = X_mat[:, unit_idx, :] if X_mat is not None else None 489 W_sub = W_mat[:, unit_idx] if W_mat is not None else None 490 T_on_sub = T_on_np[:, unit_idx] 491 beta0_sub = beta0 492 493 # Re-compute initial fit for this subset 494 Y0_sub, beta0_sub = initial_fit(Y_sub, X_sub, II_sub, force_int) 495 496 if method in ("fe", "ife"): 497 est = estimate_ife( 498 Y_sub, Y0_sub, X_sub, II_sub, W_sub, beta0_sub, 499 r=r_cv, force=force_int, tol=tol, max_iter=max_iter, 500 ) 501 elif method == "mc": 502 est = estimate_mc( 503 Y_sub, Y0_sub, X_sub, II_sub, W_sub, beta0_sub, 504 lam=lambda_cv, force=force_int, tol=tol, max_iter=max_iter, 505 ) 506 else: 507 est = estimate_ife( 508 Y_sub, Y0_sub, X_sub, II_sub, W_sub, beta0_sub, 509 r=r_cv or 0, force=force_int, tol=tol, max_iter=max_iter, 510 ) 511 512 eff = to_numpy(Y_sub - est.fit) 513 if normalize and norm_factor != 1.0: 514 eff = eff * norm_factor 515 516 return eff, to_numpy(D_sub), T_on_sub 517 518 if vartype == "bootstrap": 519 return bootstrap( 520 _estimate_fn, to_numpy(Y_mat), to_numpy(panel.D), 521 to_numpy(panel.I), panel.T_on, panel.unit_type, 522 nboots=nboots, alpha=alpha, n_jobs=n_jobs, seed=seed, 523 ) 524 else: 525 return jackknife( 526 _estimate_fn, to_numpy(Y_mat), to_numpy(panel.D), 527 to_numpy(panel.I), panel.T_on, panel.unit_type, 528 alpha=alpha, n_jobs=n_jobs, 529 )
64@dataclass 65class FectResult: 66 """Container for all fect estimation results.""" 67 # Method info 68 method: str 69 r_cv: int | None = None 70 lambda_cv: float | None = None 71 72 # Point estimates 73 att_avg: float = 0.0 74 att_avg_unit: float = 0.0 75 76 # Dynamic effects 77 att_on: np.ndarray | None = None 78 time_on: np.ndarray | None = None 79 count_on: np.ndarray | None = None 80 81 # Exit effects (treatment reversal) 82 att_off: np.ndarray | None = None 83 time_off: np.ndarray | None = None 84 85 # Coefficients 86 beta: np.ndarray | None = None 87 covariate_names: list[str] = field(default_factory=list) 88 89 # Fixed effects 90 mu: float = 0.0 91 alpha: np.ndarray | None = None # unit FE 92 xi: np.ndarray | None = None # time FE 93 factors: np.ndarray | None = None 94 loadings: np.ndarray | None = None 95 96 # Counterfactual and effects matrices 97 Y_ct: np.ndarray | None = None # T×N counterfactual 98 eff: np.ndarray | None = None # T×N treatment effects 99 residuals: np.ndarray | None = None 100 101 # Model fit 102 sigma2: float = 0.0 103 IC: float = 0.0 104 PC: float = 0.0 105 rmse: float = 0.0 106 niter: int = 0 107 converged: bool = False 108 109 # Inference 110 inference: InferenceResult | None = None 111 112 # CV 113 cv_result: CVResult | None = None 114 115 # Panel metadata 116 panel: PanelData | None = None 117 118 # Reproducibility 119 seed: int | None = None 120 121 def summary(self) -> str: 122 """Print summary table of results.""" 123 lines = [] 124 lines.append(f"pyfector estimation results") 125 lines.append(f"{'='*60}") 126 lines.append(f"Method: {self.method}") 127 if self.r_cv is not None: 128 lines.append(f"Number of factors (CV): {self.r_cv}") 129 if self.lambda_cv is not None: 130 lines.append(f"Lambda (CV): {self.lambda_cv:.6f}") 131 lines.append(f"Converged: {self.converged} (iter={self.niter})") 132 lines.append(f"Sigma^2: {self.sigma2:.6f}") 133 lines.append(f"") 134 lines.append(f"ATT (average): {self.att_avg:.6f}") 135 if self.inference is not None: 136 inf = self.inference 137 lines.append(f" SE: {inf.att_avg_se:.6f}") 138 lines.append(f" CI: [{inf.att_avg_ci[0]:.6f}, {inf.att_avg_ci[1]:.6f}]") 139 lines.append(f" p-val: {inf.att_avg_pval:.4f}") 140 141 if self.beta is not None and len(self.beta) > 0: 142 lines.append(f"") 143 lines.append(f"Coefficients:") 144 for i, name in enumerate(self.covariate_names): 145 lines.append(f" {name}: {self.beta[i]:.6f}") 146 147 if self.att_on is not None and self.time_on is not None: 148 lines.append(f"") 149 lines.append(f"Dynamic effects (ATT by relative time):") 150 lines.append(f" {'Time':>6s} {'ATT':>10s} {'Count':>6s}", ) 151 for i, t in enumerate(self.time_on): 152 count = self.count_on[i] if self.count_on is not None else "" 153 att = self.att_on[i] 154 if self.inference is not None: 155 se = self.inference.att_on_se[i] 156 lines.append(f" {t:>6.0f} {att:>10.4f} ({se:.4f}) {count}") 157 else: 158 lines.append(f" {t:>6.0f} {att:>10.4f} {count}") 159 160 lines.append(f"{'='*60}") 161 if self.panel is not None: 162 lines.append(f"N={self.panel.N}, T={self.panel.T}") 163 if self.seed is not None: 164 lines.append(f"Seed: {self.seed}") 165 return "\n".join(lines) 166 167 def __repr__(self): 168 return self.summary() 169 170 def plot(self, kind="gap", **kwargs): 171 """Plot results. Shortcut for ``pyfector.plot(self, kind, ...)``.""" 172 from .plotting import plot as _plot 173 return _plot(self, kind=kind, **kwargs) 174 175 def diagnose(self, **kwargs): 176 """Run diagnostic tests. Shortcut for ``pyfector.run_diagnostics(self, ...)``.""" 177 from .diagnostics import run_diagnostics 178 return run_diagnostics(self, **kwargs)
Container for all fect estimation results.
121 def summary(self) -> str: 122 """Print summary table of results.""" 123 lines = [] 124 lines.append(f"pyfector estimation results") 125 lines.append(f"{'='*60}") 126 lines.append(f"Method: {self.method}") 127 if self.r_cv is not None: 128 lines.append(f"Number of factors (CV): {self.r_cv}") 129 if self.lambda_cv is not None: 130 lines.append(f"Lambda (CV): {self.lambda_cv:.6f}") 131 lines.append(f"Converged: {self.converged} (iter={self.niter})") 132 lines.append(f"Sigma^2: {self.sigma2:.6f}") 133 lines.append(f"") 134 lines.append(f"ATT (average): {self.att_avg:.6f}") 135 if self.inference is not None: 136 inf = self.inference 137 lines.append(f" SE: {inf.att_avg_se:.6f}") 138 lines.append(f" CI: [{inf.att_avg_ci[0]:.6f}, {inf.att_avg_ci[1]:.6f}]") 139 lines.append(f" p-val: {inf.att_avg_pval:.4f}") 140 141 if self.beta is not None and len(self.beta) > 0: 142 lines.append(f"") 143 lines.append(f"Coefficients:") 144 for i, name in enumerate(self.covariate_names): 145 lines.append(f" {name}: {self.beta[i]:.6f}") 146 147 if self.att_on is not None and self.time_on is not None: 148 lines.append(f"") 149 lines.append(f"Dynamic effects (ATT by relative time):") 150 lines.append(f" {'Time':>6s} {'ATT':>10s} {'Count':>6s}", ) 151 for i, t in enumerate(self.time_on): 152 count = self.count_on[i] if self.count_on is not None else "" 153 att = self.att_on[i] 154 if self.inference is not None: 155 se = self.inference.att_on_se[i] 156 lines.append(f" {t:>6.0f} {att:>10.4f} ({se:.4f}) {count}") 157 else: 158 lines.append(f" {t:>6.0f} {att:>10.4f} {count}") 159 160 lines.append(f"{'='*60}") 161 if self.panel is not None: 162 lines.append(f"N={self.panel.N}, T={self.panel.T}") 163 if self.seed is not None: 164 lines.append(f"Seed: {self.seed}") 165 return "\n".join(lines)
Print summary table of results.
170 def plot(self, kind="gap", **kwargs): 171 """Plot results. Shortcut for ``pyfector.plot(self, kind, ...)``.""" 172 from .plotting import plot as _plot 173 return _plot(self, kind=kind, **kwargs)
Plot results. Shortcut for pyfector.plot(self, kind, ...).
175 def diagnose(self, **kwargs): 176 """Run diagnostic tests. Shortcut for ``pyfector.run_diagnostics(self, ...)``.""" 177 from .diagnostics import run_diagnostics 178 return run_diagnostics(self, **kwargs)
Run diagnostic tests. Shortcut for pyfector.run_diagnostics(self, ...).
181def fect( 182 data, 183 Y: str, 184 D: str, 185 index: tuple[str, str], 186 X: list[str] | None = None, 187 W: str | None = None, 188 group: str | None = None, 189 method: Literal["fe", "ife", "mc", "cfe", "both"] = "ife", 190 force: Literal["none", "unit", "time", "two-way"] = "two-way", 191 r: int | tuple[int, int] = 0, 192 lam: float | None = None, 193 nlambda: int = 10, 194 CV: bool = True, 195 k: int = 10, 196 cv_prop: float = 0.1, 197 cv_nobs: int = 3, 198 cv_treat: bool = True, 199 cv_donut: int = 0, 200 criterion: str = "mspe", 201 se: bool = False, 202 vartype: Literal["bootstrap", "jackknife"] = "bootstrap", 203 nboots: int = 200, 204 alpha: float = 0.05, 205 tol: float = 1e-7, 206 max_iter: int = 5000, 207 min_T0: int = 1, 208 max_missing: float = 1.0, 209 normalize: bool = False, 210 # CFE-specific 211 Z: list[str] | None = None, 212 Q: list[str] | None = None, 213 # Performance 214 device: Literal["cpu", "gpu"] = "cpu", 215 n_jobs: int = 1, 216 seed: int | None = None, 217) -> FectResult: 218 """Estimate counterfactual treatment effects for panel data. 219 220 This is the main entry point — equivalent to R fect's ``fect()`` function. 221 222 Parameters 223 ---------- 224 data : polars.DataFrame, pandas.DataFrame 225 Long-format panel data. 226 Y, D : str 227 Column names for outcome and binary treatment indicator. 228 index : (str, str) 229 Column names for (unit_id, time_period). 230 X : list of str, optional 231 Time-varying covariates. 232 method : {"fe", "ife", "mc", "cfe", "both"} 233 Estimation method. 234 force : {"none", "unit", "time", "two-way"} 235 Fixed effects specification. 236 r : int or (int, int) 237 Number of factors. If tuple, CV selects from range. 238 lam : float, optional 239 Nuclear norm penalty for MC. If None with CV=True, auto-selected. 240 se : bool 241 Compute standard errors via bootstrap/jackknife. 242 device : {"cpu", "gpu"} 243 Compute device. 244 n_jobs : int 245 Parallel workers for CV and bootstrap. 246 seed : int, optional 247 Random seed for full reproducibility. 248 """ 249 # Set device 250 set_device(device) 251 xp = get_backend() 252 253 # Map force string to int 254 force_map = {"none": 0, "unit": 1, "time": 2, "two-way": 3} 255 force_int = force_map[force] 256 257 # Prepare panel data 258 panel = prepare_panel( 259 data, Y=Y, D=D, index=index, X=X, W=W, 260 group=group, min_T0=min_T0, max_missing=max_missing, 261 ) 262 263 # Move to device 264 Y_mat = to_device(panel.Y) 265 D_mat = to_device(panel.D) 266 I_mat = to_device(panel.I) 267 II_mat = to_device(panel.II) 268 X_mat = to_device(panel.X) if panel.X is not None else None 269 W_mat = to_device(panel.W) if panel.W is not None else None 270 271 # Normalize 272 norm_factor = 1.0 273 if normalize: 274 sd_y = float(xp.std(Y_mat[I_mat > 0])) 275 if sd_y > 0: 276 Y_mat = Y_mat / sd_y 277 norm_factor = sd_y 278 279 # Initial fit 280 Y0, beta0 = initial_fit(Y_mat, X_mat, II_mat, force_int) 281 282 # Determine r and lambda 283 r_cv = None 284 lambda_cv = None 285 cv_result = None 286 287 if method == "ife": 288 if isinstance(r, tuple) and CV: 289 cv_result = cv_ife( 290 Y_mat, Y0, X_mat, I_mat, II_mat, D_mat, W_mat, beta0, 291 force=force_int, r_range=r, k=k, cv_prop=cv_prop, 292 cv_nobs=cv_nobs, cv_treat=cv_treat, cv_donut=cv_donut, 293 criterion=criterion, tol=tol, max_iter=max_iter, 294 n_jobs=n_jobs, seed=seed, 295 ) 296 r_cv = cv_result.best_r 297 else: 298 r_cv = r if isinstance(r, int) else r[0] 299 300 elif method == "mc": 301 if lam is None and CV: 302 cv_result = cv_mc( 303 Y_mat, Y0, X_mat, I_mat, II_mat, D_mat, W_mat, beta0, 304 force=force_int, nlambda=nlambda, k=k, cv_prop=cv_prop, 305 cv_nobs=cv_nobs, cv_treat=cv_treat, cv_donut=cv_donut, 306 criterion=criterion, tol=tol, max_iter=max_iter, 307 n_jobs=n_jobs, seed=seed, 308 ) 309 lambda_cv = cv_result.best_lambda 310 else: 311 lambda_cv = lam if lam is not None else 0.0 312 313 elif method == "fe": 314 r_cv = 0 315 316 elif method == "cfe": 317 r_cv = r if isinstance(r, int) else r[0] 318 319 # Point estimation 320 if method in ("fe", "ife"): 321 est = estimate_ife( 322 Y_mat, Y0, X_mat, II_mat, W_mat, beta0, 323 r=r_cv, force=force_int, tol=tol, max_iter=max_iter, 324 ) 325 elif method == "mc": 326 est = estimate_mc( 327 Y_mat, Y0, X_mat, II_mat, W_mat, beta0, 328 lam=lambda_cv, force=force_int, tol=tol, max_iter=max_iter, 329 ) 330 elif method == "cfe": 331 est = estimate_cfe( 332 Y_mat, Y0, X_mat, II_mat, W_mat, beta0, 333 r=r_cv, force=force_int, tol=tol, max_iter=max_iter, 334 ) 335 elif method == "both": 336 # Run both IFE and MC, return IFE results with MC comparison 337 if isinstance(r, tuple) and CV: 338 cv_result = cv_ife( 339 Y_mat, Y0, X_mat, I_mat, II_mat, D_mat, W_mat, beta0, 340 force=force_int, r_range=r, k=k, cv_prop=cv_prop, 341 cv_nobs=cv_nobs, cv_treat=cv_treat, cv_donut=cv_donut, 342 criterion=criterion, tol=tol, max_iter=max_iter, 343 n_jobs=n_jobs, seed=seed, 344 ) 345 r_cv = cv_result.best_r 346 else: 347 r_cv = r if isinstance(r, int) else r[0] 348 est = estimate_ife( 349 Y_mat, Y0, X_mat, II_mat, W_mat, beta0, 350 r=r_cv, force=force_int, tol=tol, max_iter=max_iter, 351 ) 352 else: 353 raise ValueError(f"Unknown method: {method}") 354 355 # Compute effects 356 eff = Y_mat - est.fit 357 Y_ct = est.fit 358 359 # Denormalize 360 if normalize and norm_factor != 1.0: 361 eff = eff * norm_factor 362 Y_ct = Y_ct * norm_factor 363 Y_mat = Y_mat * norm_factor 364 if est.beta is not None: 365 est = est._replace(beta=est.beta * norm_factor) 366 367 # ATT computation 368 T_on = to_device(panel.T_on) 369 att_avg, att_on, time_on, count_on, att_avg_unit = _compute_effects( 370 to_numpy(eff), to_numpy(D_mat), to_numpy(panel.T_on), to_numpy(I_mat), 371 ) 372 373 # Build result 374 result = FectResult( 375 method=method, 376 r_cv=r_cv, 377 lambda_cv=lambda_cv, 378 att_avg=att_avg, 379 att_avg_unit=att_avg_unit, 380 att_on=att_on, 381 time_on=time_on, 382 count_on=count_on, 383 beta=to_numpy(est.beta) if est.beta is not None else None, 384 covariate_names=panel.covariate_names, 385 mu=est.mu, 386 alpha=to_numpy(est.alpha) if est.alpha is not None else None, 387 xi=to_numpy(est.xi) if est.xi is not None else None, 388 factors=to_numpy(est.factors) if est.factors is not None else None, 389 loadings=to_numpy(est.loadings) if est.loadings is not None else None, 390 Y_ct=to_numpy(Y_ct), 391 eff=to_numpy(eff), 392 residuals=to_numpy(est.residuals), 393 sigma2=est.sigma2, 394 IC=est.IC, 395 PC=est.PC, 396 niter=est.niter, 397 converged=est.converged, 398 cv_result=cv_result, 399 panel=panel, 400 seed=seed, 401 ) 402 403 # Inference 404 if se: 405 result.inference = _run_inference( 406 result, panel, Y_mat, X_mat, W_mat, beta0, Y0, 407 method=method, r_cv=r_cv, lambda_cv=lambda_cv, 408 force_int=force_int, tol=tol, max_iter=max_iter, 409 vartype=vartype, nboots=nboots, alpha=alpha, 410 n_jobs=n_jobs, seed=seed, normalize=normalize, 411 norm_factor=norm_factor, 412 ) 413 414 return result
Estimate counterfactual treatment effects for panel data.
This is the main entry point — equivalent to R fect's fect() function.
Parameters
data : polars.DataFrame, pandas.DataFrame Long-format panel data. Y, D : str Column names for outcome and binary treatment indicator. index : (str, str) Column names for (unit_id, time_period). X : list of str, optional Time-varying covariates. method : {"fe", "ife", "mc", "cfe", "both"} Estimation method. force : {"none", "unit", "time", "two-way"} Fixed effects specification. r : int or (int, int) Number of factors. If tuple, CV selects from range. lam : float, optional Nuclear norm penalty for MC. If None with CV=True, auto-selected. se : bool Compute standard errors via bootstrap/jackknife. device : {"cpu", "gpu"} Compute device. n_jobs : int Parallel workers for CV and bootstrap. seed : int, optional Random seed for full reproducibility.