pyfector

pyfector — Fast counterfactual estimators for panel data in Python.

A high-performance Python reimplementation of the R fect package, featuring randomized SVD, GPU acceleration (CuPy), parallel computing (joblib), Polars data ingestion, and seeded reproducibility.

Usage::

import pyfector

result = pyfector.fect(
    data=df,
    Y="outcome", D="treat",
    index=("unit", "year"),
    method="ife",
    r=(0, 5),
    se=True,
    seed=42,
)
result.summary()
result.plot()
 1"""
 2pyfector — Fast counterfactual estimators for panel data in Python.
 3
 4A high-performance Python reimplementation of the R ``fect`` package,
 5featuring randomized SVD, GPU acceleration (CuPy), parallel computing
 6(joblib), Polars data ingestion, and seeded reproducibility.
 7
 8Usage::
 9
10    import pyfector
11
12    result = pyfector.fect(
13        data=df,
14        Y="outcome", D="treat",
15        index=("unit", "year"),
16        method="ife",
17        r=(0, 5),
18        se=True,
19        seed=42,
20    )
21    result.summary()
22    result.plot()
23
24"""
25
26__version__ = "0.1.6"
27
28from .fect import fect, FectResult
29from .backend import set_device, get_device
30from .diagnostics import run_diagnostics, DiagnosticResult
31from .plotting import plot
32
33__all__ = [
34    "fect",
35    "FectResult",
36    "set_device",
37    "get_device",
38    "run_diagnostics",
39    "DiagnosticResult",
40    "plot",
41]
def fect( data, Y: str, D: str, index: tuple[str, str], X: list[str] | None = None, W: str | None = None, group: str | None = None, method: Literal['fe', 'ife', 'mc', 'cfe', 'both'] = 'ife', force: Literal['none', 'unit', 'time', 'two-way'] = 'two-way', r: int | tuple[int, int] = 0, lam: float | None = None, nlambda: int = 10, CV: bool = True, k: int = 10, cv_prop: float = 0.1, cv_nobs: int = 3, cv_treat: bool = True, cv_donut: int = 0, criterion: str = 'mspe', cv_rule: Literal['min', 'onepct'] = 'min', se: bool = False, vartype: Literal['bootstrap', 'jackknife'] = 'bootstrap', nboots: int = 200, alpha: float = 0.05, tol: float = 1e-07, max_iter: int = 5000, min_T0: int = 1, min_T0_strict: bool = False, max_missing: float = 1.0, normalize: bool = False, Z: list[str] | None = None, Q: list[str] | None = None, device: Literal['cpu', 'gpu'] = 'cpu', n_jobs: int | None = -1, seed: int | None = None) -> FectResult:
185def fect(
186    data,
187    Y: str,
188    D: str,
189    index: tuple[str, str],
190    X: list[str] | None = None,
191    W: str | None = None,
192    group: str | None = None,
193    method: Literal["fe", "ife", "mc", "cfe", "both"] = "ife",
194    force: Literal["none", "unit", "time", "two-way"] = "two-way",
195    r: int | tuple[int, int] = 0,
196    lam: float | None = None,
197    nlambda: int = 10,
198    CV: bool = True,
199    k: int = 10,
200    cv_prop: float = 0.1,
201    cv_nobs: int = 3,
202    cv_treat: bool = True,
203    cv_donut: int = 0,
204    criterion: str = "mspe",
205    cv_rule: Literal["min", "onepct"] = "min",
206    se: bool = False,
207    vartype: Literal["bootstrap", "jackknife"] = "bootstrap",
208    nboots: int = 200,
209    alpha: float = 0.05,
210    tol: float = 1e-7,
211    max_iter: int = 5000,
212    min_T0: int = 1,
213    min_T0_strict: bool = False,
214    max_missing: float = 1.0,
215    normalize: bool = False,
216    # CFE-specific
217    Z: list[str] | None = None,
218    Q: list[str] | None = None,
219    # Performance
220    device: Literal["cpu", "gpu"] = "cpu",
221    n_jobs: int | None = -1,
222    seed: int | None = None,
223) -> FectResult:
224    """Estimate counterfactual treatment effects for panel data.
225
226    This is the main Python entry point for the counterfactual estimator
227    workflow.  Where the paper and the historical R package differ,
228    pyfector defaults to the paper's statistical definition and exposes
229    R-package-style behavior through explicit options.
230
231    Missing outcome policy
232    ----------------------
233    pyfector distinguishes raw missing outcomes from counterfactual
234    missingness caused by treatment.  Observed untreated cells
235    (``D == 0`` and non-missing ``Y``) fit the response surface.  Observed
236    treated cells (``D == 1`` and non-missing ``Y``) contribute to ATT as
237    ``Y - Y_ct``.  If a treated outcome is missing in the input data, the
238    model can still produce a counterfactual ``Y_ct`` for that cell, but
239    the cell is not counted in ``att_avg`` or ``att_on`` because the
240    treated potential outcome was not observed.
241
242    By default, ``min_T0`` is enforced only for treated and reversal
243    units.  Sparse controls are retained if they have at least one
244    observed outcome, because they may still inform the low-rank response
245    surface.  Set ``min_T0_strict=True`` to require controls to satisfy
246    ``min_T0`` too, matching the more conservative R fect sparse-panel
247    behavior.
248
249    Parameters
250    ----------
251    data : polars.DataFrame, pandas.DataFrame
252        Long-format panel data.
253    Y, D : str
254        Column names for outcome and binary treatment indicator.
255    index : (str, str)
256        Column names for (unit_id, time_period).
257    X : list of str, optional
258        Time-varying covariates.
259    W : str, optional
260        Observation weight column.
261    group : str, optional
262        Reserved for grouped estimation. Currently raises
263        ``NotImplementedError`` when supplied.
264    method : {"fe", "ife", "mc", "cfe", "both"}
265        Estimation method.
266    force : {"none", "unit", "time", "two-way"}
267        Fixed effects specification.
268    r : int or (int, int)
269        Number of factors.  If tuple, CV selects from range.
270    lam : float, optional
271        Nuclear norm penalty for MC.  If None with CV=True, auto-selected.
272    nlambda : int
273        Number of automatically generated lambda candidates for MC CV.
274    CV : bool
275        If True, cross-validate over ``r`` for IFE when ``r`` is a tuple,
276        or over ``lam`` for MC when ``lam`` is None.
277    k : int
278        Number of CV folds.
279    cv_prop : float
280        Fraction of eligible observed control cells masked per CV fold.
281    cv_nobs : int
282        Number of consecutive within-unit observations to mask as a block.
283    cv_treat : bool
284        If True, restrict CV masks to pre-treatment cells of ever-treated
285        units. If False, use all observed control cells.
286    cv_donut : int
287        Exclude this many periods around treatment onset from CV evaluation.
288    criterion : {"mspe", "gmspe", "mad"}
289        Cross-validation loss.
290    cv_rule : {"min", "onepct"}
291        CV selection rule. ``"min"`` chooses the strict minimum-score
292        candidate and is the paper-faithful default. ``"onepct"`` chooses
293        the simplest candidate within 1% of the best score (lower ``r`` for
294        IFE, higher ``lam`` for MC).
295    se : bool
296        Compute standard errors via bootstrap/jackknife.
297    vartype : {"bootstrap", "jackknife"}
298        Inference method when ``se=True``.
299    nboots : int
300        Number of bootstrap replications. Ignored for jackknife.
301    alpha : float
302        Significance level for confidence intervals and tests.
303    tol : float
304        EM convergence tolerance for final point estimation.
305    max_iter : int
306        Maximum EM iterations.
307    min_T0 : int
308        Minimum untreated/pre-treatment observed periods. By default this is
309        enforced only for treated and treatment-reversal units.
310    min_T0_strict : bool
311        If True, enforce ``min_T0`` on all units, including controls. This
312        matches R fect's conservative handling of sparse control rows.
313    max_missing : float
314        Maximum missing-outcome fraction per unit, in ``[0, 1]``. Units with
315        no observed outcomes are always dropped, regardless of this threshold,
316        because they provide neither fitting information nor observed treated
317        effects.
318    normalize : bool
319        If True, estimate on an outcome standardized by its observed standard
320        deviation, then transform effects back to the original scale.
321    Z, Q : list of str, optional
322        Reserved CFE interaction arguments. Currently raise
323        ``NotImplementedError`` when supplied.
324    device : {"cpu", "gpu"}
325        Compute device.
326    n_jobs : int, optional
327        Parallel workers for CV and bootstrap. ``-1`` or ``None`` uses
328        all available CPUs.
329    seed : int, optional
330        Random seed for full reproducibility.
331    """
332    # Set device
333    set_device(device)
334    xp = get_backend()
335    n_jobs = _resolve_n_jobs(n_jobs)
336    if device == "gpu":
337        n_jobs = 1
338
339    if group is not None:
340        raise NotImplementedError("The `group` argument is not implemented yet.")
341    if Z is not None or Q is not None:
342        raise NotImplementedError("The `Z` and `Q` CFE interaction arguments are not implemented yet.")
343    if criterion not in {"mspe", "gmspe", "mad"}:
344        raise ValueError("criterion must be 'mspe', 'gmspe', or 'mad'")
345    if cv_rule not in {"min", "onepct"}:
346        raise ValueError("cv_rule must be 'min' or 'onepct'")
347    if min_T0 < 0:
348        raise ValueError("min_T0 must be non-negative")
349    if not 0.0 <= max_missing <= 1.0:
350        raise ValueError("max_missing must be between 0 and 1")
351
352    # Map force string to int
353    force_map = {"none": 0, "unit": 1, "time": 2, "two-way": 3}
354    force_int = force_map[force]
355
356    # Prepare panel data
357    panel = prepare_panel(
358        data, Y=Y, D=D, index=index, X=X, W=W,
359        group=group, min_T0=min_T0, min_T0_strict=min_T0_strict,
360        max_missing=max_missing,
361    )
362
363    # Move to device
364    Y_mat = to_device(panel.Y)
365    D_mat = to_device(panel.D)
366    I_mat = to_device(panel.I)
367    II_mat = to_device(panel.II)
368    X_mat = to_device(panel.X) if panel.X is not None else None
369    W_mat = to_device(panel.W) if panel.W is not None else None
370
371    # Normalize
372    norm_factor = 1.0
373    if normalize:
374        sd_y = float(xp.std(Y_mat[I_mat > 0]))
375        if sd_y > 0:
376            Y_mat = Y_mat / sd_y
377            norm_factor = sd_y
378
379    # Initial fit
380    Y0, beta0 = initial_fit(Y_mat, X_mat, II_mat, force_int)
381
382    # Determine r and lambda
383    r_cv = None
384    lambda_cv = None
385    cv_result = None
386
387    if method == "ife":
388        if isinstance(r, tuple) and CV:
389            cv_result = cv_ife(
390                Y_mat, Y0, X_mat, I_mat, II_mat, D_mat, W_mat, beta0,
391                force=force_int, r_range=r, k=k, cv_prop=cv_prop,
392                cv_nobs=cv_nobs, cv_treat=cv_treat, cv_donut=cv_donut,
393                criterion=criterion, cv_rule=cv_rule,
394                tol=tol, max_iter=max_iter,
395                n_jobs=n_jobs, seed=seed,
396            )
397            r_cv = cv_result.best_r
398        else:
399            r_cv = r if isinstance(r, int) else r[0]
400
401    elif method == "mc":
402        if lam is None and CV:
403            cv_result = cv_mc(
404                Y_mat, Y0, X_mat, I_mat, II_mat, D_mat, W_mat, beta0,
405                force=force_int, nlambda=nlambda, k=k, cv_prop=cv_prop,
406                cv_nobs=cv_nobs, cv_treat=cv_treat, cv_donut=cv_donut,
407                criterion=criterion, cv_rule=cv_rule,
408                tol=tol, max_iter=max_iter,
409                n_jobs=n_jobs, seed=seed,
410            )
411            lambda_cv = cv_result.best_lambda
412        else:
413            lambda_cv = lam if lam is not None else 0.0
414
415    elif method == "fe":
416        r_cv = 0
417
418    elif method == "cfe":
419        r_cv = r if isinstance(r, int) else r[0]
420
421    # Point estimation
422    if method in ("fe", "ife"):
423        est = estimate_ife(
424            Y_mat, Y0, X_mat, II_mat, W_mat, beta0,
425            r=r_cv, force=force_int, tol=tol, max_iter=max_iter,
426        )
427    elif method == "mc":
428        est = estimate_mc(
429            Y_mat, Y0, X_mat, II_mat, W_mat, beta0,
430            lam=lambda_cv, force=force_int, tol=tol, max_iter=max_iter,
431        )
432    elif method == "cfe":
433        est = estimate_cfe(
434            Y_mat, Y0, X_mat, II_mat, W_mat, beta0,
435            r=r_cv, force=force_int, tol=tol, max_iter=max_iter,
436        )
437    elif method == "both":
438        # Run both IFE and MC, return IFE results with MC comparison
439        if isinstance(r, tuple) and CV:
440            cv_result = cv_ife(
441                Y_mat, Y0, X_mat, I_mat, II_mat, D_mat, W_mat, beta0,
442                force=force_int, r_range=r, k=k, cv_prop=cv_prop,
443                cv_nobs=cv_nobs, cv_treat=cv_treat, cv_donut=cv_donut,
444                criterion=criterion, cv_rule=cv_rule,
445                tol=tol, max_iter=max_iter,
446                n_jobs=n_jobs, seed=seed,
447            )
448            r_cv = cv_result.best_r
449        else:
450            r_cv = r if isinstance(r, int) else r[0]
451        est = estimate_ife(
452            Y_mat, Y0, X_mat, II_mat, W_mat, beta0,
453            r=r_cv, force=force_int, tol=tol, max_iter=max_iter,
454        )
455    else:
456        raise ValueError(f"Unknown method: {method}")
457
458    # Compute effects
459    eff = Y_mat - est.fit
460    Y_ct = est.fit
461
462    # Denormalize
463    if normalize and norm_factor != 1.0:
464        eff = eff * norm_factor
465        Y_ct = Y_ct * norm_factor
466        Y_mat = Y_mat * norm_factor
467        if est.beta is not None:
468            est = est._replace(beta=est.beta * norm_factor)
469
470    # ATT computation
471    T_on = to_device(panel.T_on)
472    att_avg, att_on, time_on, count_on, att_avg_unit = _compute_effects(
473        to_numpy(eff), to_numpy(D_mat), to_numpy(panel.T_on), to_numpy(I_mat),
474    )
475
476    # Build result
477    result = FectResult(
478        method=method,
479        r_cv=r_cv,
480        lambda_cv=lambda_cv,
481        att_avg=att_avg,
482        att_avg_unit=att_avg_unit,
483        att_on=att_on,
484        time_on=time_on,
485        count_on=count_on,
486        beta=to_numpy(est.beta) if est.beta is not None else None,
487        covariate_names=panel.covariate_names,
488        mu=est.mu,
489        alpha=to_numpy(est.alpha) if est.alpha is not None else None,
490        xi=to_numpy(est.xi) if est.xi is not None else None,
491        factors=to_numpy(est.factors) if est.factors is not None else None,
492        loadings=to_numpy(est.loadings) if est.loadings is not None else None,
493        Y_ct=to_numpy(Y_ct),
494        eff=to_numpy(eff),
495        residuals=to_numpy(est.residuals),
496        sigma2=est.sigma2,
497        IC=est.IC,
498        PC=est.PC,
499        niter=est.niter,
500        converged=est.converged,
501        cv_result=cv_result,
502        panel=panel,
503        seed=seed,
504    )
505
506    # Inference
507    if se:
508        result.inference = _run_inference(
509            result, panel, Y_mat, X_mat, W_mat, beta0, Y0,
510            method=method, r_cv=r_cv, lambda_cv=lambda_cv,
511            force_int=force_int, tol=tol, max_iter=max_iter,
512            vartype=vartype, nboots=nboots, alpha=alpha,
513            n_jobs=n_jobs, seed=seed, normalize=normalize,
514            norm_factor=norm_factor,
515        )
516
517    return result

Estimate counterfactual treatment effects for panel data.

This is the main Python entry point for the counterfactual estimator workflow. Where the paper and the historical R package differ, pyfector defaults to the paper's statistical definition and exposes R-package-style behavior through explicit options.

Missing outcome policy

pyfector distinguishes raw missing outcomes from counterfactual missingness caused by treatment. Observed untreated cells (D == 0 and non-missing Y) fit the response surface. Observed treated cells (D == 1 and non-missing Y) contribute to ATT as Y - Y_ct. If a treated outcome is missing in the input data, the model can still produce a counterfactual Y_ct for that cell, but the cell is not counted in att_avg or att_on because the treated potential outcome was not observed.

By default, min_T0 is enforced only for treated and reversal units. Sparse controls are retained if they have at least one observed outcome, because they may still inform the low-rank response surface. Set min_T0_strict=True to require controls to satisfy min_T0 too, matching the more conservative R fect sparse-panel behavior.

Parameters

data : polars.DataFrame, pandas.DataFrame Long-format panel data. Y, D : str Column names for outcome and binary treatment indicator. index : (str, str) Column names for (unit_id, time_period). X : list of str, optional Time-varying covariates. W : str, optional Observation weight column. group : str, optional Reserved for grouped estimation. Currently raises NotImplementedError when supplied. method : {"fe", "ife", "mc", "cfe", "both"} Estimation method. force : {"none", "unit", "time", "two-way"} Fixed effects specification. r : int or (int, int) Number of factors. If tuple, CV selects from range. lam : float, optional Nuclear norm penalty for MC. If None with CV=True, auto-selected. nlambda : int Number of automatically generated lambda candidates for MC CV. CV : bool If True, cross-validate over r for IFE when r is a tuple, or over lam for MC when lam is None. k : int Number of CV folds. cv_prop : float Fraction of eligible observed control cells masked per CV fold. cv_nobs : int Number of consecutive within-unit observations to mask as a block. cv_treat : bool If True, restrict CV masks to pre-treatment cells of ever-treated units. If False, use all observed control cells. cv_donut : int Exclude this many periods around treatment onset from CV evaluation. criterion : {"mspe", "gmspe", "mad"} Cross-validation loss. cv_rule : {"min", "onepct"} CV selection rule. "min" chooses the strict minimum-score candidate and is the paper-faithful default. "onepct" chooses the simplest candidate within 1% of the best score (lower r for IFE, higher lam for MC). se : bool Compute standard errors via bootstrap/jackknife. vartype : {"bootstrap", "jackknife"} Inference method when se=True. nboots : int Number of bootstrap replications. Ignored for jackknife. alpha : float Significance level for confidence intervals and tests. tol : float EM convergence tolerance for final point estimation. max_iter : int Maximum EM iterations. min_T0 : int Minimum untreated/pre-treatment observed periods. By default this is enforced only for treated and treatment-reversal units. min_T0_strict : bool If True, enforce min_T0 on all units, including controls. This matches R fect's conservative handling of sparse control rows. max_missing : float Maximum missing-outcome fraction per unit, in [0, 1]. Units with no observed outcomes are always dropped, regardless of this threshold, because they provide neither fitting information nor observed treated effects. normalize : bool If True, estimate on an outcome standardized by its observed standard deviation, then transform effects back to the original scale. Z, Q : list of str, optional Reserved CFE interaction arguments. Currently raise NotImplementedError when supplied. device : {"cpu", "gpu"} Compute device. n_jobs : int, optional Parallel workers for CV and bootstrap. -1 or None uses all available CPUs. seed : int, optional Random seed for full reproducibility.

@dataclass
class FectResult:
 68@dataclass
 69class FectResult:
 70    """Container for all fect estimation results."""
 71    # Method info
 72    method: str
 73    r_cv: int | None = None
 74    lambda_cv: float | None = None
 75
 76    # Point estimates
 77    att_avg: float = 0.0
 78    att_avg_unit: float = 0.0
 79
 80    # Dynamic effects
 81    att_on: np.ndarray | None = None
 82    time_on: np.ndarray | None = None
 83    count_on: np.ndarray | None = None
 84
 85    # Exit effects (treatment reversal)
 86    att_off: np.ndarray | None = None
 87    time_off: np.ndarray | None = None
 88
 89    # Coefficients
 90    beta: np.ndarray | None = None
 91    covariate_names: list[str] = field(default_factory=list)
 92
 93    # Fixed effects
 94    mu: float = 0.0
 95    alpha: np.ndarray | None = None   # unit FE
 96    xi: np.ndarray | None = None      # time FE
 97    factors: np.ndarray | None = None
 98    loadings: np.ndarray | None = None
 99
100    # Counterfactual and effects matrices
101    Y_ct: np.ndarray | None = None    # T×N counterfactual
102    eff: np.ndarray | None = None     # T×N treatment effects
103    residuals: np.ndarray | None = None
104
105    # Model fit
106    sigma2: float = 0.0
107    IC: float = 0.0
108    PC: float = 0.0
109    rmse: float = 0.0
110    niter: int = 0
111    converged: bool = False
112
113    # Inference
114    inference: InferenceResult | None = None
115
116    # CV
117    cv_result: CVResult | None = None
118
119    # Panel metadata
120    panel: PanelData | None = None
121
122    # Reproducibility
123    seed: int | None = None
124
125    def summary(self) -> str:
126        """Print summary table of results."""
127        lines = []
128        lines.append(f"pyfector estimation results")
129        lines.append(f"{'='*60}")
130        lines.append(f"Method: {self.method}")
131        if self.r_cv is not None:
132            lines.append(f"Number of factors (CV): {self.r_cv}")
133        if self.lambda_cv is not None:
134            lines.append(f"Lambda (CV): {self.lambda_cv:.6f}")
135        lines.append(f"Converged: {self.converged} (iter={self.niter})")
136        lines.append(f"Sigma^2: {self.sigma2:.6f}")
137        lines.append(f"")
138        lines.append(f"ATT (average): {self.att_avg:.6f}")
139        if self.inference is not None:
140            inf = self.inference
141            lines.append(f"  SE:     {inf.att_avg_se:.6f}")
142            lines.append(f"  CI:     [{inf.att_avg_ci[0]:.6f}, {inf.att_avg_ci[1]:.6f}]")
143            lines.append(f"  p-val:  {inf.att_avg_pval:.4f}")
144
145        if self.beta is not None and len(self.beta) > 0:
146            lines.append(f"")
147            lines.append(f"Coefficients:")
148            for i, name in enumerate(self.covariate_names):
149                lines.append(f"  {name}: {self.beta[i]:.6f}")
150
151        if self.att_on is not None and self.time_on is not None:
152            lines.append(f"")
153            lines.append(f"Dynamic effects (ATT by relative time):")
154            lines.append(f"  {'Time':>6s}  {'ATT':>10s}  {'Count':>6s}", )
155            for i, t in enumerate(self.time_on):
156                count = self.count_on[i] if self.count_on is not None else ""
157                att = self.att_on[i]
158                if self.inference is not None:
159                    se = self.inference.att_on_se[i]
160                    lines.append(f"  {t:>6.0f}  {att:>10.4f}  ({se:.4f})  {count}")
161                else:
162                    lines.append(f"  {t:>6.0f}  {att:>10.4f}  {count}")
163
164        lines.append(f"{'='*60}")
165        if self.panel is not None:
166            lines.append(f"N={self.panel.N}, T={self.panel.T}")
167        if self.seed is not None:
168            lines.append(f"Seed: {self.seed}")
169        return "\n".join(lines)
170
171    def __repr__(self):
172        return self.summary()
173
174    def plot(self, kind="gap", **kwargs):
175        """Plot results. Shortcut for ``pyfector.plot(self, kind, ...)``."""
176        from .plotting import plot as _plot
177        return _plot(self, kind=kind, **kwargs)
178
179    def diagnose(self, **kwargs):
180        """Run diagnostic tests. Shortcut for ``pyfector.run_diagnostics(self, ...)``."""
181        from .diagnostics import run_diagnostics
182        return run_diagnostics(self, **kwargs)

Container for all fect estimation results.

FectResult( method: str, r_cv: int | None = None, lambda_cv: float | None = None, att_avg: float = 0.0, att_avg_unit: float = 0.0, att_on: numpy.ndarray | None = None, time_on: numpy.ndarray | None = None, count_on: numpy.ndarray | None = None, att_off: numpy.ndarray | None = None, time_off: numpy.ndarray | None = None, beta: numpy.ndarray | None = None, covariate_names: list[str] = <factory>, mu: float = 0.0, alpha: numpy.ndarray | None = None, xi: numpy.ndarray | None = None, factors: numpy.ndarray | None = None, loadings: numpy.ndarray | None = None, Y_ct: numpy.ndarray | None = None, eff: numpy.ndarray | None = None, residuals: numpy.ndarray | None = None, sigma2: float = 0.0, IC: float = 0.0, PC: float = 0.0, rmse: float = 0.0, niter: int = 0, converged: bool = False, inference: pyfector.inference.InferenceResult | None = None, cv_result: pyfector.cv.CVResult | None = None, panel: pyfector.panel.PanelData | None = None, seed: int | None = None)
method: str
r_cv: int | None = None
lambda_cv: float | None = None
att_avg: float = 0.0
att_avg_unit: float = 0.0
att_on: numpy.ndarray | None = None
time_on: numpy.ndarray | None = None
count_on: numpy.ndarray | None = None
att_off: numpy.ndarray | None = None
time_off: numpy.ndarray | None = None
beta: numpy.ndarray | None = None
covariate_names: list[str]
mu: float = 0.0
alpha: numpy.ndarray | None = None
xi: numpy.ndarray | None = None
factors: numpy.ndarray | None = None
loadings: numpy.ndarray | None = None
Y_ct: numpy.ndarray | None = None
eff: numpy.ndarray | None = None
residuals: numpy.ndarray | None = None
sigma2: float = 0.0
IC: float = 0.0
PC: float = 0.0
rmse: float = 0.0
niter: int = 0
converged: bool = False
inference: pyfector.inference.InferenceResult | None = None
cv_result: pyfector.cv.CVResult | None = None
panel: pyfector.panel.PanelData | None = None
seed: int | None = None
def summary(self) -> str:
125    def summary(self) -> str:
126        """Print summary table of results."""
127        lines = []
128        lines.append(f"pyfector estimation results")
129        lines.append(f"{'='*60}")
130        lines.append(f"Method: {self.method}")
131        if self.r_cv is not None:
132            lines.append(f"Number of factors (CV): {self.r_cv}")
133        if self.lambda_cv is not None:
134            lines.append(f"Lambda (CV): {self.lambda_cv:.6f}")
135        lines.append(f"Converged: {self.converged} (iter={self.niter})")
136        lines.append(f"Sigma^2: {self.sigma2:.6f}")
137        lines.append(f"")
138        lines.append(f"ATT (average): {self.att_avg:.6f}")
139        if self.inference is not None:
140            inf = self.inference
141            lines.append(f"  SE:     {inf.att_avg_se:.6f}")
142            lines.append(f"  CI:     [{inf.att_avg_ci[0]:.6f}, {inf.att_avg_ci[1]:.6f}]")
143            lines.append(f"  p-val:  {inf.att_avg_pval:.4f}")
144
145        if self.beta is not None and len(self.beta) > 0:
146            lines.append(f"")
147            lines.append(f"Coefficients:")
148            for i, name in enumerate(self.covariate_names):
149                lines.append(f"  {name}: {self.beta[i]:.6f}")
150
151        if self.att_on is not None and self.time_on is not None:
152            lines.append(f"")
153            lines.append(f"Dynamic effects (ATT by relative time):")
154            lines.append(f"  {'Time':>6s}  {'ATT':>10s}  {'Count':>6s}", )
155            for i, t in enumerate(self.time_on):
156                count = self.count_on[i] if self.count_on is not None else ""
157                att = self.att_on[i]
158                if self.inference is not None:
159                    se = self.inference.att_on_se[i]
160                    lines.append(f"  {t:>6.0f}  {att:>10.4f}  ({se:.4f})  {count}")
161                else:
162                    lines.append(f"  {t:>6.0f}  {att:>10.4f}  {count}")
163
164        lines.append(f"{'='*60}")
165        if self.panel is not None:
166            lines.append(f"N={self.panel.N}, T={self.panel.T}")
167        if self.seed is not None:
168            lines.append(f"Seed: {self.seed}")
169        return "\n".join(lines)

Print summary table of results.

def plot(self, kind='gap', **kwargs):
174    def plot(self, kind="gap", **kwargs):
175        """Plot results. Shortcut for ``pyfector.plot(self, kind, ...)``."""
176        from .plotting import plot as _plot
177        return _plot(self, kind=kind, **kwargs)

Plot results. Shortcut for pyfector.plot(self, kind, ...).

def diagnose(self, **kwargs):
179    def diagnose(self, **kwargs):
180        """Run diagnostic tests. Shortcut for ``pyfector.run_diagnostics(self, ...)``."""
181        from .diagnostics import run_diagnostics
182        return run_diagnostics(self, **kwargs)

Run diagnostic tests. Shortcut for pyfector.run_diagnostics(self, ...).

def set_device(device: Literal['cpu', 'gpu']) -> None:
35def set_device(device: Literal["cpu", "gpu"]) -> None:
36    """Set the compute device globally."""
37    global _DEVICE
38    if device == "gpu" and not _check_cupy():
39        raise ImportError(
40            "CuPy is required for GPU support. Install it with: "
41            "pip install cupy-cuda12x  (adjust for your CUDA version)"
42        )
43    _DEVICE = device

Set the compute device globally.

def get_device() -> Literal['cpu', 'gpu']:
46def get_device() -> Literal["cpu", "gpu"]:
47    """Return the current device."""
48    return _DEVICE

Return the current device.

def run_diagnostics( result, f_threshold: float = 0.5, tost_threshold: float = 0.36, placebo_period: tuple[int, int] | None = None, carryover_period: tuple[int, int] | None = None, loo: bool = False) -> DiagnosticResult:
 92def run_diagnostics(
 93    result,
 94    f_threshold: float = 0.5,
 95    tost_threshold: float = 0.36,
 96    placebo_period: tuple[int, int] | None = None,
 97    carryover_period: tuple[int, int] | None = None,
 98    loo: bool = False,
 99) -> DiagnosticResult:
100    """Run diagnostic tests on a FectResult.
101
102    Parameters
103    ----------
104    result : FectResult
105        Must have inference results (``se=True``).
106    f_threshold : float
107        Non-centrality parameter for equivalence F-test.
108    tost_threshold : float
109        Equivalence bound for TOST.
110    placebo_period : (start, end), optional
111        Relative time window for placebo test.
112    """
113    from scipy import stats
114
115    diag = DiagnosticResult()
116
117    if result.inference is None:
118        return diag
119
120    inf = result.inference
121    time_on = result.time_on
122    att_on = result.att_on
123
124    if time_on is None or att_on is None:
125        return diag
126
127    # Pre-treatment periods
128    pre_mask = time_on < 0
129    if not np.any(pre_mask):
130        return diag
131
132    pre_idx = np.where(pre_mask)[0]
133    k = len(pre_idx)
134    att_pre = att_on[pre_idx]
135
136    # F-test from bootstrap covariance
137    if inf.att_on_boot is not None and inf.att_on_boot.shape[1] > k:
138        boot_pre = inf.att_on_boot[pre_idx, :]  # (k, nboots)
139        S = np.cov(boot_pre)  # (k, k)
140        if k == 1:
141            S = S.reshape(1, 1)
142
143        n_boot = boot_pre.shape[1]
144        try:
145            cond = np.linalg.cond(S)
146            if cond > 1e12:
147                S_inv = np.linalg.pinv(S)
148            else:
149                S_inv = np.linalg.inv(S)
150            F_stat = float(att_pre @ S_inv @ att_pre)
151            scale = (n_boot - k) / ((n_boot - 1) * k)
152            F_stat_scaled = F_stat * scale
153
154            if np.isfinite(F_stat_scaled) and F_stat_scaled >= 0:
155                diag.f_stat = F_stat_scaled
156                diag.f_df1 = k
157                diag.f_df2 = n_boot - k
158                diag.f_pval = float(1 - stats.f.cdf(F_stat_scaled, k, n_boot - k))
159
160                # Equivalence F-test (non-central F)
161                ncp = n_boot * f_threshold
162                diag.equiv_f_pval = float(stats.ncf.cdf(F_stat_scaled, k, n_boot - k, ncp))
163                diag.equiv_threshold = f_threshold
164        except np.linalg.LinAlgError:
165            pass
166
167    # TOST
168    if inf.att_on_se is not None:
169        se_pre = inf.att_on_se[pre_idx]
170        n_boot = inf.att_on_boot.shape[1] if inf.att_on_boot is not None else 200
171        df = n_boot - 1
172
173        tost_pvals = np.full(k, np.nan)
174        for i in range(k):
175            if se_pre[i] > 0:
176                t_upper = (att_pre[i] - tost_threshold) / se_pre[i]
177                t_lower = (att_pre[i] + tost_threshold) / se_pre[i]
178                p_upper = float(stats.t.cdf(t_upper, df))
179                p_lower = float(1 - stats.t.cdf(t_lower, df))
180                tost_pvals[i] = max(p_upper, p_lower)
181
182        diag.tost_pvals = tost_pvals
183        diag.tost_periods = time_on[pre_idx]
184        diag.tost_threshold = tost_threshold
185
186    # Placebo test
187    if placebo_period is not None:
188        p_start, p_end = placebo_period
189        placebo_mask = (time_on >= p_start) & (time_on <= p_end) & pre_mask
190        if np.any(placebo_mask):
191            p_idx = np.where(placebo_mask)[0]
192            diag.placebo_att = float(np.mean(att_on[p_idx]))
193            if inf.att_on_boot is not None:
194                boot_placebo = np.mean(inf.att_on_boot[p_idx, :], axis=0)
195                diag.placebo_pval = min(
196                    2 * np.mean(boot_placebo >= 0),
197                    2 * np.mean(boot_placebo <= 0),
198                    1.0,
199                )
200
201    # Carryover test (for treatment reversals)
202    if carryover_period is not None and hasattr(result, "att_off") and result.att_off is not None:
203        c_start, c_end = carryover_period
204        time_off = getattr(result, "time_off", None)
205        if time_off is not None:
206            off_mask = (time_off >= c_start) & (time_off <= c_end)
207            if np.any(off_mask):
208                diag.carryover_att = float(np.mean(result.att_off[off_mask]))
209
210    # Leave-one-period-out test
211    if loo and att_on is not None and time_on is not None:
212        post_mask = time_on >= 0
213        post_idx = np.where(post_mask)[0]
214        if len(post_idx) > 1:
215            full_att = result.att_avg
216            loo_atts = []
217            loo_periods = []
218            for drop_i in post_idx:
219                remaining = np.delete(post_idx, np.where(post_idx == drop_i))
220                if len(remaining) > 0:
221                    loo_att = float(np.mean(att_on[remaining]))
222                    loo_atts.append(loo_att)
223                    loo_periods.append(time_on[drop_i])
224            diag.loo_atts = np.array(loo_atts)
225            diag.loo_periods = np.array(loo_periods)
226            diag.loo_max_change = float(np.max(np.abs(diag.loo_atts - full_att)))
227
228    return diag

Run diagnostic tests on a FectResult.

Parameters

result : FectResult Must have inference results (se=True). f_threshold : float Non-centrality parameter for equivalence F-test. tost_threshold : float Equivalence bound for TOST. placebo_period : (start, end), optional Relative time window for placebo test.

@dataclass
class DiagnosticResult:
35@dataclass
36class DiagnosticResult:
37    """Container for diagnostic test results."""
38    # F-test for pre-trends
39    f_stat: float | None = None
40    f_pval: float | None = None
41    f_df1: int | None = None
42    f_df2: int | None = None
43
44    # Equivalence F-test
45    equiv_f_pval: float | None = None
46    equiv_threshold: float | None = None
47
48    # TOST per pre-period
49    tost_pvals: np.ndarray | None = None
50    tost_periods: np.ndarray | None = None
51    tost_threshold: float | None = None
52
53    # Placebo
54    placebo_att: float | None = None
55    placebo_pval: float | None = None
56
57    # Carryover
58    carryover_att: float | None = None
59    carryover_pval: float | None = None
60
61    # Leave-one-out
62    loo_atts: np.ndarray | None = None      # ATT when each pre-period is dropped
63    loo_periods: np.ndarray | None = None
64    loo_max_change: float | None = None     # max |ATT_full - ATT_loo|
65
66    def summary(self) -> str:
67        lines = ["Diagnostic Tests", "=" * 50]
68        if self.f_stat is not None:
69            lines.append(f"Pre-trend F-test:")
70            lines.append(f"  F({self.f_df1},{self.f_df2}) = {self.f_stat:.4f}, p = {self.f_pval:.4f}")
71        if self.equiv_f_pval is not None:
72            lines.append(f"Equivalence F-test:")
73            lines.append(f"  p = {self.equiv_f_pval:.4f} (threshold={self.equiv_threshold})")
74        if self.tost_pvals is not None:
75            lines.append("TOST per pre-period:")
76            for i, t in enumerate(self.tost_periods):
77                p = self.tost_pvals[i]
78                status = "PASS" if p < 0.05 else "fail"
79                lines.append(f"  t={t:+.0f}: p={p:.4f} [{status}]")
80        if self.placebo_pval is not None:
81            lines.append(f"Placebo test:")
82            lines.append(f"  ATT={self.placebo_att:.4f}, p={self.placebo_pval:.4f}")
83        if self.carryover_pval is not None:
84            lines.append(f"Carryover test:")
85            lines.append(f"  ATT={self.carryover_att:.4f}, p={self.carryover_pval:.4f}")
86        if self.loo_max_change is not None:
87            lines.append(f"Leave-one-out:")
88            lines.append(f"  Max ATT change = {self.loo_max_change:.6f}")
89        return "\n".join(lines)

Container for diagnostic test results.

DiagnosticResult( f_stat: float | None = None, f_pval: float | None = None, f_df1: int | None = None, f_df2: int | None = None, equiv_f_pval: float | None = None, equiv_threshold: float | None = None, tost_pvals: numpy.ndarray | None = None, tost_periods: numpy.ndarray | None = None, tost_threshold: float | None = None, placebo_att: float | None = None, placebo_pval: float | None = None, carryover_att: float | None = None, carryover_pval: float | None = None, loo_atts: numpy.ndarray | None = None, loo_periods: numpy.ndarray | None = None, loo_max_change: float | None = None)
f_stat: float | None = None
f_pval: float | None = None
f_df1: int | None = None
f_df2: int | None = None
equiv_f_pval: float | None = None
equiv_threshold: float | None = None
tost_pvals: numpy.ndarray | None = None
tost_periods: numpy.ndarray | None = None
tost_threshold: float | None = None
placebo_att: float | None = None
placebo_pval: float | None = None
carryover_att: float | None = None
carryover_pval: float | None = None
loo_atts: numpy.ndarray | None = None
loo_periods: numpy.ndarray | None = None
loo_max_change: float | None = None
def summary(self) -> str:
66    def summary(self) -> str:
67        lines = ["Diagnostic Tests", "=" * 50]
68        if self.f_stat is not None:
69            lines.append(f"Pre-trend F-test:")
70            lines.append(f"  F({self.f_df1},{self.f_df2}) = {self.f_stat:.4f}, p = {self.f_pval:.4f}")
71        if self.equiv_f_pval is not None:
72            lines.append(f"Equivalence F-test:")
73            lines.append(f"  p = {self.equiv_f_pval:.4f} (threshold={self.equiv_threshold})")
74        if self.tost_pvals is not None:
75            lines.append("TOST per pre-period:")
76            for i, t in enumerate(self.tost_periods):
77                p = self.tost_pvals[i]
78                status = "PASS" if p < 0.05 else "fail"
79                lines.append(f"  t={t:+.0f}: p={p:.4f} [{status}]")
80        if self.placebo_pval is not None:
81            lines.append(f"Placebo test:")
82            lines.append(f"  ATT={self.placebo_att:.4f}, p={self.placebo_pval:.4f}")
83        if self.carryover_pval is not None:
84            lines.append(f"Carryover test:")
85            lines.append(f"  ATT={self.carryover_att:.4f}, p={self.carryover_pval:.4f}")
86        if self.loo_max_change is not None:
87            lines.append(f"Leave-one-out:")
88            lines.append(f"  Max ATT change = {self.loo_max_change:.6f}")
89        return "\n".join(lines)
def plot( result, kind: Literal['gap', 'status', 'factors', 'counterfactual', 'equiv', 'calendar'] = 'gap', units: list | None = None, show_ci: bool = True, title: str | None = None, figsize: tuple[float, float] = (10, 6), ax=None, **kwargs):
20def plot(
21    result,
22    kind: Literal["gap", "status", "factors", "counterfactual", "equiv", "calendar"] = "gap",
23    units: list | None = None,
24    show_ci: bool = True,
25    title: str | None = None,
26    figsize: tuple[float, float] = (10, 6),
27    ax=None,
28    **kwargs,
29):
30    """Plot fect results.
31
32    Parameters
33    ----------
34    result : FectResult
35        Output from ``pyfector.fect()``.
36    kind : str
37        Plot type.
38    units : list, optional
39        Unit IDs for counterfactual plot.
40    show_ci : bool
41        Show confidence intervals (requires ``se=True`` in estimation).
42    """
43    import matplotlib.pyplot as plt
44
45    if ax is None:
46        fig, ax = plt.subplots(figsize=figsize)
47    else:
48        fig = ax.get_figure()
49
50    if kind == "gap":
51        _plot_gap(result, ax, show_ci, **kwargs)
52    elif kind == "status":
53        _plot_status(result, ax, **kwargs)
54    elif kind == "factors":
55        _plot_factors(result, ax, **kwargs)
56    elif kind == "counterfactual":
57        _plot_counterfactual(result, ax, units, **kwargs)
58    elif kind == "equiv":
59        _plot_equiv(result, ax, **kwargs)
60    elif kind == "calendar":
61        _plot_calendar(result, ax, show_ci, **kwargs)
62    else:
63        raise ValueError(f"Unknown plot kind: {kind}")
64
65    if title:
66        ax.set_title(title)
67
68    fig.tight_layout()
69    return fig

Plot fect results.

Parameters

result : FectResult Output from pyfector.fect. kind : str Plot type. units : list, optional Unit IDs for counterfactual plot. show_ci : bool Show confidence intervals (requires se=True in estimation).