pyfector.fect

Public entry point for pyfector.

This module exposes fect(), the single function most users call, and the FectResult dataclass that packages its output.

fect() orchestrates the full pipeline:

  1. pyfector.panel.prepare_panel() converts the input DataFrame to dense (T, N) matrices with treatment timing and unit classification.
  2. pyfector.panel.initial_fit() computes the starting fit Y0 and initial covariate coefficients on the control subsample.
  3. If the user passes a range for r (IFE) or leaves lam=None (MC), pyfector.cv chooses the hyperparameter by cross-validation.
  4. pyfector.estimators iterates the EM loop for the chosen method (fe, ife, mc, or cfe).
  5. Treatment effects are computed from eff = Y - Y_ct; the overall ATT and dynamic ATT by relative event time are derived in _compute_effects() via numpy.bincount().
  6. When se=True the requested inference routine (bootstrap or jackknife) from pyfector.inference is called with a closure that re-runs the EM loop on resampled units.

Example

::

import pyfector

result = pyfector.fect(
    data=df,
    Y="outcome", D="treat",
    index=("unit", "year"),
    X=["gdp", "pop"],
    method="ife",
    r=(0, 5),            # CV over 0..5 factors
    se=True,
    nboots=200,
    device="cpu",         # or "gpu"
    n_jobs=4,
    seed=42,
)
result.summary()
result.plot(kind="gap")
  1"""
  2Public entry point for pyfector.
  3
  4This module exposes :func:`fect`, the single function most users call,
  5and the :class:`FectResult` dataclass that packages its output.
  6
  7:func:`fect` orchestrates the full pipeline:
  8
  91. :func:`pyfector.panel.prepare_panel` converts the input DataFrame to
 10   dense ``(T, N)`` matrices with treatment timing and unit
 11   classification.
 122. :func:`pyfector.panel.initial_fit` computes the starting fit ``Y0``
 13   and initial covariate coefficients on the control subsample.
 143. If the user passes a range for ``r`` (IFE) or leaves ``lam=None``
 15   (MC), :mod:`pyfector.cv` chooses the hyperparameter by
 16   cross-validation.
 174. :mod:`pyfector.estimators` iterates the EM loop for the chosen
 18   method (``fe``, ``ife``, ``mc``, or ``cfe``).
 195. Treatment effects are computed from ``eff = Y - Y_ct``; the overall
 20   ATT and dynamic ATT by relative event time are derived in
 21   :func:`_compute_effects` via :func:`numpy.bincount`.
 226. When ``se=True`` the requested inference routine
 23   (``bootstrap`` or ``jackknife``) from :mod:`pyfector.inference` is
 24   called with a closure that re-runs the EM loop on resampled units.
 25
 26Example
 27-------
 28::
 29
 30    import pyfector
 31
 32    result = pyfector.fect(
 33        data=df,
 34        Y="outcome", D="treat",
 35        index=("unit", "year"),
 36        X=["gdp", "pop"],
 37        method="ife",
 38        r=(0, 5),            # CV over 0..5 factors
 39        se=True,
 40        nboots=200,
 41        device="cpu",         # or "gpu"
 42        n_jobs=4,
 43        seed=42,
 44    )
 45    result.summary()
 46    result.plot(kind="gap")
 47"""
 48
 49from __future__ import annotations
 50
 51from dataclasses import dataclass, field
 52from typing import Literal
 53import os
 54
 55import numpy as np
 56
 57from .backend import set_device, get_backend, to_numpy, to_device, make_rng
 58from .panel import PanelData, prepare_panel, initial_fit
 59from .estimators import estimate_ife, estimate_mc, estimate_cfe, EstimationResult
 60from .cv import cv_ife, cv_mc, CVResult
 61from .inference import bootstrap, jackknife, InferenceResult
 62
 63
 64@dataclass
 65class FectResult:
 66    """Container for all fect estimation results."""
 67    # Method info
 68    method: str
 69    r_cv: int | None = None
 70    lambda_cv: float | None = None
 71
 72    # Point estimates
 73    att_avg: float = 0.0
 74    att_avg_unit: float = 0.0
 75
 76    # Dynamic effects
 77    att_on: np.ndarray | None = None
 78    time_on: np.ndarray | None = None
 79    count_on: np.ndarray | None = None
 80
 81    # Exit effects (treatment reversal)
 82    att_off: np.ndarray | None = None
 83    time_off: np.ndarray | None = None
 84
 85    # Coefficients
 86    beta: np.ndarray | None = None
 87    covariate_names: list[str] = field(default_factory=list)
 88
 89    # Fixed effects
 90    mu: float = 0.0
 91    alpha: np.ndarray | None = None   # unit FE
 92    xi: np.ndarray | None = None      # time FE
 93    factors: np.ndarray | None = None
 94    loadings: np.ndarray | None = None
 95
 96    # Counterfactual and effects matrices
 97    Y_ct: np.ndarray | None = None    # T×N counterfactual
 98    eff: np.ndarray | None = None     # T×N treatment effects
 99    residuals: np.ndarray | None = None
100
101    # Model fit
102    sigma2: float = 0.0
103    IC: float = 0.0
104    PC: float = 0.0
105    rmse: float = 0.0
106    niter: int = 0
107    converged: bool = False
108
109    # Inference
110    inference: InferenceResult | None = None
111
112    # CV
113    cv_result: CVResult | None = None
114
115    # Panel metadata
116    panel: PanelData | None = None
117
118    # Reproducibility
119    seed: int | None = None
120
121    def summary(self) -> str:
122        """Print summary table of results."""
123        lines = []
124        lines.append(f"pyfector estimation results")
125        lines.append(f"{'='*60}")
126        lines.append(f"Method: {self.method}")
127        if self.r_cv is not None:
128            lines.append(f"Number of factors (CV): {self.r_cv}")
129        if self.lambda_cv is not None:
130            lines.append(f"Lambda (CV): {self.lambda_cv:.6f}")
131        lines.append(f"Converged: {self.converged} (iter={self.niter})")
132        lines.append(f"Sigma^2: {self.sigma2:.6f}")
133        lines.append(f"")
134        lines.append(f"ATT (average): {self.att_avg:.6f}")
135        if self.inference is not None:
136            inf = self.inference
137            lines.append(f"  SE:     {inf.att_avg_se:.6f}")
138            lines.append(f"  CI:     [{inf.att_avg_ci[0]:.6f}, {inf.att_avg_ci[1]:.6f}]")
139            lines.append(f"  p-val:  {inf.att_avg_pval:.4f}")
140
141        if self.beta is not None and len(self.beta) > 0:
142            lines.append(f"")
143            lines.append(f"Coefficients:")
144            for i, name in enumerate(self.covariate_names):
145                lines.append(f"  {name}: {self.beta[i]:.6f}")
146
147        if self.att_on is not None and self.time_on is not None:
148            lines.append(f"")
149            lines.append(f"Dynamic effects (ATT by relative time):")
150            lines.append(f"  {'Time':>6s}  {'ATT':>10s}  {'Count':>6s}", )
151            for i, t in enumerate(self.time_on):
152                count = self.count_on[i] if self.count_on is not None else ""
153                att = self.att_on[i]
154                if self.inference is not None:
155                    se = self.inference.att_on_se[i]
156                    lines.append(f"  {t:>6.0f}  {att:>10.4f}  ({se:.4f})  {count}")
157                else:
158                    lines.append(f"  {t:>6.0f}  {att:>10.4f}  {count}")
159
160        lines.append(f"{'='*60}")
161        if self.panel is not None:
162            lines.append(f"N={self.panel.N}, T={self.panel.T}")
163        if self.seed is not None:
164            lines.append(f"Seed: {self.seed}")
165        return "\n".join(lines)
166
167    def __repr__(self):
168        return self.summary()
169
170    def plot(self, kind="gap", **kwargs):
171        """Plot results. Shortcut for ``pyfector.plot(self, kind, ...)``."""
172        from .plotting import plot as _plot
173        return _plot(self, kind=kind, **kwargs)
174
175    def diagnose(self, **kwargs):
176        """Run diagnostic tests. Shortcut for ``pyfector.run_diagnostics(self, ...)``."""
177        from .diagnostics import run_diagnostics
178        return run_diagnostics(self, **kwargs)
179
180
181def fect(
182    data,
183    Y: str,
184    D: str,
185    index: tuple[str, str],
186    X: list[str] | None = None,
187    W: str | None = None,
188    group: str | None = None,
189    method: Literal["fe", "ife", "mc", "cfe", "both"] = "ife",
190    force: Literal["none", "unit", "time", "two-way"] = "two-way",
191    r: int | tuple[int, int] = 0,
192    lam: float | None = None,
193    nlambda: int = 10,
194    CV: bool = True,
195    k: int = 10,
196    cv_prop: float = 0.1,
197    cv_nobs: int = 3,
198    cv_treat: bool = True,
199    cv_donut: int = 0,
200    criterion: str = "mspe",
201    se: bool = False,
202    vartype: Literal["bootstrap", "jackknife"] = "bootstrap",
203    nboots: int = 200,
204    alpha: float = 0.05,
205    tol: float = 1e-7,
206    max_iter: int = 5000,
207    min_T0: int = 1,
208    max_missing: float = 1.0,
209    normalize: bool = False,
210    # CFE-specific
211    Z: list[str] | None = None,
212    Q: list[str] | None = None,
213    # Performance
214    device: Literal["cpu", "gpu"] = "cpu",
215    n_jobs: int | None = -1,
216    seed: int | None = None,
217) -> FectResult:
218    """Estimate counterfactual treatment effects for panel data.
219
220    This is the main entry point — equivalent to R fect's ``fect()`` function.
221
222    Parameters
223    ----------
224    data : polars.DataFrame, pandas.DataFrame
225        Long-format panel data.
226    Y, D : str
227        Column names for outcome and binary treatment indicator.
228    index : (str, str)
229        Column names for (unit_id, time_period).
230    X : list of str, optional
231        Time-varying covariates.
232    method : {"fe", "ife", "mc", "cfe", "both"}
233        Estimation method.
234    force : {"none", "unit", "time", "two-way"}
235        Fixed effects specification.
236    r : int or (int, int)
237        Number of factors.  If tuple, CV selects from range.
238    lam : float, optional
239        Nuclear norm penalty for MC.  If None with CV=True, auto-selected.
240    se : bool
241        Compute standard errors via bootstrap/jackknife.
242    device : {"cpu", "gpu"}
243        Compute device.
244    n_jobs : int, optional
245        Parallel workers for CV and bootstrap. ``-1`` or ``None`` uses
246        all available CPUs.
247    seed : int, optional
248        Random seed for full reproducibility.
249    """
250    # Set device
251    set_device(device)
252    xp = get_backend()
253    n_jobs = _resolve_n_jobs(n_jobs)
254
255    if group is not None:
256        raise NotImplementedError("The `group` argument is not implemented yet.")
257    if Z is not None or Q is not None:
258        raise NotImplementedError("The `Z` and `Q` CFE interaction arguments are not implemented yet.")
259
260    # Map force string to int
261    force_map = {"none": 0, "unit": 1, "time": 2, "two-way": 3}
262    force_int = force_map[force]
263
264    # Prepare panel data
265    panel = prepare_panel(
266        data, Y=Y, D=D, index=index, X=X, W=W,
267        group=group, min_T0=min_T0, max_missing=max_missing,
268    )
269
270    # Move to device
271    Y_mat = to_device(panel.Y)
272    D_mat = to_device(panel.D)
273    I_mat = to_device(panel.I)
274    II_mat = to_device(panel.II)
275    X_mat = to_device(panel.X) if panel.X is not None else None
276    W_mat = to_device(panel.W) if panel.W is not None else None
277
278    # Normalize
279    norm_factor = 1.0
280    if normalize:
281        sd_y = float(xp.std(Y_mat[I_mat > 0]))
282        if sd_y > 0:
283            Y_mat = Y_mat / sd_y
284            norm_factor = sd_y
285
286    # Initial fit
287    Y0, beta0 = initial_fit(Y_mat, X_mat, II_mat, force_int)
288
289    # Determine r and lambda
290    r_cv = None
291    lambda_cv = None
292    cv_result = None
293
294    if method == "ife":
295        if isinstance(r, tuple) and CV:
296            cv_result = cv_ife(
297                Y_mat, Y0, X_mat, I_mat, II_mat, D_mat, W_mat, beta0,
298                force=force_int, r_range=r, k=k, cv_prop=cv_prop,
299                cv_nobs=cv_nobs, cv_treat=cv_treat, cv_donut=cv_donut,
300                criterion=criterion, tol=tol, max_iter=max_iter,
301                n_jobs=n_jobs, seed=seed,
302            )
303            r_cv = cv_result.best_r
304        else:
305            r_cv = r if isinstance(r, int) else r[0]
306
307    elif method == "mc":
308        if lam is None and CV:
309            cv_result = cv_mc(
310                Y_mat, Y0, X_mat, I_mat, II_mat, D_mat, W_mat, beta0,
311                force=force_int, nlambda=nlambda, k=k, cv_prop=cv_prop,
312                cv_nobs=cv_nobs, cv_treat=cv_treat, cv_donut=cv_donut,
313                criterion=criterion, tol=tol, max_iter=max_iter,
314                n_jobs=n_jobs, seed=seed,
315            )
316            lambda_cv = cv_result.best_lambda
317        else:
318            lambda_cv = lam if lam is not None else 0.0
319
320    elif method == "fe":
321        r_cv = 0
322
323    elif method == "cfe":
324        r_cv = r if isinstance(r, int) else r[0]
325
326    # Point estimation
327    if method in ("fe", "ife"):
328        est = estimate_ife(
329            Y_mat, Y0, X_mat, II_mat, W_mat, beta0,
330            r=r_cv, force=force_int, tol=tol, max_iter=max_iter,
331        )
332    elif method == "mc":
333        est = estimate_mc(
334            Y_mat, Y0, X_mat, II_mat, W_mat, beta0,
335            lam=lambda_cv, force=force_int, tol=tol, max_iter=max_iter,
336        )
337    elif method == "cfe":
338        est = estimate_cfe(
339            Y_mat, Y0, X_mat, II_mat, W_mat, beta0,
340            r=r_cv, force=force_int, tol=tol, max_iter=max_iter,
341        )
342    elif method == "both":
343        # Run both IFE and MC, return IFE results with MC comparison
344        if isinstance(r, tuple) and CV:
345            cv_result = cv_ife(
346                Y_mat, Y0, X_mat, I_mat, II_mat, D_mat, W_mat, beta0,
347                force=force_int, r_range=r, k=k, cv_prop=cv_prop,
348                cv_nobs=cv_nobs, cv_treat=cv_treat, cv_donut=cv_donut,
349                criterion=criterion, tol=tol, max_iter=max_iter,
350                n_jobs=n_jobs, seed=seed,
351            )
352            r_cv = cv_result.best_r
353        else:
354            r_cv = r if isinstance(r, int) else r[0]
355        est = estimate_ife(
356            Y_mat, Y0, X_mat, II_mat, W_mat, beta0,
357            r=r_cv, force=force_int, tol=tol, max_iter=max_iter,
358        )
359    else:
360        raise ValueError(f"Unknown method: {method}")
361
362    # Compute effects
363    eff = Y_mat - est.fit
364    Y_ct = est.fit
365
366    # Denormalize
367    if normalize and norm_factor != 1.0:
368        eff = eff * norm_factor
369        Y_ct = Y_ct * norm_factor
370        Y_mat = Y_mat * norm_factor
371        if est.beta is not None:
372            est = est._replace(beta=est.beta * norm_factor)
373
374    # ATT computation
375    T_on = to_device(panel.T_on)
376    att_avg, att_on, time_on, count_on, att_avg_unit = _compute_effects(
377        to_numpy(eff), to_numpy(D_mat), to_numpy(panel.T_on), to_numpy(I_mat),
378    )
379
380    # Build result
381    result = FectResult(
382        method=method,
383        r_cv=r_cv,
384        lambda_cv=lambda_cv,
385        att_avg=att_avg,
386        att_avg_unit=att_avg_unit,
387        att_on=att_on,
388        time_on=time_on,
389        count_on=count_on,
390        beta=to_numpy(est.beta) if est.beta is not None else None,
391        covariate_names=panel.covariate_names,
392        mu=est.mu,
393        alpha=to_numpy(est.alpha) if est.alpha is not None else None,
394        xi=to_numpy(est.xi) if est.xi is not None else None,
395        factors=to_numpy(est.factors) if est.factors is not None else None,
396        loadings=to_numpy(est.loadings) if est.loadings is not None else None,
397        Y_ct=to_numpy(Y_ct),
398        eff=to_numpy(eff),
399        residuals=to_numpy(est.residuals),
400        sigma2=est.sigma2,
401        IC=est.IC,
402        PC=est.PC,
403        niter=est.niter,
404        converged=est.converged,
405        cv_result=cv_result,
406        panel=panel,
407        seed=seed,
408    )
409
410    # Inference
411    if se:
412        result.inference = _run_inference(
413            result, panel, Y_mat, X_mat, W_mat, beta0, Y0,
414            method=method, r_cv=r_cv, lambda_cv=lambda_cv,
415            force_int=force_int, tol=tol, max_iter=max_iter,
416            vartype=vartype, nboots=nboots, alpha=alpha,
417            n_jobs=n_jobs, seed=seed, normalize=normalize,
418            norm_factor=norm_factor,
419        )
420
421    return result
422
423
424def _compute_effects(eff, D, T_on, I):
425    """Compute overall ATT, per-unit ATT, and dynamic ATT by event time.
426
427    Dynamic ATT grouping is done with :func:`numpy.bincount` so the cost
428    is O(n_observed_ever_treated_cells) regardless of the number of
429    distinct relative-time values.  Per-unit ATT uses a column-wise
430    masked mean via ``np.add.reduce`` rather than a Python loop.
431    """
432    treated = (D > 0) & (I > 0)
433    n_treated = int(np.sum(treated))
434
435    att_avg = float(np.sum(eff[treated]) / max(n_treated, 1))
436
437    # Per-unit ATT (post-treatment only) — sum then divide by count,
438    # keeping only units that have at least one treated observation.
439    col_counts = treated.sum(axis=0)                                 # (N,)
440    col_sums = (eff * treated).sum(axis=0)                           # (N,)
441    has_any = col_counts > 0
442    if np.any(has_any):
443        unit_atts = col_sums[has_any] / col_counts[has_any]
444        att_avg_unit = float(unit_atts.mean())
445    else:
446        att_avg_unit = 0.0
447
448    # Dynamic ATT by relative event time.  Include pre-treatment periods
449    # for ever-treated units (counterfactual gaps before onset).
450    ever_treated = np.any(D > 0, axis=0)                             # (N,)
451    all_periods = (I > 0) & ever_treated[np.newaxis, :]              # (T, N)
452
453    T_on_flat = T_on[all_periods]
454    eff_flat = eff[all_periods]
455    valid = ~np.isnan(T_on_flat)
456    T_on_flat = T_on_flat[valid].astype(np.int64)
457    eff_flat = eff_flat[valid]
458
459    if T_on_flat.size == 0:
460        return att_avg, np.array([]), np.array([]), np.array([], dtype=np.int64), att_avg_unit
461
462    # Bincount over the shifted integer indices.
463    offset = int(T_on_flat.min())
464    idx = T_on_flat - offset
465    minlength = int(T_on_flat.max() - offset + 1)
466    sums = np.bincount(idx, weights=eff_flat, minlength=minlength)
467    counts = np.bincount(idx, minlength=minlength)
468    keep = counts > 0
469    time_on = (offset + np.arange(minlength))[keep].astype(np.float64)
470    att_on = (sums[keep] / counts[keep]).astype(np.float64)
471    count_on = counts[keep].astype(np.int64)
472
473    return att_avg, att_on, time_on, count_on, att_avg_unit
474
475
476def _run_inference(
477    result, panel, Y_mat, X_mat, W_mat, beta0, Y0,
478    method, r_cv, lambda_cv, force_int, tol, max_iter,
479    vartype, nboots, alpha, n_jobs, seed, normalize, norm_factor,
480):
481    """Run bootstrap or jackknife inference.
482
483    Bootstrap replicates use a relaxed convergence tolerance
484    (``max(tol, 1e-3)``) because bootstrap SEs converge to 3–4
485    significant digits regardless of inner precision.  The full-sample
486    fit is used as the warm-start initialiser for each replicate,
487    which cuts EM iterations by ~30-60 %.
488    """
489    xp = get_backend()
490
491    # Relaxed tolerance: bootstrap SEs don't benefit from tight inner tol.
492    boot_tol = max(tol, 1e-3)
493
494    # Pre-move panel data to device once (avoids CPU→GPU copy per bootstrap rep)
495    II_dev = to_device(panel.II)
496    D_dev = to_device(panel.D)
497    I_dev = to_device(panel.I)
498    T_on_np = panel.T_on  # keep on CPU for ATT computation
499
500    # Warm-start: use full-sample fitted values as initial imputation
501    # for each replicate instead of recomputing initial_fit from scratch.
502    Y0_full = to_device(result.Y_ct)
503
504    def _estimate_fn(unit_idx):
505        """Re-estimate on a subset of units."""
506        Y_sub = Y_mat[:, unit_idx]
507        II_sub = II_dev[:, unit_idx]
508        D_sub = D_dev[:, unit_idx]
509        I_sub = I_dev[:, unit_idx]
510        X_sub = X_mat[:, unit_idx, :] if X_mat is not None else None
511        W_sub = W_mat[:, unit_idx] if W_mat is not None else None
512        T_on_sub = T_on_np[:, unit_idx]
513        beta0_sub = beta0
514
515        # Warm-start from full-sample fit (columns for resampled units).
516        # This is much closer to the replicate's solution than a cold
517        # initial_fit, cutting EM iterations significantly.
518        Y0_sub = Y0_full[:, unit_idx].copy()
519
520        if method in ("fe", "ife"):
521            est = estimate_ife(
522                Y_sub, Y0_sub, X_sub, II_sub, W_sub, beta0_sub,
523                r=r_cv, force=force_int, tol=boot_tol, max_iter=max_iter,
524            )
525        elif method == "mc":
526            est = estimate_mc(
527                Y_sub, Y0_sub, X_sub, II_sub, W_sub, beta0_sub,
528                lam=lambda_cv, force=force_int, tol=boot_tol, max_iter=max_iter,
529            )
530        else:
531            est = estimate_ife(
532                Y_sub, Y0_sub, X_sub, II_sub, W_sub, beta0_sub,
533                r=r_cv or 0, force=force_int, tol=boot_tol, max_iter=max_iter,
534            )
535
536        eff = to_numpy(Y_sub - est.fit)
537        if normalize and norm_factor != 1.0:
538            eff = eff * norm_factor
539
540        return eff, to_numpy(D_sub), T_on_sub, to_numpy(I_sub)
541
542    if vartype == "bootstrap":
543        return bootstrap(
544            _estimate_fn, to_numpy(Y_mat), to_numpy(panel.D),
545            to_numpy(panel.I), panel.T_on, panel.unit_type,
546            nboots=nboots, alpha=alpha, n_jobs=n_jobs, seed=seed,
547        )
548    else:
549        return jackknife(
550            _estimate_fn, to_numpy(Y_mat), to_numpy(panel.D),
551            to_numpy(panel.I), panel.T_on, panel.unit_type,
552            alpha=alpha, n_jobs=n_jobs,
553        )
554
555
556def _resolve_n_jobs(n_jobs: int | None) -> int:
557    """Normalize public n_jobs values before passing them to joblib."""
558    if n_jobs is None or n_jobs == -1:
559        return os.cpu_count() or 1
560    if n_jobs == 0 or n_jobs < -1:
561        raise ValueError("n_jobs must be a positive integer, -1, or None")
562    return int(n_jobs)
@dataclass
class FectResult:
 65@dataclass
 66class FectResult:
 67    """Container for all fect estimation results."""
 68    # Method info
 69    method: str
 70    r_cv: int | None = None
 71    lambda_cv: float | None = None
 72
 73    # Point estimates
 74    att_avg: float = 0.0
 75    att_avg_unit: float = 0.0
 76
 77    # Dynamic effects
 78    att_on: np.ndarray | None = None
 79    time_on: np.ndarray | None = None
 80    count_on: np.ndarray | None = None
 81
 82    # Exit effects (treatment reversal)
 83    att_off: np.ndarray | None = None
 84    time_off: np.ndarray | None = None
 85
 86    # Coefficients
 87    beta: np.ndarray | None = None
 88    covariate_names: list[str] = field(default_factory=list)
 89
 90    # Fixed effects
 91    mu: float = 0.0
 92    alpha: np.ndarray | None = None   # unit FE
 93    xi: np.ndarray | None = None      # time FE
 94    factors: np.ndarray | None = None
 95    loadings: np.ndarray | None = None
 96
 97    # Counterfactual and effects matrices
 98    Y_ct: np.ndarray | None = None    # T×N counterfactual
 99    eff: np.ndarray | None = None     # T×N treatment effects
100    residuals: np.ndarray | None = None
101
102    # Model fit
103    sigma2: float = 0.0
104    IC: float = 0.0
105    PC: float = 0.0
106    rmse: float = 0.0
107    niter: int = 0
108    converged: bool = False
109
110    # Inference
111    inference: InferenceResult | None = None
112
113    # CV
114    cv_result: CVResult | None = None
115
116    # Panel metadata
117    panel: PanelData | None = None
118
119    # Reproducibility
120    seed: int | None = None
121
122    def summary(self) -> str:
123        """Print summary table of results."""
124        lines = []
125        lines.append(f"pyfector estimation results")
126        lines.append(f"{'='*60}")
127        lines.append(f"Method: {self.method}")
128        if self.r_cv is not None:
129            lines.append(f"Number of factors (CV): {self.r_cv}")
130        if self.lambda_cv is not None:
131            lines.append(f"Lambda (CV): {self.lambda_cv:.6f}")
132        lines.append(f"Converged: {self.converged} (iter={self.niter})")
133        lines.append(f"Sigma^2: {self.sigma2:.6f}")
134        lines.append(f"")
135        lines.append(f"ATT (average): {self.att_avg:.6f}")
136        if self.inference is not None:
137            inf = self.inference
138            lines.append(f"  SE:     {inf.att_avg_se:.6f}")
139            lines.append(f"  CI:     [{inf.att_avg_ci[0]:.6f}, {inf.att_avg_ci[1]:.6f}]")
140            lines.append(f"  p-val:  {inf.att_avg_pval:.4f}")
141
142        if self.beta is not None and len(self.beta) > 0:
143            lines.append(f"")
144            lines.append(f"Coefficients:")
145            for i, name in enumerate(self.covariate_names):
146                lines.append(f"  {name}: {self.beta[i]:.6f}")
147
148        if self.att_on is not None and self.time_on is not None:
149            lines.append(f"")
150            lines.append(f"Dynamic effects (ATT by relative time):")
151            lines.append(f"  {'Time':>6s}  {'ATT':>10s}  {'Count':>6s}", )
152            for i, t in enumerate(self.time_on):
153                count = self.count_on[i] if self.count_on is not None else ""
154                att = self.att_on[i]
155                if self.inference is not None:
156                    se = self.inference.att_on_se[i]
157                    lines.append(f"  {t:>6.0f}  {att:>10.4f}  ({se:.4f})  {count}")
158                else:
159                    lines.append(f"  {t:>6.0f}  {att:>10.4f}  {count}")
160
161        lines.append(f"{'='*60}")
162        if self.panel is not None:
163            lines.append(f"N={self.panel.N}, T={self.panel.T}")
164        if self.seed is not None:
165            lines.append(f"Seed: {self.seed}")
166        return "\n".join(lines)
167
168    def __repr__(self):
169        return self.summary()
170
171    def plot(self, kind="gap", **kwargs):
172        """Plot results. Shortcut for ``pyfector.plot(self, kind, ...)``."""
173        from .plotting import plot as _plot
174        return _plot(self, kind=kind, **kwargs)
175
176    def diagnose(self, **kwargs):
177        """Run diagnostic tests. Shortcut for ``pyfector.run_diagnostics(self, ...)``."""
178        from .diagnostics import run_diagnostics
179        return run_diagnostics(self, **kwargs)

Container for all fect estimation results.

FectResult( method: str, r_cv: int | None = None, lambda_cv: float | None = None, att_avg: float = 0.0, att_avg_unit: float = 0.0, att_on: numpy.ndarray | None = None, time_on: numpy.ndarray | None = None, count_on: numpy.ndarray | None = None, att_off: numpy.ndarray | None = None, time_off: numpy.ndarray | None = None, beta: numpy.ndarray | None = None, covariate_names: list[str] = <factory>, mu: float = 0.0, alpha: numpy.ndarray | None = None, xi: numpy.ndarray | None = None, factors: numpy.ndarray | None = None, loadings: numpy.ndarray | None = None, Y_ct: numpy.ndarray | None = None, eff: numpy.ndarray | None = None, residuals: numpy.ndarray | None = None, sigma2: float = 0.0, IC: float = 0.0, PC: float = 0.0, rmse: float = 0.0, niter: int = 0, converged: bool = False, inference: pyfector.inference.InferenceResult | None = None, cv_result: pyfector.cv.CVResult | None = None, panel: pyfector.panel.PanelData | None = None, seed: int | None = None)
method: str
r_cv: int | None = None
lambda_cv: float | None = None
att_avg: float = 0.0
att_avg_unit: float = 0.0
att_on: numpy.ndarray | None = None
time_on: numpy.ndarray | None = None
count_on: numpy.ndarray | None = None
att_off: numpy.ndarray | None = None
time_off: numpy.ndarray | None = None
beta: numpy.ndarray | None = None
covariate_names: list[str]
mu: float = 0.0
alpha: numpy.ndarray | None = None
xi: numpy.ndarray | None = None
factors: numpy.ndarray | None = None
loadings: numpy.ndarray | None = None
Y_ct: numpy.ndarray | None = None
eff: numpy.ndarray | None = None
residuals: numpy.ndarray | None = None
sigma2: float = 0.0
IC: float = 0.0
PC: float = 0.0
rmse: float = 0.0
niter: int = 0
converged: bool = False
inference: pyfector.inference.InferenceResult | None = None
cv_result: pyfector.cv.CVResult | None = None
panel: pyfector.panel.PanelData | None = None
seed: int | None = None
def summary(self) -> str:
122    def summary(self) -> str:
123        """Print summary table of results."""
124        lines = []
125        lines.append(f"pyfector estimation results")
126        lines.append(f"{'='*60}")
127        lines.append(f"Method: {self.method}")
128        if self.r_cv is not None:
129            lines.append(f"Number of factors (CV): {self.r_cv}")
130        if self.lambda_cv is not None:
131            lines.append(f"Lambda (CV): {self.lambda_cv:.6f}")
132        lines.append(f"Converged: {self.converged} (iter={self.niter})")
133        lines.append(f"Sigma^2: {self.sigma2:.6f}")
134        lines.append(f"")
135        lines.append(f"ATT (average): {self.att_avg:.6f}")
136        if self.inference is not None:
137            inf = self.inference
138            lines.append(f"  SE:     {inf.att_avg_se:.6f}")
139            lines.append(f"  CI:     [{inf.att_avg_ci[0]:.6f}, {inf.att_avg_ci[1]:.6f}]")
140            lines.append(f"  p-val:  {inf.att_avg_pval:.4f}")
141
142        if self.beta is not None and len(self.beta) > 0:
143            lines.append(f"")
144            lines.append(f"Coefficients:")
145            for i, name in enumerate(self.covariate_names):
146                lines.append(f"  {name}: {self.beta[i]:.6f}")
147
148        if self.att_on is not None and self.time_on is not None:
149            lines.append(f"")
150            lines.append(f"Dynamic effects (ATT by relative time):")
151            lines.append(f"  {'Time':>6s}  {'ATT':>10s}  {'Count':>6s}", )
152            for i, t in enumerate(self.time_on):
153                count = self.count_on[i] if self.count_on is not None else ""
154                att = self.att_on[i]
155                if self.inference is not None:
156                    se = self.inference.att_on_se[i]
157                    lines.append(f"  {t:>6.0f}  {att:>10.4f}  ({se:.4f})  {count}")
158                else:
159                    lines.append(f"  {t:>6.0f}  {att:>10.4f}  {count}")
160
161        lines.append(f"{'='*60}")
162        if self.panel is not None:
163            lines.append(f"N={self.panel.N}, T={self.panel.T}")
164        if self.seed is not None:
165            lines.append(f"Seed: {self.seed}")
166        return "\n".join(lines)

Print summary table of results.

def plot(self, kind='gap', **kwargs):
171    def plot(self, kind="gap", **kwargs):
172        """Plot results. Shortcut for ``pyfector.plot(self, kind, ...)``."""
173        from .plotting import plot as _plot
174        return _plot(self, kind=kind, **kwargs)

Plot results. Shortcut for pyfector.plot(self, kind, ...).

def diagnose(self, **kwargs):
176    def diagnose(self, **kwargs):
177        """Run diagnostic tests. Shortcut for ``pyfector.run_diagnostics(self, ...)``."""
178        from .diagnostics import run_diagnostics
179        return run_diagnostics(self, **kwargs)

Run diagnostic tests. Shortcut for pyfector.run_diagnostics(self, ...).

def fect( data, Y: str, D: str, index: tuple[str, str], X: list[str] | None = None, W: str | None = None, group: str | None = None, method: Literal['fe', 'ife', 'mc', 'cfe', 'both'] = 'ife', force: Literal['none', 'unit', 'time', 'two-way'] = 'two-way', r: int | tuple[int, int] = 0, lam: float | None = None, nlambda: int = 10, CV: bool = True, k: int = 10, cv_prop: float = 0.1, cv_nobs: int = 3, cv_treat: bool = True, cv_donut: int = 0, criterion: str = 'mspe', se: bool = False, vartype: Literal['bootstrap', 'jackknife'] = 'bootstrap', nboots: int = 200, alpha: float = 0.05, tol: float = 1e-07, max_iter: int = 5000, min_T0: int = 1, max_missing: float = 1.0, normalize: bool = False, Z: list[str] | None = None, Q: list[str] | None = None, device: Literal['cpu', 'gpu'] = 'cpu', n_jobs: int | None = -1, seed: int | None = None) -> FectResult:
182def fect(
183    data,
184    Y: str,
185    D: str,
186    index: tuple[str, str],
187    X: list[str] | None = None,
188    W: str | None = None,
189    group: str | None = None,
190    method: Literal["fe", "ife", "mc", "cfe", "both"] = "ife",
191    force: Literal["none", "unit", "time", "two-way"] = "two-way",
192    r: int | tuple[int, int] = 0,
193    lam: float | None = None,
194    nlambda: int = 10,
195    CV: bool = True,
196    k: int = 10,
197    cv_prop: float = 0.1,
198    cv_nobs: int = 3,
199    cv_treat: bool = True,
200    cv_donut: int = 0,
201    criterion: str = "mspe",
202    se: bool = False,
203    vartype: Literal["bootstrap", "jackknife"] = "bootstrap",
204    nboots: int = 200,
205    alpha: float = 0.05,
206    tol: float = 1e-7,
207    max_iter: int = 5000,
208    min_T0: int = 1,
209    max_missing: float = 1.0,
210    normalize: bool = False,
211    # CFE-specific
212    Z: list[str] | None = None,
213    Q: list[str] | None = None,
214    # Performance
215    device: Literal["cpu", "gpu"] = "cpu",
216    n_jobs: int | None = -1,
217    seed: int | None = None,
218) -> FectResult:
219    """Estimate counterfactual treatment effects for panel data.
220
221    This is the main entry point — equivalent to R fect's ``fect()`` function.
222
223    Parameters
224    ----------
225    data : polars.DataFrame, pandas.DataFrame
226        Long-format panel data.
227    Y, D : str
228        Column names for outcome and binary treatment indicator.
229    index : (str, str)
230        Column names for (unit_id, time_period).
231    X : list of str, optional
232        Time-varying covariates.
233    method : {"fe", "ife", "mc", "cfe", "both"}
234        Estimation method.
235    force : {"none", "unit", "time", "two-way"}
236        Fixed effects specification.
237    r : int or (int, int)
238        Number of factors.  If tuple, CV selects from range.
239    lam : float, optional
240        Nuclear norm penalty for MC.  If None with CV=True, auto-selected.
241    se : bool
242        Compute standard errors via bootstrap/jackknife.
243    device : {"cpu", "gpu"}
244        Compute device.
245    n_jobs : int, optional
246        Parallel workers for CV and bootstrap. ``-1`` or ``None`` uses
247        all available CPUs.
248    seed : int, optional
249        Random seed for full reproducibility.
250    """
251    # Set device
252    set_device(device)
253    xp = get_backend()
254    n_jobs = _resolve_n_jobs(n_jobs)
255
256    if group is not None:
257        raise NotImplementedError("The `group` argument is not implemented yet.")
258    if Z is not None or Q is not None:
259        raise NotImplementedError("The `Z` and `Q` CFE interaction arguments are not implemented yet.")
260
261    # Map force string to int
262    force_map = {"none": 0, "unit": 1, "time": 2, "two-way": 3}
263    force_int = force_map[force]
264
265    # Prepare panel data
266    panel = prepare_panel(
267        data, Y=Y, D=D, index=index, X=X, W=W,
268        group=group, min_T0=min_T0, max_missing=max_missing,
269    )
270
271    # Move to device
272    Y_mat = to_device(panel.Y)
273    D_mat = to_device(panel.D)
274    I_mat = to_device(panel.I)
275    II_mat = to_device(panel.II)
276    X_mat = to_device(panel.X) if panel.X is not None else None
277    W_mat = to_device(panel.W) if panel.W is not None else None
278
279    # Normalize
280    norm_factor = 1.0
281    if normalize:
282        sd_y = float(xp.std(Y_mat[I_mat > 0]))
283        if sd_y > 0:
284            Y_mat = Y_mat / sd_y
285            norm_factor = sd_y
286
287    # Initial fit
288    Y0, beta0 = initial_fit(Y_mat, X_mat, II_mat, force_int)
289
290    # Determine r and lambda
291    r_cv = None
292    lambda_cv = None
293    cv_result = None
294
295    if method == "ife":
296        if isinstance(r, tuple) and CV:
297            cv_result = cv_ife(
298                Y_mat, Y0, X_mat, I_mat, II_mat, D_mat, W_mat, beta0,
299                force=force_int, r_range=r, k=k, cv_prop=cv_prop,
300                cv_nobs=cv_nobs, cv_treat=cv_treat, cv_donut=cv_donut,
301                criterion=criterion, tol=tol, max_iter=max_iter,
302                n_jobs=n_jobs, seed=seed,
303            )
304            r_cv = cv_result.best_r
305        else:
306            r_cv = r if isinstance(r, int) else r[0]
307
308    elif method == "mc":
309        if lam is None and CV:
310            cv_result = cv_mc(
311                Y_mat, Y0, X_mat, I_mat, II_mat, D_mat, W_mat, beta0,
312                force=force_int, nlambda=nlambda, k=k, cv_prop=cv_prop,
313                cv_nobs=cv_nobs, cv_treat=cv_treat, cv_donut=cv_donut,
314                criterion=criterion, tol=tol, max_iter=max_iter,
315                n_jobs=n_jobs, seed=seed,
316            )
317            lambda_cv = cv_result.best_lambda
318        else:
319            lambda_cv = lam if lam is not None else 0.0
320
321    elif method == "fe":
322        r_cv = 0
323
324    elif method == "cfe":
325        r_cv = r if isinstance(r, int) else r[0]
326
327    # Point estimation
328    if method in ("fe", "ife"):
329        est = estimate_ife(
330            Y_mat, Y0, X_mat, II_mat, W_mat, beta0,
331            r=r_cv, force=force_int, tol=tol, max_iter=max_iter,
332        )
333    elif method == "mc":
334        est = estimate_mc(
335            Y_mat, Y0, X_mat, II_mat, W_mat, beta0,
336            lam=lambda_cv, force=force_int, tol=tol, max_iter=max_iter,
337        )
338    elif method == "cfe":
339        est = estimate_cfe(
340            Y_mat, Y0, X_mat, II_mat, W_mat, beta0,
341            r=r_cv, force=force_int, tol=tol, max_iter=max_iter,
342        )
343    elif method == "both":
344        # Run both IFE and MC, return IFE results with MC comparison
345        if isinstance(r, tuple) and CV:
346            cv_result = cv_ife(
347                Y_mat, Y0, X_mat, I_mat, II_mat, D_mat, W_mat, beta0,
348                force=force_int, r_range=r, k=k, cv_prop=cv_prop,
349                cv_nobs=cv_nobs, cv_treat=cv_treat, cv_donut=cv_donut,
350                criterion=criterion, tol=tol, max_iter=max_iter,
351                n_jobs=n_jobs, seed=seed,
352            )
353            r_cv = cv_result.best_r
354        else:
355            r_cv = r if isinstance(r, int) else r[0]
356        est = estimate_ife(
357            Y_mat, Y0, X_mat, II_mat, W_mat, beta0,
358            r=r_cv, force=force_int, tol=tol, max_iter=max_iter,
359        )
360    else:
361        raise ValueError(f"Unknown method: {method}")
362
363    # Compute effects
364    eff = Y_mat - est.fit
365    Y_ct = est.fit
366
367    # Denormalize
368    if normalize and norm_factor != 1.0:
369        eff = eff * norm_factor
370        Y_ct = Y_ct * norm_factor
371        Y_mat = Y_mat * norm_factor
372        if est.beta is not None:
373            est = est._replace(beta=est.beta * norm_factor)
374
375    # ATT computation
376    T_on = to_device(panel.T_on)
377    att_avg, att_on, time_on, count_on, att_avg_unit = _compute_effects(
378        to_numpy(eff), to_numpy(D_mat), to_numpy(panel.T_on), to_numpy(I_mat),
379    )
380
381    # Build result
382    result = FectResult(
383        method=method,
384        r_cv=r_cv,
385        lambda_cv=lambda_cv,
386        att_avg=att_avg,
387        att_avg_unit=att_avg_unit,
388        att_on=att_on,
389        time_on=time_on,
390        count_on=count_on,
391        beta=to_numpy(est.beta) if est.beta is not None else None,
392        covariate_names=panel.covariate_names,
393        mu=est.mu,
394        alpha=to_numpy(est.alpha) if est.alpha is not None else None,
395        xi=to_numpy(est.xi) if est.xi is not None else None,
396        factors=to_numpy(est.factors) if est.factors is not None else None,
397        loadings=to_numpy(est.loadings) if est.loadings is not None else None,
398        Y_ct=to_numpy(Y_ct),
399        eff=to_numpy(eff),
400        residuals=to_numpy(est.residuals),
401        sigma2=est.sigma2,
402        IC=est.IC,
403        PC=est.PC,
404        niter=est.niter,
405        converged=est.converged,
406        cv_result=cv_result,
407        panel=panel,
408        seed=seed,
409    )
410
411    # Inference
412    if se:
413        result.inference = _run_inference(
414            result, panel, Y_mat, X_mat, W_mat, beta0, Y0,
415            method=method, r_cv=r_cv, lambda_cv=lambda_cv,
416            force_int=force_int, tol=tol, max_iter=max_iter,
417            vartype=vartype, nboots=nboots, alpha=alpha,
418            n_jobs=n_jobs, seed=seed, normalize=normalize,
419            norm_factor=norm_factor,
420        )
421
422    return result

Estimate counterfactual treatment effects for panel data.

This is the main entry point — equivalent to R fect's fect() function.

Parameters

data : polars.DataFrame, pandas.DataFrame Long-format panel data. Y, D : str Column names for outcome and binary treatment indicator. index : (str, str) Column names for (unit_id, time_period). X : list of str, optional Time-varying covariates. method : {"fe", "ife", "mc", "cfe", "both"} Estimation method. force : {"none", "unit", "time", "two-way"} Fixed effects specification. r : int or (int, int) Number of factors. If tuple, CV selects from range. lam : float, optional Nuclear norm penalty for MC. If None with CV=True, auto-selected. se : bool Compute standard errors via bootstrap/jackknife. device : {"cpu", "gpu"} Compute device. n_jobs : int, optional Parallel workers for CV and bootstrap. -1 or None uses all available CPUs. seed : int, optional Random seed for full reproducibility.