pyfector

pyfector — Fast counterfactual estimators for panel data in Python.

A high-performance Python reimplementation of the R fect package, featuring randomized SVD, GPU acceleration (CuPy), parallel computing (joblib), Polars data ingestion, and seeded reproducibility.

Usage::

import pyfector

result = pyfector.fect(
    data=df,
    Y="outcome", D="treat",
    index=("unit", "year"),
    method="ife",
    r=(0, 5),
    se=True,
    seed=42,
)
result.summary()
result.plot()
 1"""
 2pyfector — Fast counterfactual estimators for panel data in Python.
 3
 4A high-performance Python reimplementation of the R ``fect`` package,
 5featuring randomized SVD, GPU acceleration (CuPy), parallel computing
 6(joblib), Polars data ingestion, and seeded reproducibility.
 7
 8Usage::
 9
10    import pyfector
11
12    result = pyfector.fect(
13        data=df,
14        Y="outcome", D="treat",
15        index=("unit", "year"),
16        method="ife",
17        r=(0, 5),
18        se=True,
19        seed=42,
20    )
21    result.summary()
22    result.plot()
23
24"""
25
26__version__ = "0.1.4"
27
28from .fect import fect, FectResult
29from .backend import set_device, get_device
30from .diagnostics import run_diagnostics, DiagnosticResult
31from .plotting import plot
32
33__all__ = [
34    "fect",
35    "FectResult",
36    "set_device",
37    "get_device",
38    "run_diagnostics",
39    "DiagnosticResult",
40    "plot",
41]
def fect( data, Y: str, D: str, index: tuple[str, str], X: list[str] | None = None, W: str | None = None, group: str | None = None, method: Literal['fe', 'ife', 'mc', 'cfe', 'both'] = 'ife', force: Literal['none', 'unit', 'time', 'two-way'] = 'two-way', r: int | tuple[int, int] = 0, lam: float | None = None, nlambda: int = 10, CV: bool = True, k: int = 10, cv_prop: float = 0.1, cv_nobs: int = 3, cv_treat: bool = True, cv_donut: int = 0, criterion: str = 'mspe', se: bool = False, vartype: Literal['bootstrap', 'jackknife'] = 'bootstrap', nboots: int = 200, alpha: float = 0.05, tol: float = 1e-07, max_iter: int = 5000, min_T0: int = 1, max_missing: float = 1.0, normalize: bool = False, Z: list[str] | None = None, Q: list[str] | None = None, device: Literal['cpu', 'gpu'] = 'cpu', n_jobs: int | None = -1, seed: int | None = None) -> FectResult:
182def fect(
183    data,
184    Y: str,
185    D: str,
186    index: tuple[str, str],
187    X: list[str] | None = None,
188    W: str | None = None,
189    group: str | None = None,
190    method: Literal["fe", "ife", "mc", "cfe", "both"] = "ife",
191    force: Literal["none", "unit", "time", "two-way"] = "two-way",
192    r: int | tuple[int, int] = 0,
193    lam: float | None = None,
194    nlambda: int = 10,
195    CV: bool = True,
196    k: int = 10,
197    cv_prop: float = 0.1,
198    cv_nobs: int = 3,
199    cv_treat: bool = True,
200    cv_donut: int = 0,
201    criterion: str = "mspe",
202    se: bool = False,
203    vartype: Literal["bootstrap", "jackknife"] = "bootstrap",
204    nboots: int = 200,
205    alpha: float = 0.05,
206    tol: float = 1e-7,
207    max_iter: int = 5000,
208    min_T0: int = 1,
209    max_missing: float = 1.0,
210    normalize: bool = False,
211    # CFE-specific
212    Z: list[str] | None = None,
213    Q: list[str] | None = None,
214    # Performance
215    device: Literal["cpu", "gpu"] = "cpu",
216    n_jobs: int | None = -1,
217    seed: int | None = None,
218) -> FectResult:
219    """Estimate counterfactual treatment effects for panel data.
220
221    This is the main entry point — equivalent to R fect's ``fect()`` function.
222
223    Parameters
224    ----------
225    data : polars.DataFrame, pandas.DataFrame
226        Long-format panel data.
227    Y, D : str
228        Column names for outcome and binary treatment indicator.
229    index : (str, str)
230        Column names for (unit_id, time_period).
231    X : list of str, optional
232        Time-varying covariates.
233    method : {"fe", "ife", "mc", "cfe", "both"}
234        Estimation method.
235    force : {"none", "unit", "time", "two-way"}
236        Fixed effects specification.
237    r : int or (int, int)
238        Number of factors.  If tuple, CV selects from range.
239    lam : float, optional
240        Nuclear norm penalty for MC.  If None with CV=True, auto-selected.
241    se : bool
242        Compute standard errors via bootstrap/jackknife.
243    device : {"cpu", "gpu"}
244        Compute device.
245    n_jobs : int, optional
246        Parallel workers for CV and bootstrap. ``-1`` or ``None`` uses
247        all available CPUs.
248    seed : int, optional
249        Random seed for full reproducibility.
250    """
251    # Set device
252    set_device(device)
253    xp = get_backend()
254    n_jobs = _resolve_n_jobs(n_jobs)
255
256    if group is not None:
257        raise NotImplementedError("The `group` argument is not implemented yet.")
258    if Z is not None or Q is not None:
259        raise NotImplementedError("The `Z` and `Q` CFE interaction arguments are not implemented yet.")
260
261    # Map force string to int
262    force_map = {"none": 0, "unit": 1, "time": 2, "two-way": 3}
263    force_int = force_map[force]
264
265    # Prepare panel data
266    panel = prepare_panel(
267        data, Y=Y, D=D, index=index, X=X, W=W,
268        group=group, min_T0=min_T0, max_missing=max_missing,
269    )
270
271    # Move to device
272    Y_mat = to_device(panel.Y)
273    D_mat = to_device(panel.D)
274    I_mat = to_device(panel.I)
275    II_mat = to_device(panel.II)
276    X_mat = to_device(panel.X) if panel.X is not None else None
277    W_mat = to_device(panel.W) if panel.W is not None else None
278
279    # Normalize
280    norm_factor = 1.0
281    if normalize:
282        sd_y = float(xp.std(Y_mat[I_mat > 0]))
283        if sd_y > 0:
284            Y_mat = Y_mat / sd_y
285            norm_factor = sd_y
286
287    # Initial fit
288    Y0, beta0 = initial_fit(Y_mat, X_mat, II_mat, force_int)
289
290    # Determine r and lambda
291    r_cv = None
292    lambda_cv = None
293    cv_result = None
294
295    if method == "ife":
296        if isinstance(r, tuple) and CV:
297            cv_result = cv_ife(
298                Y_mat, Y0, X_mat, I_mat, II_mat, D_mat, W_mat, beta0,
299                force=force_int, r_range=r, k=k, cv_prop=cv_prop,
300                cv_nobs=cv_nobs, cv_treat=cv_treat, cv_donut=cv_donut,
301                criterion=criterion, tol=tol, max_iter=max_iter,
302                n_jobs=n_jobs, seed=seed,
303            )
304            r_cv = cv_result.best_r
305        else:
306            r_cv = r if isinstance(r, int) else r[0]
307
308    elif method == "mc":
309        if lam is None and CV:
310            cv_result = cv_mc(
311                Y_mat, Y0, X_mat, I_mat, II_mat, D_mat, W_mat, beta0,
312                force=force_int, nlambda=nlambda, k=k, cv_prop=cv_prop,
313                cv_nobs=cv_nobs, cv_treat=cv_treat, cv_donut=cv_donut,
314                criterion=criterion, tol=tol, max_iter=max_iter,
315                n_jobs=n_jobs, seed=seed,
316            )
317            lambda_cv = cv_result.best_lambda
318        else:
319            lambda_cv = lam if lam is not None else 0.0
320
321    elif method == "fe":
322        r_cv = 0
323
324    elif method == "cfe":
325        r_cv = r if isinstance(r, int) else r[0]
326
327    # Point estimation
328    if method in ("fe", "ife"):
329        est = estimate_ife(
330            Y_mat, Y0, X_mat, II_mat, W_mat, beta0,
331            r=r_cv, force=force_int, tol=tol, max_iter=max_iter,
332        )
333    elif method == "mc":
334        est = estimate_mc(
335            Y_mat, Y0, X_mat, II_mat, W_mat, beta0,
336            lam=lambda_cv, force=force_int, tol=tol, max_iter=max_iter,
337        )
338    elif method == "cfe":
339        est = estimate_cfe(
340            Y_mat, Y0, X_mat, II_mat, W_mat, beta0,
341            r=r_cv, force=force_int, tol=tol, max_iter=max_iter,
342        )
343    elif method == "both":
344        # Run both IFE and MC, return IFE results with MC comparison
345        if isinstance(r, tuple) and CV:
346            cv_result = cv_ife(
347                Y_mat, Y0, X_mat, I_mat, II_mat, D_mat, W_mat, beta0,
348                force=force_int, r_range=r, k=k, cv_prop=cv_prop,
349                cv_nobs=cv_nobs, cv_treat=cv_treat, cv_donut=cv_donut,
350                criterion=criterion, tol=tol, max_iter=max_iter,
351                n_jobs=n_jobs, seed=seed,
352            )
353            r_cv = cv_result.best_r
354        else:
355            r_cv = r if isinstance(r, int) else r[0]
356        est = estimate_ife(
357            Y_mat, Y0, X_mat, II_mat, W_mat, beta0,
358            r=r_cv, force=force_int, tol=tol, max_iter=max_iter,
359        )
360    else:
361        raise ValueError(f"Unknown method: {method}")
362
363    # Compute effects
364    eff = Y_mat - est.fit
365    Y_ct = est.fit
366
367    # Denormalize
368    if normalize and norm_factor != 1.0:
369        eff = eff * norm_factor
370        Y_ct = Y_ct * norm_factor
371        Y_mat = Y_mat * norm_factor
372        if est.beta is not None:
373            est = est._replace(beta=est.beta * norm_factor)
374
375    # ATT computation
376    T_on = to_device(panel.T_on)
377    att_avg, att_on, time_on, count_on, att_avg_unit = _compute_effects(
378        to_numpy(eff), to_numpy(D_mat), to_numpy(panel.T_on), to_numpy(I_mat),
379    )
380
381    # Build result
382    result = FectResult(
383        method=method,
384        r_cv=r_cv,
385        lambda_cv=lambda_cv,
386        att_avg=att_avg,
387        att_avg_unit=att_avg_unit,
388        att_on=att_on,
389        time_on=time_on,
390        count_on=count_on,
391        beta=to_numpy(est.beta) if est.beta is not None else None,
392        covariate_names=panel.covariate_names,
393        mu=est.mu,
394        alpha=to_numpy(est.alpha) if est.alpha is not None else None,
395        xi=to_numpy(est.xi) if est.xi is not None else None,
396        factors=to_numpy(est.factors) if est.factors is not None else None,
397        loadings=to_numpy(est.loadings) if est.loadings is not None else None,
398        Y_ct=to_numpy(Y_ct),
399        eff=to_numpy(eff),
400        residuals=to_numpy(est.residuals),
401        sigma2=est.sigma2,
402        IC=est.IC,
403        PC=est.PC,
404        niter=est.niter,
405        converged=est.converged,
406        cv_result=cv_result,
407        panel=panel,
408        seed=seed,
409    )
410
411    # Inference
412    if se:
413        result.inference = _run_inference(
414            result, panel, Y_mat, X_mat, W_mat, beta0, Y0,
415            method=method, r_cv=r_cv, lambda_cv=lambda_cv,
416            force_int=force_int, tol=tol, max_iter=max_iter,
417            vartype=vartype, nboots=nboots, alpha=alpha,
418            n_jobs=n_jobs, seed=seed, normalize=normalize,
419            norm_factor=norm_factor,
420        )
421
422    return result

Estimate counterfactual treatment effects for panel data.

This is the main entry point — equivalent to R fect's fect() function.

Parameters

data : polars.DataFrame, pandas.DataFrame Long-format panel data. Y, D : str Column names for outcome and binary treatment indicator. index : (str, str) Column names for (unit_id, time_period). X : list of str, optional Time-varying covariates. method : {"fe", "ife", "mc", "cfe", "both"} Estimation method. force : {"none", "unit", "time", "two-way"} Fixed effects specification. r : int or (int, int) Number of factors. If tuple, CV selects from range. lam : float, optional Nuclear norm penalty for MC. If None with CV=True, auto-selected. se : bool Compute standard errors via bootstrap/jackknife. device : {"cpu", "gpu"} Compute device. n_jobs : int, optional Parallel workers for CV and bootstrap. -1 or None uses all available CPUs. seed : int, optional Random seed for full reproducibility.

@dataclass
class FectResult:
 65@dataclass
 66class FectResult:
 67    """Container for all fect estimation results."""
 68    # Method info
 69    method: str
 70    r_cv: int | None = None
 71    lambda_cv: float | None = None
 72
 73    # Point estimates
 74    att_avg: float = 0.0
 75    att_avg_unit: float = 0.0
 76
 77    # Dynamic effects
 78    att_on: np.ndarray | None = None
 79    time_on: np.ndarray | None = None
 80    count_on: np.ndarray | None = None
 81
 82    # Exit effects (treatment reversal)
 83    att_off: np.ndarray | None = None
 84    time_off: np.ndarray | None = None
 85
 86    # Coefficients
 87    beta: np.ndarray | None = None
 88    covariate_names: list[str] = field(default_factory=list)
 89
 90    # Fixed effects
 91    mu: float = 0.0
 92    alpha: np.ndarray | None = None   # unit FE
 93    xi: np.ndarray | None = None      # time FE
 94    factors: np.ndarray | None = None
 95    loadings: np.ndarray | None = None
 96
 97    # Counterfactual and effects matrices
 98    Y_ct: np.ndarray | None = None    # T×N counterfactual
 99    eff: np.ndarray | None = None     # T×N treatment effects
100    residuals: np.ndarray | None = None
101
102    # Model fit
103    sigma2: float = 0.0
104    IC: float = 0.0
105    PC: float = 0.0
106    rmse: float = 0.0
107    niter: int = 0
108    converged: bool = False
109
110    # Inference
111    inference: InferenceResult | None = None
112
113    # CV
114    cv_result: CVResult | None = None
115
116    # Panel metadata
117    panel: PanelData | None = None
118
119    # Reproducibility
120    seed: int | None = None
121
122    def summary(self) -> str:
123        """Print summary table of results."""
124        lines = []
125        lines.append(f"pyfector estimation results")
126        lines.append(f"{'='*60}")
127        lines.append(f"Method: {self.method}")
128        if self.r_cv is not None:
129            lines.append(f"Number of factors (CV): {self.r_cv}")
130        if self.lambda_cv is not None:
131            lines.append(f"Lambda (CV): {self.lambda_cv:.6f}")
132        lines.append(f"Converged: {self.converged} (iter={self.niter})")
133        lines.append(f"Sigma^2: {self.sigma2:.6f}")
134        lines.append(f"")
135        lines.append(f"ATT (average): {self.att_avg:.6f}")
136        if self.inference is not None:
137            inf = self.inference
138            lines.append(f"  SE:     {inf.att_avg_se:.6f}")
139            lines.append(f"  CI:     [{inf.att_avg_ci[0]:.6f}, {inf.att_avg_ci[1]:.6f}]")
140            lines.append(f"  p-val:  {inf.att_avg_pval:.4f}")
141
142        if self.beta is not None and len(self.beta) > 0:
143            lines.append(f"")
144            lines.append(f"Coefficients:")
145            for i, name in enumerate(self.covariate_names):
146                lines.append(f"  {name}: {self.beta[i]:.6f}")
147
148        if self.att_on is not None and self.time_on is not None:
149            lines.append(f"")
150            lines.append(f"Dynamic effects (ATT by relative time):")
151            lines.append(f"  {'Time':>6s}  {'ATT':>10s}  {'Count':>6s}", )
152            for i, t in enumerate(self.time_on):
153                count = self.count_on[i] if self.count_on is not None else ""
154                att = self.att_on[i]
155                if self.inference is not None:
156                    se = self.inference.att_on_se[i]
157                    lines.append(f"  {t:>6.0f}  {att:>10.4f}  ({se:.4f})  {count}")
158                else:
159                    lines.append(f"  {t:>6.0f}  {att:>10.4f}  {count}")
160
161        lines.append(f"{'='*60}")
162        if self.panel is not None:
163            lines.append(f"N={self.panel.N}, T={self.panel.T}")
164        if self.seed is not None:
165            lines.append(f"Seed: {self.seed}")
166        return "\n".join(lines)
167
168    def __repr__(self):
169        return self.summary()
170
171    def plot(self, kind="gap", **kwargs):
172        """Plot results. Shortcut for ``pyfector.plot(self, kind, ...)``."""
173        from .plotting import plot as _plot
174        return _plot(self, kind=kind, **kwargs)
175
176    def diagnose(self, **kwargs):
177        """Run diagnostic tests. Shortcut for ``pyfector.run_diagnostics(self, ...)``."""
178        from .diagnostics import run_diagnostics
179        return run_diagnostics(self, **kwargs)

Container for all fect estimation results.

FectResult( method: str, r_cv: int | None = None, lambda_cv: float | None = None, att_avg: float = 0.0, att_avg_unit: float = 0.0, att_on: numpy.ndarray | None = None, time_on: numpy.ndarray | None = None, count_on: numpy.ndarray | None = None, att_off: numpy.ndarray | None = None, time_off: numpy.ndarray | None = None, beta: numpy.ndarray | None = None, covariate_names: list[str] = <factory>, mu: float = 0.0, alpha: numpy.ndarray | None = None, xi: numpy.ndarray | None = None, factors: numpy.ndarray | None = None, loadings: numpy.ndarray | None = None, Y_ct: numpy.ndarray | None = None, eff: numpy.ndarray | None = None, residuals: numpy.ndarray | None = None, sigma2: float = 0.0, IC: float = 0.0, PC: float = 0.0, rmse: float = 0.0, niter: int = 0, converged: bool = False, inference: pyfector.inference.InferenceResult | None = None, cv_result: pyfector.cv.CVResult | None = None, panel: pyfector.panel.PanelData | None = None, seed: int | None = None)
method: str
r_cv: int | None = None
lambda_cv: float | None = None
att_avg: float = 0.0
att_avg_unit: float = 0.0
att_on: numpy.ndarray | None = None
time_on: numpy.ndarray | None = None
count_on: numpy.ndarray | None = None
att_off: numpy.ndarray | None = None
time_off: numpy.ndarray | None = None
beta: numpy.ndarray | None = None
covariate_names: list[str]
mu: float = 0.0
alpha: numpy.ndarray | None = None
xi: numpy.ndarray | None = None
factors: numpy.ndarray | None = None
loadings: numpy.ndarray | None = None
Y_ct: numpy.ndarray | None = None
eff: numpy.ndarray | None = None
residuals: numpy.ndarray | None = None
sigma2: float = 0.0
IC: float = 0.0
PC: float = 0.0
rmse: float = 0.0
niter: int = 0
converged: bool = False
inference: pyfector.inference.InferenceResult | None = None
cv_result: pyfector.cv.CVResult | None = None
panel: pyfector.panel.PanelData | None = None
seed: int | None = None
def summary(self) -> str:
122    def summary(self) -> str:
123        """Print summary table of results."""
124        lines = []
125        lines.append(f"pyfector estimation results")
126        lines.append(f"{'='*60}")
127        lines.append(f"Method: {self.method}")
128        if self.r_cv is not None:
129            lines.append(f"Number of factors (CV): {self.r_cv}")
130        if self.lambda_cv is not None:
131            lines.append(f"Lambda (CV): {self.lambda_cv:.6f}")
132        lines.append(f"Converged: {self.converged} (iter={self.niter})")
133        lines.append(f"Sigma^2: {self.sigma2:.6f}")
134        lines.append(f"")
135        lines.append(f"ATT (average): {self.att_avg:.6f}")
136        if self.inference is not None:
137            inf = self.inference
138            lines.append(f"  SE:     {inf.att_avg_se:.6f}")
139            lines.append(f"  CI:     [{inf.att_avg_ci[0]:.6f}, {inf.att_avg_ci[1]:.6f}]")
140            lines.append(f"  p-val:  {inf.att_avg_pval:.4f}")
141
142        if self.beta is not None and len(self.beta) > 0:
143            lines.append(f"")
144            lines.append(f"Coefficients:")
145            for i, name in enumerate(self.covariate_names):
146                lines.append(f"  {name}: {self.beta[i]:.6f}")
147
148        if self.att_on is not None and self.time_on is not None:
149            lines.append(f"")
150            lines.append(f"Dynamic effects (ATT by relative time):")
151            lines.append(f"  {'Time':>6s}  {'ATT':>10s}  {'Count':>6s}", )
152            for i, t in enumerate(self.time_on):
153                count = self.count_on[i] if self.count_on is not None else ""
154                att = self.att_on[i]
155                if self.inference is not None:
156                    se = self.inference.att_on_se[i]
157                    lines.append(f"  {t:>6.0f}  {att:>10.4f}  ({se:.4f})  {count}")
158                else:
159                    lines.append(f"  {t:>6.0f}  {att:>10.4f}  {count}")
160
161        lines.append(f"{'='*60}")
162        if self.panel is not None:
163            lines.append(f"N={self.panel.N}, T={self.panel.T}")
164        if self.seed is not None:
165            lines.append(f"Seed: {self.seed}")
166        return "\n".join(lines)

Print summary table of results.

def plot(self, kind='gap', **kwargs):
171    def plot(self, kind="gap", **kwargs):
172        """Plot results. Shortcut for ``pyfector.plot(self, kind, ...)``."""
173        from .plotting import plot as _plot
174        return _plot(self, kind=kind, **kwargs)

Plot results. Shortcut for pyfector.plot(self, kind, ...).

def diagnose(self, **kwargs):
176    def diagnose(self, **kwargs):
177        """Run diagnostic tests. Shortcut for ``pyfector.run_diagnostics(self, ...)``."""
178        from .diagnostics import run_diagnostics
179        return run_diagnostics(self, **kwargs)

Run diagnostic tests. Shortcut for pyfector.run_diagnostics(self, ...).

def set_device(device: Literal['cpu', 'gpu']) -> None:
35def set_device(device: Literal["cpu", "gpu"]) -> None:
36    """Set the compute device globally."""
37    global _DEVICE
38    if device == "gpu" and not _check_cupy():
39        raise ImportError(
40            "CuPy is required for GPU support. Install it with: "
41            "pip install cupy-cuda12x  (adjust for your CUDA version)"
42        )
43    _DEVICE = device

Set the compute device globally.

def get_device() -> Literal['cpu', 'gpu']:
46def get_device() -> Literal["cpu", "gpu"]:
47    """Return the current device."""
48    return _DEVICE

Return the current device.

def run_diagnostics( result, f_threshold: float = 0.5, tost_threshold: float = 0.36, placebo_period: tuple[int, int] | None = None, carryover_period: tuple[int, int] | None = None, loo: bool = False) -> DiagnosticResult:
 92def run_diagnostics(
 93    result,
 94    f_threshold: float = 0.5,
 95    tost_threshold: float = 0.36,
 96    placebo_period: tuple[int, int] | None = None,
 97    carryover_period: tuple[int, int] | None = None,
 98    loo: bool = False,
 99) -> DiagnosticResult:
100    """Run diagnostic tests on a FectResult.
101
102    Parameters
103    ----------
104    result : FectResult
105        Must have inference results (``se=True``).
106    f_threshold : float
107        Non-centrality parameter for equivalence F-test.
108    tost_threshold : float
109        Equivalence bound for TOST.
110    placebo_period : (start, end), optional
111        Relative time window for placebo test.
112    """
113    from scipy import stats
114
115    diag = DiagnosticResult()
116
117    if result.inference is None:
118        return diag
119
120    inf = result.inference
121    time_on = result.time_on
122    att_on = result.att_on
123
124    if time_on is None or att_on is None:
125        return diag
126
127    # Pre-treatment periods
128    pre_mask = time_on < 0
129    if not np.any(pre_mask):
130        return diag
131
132    pre_idx = np.where(pre_mask)[0]
133    k = len(pre_idx)
134    att_pre = att_on[pre_idx]
135
136    # F-test from bootstrap covariance
137    if inf.att_on_boot is not None and inf.att_on_boot.shape[1] > k:
138        boot_pre = inf.att_on_boot[pre_idx, :]  # (k, nboots)
139        S = np.cov(boot_pre)  # (k, k)
140        if k == 1:
141            S = S.reshape(1, 1)
142
143        n_boot = boot_pre.shape[1]
144        try:
145            cond = np.linalg.cond(S)
146            if cond > 1e12:
147                S_inv = np.linalg.pinv(S)
148            else:
149                S_inv = np.linalg.inv(S)
150            F_stat = float(att_pre @ S_inv @ att_pre)
151            scale = (n_boot - k) / ((n_boot - 1) * k)
152            F_stat_scaled = F_stat * scale
153
154            if np.isfinite(F_stat_scaled) and F_stat_scaled >= 0:
155                diag.f_stat = F_stat_scaled
156                diag.f_df1 = k
157                diag.f_df2 = n_boot - k
158                diag.f_pval = float(1 - stats.f.cdf(F_stat_scaled, k, n_boot - k))
159
160                # Equivalence F-test (non-central F)
161                ncp = n_boot * f_threshold
162                diag.equiv_f_pval = float(stats.ncf.cdf(F_stat_scaled, k, n_boot - k, ncp))
163                diag.equiv_threshold = f_threshold
164        except np.linalg.LinAlgError:
165            pass
166
167    # TOST
168    if inf.att_on_se is not None:
169        se_pre = inf.att_on_se[pre_idx]
170        n_boot = inf.att_on_boot.shape[1] if inf.att_on_boot is not None else 200
171        df = n_boot - 1
172
173        tost_pvals = np.full(k, np.nan)
174        for i in range(k):
175            if se_pre[i] > 0:
176                t_upper = (att_pre[i] - tost_threshold) / se_pre[i]
177                t_lower = (att_pre[i] + tost_threshold) / se_pre[i]
178                p_upper = float(stats.t.cdf(t_upper, df))
179                p_lower = float(1 - stats.t.cdf(t_lower, df))
180                tost_pvals[i] = max(p_upper, p_lower)
181
182        diag.tost_pvals = tost_pvals
183        diag.tost_periods = time_on[pre_idx]
184        diag.tost_threshold = tost_threshold
185
186    # Placebo test
187    if placebo_period is not None:
188        p_start, p_end = placebo_period
189        placebo_mask = (time_on >= p_start) & (time_on <= p_end) & pre_mask
190        if np.any(placebo_mask):
191            p_idx = np.where(placebo_mask)[0]
192            diag.placebo_att = float(np.mean(att_on[p_idx]))
193            if inf.att_on_boot is not None:
194                boot_placebo = np.mean(inf.att_on_boot[p_idx, :], axis=0)
195                diag.placebo_pval = min(
196                    2 * np.mean(boot_placebo >= 0),
197                    2 * np.mean(boot_placebo <= 0),
198                    1.0,
199                )
200
201    # Carryover test (for treatment reversals)
202    if carryover_period is not None and hasattr(result, "att_off") and result.att_off is not None:
203        c_start, c_end = carryover_period
204        time_off = getattr(result, "time_off", None)
205        if time_off is not None:
206            off_mask = (time_off >= c_start) & (time_off <= c_end)
207            if np.any(off_mask):
208                diag.carryover_att = float(np.mean(result.att_off[off_mask]))
209
210    # Leave-one-period-out test
211    if loo and att_on is not None and time_on is not None:
212        post_mask = time_on >= 0
213        post_idx = np.where(post_mask)[0]
214        if len(post_idx) > 1:
215            full_att = result.att_avg
216            loo_atts = []
217            loo_periods = []
218            for drop_i in post_idx:
219                remaining = np.delete(post_idx, np.where(post_idx == drop_i))
220                if len(remaining) > 0:
221                    loo_att = float(np.mean(att_on[remaining]))
222                    loo_atts.append(loo_att)
223                    loo_periods.append(time_on[drop_i])
224            diag.loo_atts = np.array(loo_atts)
225            diag.loo_periods = np.array(loo_periods)
226            diag.loo_max_change = float(np.max(np.abs(diag.loo_atts - full_att)))
227
228    return diag

Run diagnostic tests on a FectResult.

Parameters

result : FectResult Must have inference results (se=True). f_threshold : float Non-centrality parameter for equivalence F-test. tost_threshold : float Equivalence bound for TOST. placebo_period : (start, end), optional Relative time window for placebo test.

@dataclass
class DiagnosticResult:
35@dataclass
36class DiagnosticResult:
37    """Container for diagnostic test results."""
38    # F-test for pre-trends
39    f_stat: float | None = None
40    f_pval: float | None = None
41    f_df1: int | None = None
42    f_df2: int | None = None
43
44    # Equivalence F-test
45    equiv_f_pval: float | None = None
46    equiv_threshold: float | None = None
47
48    # TOST per pre-period
49    tost_pvals: np.ndarray | None = None
50    tost_periods: np.ndarray | None = None
51    tost_threshold: float | None = None
52
53    # Placebo
54    placebo_att: float | None = None
55    placebo_pval: float | None = None
56
57    # Carryover
58    carryover_att: float | None = None
59    carryover_pval: float | None = None
60
61    # Leave-one-out
62    loo_atts: np.ndarray | None = None      # ATT when each pre-period is dropped
63    loo_periods: np.ndarray | None = None
64    loo_max_change: float | None = None     # max |ATT_full - ATT_loo|
65
66    def summary(self) -> str:
67        lines = ["Diagnostic Tests", "=" * 50]
68        if self.f_stat is not None:
69            lines.append(f"Pre-trend F-test:")
70            lines.append(f"  F({self.f_df1},{self.f_df2}) = {self.f_stat:.4f}, p = {self.f_pval:.4f}")
71        if self.equiv_f_pval is not None:
72            lines.append(f"Equivalence F-test:")
73            lines.append(f"  p = {self.equiv_f_pval:.4f} (threshold={self.equiv_threshold})")
74        if self.tost_pvals is not None:
75            lines.append("TOST per pre-period:")
76            for i, t in enumerate(self.tost_periods):
77                p = self.tost_pvals[i]
78                status = "PASS" if p < 0.05 else "fail"
79                lines.append(f"  t={t:+.0f}: p={p:.4f} [{status}]")
80        if self.placebo_pval is not None:
81            lines.append(f"Placebo test:")
82            lines.append(f"  ATT={self.placebo_att:.4f}, p={self.placebo_pval:.4f}")
83        if self.carryover_pval is not None:
84            lines.append(f"Carryover test:")
85            lines.append(f"  ATT={self.carryover_att:.4f}, p={self.carryover_pval:.4f}")
86        if self.loo_max_change is not None:
87            lines.append(f"Leave-one-out:")
88            lines.append(f"  Max ATT change = {self.loo_max_change:.6f}")
89        return "\n".join(lines)

Container for diagnostic test results.

DiagnosticResult( f_stat: float | None = None, f_pval: float | None = None, f_df1: int | None = None, f_df2: int | None = None, equiv_f_pval: float | None = None, equiv_threshold: float | None = None, tost_pvals: numpy.ndarray | None = None, tost_periods: numpy.ndarray | None = None, tost_threshold: float | None = None, placebo_att: float | None = None, placebo_pval: float | None = None, carryover_att: float | None = None, carryover_pval: float | None = None, loo_atts: numpy.ndarray | None = None, loo_periods: numpy.ndarray | None = None, loo_max_change: float | None = None)
f_stat: float | None = None
f_pval: float | None = None
f_df1: int | None = None
f_df2: int | None = None
equiv_f_pval: float | None = None
equiv_threshold: float | None = None
tost_pvals: numpy.ndarray | None = None
tost_periods: numpy.ndarray | None = None
tost_threshold: float | None = None
placebo_att: float | None = None
placebo_pval: float | None = None
carryover_att: float | None = None
carryover_pval: float | None = None
loo_atts: numpy.ndarray | None = None
loo_periods: numpy.ndarray | None = None
loo_max_change: float | None = None
def summary(self) -> str:
66    def summary(self) -> str:
67        lines = ["Diagnostic Tests", "=" * 50]
68        if self.f_stat is not None:
69            lines.append(f"Pre-trend F-test:")
70            lines.append(f"  F({self.f_df1},{self.f_df2}) = {self.f_stat:.4f}, p = {self.f_pval:.4f}")
71        if self.equiv_f_pval is not None:
72            lines.append(f"Equivalence F-test:")
73            lines.append(f"  p = {self.equiv_f_pval:.4f} (threshold={self.equiv_threshold})")
74        if self.tost_pvals is not None:
75            lines.append("TOST per pre-period:")
76            for i, t in enumerate(self.tost_periods):
77                p = self.tost_pvals[i]
78                status = "PASS" if p < 0.05 else "fail"
79                lines.append(f"  t={t:+.0f}: p={p:.4f} [{status}]")
80        if self.placebo_pval is not None:
81            lines.append(f"Placebo test:")
82            lines.append(f"  ATT={self.placebo_att:.4f}, p={self.placebo_pval:.4f}")
83        if self.carryover_pval is not None:
84            lines.append(f"Carryover test:")
85            lines.append(f"  ATT={self.carryover_att:.4f}, p={self.carryover_pval:.4f}")
86        if self.loo_max_change is not None:
87            lines.append(f"Leave-one-out:")
88            lines.append(f"  Max ATT change = {self.loo_max_change:.6f}")
89        return "\n".join(lines)
def plot( result, kind: Literal['gap', 'status', 'factors', 'counterfactual', 'equiv', 'calendar'] = 'gap', units: list | None = None, show_ci: bool = True, title: str | None = None, figsize: tuple[float, float] = (10, 6), ax=None, **kwargs):
20def plot(
21    result,
22    kind: Literal["gap", "status", "factors", "counterfactual", "equiv", "calendar"] = "gap",
23    units: list | None = None,
24    show_ci: bool = True,
25    title: str | None = None,
26    figsize: tuple[float, float] = (10, 6),
27    ax=None,
28    **kwargs,
29):
30    """Plot fect results.
31
32    Parameters
33    ----------
34    result : FectResult
35        Output from ``pyfector.fect()``.
36    kind : str
37        Plot type.
38    units : list, optional
39        Unit IDs for counterfactual plot.
40    show_ci : bool
41        Show confidence intervals (requires ``se=True`` in estimation).
42    """
43    import matplotlib.pyplot as plt
44
45    if ax is None:
46        fig, ax = plt.subplots(figsize=figsize)
47    else:
48        fig = ax.get_figure()
49
50    if kind == "gap":
51        _plot_gap(result, ax, show_ci, **kwargs)
52    elif kind == "status":
53        _plot_status(result, ax, **kwargs)
54    elif kind == "factors":
55        _plot_factors(result, ax, **kwargs)
56    elif kind == "counterfactual":
57        _plot_counterfactual(result, ax, units, **kwargs)
58    elif kind == "equiv":
59        _plot_equiv(result, ax, **kwargs)
60    elif kind == "calendar":
61        _plot_calendar(result, ax, show_ci, **kwargs)
62    else:
63        raise ValueError(f"Unknown plot kind: {kind}")
64
65    if title:
66        ax.set_title(title)
67
68    fig.tight_layout()
69    return fig

Plot fect results.

Parameters

result : FectResult Output from pyfector.fect. kind : str Plot type. units : list, optional Unit IDs for counterfactual plot. show_ci : bool Show confidence intervals (requires se=True in estimation).