pyfector

pyfector — Fast counterfactual estimators for panel data in Python.

A high-performance Python reimplementation of the R fect package, featuring randomized SVD, GPU acceleration (CuPy), parallel computing (joblib), Polars data ingestion, and seeded reproducibility.

Usage::

import pyfector

result = pyfector.fect(
    data=df,
    Y="outcome", D="treat",
    index=("unit", "year"),
    method="ife",
    r=(0, 5),
    se=True,
    seed=42,
)
result.summary()
result.plot()
 1"""
 2pyfector — Fast counterfactual estimators for panel data in Python.
 3
 4A high-performance Python reimplementation of the R ``fect`` package,
 5featuring randomized SVD, GPU acceleration (CuPy), parallel computing
 6(joblib), Polars data ingestion, and seeded reproducibility.
 7
 8Usage::
 9
10    import pyfector
11
12    result = pyfector.fect(
13        data=df,
14        Y="outcome", D="treat",
15        index=("unit", "year"),
16        method="ife",
17        r=(0, 5),
18        se=True,
19        seed=42,
20    )
21    result.summary()
22    result.plot()
23
24"""
25
26__version__ = "0.1.0"
27
28from .fect import fect, FectResult
29from .backend import set_device, get_device
30from .diagnostics import run_diagnostics, DiagnosticResult
31from .plotting import plot
32
33__all__ = [
34    "fect",
35    "FectResult",
36    "set_device",
37    "get_device",
38    "run_diagnostics",
39    "DiagnosticResult",
40    "plot",
41]
def fect( data, Y: str, D: str, index: tuple[str, str], X: list[str] | None = None, W: str | None = None, group: str | None = None, method: Literal['fe', 'ife', 'mc', 'cfe', 'both'] = 'ife', force: Literal['none', 'unit', 'time', 'two-way'] = 'two-way', r: int | tuple[int, int] = 0, lam: float | None = None, nlambda: int = 10, CV: bool = True, k: int = 10, cv_prop: float = 0.1, cv_nobs: int = 3, cv_treat: bool = True, cv_donut: int = 0, criterion: str = 'mspe', se: bool = False, vartype: Literal['bootstrap', 'jackknife'] = 'bootstrap', nboots: int = 200, alpha: float = 0.05, tol: float = 1e-07, max_iter: int = 5000, min_T0: int = 1, max_missing: float = 1.0, normalize: bool = False, Z: list[str] | None = None, Q: list[str] | None = None, device: Literal['cpu', 'gpu'] = 'cpu', n_jobs: int = 1, seed: int | None = None) -> FectResult:
181def fect(
182    data,
183    Y: str,
184    D: str,
185    index: tuple[str, str],
186    X: list[str] | None = None,
187    W: str | None = None,
188    group: str | None = None,
189    method: Literal["fe", "ife", "mc", "cfe", "both"] = "ife",
190    force: Literal["none", "unit", "time", "two-way"] = "two-way",
191    r: int | tuple[int, int] = 0,
192    lam: float | None = None,
193    nlambda: int = 10,
194    CV: bool = True,
195    k: int = 10,
196    cv_prop: float = 0.1,
197    cv_nobs: int = 3,
198    cv_treat: bool = True,
199    cv_donut: int = 0,
200    criterion: str = "mspe",
201    se: bool = False,
202    vartype: Literal["bootstrap", "jackknife"] = "bootstrap",
203    nboots: int = 200,
204    alpha: float = 0.05,
205    tol: float = 1e-7,
206    max_iter: int = 5000,
207    min_T0: int = 1,
208    max_missing: float = 1.0,
209    normalize: bool = False,
210    # CFE-specific
211    Z: list[str] | None = None,
212    Q: list[str] | None = None,
213    # Performance
214    device: Literal["cpu", "gpu"] = "cpu",
215    n_jobs: int = 1,
216    seed: int | None = None,
217) -> FectResult:
218    """Estimate counterfactual treatment effects for panel data.
219
220    This is the main entry point — equivalent to R fect's ``fect()`` function.
221
222    Parameters
223    ----------
224    data : polars.DataFrame, pandas.DataFrame
225        Long-format panel data.
226    Y, D : str
227        Column names for outcome and binary treatment indicator.
228    index : (str, str)
229        Column names for (unit_id, time_period).
230    X : list of str, optional
231        Time-varying covariates.
232    method : {"fe", "ife", "mc", "cfe", "both"}
233        Estimation method.
234    force : {"none", "unit", "time", "two-way"}
235        Fixed effects specification.
236    r : int or (int, int)
237        Number of factors.  If tuple, CV selects from range.
238    lam : float, optional
239        Nuclear norm penalty for MC.  If None with CV=True, auto-selected.
240    se : bool
241        Compute standard errors via bootstrap/jackknife.
242    device : {"cpu", "gpu"}
243        Compute device.
244    n_jobs : int
245        Parallel workers for CV and bootstrap.
246    seed : int, optional
247        Random seed for full reproducibility.
248    """
249    # Set device
250    set_device(device)
251    xp = get_backend()
252
253    # Map force string to int
254    force_map = {"none": 0, "unit": 1, "time": 2, "two-way": 3}
255    force_int = force_map[force]
256
257    # Prepare panel data
258    panel = prepare_panel(
259        data, Y=Y, D=D, index=index, X=X, W=W,
260        group=group, min_T0=min_T0, max_missing=max_missing,
261    )
262
263    # Move to device
264    Y_mat = to_device(panel.Y)
265    D_mat = to_device(panel.D)
266    I_mat = to_device(panel.I)
267    II_mat = to_device(panel.II)
268    X_mat = to_device(panel.X) if panel.X is not None else None
269    W_mat = to_device(panel.W) if panel.W is not None else None
270
271    # Normalize
272    norm_factor = 1.0
273    if normalize:
274        sd_y = float(xp.std(Y_mat[I_mat > 0]))
275        if sd_y > 0:
276            Y_mat = Y_mat / sd_y
277            norm_factor = sd_y
278
279    # Initial fit
280    Y0, beta0 = initial_fit(Y_mat, X_mat, II_mat, force_int)
281
282    # Determine r and lambda
283    r_cv = None
284    lambda_cv = None
285    cv_result = None
286
287    if method == "ife":
288        if isinstance(r, tuple) and CV:
289            cv_result = cv_ife(
290                Y_mat, Y0, X_mat, I_mat, II_mat, D_mat, W_mat, beta0,
291                force=force_int, r_range=r, k=k, cv_prop=cv_prop,
292                cv_nobs=cv_nobs, cv_treat=cv_treat, cv_donut=cv_donut,
293                criterion=criterion, tol=tol, max_iter=max_iter,
294                n_jobs=n_jobs, seed=seed,
295            )
296            r_cv = cv_result.best_r
297        else:
298            r_cv = r if isinstance(r, int) else r[0]
299
300    elif method == "mc":
301        if lam is None and CV:
302            cv_result = cv_mc(
303                Y_mat, Y0, X_mat, I_mat, II_mat, D_mat, W_mat, beta0,
304                force=force_int, nlambda=nlambda, k=k, cv_prop=cv_prop,
305                cv_nobs=cv_nobs, cv_treat=cv_treat, cv_donut=cv_donut,
306                criterion=criterion, tol=tol, max_iter=max_iter,
307                n_jobs=n_jobs, seed=seed,
308            )
309            lambda_cv = cv_result.best_lambda
310        else:
311            lambda_cv = lam if lam is not None else 0.0
312
313    elif method == "fe":
314        r_cv = 0
315
316    elif method == "cfe":
317        r_cv = r if isinstance(r, int) else r[0]
318
319    # Point estimation
320    if method in ("fe", "ife"):
321        est = estimate_ife(
322            Y_mat, Y0, X_mat, II_mat, W_mat, beta0,
323            r=r_cv, force=force_int, tol=tol, max_iter=max_iter,
324        )
325    elif method == "mc":
326        est = estimate_mc(
327            Y_mat, Y0, X_mat, II_mat, W_mat, beta0,
328            lam=lambda_cv, force=force_int, tol=tol, max_iter=max_iter,
329        )
330    elif method == "cfe":
331        est = estimate_cfe(
332            Y_mat, Y0, X_mat, II_mat, W_mat, beta0,
333            r=r_cv, force=force_int, tol=tol, max_iter=max_iter,
334        )
335    elif method == "both":
336        # Run both IFE and MC, return IFE results with MC comparison
337        if isinstance(r, tuple) and CV:
338            cv_result = cv_ife(
339                Y_mat, Y0, X_mat, I_mat, II_mat, D_mat, W_mat, beta0,
340                force=force_int, r_range=r, k=k, cv_prop=cv_prop,
341                cv_nobs=cv_nobs, cv_treat=cv_treat, cv_donut=cv_donut,
342                criterion=criterion, tol=tol, max_iter=max_iter,
343                n_jobs=n_jobs, seed=seed,
344            )
345            r_cv = cv_result.best_r
346        else:
347            r_cv = r if isinstance(r, int) else r[0]
348        est = estimate_ife(
349            Y_mat, Y0, X_mat, II_mat, W_mat, beta0,
350            r=r_cv, force=force_int, tol=tol, max_iter=max_iter,
351        )
352    else:
353        raise ValueError(f"Unknown method: {method}")
354
355    # Compute effects
356    eff = Y_mat - est.fit
357    Y_ct = est.fit
358
359    # Denormalize
360    if normalize and norm_factor != 1.0:
361        eff = eff * norm_factor
362        Y_ct = Y_ct * norm_factor
363        Y_mat = Y_mat * norm_factor
364        if est.beta is not None:
365            est = est._replace(beta=est.beta * norm_factor)
366
367    # ATT computation
368    T_on = to_device(panel.T_on)
369    att_avg, att_on, time_on, count_on, att_avg_unit = _compute_effects(
370        to_numpy(eff), to_numpy(D_mat), to_numpy(panel.T_on), to_numpy(I_mat),
371    )
372
373    # Build result
374    result = FectResult(
375        method=method,
376        r_cv=r_cv,
377        lambda_cv=lambda_cv,
378        att_avg=att_avg,
379        att_avg_unit=att_avg_unit,
380        att_on=att_on,
381        time_on=time_on,
382        count_on=count_on,
383        beta=to_numpy(est.beta) if est.beta is not None else None,
384        covariate_names=panel.covariate_names,
385        mu=est.mu,
386        alpha=to_numpy(est.alpha) if est.alpha is not None else None,
387        xi=to_numpy(est.xi) if est.xi is not None else None,
388        factors=to_numpy(est.factors) if est.factors is not None else None,
389        loadings=to_numpy(est.loadings) if est.loadings is not None else None,
390        Y_ct=to_numpy(Y_ct),
391        eff=to_numpy(eff),
392        residuals=to_numpy(est.residuals),
393        sigma2=est.sigma2,
394        IC=est.IC,
395        PC=est.PC,
396        niter=est.niter,
397        converged=est.converged,
398        cv_result=cv_result,
399        panel=panel,
400        seed=seed,
401    )
402
403    # Inference
404    if se:
405        result.inference = _run_inference(
406            result, panel, Y_mat, X_mat, W_mat, beta0, Y0,
407            method=method, r_cv=r_cv, lambda_cv=lambda_cv,
408            force_int=force_int, tol=tol, max_iter=max_iter,
409            vartype=vartype, nboots=nboots, alpha=alpha,
410            n_jobs=n_jobs, seed=seed, normalize=normalize,
411            norm_factor=norm_factor,
412        )
413
414    return result

Estimate counterfactual treatment effects for panel data.

This is the main entry point — equivalent to R fect's fect() function.

Parameters

data : polars.DataFrame, pandas.DataFrame Long-format panel data. Y, D : str Column names for outcome and binary treatment indicator. index : (str, str) Column names for (unit_id, time_period). X : list of str, optional Time-varying covariates. method : {"fe", "ife", "mc", "cfe", "both"} Estimation method. force : {"none", "unit", "time", "two-way"} Fixed effects specification. r : int or (int, int) Number of factors. If tuple, CV selects from range. lam : float, optional Nuclear norm penalty for MC. If None with CV=True, auto-selected. se : bool Compute standard errors via bootstrap/jackknife. device : {"cpu", "gpu"} Compute device. n_jobs : int Parallel workers for CV and bootstrap. seed : int, optional Random seed for full reproducibility.

@dataclass
class FectResult:
 64@dataclass
 65class FectResult:
 66    """Container for all fect estimation results."""
 67    # Method info
 68    method: str
 69    r_cv: int | None = None
 70    lambda_cv: float | None = None
 71
 72    # Point estimates
 73    att_avg: float = 0.0
 74    att_avg_unit: float = 0.0
 75
 76    # Dynamic effects
 77    att_on: np.ndarray | None = None
 78    time_on: np.ndarray | None = None
 79    count_on: np.ndarray | None = None
 80
 81    # Exit effects (treatment reversal)
 82    att_off: np.ndarray | None = None
 83    time_off: np.ndarray | None = None
 84
 85    # Coefficients
 86    beta: np.ndarray | None = None
 87    covariate_names: list[str] = field(default_factory=list)
 88
 89    # Fixed effects
 90    mu: float = 0.0
 91    alpha: np.ndarray | None = None   # unit FE
 92    xi: np.ndarray | None = None      # time FE
 93    factors: np.ndarray | None = None
 94    loadings: np.ndarray | None = None
 95
 96    # Counterfactual and effects matrices
 97    Y_ct: np.ndarray | None = None    # T×N counterfactual
 98    eff: np.ndarray | None = None     # T×N treatment effects
 99    residuals: np.ndarray | None = None
100
101    # Model fit
102    sigma2: float = 0.0
103    IC: float = 0.0
104    PC: float = 0.0
105    rmse: float = 0.0
106    niter: int = 0
107    converged: bool = False
108
109    # Inference
110    inference: InferenceResult | None = None
111
112    # CV
113    cv_result: CVResult | None = None
114
115    # Panel metadata
116    panel: PanelData | None = None
117
118    # Reproducibility
119    seed: int | None = None
120
121    def summary(self) -> str:
122        """Print summary table of results."""
123        lines = []
124        lines.append(f"pyfector estimation results")
125        lines.append(f"{'='*60}")
126        lines.append(f"Method: {self.method}")
127        if self.r_cv is not None:
128            lines.append(f"Number of factors (CV): {self.r_cv}")
129        if self.lambda_cv is not None:
130            lines.append(f"Lambda (CV): {self.lambda_cv:.6f}")
131        lines.append(f"Converged: {self.converged} (iter={self.niter})")
132        lines.append(f"Sigma^2: {self.sigma2:.6f}")
133        lines.append(f"")
134        lines.append(f"ATT (average): {self.att_avg:.6f}")
135        if self.inference is not None:
136            inf = self.inference
137            lines.append(f"  SE:     {inf.att_avg_se:.6f}")
138            lines.append(f"  CI:     [{inf.att_avg_ci[0]:.6f}, {inf.att_avg_ci[1]:.6f}]")
139            lines.append(f"  p-val:  {inf.att_avg_pval:.4f}")
140
141        if self.beta is not None and len(self.beta) > 0:
142            lines.append(f"")
143            lines.append(f"Coefficients:")
144            for i, name in enumerate(self.covariate_names):
145                lines.append(f"  {name}: {self.beta[i]:.6f}")
146
147        if self.att_on is not None and self.time_on is not None:
148            lines.append(f"")
149            lines.append(f"Dynamic effects (ATT by relative time):")
150            lines.append(f"  {'Time':>6s}  {'ATT':>10s}  {'Count':>6s}", )
151            for i, t in enumerate(self.time_on):
152                count = self.count_on[i] if self.count_on is not None else ""
153                att = self.att_on[i]
154                if self.inference is not None:
155                    se = self.inference.att_on_se[i]
156                    lines.append(f"  {t:>6.0f}  {att:>10.4f}  ({se:.4f})  {count}")
157                else:
158                    lines.append(f"  {t:>6.0f}  {att:>10.4f}  {count}")
159
160        lines.append(f"{'='*60}")
161        if self.panel is not None:
162            lines.append(f"N={self.panel.N}, T={self.panel.T}")
163        if self.seed is not None:
164            lines.append(f"Seed: {self.seed}")
165        return "\n".join(lines)
166
167    def __repr__(self):
168        return self.summary()
169
170    def plot(self, kind="gap", **kwargs):
171        """Plot results. Shortcut for ``pyfector.plot(self, kind, ...)``."""
172        from .plotting import plot as _plot
173        return _plot(self, kind=kind, **kwargs)
174
175    def diagnose(self, **kwargs):
176        """Run diagnostic tests. Shortcut for ``pyfector.run_diagnostics(self, ...)``."""
177        from .diagnostics import run_diagnostics
178        return run_diagnostics(self, **kwargs)

Container for all fect estimation results.

FectResult( method: str, r_cv: int | None = None, lambda_cv: float | None = None, att_avg: float = 0.0, att_avg_unit: float = 0.0, att_on: numpy.ndarray | None = None, time_on: numpy.ndarray | None = None, count_on: numpy.ndarray | None = None, att_off: numpy.ndarray | None = None, time_off: numpy.ndarray | None = None, beta: numpy.ndarray | None = None, covariate_names: list[str] = <factory>, mu: float = 0.0, alpha: numpy.ndarray | None = None, xi: numpy.ndarray | None = None, factors: numpy.ndarray | None = None, loadings: numpy.ndarray | None = None, Y_ct: numpy.ndarray | None = None, eff: numpy.ndarray | None = None, residuals: numpy.ndarray | None = None, sigma2: float = 0.0, IC: float = 0.0, PC: float = 0.0, rmse: float = 0.0, niter: int = 0, converged: bool = False, inference: pyfector.inference.InferenceResult | None = None, cv_result: pyfector.cv.CVResult | None = None, panel: pyfector.panel.PanelData | None = None, seed: int | None = None)
method: str
r_cv: int | None = None
lambda_cv: float | None = None
att_avg: float = 0.0
att_avg_unit: float = 0.0
att_on: numpy.ndarray | None = None
time_on: numpy.ndarray | None = None
count_on: numpy.ndarray | None = None
att_off: numpy.ndarray | None = None
time_off: numpy.ndarray | None = None
beta: numpy.ndarray | None = None
covariate_names: list[str]
mu: float = 0.0
alpha: numpy.ndarray | None = None
xi: numpy.ndarray | None = None
factors: numpy.ndarray | None = None
loadings: numpy.ndarray | None = None
Y_ct: numpy.ndarray | None = None
eff: numpy.ndarray | None = None
residuals: numpy.ndarray | None = None
sigma2: float = 0.0
IC: float = 0.0
PC: float = 0.0
rmse: float = 0.0
niter: int = 0
converged: bool = False
inference: pyfector.inference.InferenceResult | None = None
cv_result: pyfector.cv.CVResult | None = None
panel: pyfector.panel.PanelData | None = None
seed: int | None = None
def summary(self) -> str:
121    def summary(self) -> str:
122        """Print summary table of results."""
123        lines = []
124        lines.append(f"pyfector estimation results")
125        lines.append(f"{'='*60}")
126        lines.append(f"Method: {self.method}")
127        if self.r_cv is not None:
128            lines.append(f"Number of factors (CV): {self.r_cv}")
129        if self.lambda_cv is not None:
130            lines.append(f"Lambda (CV): {self.lambda_cv:.6f}")
131        lines.append(f"Converged: {self.converged} (iter={self.niter})")
132        lines.append(f"Sigma^2: {self.sigma2:.6f}")
133        lines.append(f"")
134        lines.append(f"ATT (average): {self.att_avg:.6f}")
135        if self.inference is not None:
136            inf = self.inference
137            lines.append(f"  SE:     {inf.att_avg_se:.6f}")
138            lines.append(f"  CI:     [{inf.att_avg_ci[0]:.6f}, {inf.att_avg_ci[1]:.6f}]")
139            lines.append(f"  p-val:  {inf.att_avg_pval:.4f}")
140
141        if self.beta is not None and len(self.beta) > 0:
142            lines.append(f"")
143            lines.append(f"Coefficients:")
144            for i, name in enumerate(self.covariate_names):
145                lines.append(f"  {name}: {self.beta[i]:.6f}")
146
147        if self.att_on is not None and self.time_on is not None:
148            lines.append(f"")
149            lines.append(f"Dynamic effects (ATT by relative time):")
150            lines.append(f"  {'Time':>6s}  {'ATT':>10s}  {'Count':>6s}", )
151            for i, t in enumerate(self.time_on):
152                count = self.count_on[i] if self.count_on is not None else ""
153                att = self.att_on[i]
154                if self.inference is not None:
155                    se = self.inference.att_on_se[i]
156                    lines.append(f"  {t:>6.0f}  {att:>10.4f}  ({se:.4f})  {count}")
157                else:
158                    lines.append(f"  {t:>6.0f}  {att:>10.4f}  {count}")
159
160        lines.append(f"{'='*60}")
161        if self.panel is not None:
162            lines.append(f"N={self.panel.N}, T={self.panel.T}")
163        if self.seed is not None:
164            lines.append(f"Seed: {self.seed}")
165        return "\n".join(lines)

Print summary table of results.

def plot(self, kind='gap', **kwargs):
170    def plot(self, kind="gap", **kwargs):
171        """Plot results. Shortcut for ``pyfector.plot(self, kind, ...)``."""
172        from .plotting import plot as _plot
173        return _plot(self, kind=kind, **kwargs)

Plot results. Shortcut for pyfector.plot(self, kind, ...).

def diagnose(self, **kwargs):
175    def diagnose(self, **kwargs):
176        """Run diagnostic tests. Shortcut for ``pyfector.run_diagnostics(self, ...)``."""
177        from .diagnostics import run_diagnostics
178        return run_diagnostics(self, **kwargs)

Run diagnostic tests. Shortcut for pyfector.run_diagnostics(self, ...).

def set_device(device: Literal['cpu', 'gpu']) -> None:
35def set_device(device: Literal["cpu", "gpu"]) -> None:
36    """Set the compute device globally."""
37    global _DEVICE
38    if device == "gpu" and not _check_cupy():
39        raise ImportError(
40            "CuPy is required for GPU support. Install it with: "
41            "pip install cupy-cuda12x  (adjust for your CUDA version)"
42        )
43    _DEVICE = device

Set the compute device globally.

def get_device() -> Literal['cpu', 'gpu']:
46def get_device() -> Literal["cpu", "gpu"]:
47    """Return the current device."""
48    return _DEVICE

Return the current device.

def run_diagnostics( result, f_threshold: float = 0.5, tost_threshold: float = 0.36, placebo_period: tuple[int, int] | None = None, carryover_period: tuple[int, int] | None = None, loo: bool = False) -> DiagnosticResult:
 92def run_diagnostics(
 93    result,
 94    f_threshold: float = 0.5,
 95    tost_threshold: float = 0.36,
 96    placebo_period: tuple[int, int] | None = None,
 97    carryover_period: tuple[int, int] | None = None,
 98    loo: bool = False,
 99) -> DiagnosticResult:
100    """Run diagnostic tests on a FectResult.
101
102    Parameters
103    ----------
104    result : FectResult
105        Must have inference results (``se=True``).
106    f_threshold : float
107        Non-centrality parameter for equivalence F-test.
108    tost_threshold : float
109        Equivalence bound for TOST.
110    placebo_period : (start, end), optional
111        Relative time window for placebo test.
112    """
113    from scipy import stats
114
115    diag = DiagnosticResult()
116
117    if result.inference is None:
118        return diag
119
120    inf = result.inference
121    time_on = result.time_on
122    att_on = result.att_on
123
124    if time_on is None or att_on is None:
125        return diag
126
127    # Pre-treatment periods
128    pre_mask = time_on < 0
129    if not np.any(pre_mask):
130        return diag
131
132    pre_idx = np.where(pre_mask)[0]
133    k = len(pre_idx)
134    att_pre = att_on[pre_idx]
135
136    # F-test from bootstrap covariance
137    if inf.att_on_boot is not None and inf.att_on_boot.shape[1] > k:
138        boot_pre = inf.att_on_boot[pre_idx, :]  # (k, nboots)
139        S = np.cov(boot_pre)  # (k, k)
140        if k == 1:
141            S = S.reshape(1, 1)
142
143        n_boot = boot_pre.shape[1]
144        try:
145            cond = np.linalg.cond(S)
146            if cond > 1e12:
147                S_inv = np.linalg.pinv(S)
148            else:
149                S_inv = np.linalg.inv(S)
150            F_stat = float(att_pre @ S_inv @ att_pre)
151            scale = (n_boot - k) / ((n_boot - 1) * k)
152            F_stat_scaled = F_stat * scale
153
154            if np.isfinite(F_stat_scaled) and F_stat_scaled >= 0:
155                diag.f_stat = F_stat_scaled
156                diag.f_df1 = k
157                diag.f_df2 = n_boot - k
158                diag.f_pval = float(1 - stats.f.cdf(F_stat_scaled, k, n_boot - k))
159
160                # Equivalence F-test (non-central F)
161                ncp = n_boot * f_threshold
162                diag.equiv_f_pval = float(stats.ncf.cdf(F_stat_scaled, k, n_boot - k, ncp))
163                diag.equiv_threshold = f_threshold
164        except np.linalg.LinAlgError:
165            pass
166
167    # TOST
168    if inf.att_on_se is not None:
169        se_pre = inf.att_on_se[pre_idx]
170        n_boot = inf.att_on_boot.shape[1] if inf.att_on_boot is not None else 200
171        df = n_boot - 1
172
173        tost_pvals = np.full(k, np.nan)
174        for i in range(k):
175            if se_pre[i] > 0:
176                t_upper = (att_pre[i] - tost_threshold) / se_pre[i]
177                t_lower = (att_pre[i] + tost_threshold) / se_pre[i]
178                p_upper = float(stats.t.cdf(t_upper, df))
179                p_lower = float(1 - stats.t.cdf(t_lower, df))
180                tost_pvals[i] = max(p_upper, p_lower)
181
182        diag.tost_pvals = tost_pvals
183        diag.tost_periods = time_on[pre_idx]
184        diag.tost_threshold = tost_threshold
185
186    # Placebo test
187    if placebo_period is not None:
188        p_start, p_end = placebo_period
189        placebo_mask = (time_on >= p_start) & (time_on <= p_end) & pre_mask
190        if np.any(placebo_mask):
191            p_idx = np.where(placebo_mask)[0]
192            diag.placebo_att = float(np.mean(att_on[p_idx]))
193            if inf.att_on_boot is not None:
194                boot_placebo = np.mean(inf.att_on_boot[p_idx, :], axis=0)
195                diag.placebo_pval = min(
196                    2 * np.mean(boot_placebo >= 0),
197                    2 * np.mean(boot_placebo <= 0),
198                    1.0,
199                )
200
201    # Carryover test (for treatment reversals)
202    if carryover_period is not None and hasattr(result, "att_off") and result.att_off is not None:
203        c_start, c_end = carryover_period
204        time_off = getattr(result, "time_off", None)
205        if time_off is not None:
206            off_mask = (time_off >= c_start) & (time_off <= c_end)
207            if np.any(off_mask):
208                diag.carryover_att = float(np.mean(result.att_off[off_mask]))
209
210    # Leave-one-period-out test
211    if loo and att_on is not None and time_on is not None:
212        post_mask = time_on >= 0
213        post_idx = np.where(post_mask)[0]
214        if len(post_idx) > 1:
215            full_att = result.att_avg
216            loo_atts = []
217            loo_periods = []
218            for drop_i in post_idx:
219                remaining = np.delete(post_idx, np.where(post_idx == drop_i))
220                if len(remaining) > 0:
221                    loo_att = float(np.mean(att_on[remaining]))
222                    loo_atts.append(loo_att)
223                    loo_periods.append(time_on[drop_i])
224            diag.loo_atts = np.array(loo_atts)
225            diag.loo_periods = np.array(loo_periods)
226            diag.loo_max_change = float(np.max(np.abs(diag.loo_atts - full_att)))
227
228    return diag

Run diagnostic tests on a FectResult.

Parameters

result : FectResult Must have inference results (se=True). f_threshold : float Non-centrality parameter for equivalence F-test. tost_threshold : float Equivalence bound for TOST. placebo_period : (start, end), optional Relative time window for placebo test.

@dataclass
class DiagnosticResult:
35@dataclass
36class DiagnosticResult:
37    """Container for diagnostic test results."""
38    # F-test for pre-trends
39    f_stat: float | None = None
40    f_pval: float | None = None
41    f_df1: int | None = None
42    f_df2: int | None = None
43
44    # Equivalence F-test
45    equiv_f_pval: float | None = None
46    equiv_threshold: float | None = None
47
48    # TOST per pre-period
49    tost_pvals: np.ndarray | None = None
50    tost_periods: np.ndarray | None = None
51    tost_threshold: float | None = None
52
53    # Placebo
54    placebo_att: float | None = None
55    placebo_pval: float | None = None
56
57    # Carryover
58    carryover_att: float | None = None
59    carryover_pval: float | None = None
60
61    # Leave-one-out
62    loo_atts: np.ndarray | None = None      # ATT when each pre-period is dropped
63    loo_periods: np.ndarray | None = None
64    loo_max_change: float | None = None     # max |ATT_full - ATT_loo|
65
66    def summary(self) -> str:
67        lines = ["Diagnostic Tests", "=" * 50]
68        if self.f_stat is not None:
69            lines.append(f"Pre-trend F-test:")
70            lines.append(f"  F({self.f_df1},{self.f_df2}) = {self.f_stat:.4f}, p = {self.f_pval:.4f}")
71        if self.equiv_f_pval is not None:
72            lines.append(f"Equivalence F-test:")
73            lines.append(f"  p = {self.equiv_f_pval:.4f} (threshold={self.equiv_threshold})")
74        if self.tost_pvals is not None:
75            lines.append("TOST per pre-period:")
76            for i, t in enumerate(self.tost_periods):
77                p = self.tost_pvals[i]
78                status = "PASS" if p < 0.05 else "fail"
79                lines.append(f"  t={t:+.0f}: p={p:.4f} [{status}]")
80        if self.placebo_pval is not None:
81            lines.append(f"Placebo test:")
82            lines.append(f"  ATT={self.placebo_att:.4f}, p={self.placebo_pval:.4f}")
83        if self.carryover_pval is not None:
84            lines.append(f"Carryover test:")
85            lines.append(f"  ATT={self.carryover_att:.4f}, p={self.carryover_pval:.4f}")
86        if self.loo_max_change is not None:
87            lines.append(f"Leave-one-out:")
88            lines.append(f"  Max ATT change = {self.loo_max_change:.6f}")
89        return "\n".join(lines)

Container for diagnostic test results.

DiagnosticResult( f_stat: float | None = None, f_pval: float | None = None, f_df1: int | None = None, f_df2: int | None = None, equiv_f_pval: float | None = None, equiv_threshold: float | None = None, tost_pvals: numpy.ndarray | None = None, tost_periods: numpy.ndarray | None = None, tost_threshold: float | None = None, placebo_att: float | None = None, placebo_pval: float | None = None, carryover_att: float | None = None, carryover_pval: float | None = None, loo_atts: numpy.ndarray | None = None, loo_periods: numpy.ndarray | None = None, loo_max_change: float | None = None)
f_stat: float | None = None
f_pval: float | None = None
f_df1: int | None = None
f_df2: int | None = None
equiv_f_pval: float | None = None
equiv_threshold: float | None = None
tost_pvals: numpy.ndarray | None = None
tost_periods: numpy.ndarray | None = None
tost_threshold: float | None = None
placebo_att: float | None = None
placebo_pval: float | None = None
carryover_att: float | None = None
carryover_pval: float | None = None
loo_atts: numpy.ndarray | None = None
loo_periods: numpy.ndarray | None = None
loo_max_change: float | None = None
def summary(self) -> str:
66    def summary(self) -> str:
67        lines = ["Diagnostic Tests", "=" * 50]
68        if self.f_stat is not None:
69            lines.append(f"Pre-trend F-test:")
70            lines.append(f"  F({self.f_df1},{self.f_df2}) = {self.f_stat:.4f}, p = {self.f_pval:.4f}")
71        if self.equiv_f_pval is not None:
72            lines.append(f"Equivalence F-test:")
73            lines.append(f"  p = {self.equiv_f_pval:.4f} (threshold={self.equiv_threshold})")
74        if self.tost_pvals is not None:
75            lines.append("TOST per pre-period:")
76            for i, t in enumerate(self.tost_periods):
77                p = self.tost_pvals[i]
78                status = "PASS" if p < 0.05 else "fail"
79                lines.append(f"  t={t:+.0f}: p={p:.4f} [{status}]")
80        if self.placebo_pval is not None:
81            lines.append(f"Placebo test:")
82            lines.append(f"  ATT={self.placebo_att:.4f}, p={self.placebo_pval:.4f}")
83        if self.carryover_pval is not None:
84            lines.append(f"Carryover test:")
85            lines.append(f"  ATT={self.carryover_att:.4f}, p={self.carryover_pval:.4f}")
86        if self.loo_max_change is not None:
87            lines.append(f"Leave-one-out:")
88            lines.append(f"  Max ATT change = {self.loo_max_change:.6f}")
89        return "\n".join(lines)
def plot( result, kind: Literal['gap', 'status', 'factors', 'counterfactual', 'equiv', 'calendar'] = 'gap', units: list | None = None, show_ci: bool = True, title: str | None = None, figsize: tuple[float, float] = (10, 6), ax=None, **kwargs):
20def plot(
21    result,
22    kind: Literal["gap", "status", "factors", "counterfactual", "equiv", "calendar"] = "gap",
23    units: list | None = None,
24    show_ci: bool = True,
25    title: str | None = None,
26    figsize: tuple[float, float] = (10, 6),
27    ax=None,
28    **kwargs,
29):
30    """Plot fect results.
31
32    Parameters
33    ----------
34    result : FectResult
35        Output from ``pyfector.fect()``.
36    kind : str
37        Plot type.
38    units : list, optional
39        Unit IDs for counterfactual plot.
40    show_ci : bool
41        Show confidence intervals (requires ``se=True`` in estimation).
42    """
43    import matplotlib.pyplot as plt
44
45    if ax is None:
46        fig, ax = plt.subplots(figsize=figsize)
47    else:
48        fig = ax.get_figure()
49
50    if kind == "gap":
51        _plot_gap(result, ax, show_ci, **kwargs)
52    elif kind == "status":
53        _plot_status(result, ax, **kwargs)
54    elif kind == "factors":
55        _plot_factors(result, ax, **kwargs)
56    elif kind == "counterfactual":
57        _plot_counterfactual(result, ax, units, **kwargs)
58    elif kind == "equiv":
59        _plot_equiv(result, ax, **kwargs)
60    elif kind == "calendar":
61        _plot_calendar(result, ax, show_ci, **kwargs)
62    else:
63        raise ValueError(f"Unknown plot kind: {kind}")
64
65    if title:
66        ax.set_title(title)
67
68    fig.tight_layout()
69    return fig

Plot fect results.

Parameters

result : FectResult Output from pyfector.fect. kind : str Plot type. units : list, optional Unit IDs for counterfactual plot. show_ci : bool Show confidence intervals (requires se=True in estimation).