Coverage for partipy/weights.py: 91%
23 statements
« prev ^ index » next coverage.py v7.8.0, created at 2025-05-09 10:41 +0200
« prev ^ index » next coverage.py v7.8.0, created at 2025-05-09 10:41 +0200
1import numpy as np
4def compute_bisquare_weights(R: np.ndarray, epsilon: float = 0.1) -> np.ndarray:
5 """
6 Compute Tukey's bisquare (biweight) weights for robust estimation based on residual magnitudes.
8 For each data point, the ℓ₁ norm of the residual vector is computed. A robust
9 scale parameter is estimated using the median absolute deviation (MAD), and a
10 cutoff threshold c is defined as c = 6 * median(||r_i||₁) + ε, where ε is a small
11 constant to prevent division by zero. Residual norms are scaled by c, and weights
12 are assigned using Tukey's bisquare function: points with scaled residuals < 1
13 receive smoothly decreasing weights; those ≥ 1 receive weight 0.
15 This redescending M-estimator effectively suppresses the influence of extreme
16 outliers while preserving the influence of inliers.
18 References
19 ----------
20 Eugster, M.J.A., Leisch, F. (2011).
21 Weighted and robust archetypal analysis.
22 Computational Statistics & Data Analysis, 55(3), 1215-1225.
23 https://doi.org/10.1016/j.csda.2010.10.017
25 Fox, J., Weisberg, 2013.
26 Robust Regrssion.
27 http://users.stat.umn.edu/~sandy/courses/8053/handouts/robust.pdf
29 Parameters
30 ----------
31 R : np.ndarray
32 Residual matrix of shape (n_samples, n_features), where each row corresponds
33 to the residual vector of a data point.
35 Returns
36 -------
37 W : np.ndarray
38 Weight vector of shape (n_samples,), with values in [0, 1].
39 Points with residual norms well below the threshold receive weight near 1;
40 points beyond the cutoff are fully downweighted to 0.
41 """
42 l1_norm = np.abs(R).sum(axis=1)
43 selection_vec = l1_norm > epsilon
44 if np.sum(selection_vec) > 0:
45 c = 6 * np.median(l1_norm[selection_vec])
46 else:
47 c = 6 * np.median(l1_norm)
48 l1_norm /= c
49 W = np.zeros_like(l1_norm)
50 W[l1_norm < 1] = np.square(1 - np.square(l1_norm[l1_norm < 1]))
51 return W
54def compute_huber_weights(R: np.ndarray, epsilon: float = 0.1) -> np.ndarray:
55 """
56 Compute Huber weights for robust estimation based on residual magnitudes.
58 For each data point, the Euclidean norm (l2 norm) of the residual vector
59 is computed. A robust scale parameter is estimated using the median absolute
60 deviation (MAD), and the threshold parameter δ is set according to the
61 standard Huber rule: δ = 1.345 * σ̂. Weights are then assigned using the
62 Huber function: points with residual norms below δ receive full weight (1),
63 while those above δ are downweighted proportionally to δ / ||r_i||.
65 This weighting scheme achieves high statistical efficiency under Gaussian noise
66 while maintaining robustness to outliers.
68 Reference
69 ---------
70 Huber, P.J. (1964). Robust Estimation of a Location Parameter.
71 The Annals of Mathematical Statistics, 35(1), 73-101.
72 https://doi.org/10.1214/aoms/1177703732
74 Fox, J., Weisberg, 2013.
75 Robust Regrssion.
76 http://users.stat.umn.edu/~sandy/courses/8053/handouts/robust.pdf
78 Parameters
79 ----------
80 R : np.ndarray
81 Residual matrix of shape (n_samples, n_features), where each row corresponds
82 to the residual vector of a data point.
84 Returns
85 -------
86 W : np.ndarray
87 Weight vector of shape (n_samples,), with values in (0, 1].
88 Weights decrease with increasing residual magnitude beyond the threshold δ.
89 """
90 epsilon = 0.1
92 l2_norm = np.sqrt(np.sum(np.square(R), axis=1))
93 selection_vec = l2_norm > epsilon
94 if np.sum(selection_vec) > 0:
95 sigma_hat = np.median(l2_norm[l2_norm > epsilon]) / 0.6745 # Consistent estimator for Gaussian noise
96 else:
97 sigma_hat = np.median(l2_norm) / 0.6745 # Consistent estimator for Gaussian noise
98 delta = 1.345 * sigma_hat
99 W = np.ones_like(l2_norm)
100 mask = l2_norm > delta
101 W[mask] = delta / l2_norm[mask]
102 return W