Coverage for partipy/weights.py: 91%

23 statements  

« prev     ^ index     » next       coverage.py v7.8.0, created at 2025-05-09 10:41 +0200

1import numpy as np 

2 

3 

4def compute_bisquare_weights(R: np.ndarray, epsilon: float = 0.1) -> np.ndarray: 

5 """ 

6 Compute Tukey's bisquare (biweight) weights for robust estimation based on residual magnitudes. 

7 

8 For each data point, the ℓ₁ norm of the residual vector is computed. A robust 

9 scale parameter is estimated using the median absolute deviation (MAD), and a 

10 cutoff threshold c is defined as c = 6 * median(||r_i||₁) + ε, where ε is a small 

11 constant to prevent division by zero. Residual norms are scaled by c, and weights 

12 are assigned using Tukey's bisquare function: points with scaled residuals < 1 

13 receive smoothly decreasing weights; those ≥ 1 receive weight 0. 

14 

15 This redescending M-estimator effectively suppresses the influence of extreme 

16 outliers while preserving the influence of inliers. 

17 

18 References 

19 ---------- 

20 Eugster, M.J.A., Leisch, F. (2011). 

21 Weighted and robust archetypal analysis. 

22 Computational Statistics & Data Analysis, 55(3), 1215-1225. 

23 https://doi.org/10.1016/j.csda.2010.10.017 

24 

25 Fox, J., Weisberg, 2013. 

26 Robust Regrssion. 

27 http://users.stat.umn.edu/~sandy/courses/8053/handouts/robust.pdf 

28 

29 Parameters 

30 ---------- 

31 R : np.ndarray 

32 Residual matrix of shape (n_samples, n_features), where each row corresponds 

33 to the residual vector of a data point. 

34 

35 Returns 

36 ------- 

37 W : np.ndarray 

38 Weight vector of shape (n_samples,), with values in [0, 1]. 

39 Points with residual norms well below the threshold receive weight near 1; 

40 points beyond the cutoff are fully downweighted to 0. 

41 """ 

42 l1_norm = np.abs(R).sum(axis=1) 

43 selection_vec = l1_norm > epsilon 

44 if np.sum(selection_vec) > 0: 

45 c = 6 * np.median(l1_norm[selection_vec]) 

46 else: 

47 c = 6 * np.median(l1_norm) 

48 l1_norm /= c 

49 W = np.zeros_like(l1_norm) 

50 W[l1_norm < 1] = np.square(1 - np.square(l1_norm[l1_norm < 1])) 

51 return W 

52 

53 

54def compute_huber_weights(R: np.ndarray, epsilon: float = 0.1) -> np.ndarray: 

55 """ 

56 Compute Huber weights for robust estimation based on residual magnitudes. 

57 

58 For each data point, the Euclidean norm (l2 norm) of the residual vector 

59 is computed. A robust scale parameter is estimated using the median absolute 

60 deviation (MAD), and the threshold parameter δ is set according to the 

61 standard Huber rule: δ = 1.345 * σ̂. Weights are then assigned using the 

62 Huber function: points with residual norms below δ receive full weight (1), 

63 while those above δ are downweighted proportionally to δ / ||r_i||. 

64 

65 This weighting scheme achieves high statistical efficiency under Gaussian noise 

66 while maintaining robustness to outliers. 

67 

68 Reference 

69 --------- 

70 Huber, P.J. (1964). Robust Estimation of a Location Parameter. 

71 The Annals of Mathematical Statistics, 35(1), 73-101. 

72 https://doi.org/10.1214/aoms/1177703732 

73 

74 Fox, J., Weisberg, 2013. 

75 Robust Regrssion. 

76 http://users.stat.umn.edu/~sandy/courses/8053/handouts/robust.pdf 

77 

78 Parameters 

79 ---------- 

80 R : np.ndarray 

81 Residual matrix of shape (n_samples, n_features), where each row corresponds 

82 to the residual vector of a data point. 

83 

84 Returns 

85 ------- 

86 W : np.ndarray 

87 Weight vector of shape (n_samples,), with values in (0, 1]. 

88 Weights decrease with increasing residual magnitude beyond the threshold δ. 

89 """ 

90 epsilon = 0.1 

91 

92 l2_norm = np.sqrt(np.sum(np.square(R), axis=1)) 

93 selection_vec = l2_norm > epsilon 

94 if np.sum(selection_vec) > 0: 

95 sigma_hat = np.median(l2_norm[l2_norm > epsilon]) / 0.6745 # Consistent estimator for Gaussian noise 

96 else: 

97 sigma_hat = np.median(l2_norm) / 0.6745 # Consistent estimator for Gaussian noise 

98 delta = 1.345 * sigma_hat 

99 W = np.ones_like(l2_norm) 

100 mask = l2_norm > delta 

101 W[mask] = delta / l2_norm[mask] 

102 return W