Coverage for /Users/Newville/Codes/xraylarch/larch/math/deglitch.py: 12%

66 statements  

« prev     ^ index     » next       coverage.py v7.3.2, created at 2023-11-09 10:08 -0600

1#!/usr/bin/env python 

2# -*- coding: utf-8 -*- 

3 

4"""Deglitch utilities 

5===================== 

6 

7""" 

8import logging 

9import numpy as np 

10 

11_logger = logging.getLogger(__name__) 

12 

13 

14def remove_spikes_medfilt1d(y_spiky, backend="silx", kernel_size=3, threshold=0.1): 

15 """Remove spikes in a 1D array using medfilt from silx.math 

16 

17 Parameters 

18 ---------- 

19 y_spiky : array 

20 spiky data 

21 

22 backend : str, optional 

23 library to use as backend 

24 - 'silx' -> from silx.math.medianfilter import medfilt1d 

25 - 'pymca' -> from PyMca5.PyMcaMath.PyMcaSciPy.signal import medfilt1d 

26 - 'pandas' : TODO 

27 

28 kernel_size : int, optional 

29 kernel size where to calculate median, must be odd [3] 

30 

31 threshold : float, optional 

32 relative difference between filtered and spiky data [0.1] 

33 

34 Returns 

35 ------- 

36 array 

37 filtered array 

38 """ 

39 ynew = np.zeros_like(y_spiky) 

40 if not (kernel_size % 2): 

41 kernel_size += 1 

42 _logger.warning("'kernel_size' must be odd -> adjusted to %d", kernel_size) 

43 if backend == "silx": 

44 return remove_spikes_silx(y_spiky, kernel_size=kernel_size, threshold=threshold) 

45 elif backend == "pymca": 

46 return remove_spikes_silx(y_spiky, kernel_size=kernel_size, threshold=threshold) 

47 elif backend == "pandas": 

48 return remove_spikes_pandas(y_spiky, window=kernel_size, threshold=threshold) 

49 else: 

50 _logger.warning("backend for medfilt1d not found! -> returning zeros") 

51 return ynew 

52 

53 

54def remove_spikes_silx(y_spiky, kernel_size=3, threshold=0.1): 

55 """Remove spikes in a 1D array using medfilt from silx.math 

56 

57 Parameters 

58 ---------- 

59 y_spiky : array 

60 spiky data 

61 

62 kernel_size : int, optional 

63 kernel size where to calculate median, must be odd [3] 

64 

65 threshold : float, optional 

66 difference between filtered and spiky data relative [0.1] 

67 

68 Returns 

69 ------- 

70 array 

71 filtered array 

72 """ 

73 ynew = np.zeros_like(y_spiky) 

74 try: 

75 from silx.math.medianfilter import medfilt1d 

76 except ImportError: 

77 _logger.warning("medfilt1d (from SILX) not found! -> returning zeros") 

78 return ynew 

79 y_filtered = medfilt1d( 

80 y_spiky, kernel_size=kernel_size, conditional=True, mode="nearest", cval=0 

81 ) 

82 diff = y_filtered - y_spiky 

83 rel_diff = diff / y_filtered 

84 ynew = np.where(abs(rel_diff) > threshold, y_filtered, y_spiky) 

85 return ynew 

86 

87 

88def remove_spikes_pymca(y_spiky, kernel_size=9, threshold=0.66): 

89 """Remove spikes in a 1D array using medfilt from PyMca5.PyMcaMath.PyMcaScipy.signal 

90 

91 Parameters 

92 ---------- 

93 y_spiky : array 

94 spiky data 

95 

96 kernel_size : int, optional 

97 kernel size where to calculate median, should be odd [9] 

98 

99 threshold : float, optional 

100 difference between filtered and spiky data in sigma units [0.66] 

101 

102 Returns 

103 ------- 

104 array 

105 filtered array 

106 """ 

107 ynew = np.zeros_like(y_spiky) 

108 try: 

109 from PyMca5.PyMcaMath.PyMcaSciPy.signal import medfilt1d 

110 except ImportError: 

111 _logger.warning("medfilt1d (from PyMca5) not found! -> returning zeros") 

112 return ynew 

113 y_filtered = medfilt1d(y_spiky, kernel_size) 

114 diff = y_filtered - y_spiky 

115 mean = diff.mean() 

116 sigma = (y_spiky - mean) ** 2 

117 sigma = np.sqrt(sigma.sum() / float(len(sigma))) 

118 ynew = np.where(abs(diff) > threshold * sigma, y_filtered, y_spiky) 

119 return ynew 

120 

121 

122def remove_spikes_pandas(y, window=3, threshold=3): 

123 """remove spikes using pandas 

124 

125 Taken from `https://ocefpaf.github.io/python4oceanographers/blog/2015/03/16/outlier_detection/`_ 

126 

127 .. note:: this will not work in pandas > 0.17 one could simply do 

128 `df.rolling(3, center=True).median()`; also 

129 df.as_matrix() is deprecated, use df.values instead 

130 

131 Parameters 

132 ---------- 

133 y : array 1D 

134 window : int (optional) 

135 window in rolling median [3] 

136 threshold : int (optional) 

137 number of sigma difference with original data 

138 

139 Return 

140 ------ 

141 ynew : array like x/y 

142 """ 

143 ynew = np.zeros_like(y) 

144 try: 

145 import pandas as pd 

146 except ImportError: 

147 _logger.error("pandas not found! -> returning zeros") 

148 return ynew 

149 df = pd.DataFrame(y) 

150 try: 

151 yf = ( 

152 pd.rolling_median(df, window=window, center=True) 

153 .fillna(method="bfill") 

154 .fillna(method="ffill") 

155 ) 

156 diff = yf.as_matrix() - y 

157 mean = diff.mean() 

158 sigma = (y - mean) ** 2 

159 sigma = np.sqrt(sigma.sum() / float(len(sigma))) 

160 ynew = np.where(abs(diff) > threshold * sigma, yf.as_matrix(), y) 

161 except Exception: 

162 yf = ( 

163 df.rolling(window, center=True) 

164 .median() 

165 .fillna(method="bfill") 

166 .fillna(method="ffill") 

167 ) 

168 

169 diff = yf.values - y 

170 mean = diff.mean() 

171 sigma = (y - mean) ** 2 

172 sigma = np.sqrt(sigma.sum() / float(len(sigma))) 

173 ynew = np.where(abs(diff) > threshold * sigma, yf.values, y) 

174 

175 # ynew = np.array(yf.values).reshape(len(x)) 

176 return ynew