Coverage for /Users/Newville/Codes/xraylarch/larch/math/deglitch.py: 12%
66 statements
« prev ^ index » next coverage.py v7.3.2, created at 2023-11-09 10:08 -0600
« prev ^ index » next coverage.py v7.3.2, created at 2023-11-09 10:08 -0600
1#!/usr/bin/env python
2# -*- coding: utf-8 -*-
4"""Deglitch utilities
5=====================
7"""
8import logging
9import numpy as np
11_logger = logging.getLogger(__name__)
14def remove_spikes_medfilt1d(y_spiky, backend="silx", kernel_size=3, threshold=0.1):
15 """Remove spikes in a 1D array using medfilt from silx.math
17 Parameters
18 ----------
19 y_spiky : array
20 spiky data
22 backend : str, optional
23 library to use as backend
24 - 'silx' -> from silx.math.medianfilter import medfilt1d
25 - 'pymca' -> from PyMca5.PyMcaMath.PyMcaSciPy.signal import medfilt1d
26 - 'pandas' : TODO
28 kernel_size : int, optional
29 kernel size where to calculate median, must be odd [3]
31 threshold : float, optional
32 relative difference between filtered and spiky data [0.1]
34 Returns
35 -------
36 array
37 filtered array
38 """
39 ynew = np.zeros_like(y_spiky)
40 if not (kernel_size % 2):
41 kernel_size += 1
42 _logger.warning("'kernel_size' must be odd -> adjusted to %d", kernel_size)
43 if backend == "silx":
44 return remove_spikes_silx(y_spiky, kernel_size=kernel_size, threshold=threshold)
45 elif backend == "pymca":
46 return remove_spikes_silx(y_spiky, kernel_size=kernel_size, threshold=threshold)
47 elif backend == "pandas":
48 return remove_spikes_pandas(y_spiky, window=kernel_size, threshold=threshold)
49 else:
50 _logger.warning("backend for medfilt1d not found! -> returning zeros")
51 return ynew
54def remove_spikes_silx(y_spiky, kernel_size=3, threshold=0.1):
55 """Remove spikes in a 1D array using medfilt from silx.math
57 Parameters
58 ----------
59 y_spiky : array
60 spiky data
62 kernel_size : int, optional
63 kernel size where to calculate median, must be odd [3]
65 threshold : float, optional
66 difference between filtered and spiky data relative [0.1]
68 Returns
69 -------
70 array
71 filtered array
72 """
73 ynew = np.zeros_like(y_spiky)
74 try:
75 from silx.math.medianfilter import medfilt1d
76 except ImportError:
77 _logger.warning("medfilt1d (from SILX) not found! -> returning zeros")
78 return ynew
79 y_filtered = medfilt1d(
80 y_spiky, kernel_size=kernel_size, conditional=True, mode="nearest", cval=0
81 )
82 diff = y_filtered - y_spiky
83 rel_diff = diff / y_filtered
84 ynew = np.where(abs(rel_diff) > threshold, y_filtered, y_spiky)
85 return ynew
88def remove_spikes_pymca(y_spiky, kernel_size=9, threshold=0.66):
89 """Remove spikes in a 1D array using medfilt from PyMca5.PyMcaMath.PyMcaScipy.signal
91 Parameters
92 ----------
93 y_spiky : array
94 spiky data
96 kernel_size : int, optional
97 kernel size where to calculate median, should be odd [9]
99 threshold : float, optional
100 difference between filtered and spiky data in sigma units [0.66]
102 Returns
103 -------
104 array
105 filtered array
106 """
107 ynew = np.zeros_like(y_spiky)
108 try:
109 from PyMca5.PyMcaMath.PyMcaSciPy.signal import medfilt1d
110 except ImportError:
111 _logger.warning("medfilt1d (from PyMca5) not found! -> returning zeros")
112 return ynew
113 y_filtered = medfilt1d(y_spiky, kernel_size)
114 diff = y_filtered - y_spiky
115 mean = diff.mean()
116 sigma = (y_spiky - mean) ** 2
117 sigma = np.sqrt(sigma.sum() / float(len(sigma)))
118 ynew = np.where(abs(diff) > threshold * sigma, y_filtered, y_spiky)
119 return ynew
122def remove_spikes_pandas(y, window=3, threshold=3):
123 """remove spikes using pandas
125 Taken from `https://ocefpaf.github.io/python4oceanographers/blog/2015/03/16/outlier_detection/`_
127 .. note:: this will not work in pandas > 0.17 one could simply do
128 `df.rolling(3, center=True).median()`; also
129 df.as_matrix() is deprecated, use df.values instead
131 Parameters
132 ----------
133 y : array 1D
134 window : int (optional)
135 window in rolling median [3]
136 threshold : int (optional)
137 number of sigma difference with original data
139 Return
140 ------
141 ynew : array like x/y
142 """
143 ynew = np.zeros_like(y)
144 try:
145 import pandas as pd
146 except ImportError:
147 _logger.error("pandas not found! -> returning zeros")
148 return ynew
149 df = pd.DataFrame(y)
150 try:
151 yf = (
152 pd.rolling_median(df, window=window, center=True)
153 .fillna(method="bfill")
154 .fillna(method="ffill")
155 )
156 diff = yf.as_matrix() - y
157 mean = diff.mean()
158 sigma = (y - mean) ** 2
159 sigma = np.sqrt(sigma.sum() / float(len(sigma)))
160 ynew = np.where(abs(diff) > threshold * sigma, yf.as_matrix(), y)
161 except Exception:
162 yf = (
163 df.rolling(window, center=True)
164 .median()
165 .fillna(method="bfill")
166 .fillna(method="ffill")
167 )
169 diff = yf.values - y
170 mean = diff.mean()
171 sigma = (y - mean) ** 2
172 sigma = np.sqrt(sigma.sum() / float(len(sigma)))
173 ynew = np.where(abs(diff) > threshold * sigma, yf.values, y)
175 # ynew = np.array(yf.values).reshape(len(x))
176 return ynew