Source code for findpeaks

r"""Functions to find peaks in data above a certain threshold as part of the \
EQcorrscan package written by Calum Chamberlain of Victoria University of \
Wellington in early 2015.

Copyright 2015 Calum Chamberlain

This file is part of EQcorrscan.

    EQcorrscan is free software: you can redistribute it and/or modify
    it under the terms of the GNU General Public License as published by
    the Free Software Foundation, either version 3 of the License, or
    (at your option) any later version.

    EQcorrscan is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    GNU General Public License for more details.

    You should have received a copy of the GNU General Public License
    along with EQcorrscan.  If not, see <http://www.gnu.org/licenses/>.
"""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from __future__ import unicode_literals


[docs]def is_prime(number): r"""Function to test primality of a number. Function lifted from online \ resource: http://www.codeproject.com/Articles/691200/Primality-test-algorithms-Prime-test-The-fastest-w This function is distributed under a seperate licence: This article, along with any associated source code and files, is \ licensed under The Code Project Open License (CPOL) :type number: int :param number: Integer to test for primality :returns: bool """ import random ''' if number != 1 ''' if (number > 1): ''' repeat the test few times ''' for time in range(3): ''' Draw a RANDOM number in range of number ( Z_number ) ''' randomNumber = random.randint(2, number)-1 ''' Test if a^(n-1) = 1 mod n ''' if (pow(randomNumber, number-1, number) != 1): return False return True else: ''' case number == 1 ''' return False
[docs]def find_peaks2(arr, thresh, trig_int, debug=0, maxwidth=10, starttime=False, samp_rate=1.0): r"""Function to determine peaks in an array of data using scipy \ find_peaks_cwt, works fast in certain cases, but for match_filter cccsum \ peak finding, find_peaks2_short works better. Test it out and see which \ works best for your application. :type arr: ndarray :param arr: 1-D numpy array is required :type thresh: float :param thresh: The threshold below which will be considered noise and \ peaks will not be found in. :type trig_int: int :param trig_int: The minimum difference in samples between triggers, \ if multiple peaks within this window this code will find the highest. :type debug: int :param debug: Optional, debug level 0-5 :type maxwidth: int :param maxwidth: Maximum peak width to look for in samples :type starttime: osbpy.UTCDateTime :param starttime: Starttime for plotting, only used if debug > 2. :type samp_rate: float :param samp_rate: Sampling rate in Hz, only used for plotting if debug > 2. :return: peaks: Lists of tuples of peak values and locations. """ from scipy.signal import find_peaks_cwt import numpy as np from obspy import UTCDateTime if not starttime: starttime = UTCDateTime(0) # Set everything below the threshold to zero image = np.copy(arr) image = np.abs(image) image[image < thresh] = thresh # We need to check if the number of samples in the image is prime, if it # is this method will be really slow, so we add a pad to the end to make # it not of prime length! if is_prime(len(image)): image = np.append(image, 0.0) print('Input array has a prime number of samples, appending a zero') print(len(image)) if len(image[image > thresh]) == 0: print('No values over threshold found') return [] if debug > 0: msg = ' '.join(['Found', str(len(image[image > thresh])), 'samples above the threshold']) print(msg) initial_peaks = [] peaks = [] # Find the peaks print('Finding peaks') peakinds = find_peaks_cwt(image, np.arange(1, maxwidth)) initial_peaks = [(image[peakind], peakind) for peakind in peakinds] # Sort initial peaks according to amplitude print('sorting peaks') peaks_sort = sorted(initial_peaks, key=lambda amplitude: amplitude[0], reverse=True) if debug >= 4: for peak in initial_peaks: print(peak) if initial_peaks: peaks.append(peaks_sort[0]) # Definitely take the biggest peak if debug > 3: msg = ' '.join(['Added the biggest peak of', str(peaks[0][0]), 'at sample', str(peaks[0][1])]) print(msg) if len(initial_peaks) > 1: if debug > 3: msg = ' '.join(['Multiple peaks found, checking them', 'now to see if they overlap']) print(msg) for next_peak in peaks_sort: # i in xrange(1,len(peaks_sort)): # Loop through the amplitude sorted peaks # if the next highest amplitude peak is within trig_int of any # peak already in peaks then we don't want it, else, add it # next_peak = peaks_sort[i] if debug > 3: print(next_peak) for peak in peaks: add = False # Use add as a switch for whether or not to append # next peak to peaks, if once gone through all the peaks # it is True, then we will add it, otherwise we won't! if abs(next_peak[1] - peak[1]) < trig_int: if debug > 3: msg = ' '.join(['Difference in time is', str(next_peak[1] - peak[1]), '\n' 'Which is less than', str(trig_int)]) print(msg) add = False # Need to exit the loop here if false break else: add = True if add: if debug > 3: msg = ' '.join(['Adding peak of', str(next_peak[0]), 'at sample', str(next_peak[1])]) print(msg) peaks.append(next_peak) elif debug > 3: msg = ' '.join(['I did not add peak of', str(next_peak[0]), 'at sample', str(next_peak[1])]) print(msg) if debug >= 3: from eqcorrscan.utils import EQcorrscan_plotting _fname = ''.join(['peaks_', starttime.datetime.strftime('%Y-%m-%d'), '.pdf']) print(' '.join(['Saving plot to', _fname])) EQcorrscan_plotting.peaks_plot(image, starttime, samp_rate, True, peaks, _fname) peaks = sorted(peaks, key=lambda time: time[1], reverse=False) return peaks else: print('No peaks for you!') return peaks
[docs]def find_peaks2_short(arr, thresh, trig_int, debug=0, starttime=False, samp_rate=1.0): r"""Function to determine peaks in an array of data above a certain \ threshold. Uses a mask to remove data below threshold and finds peaks in \ what is left. :type arr: ndarray :param arr: 1-D numpy array is required :type thresh: float :param thresh: The threshold below which will be considered noise and \ peaks will not be found in. :type trig_int: int :param trig_int: The minimum difference in samples between triggers,\ if multiple peaks within this window this code will find the highest. :type debug: int :param debug: Optional, debug level 0-5 :type starttime: osbpy.UTCDateTime :param starttime: Starttime for plotting, only used if debug > 2. :type samp_rate: float :param samp_rate: Sampling rate in Hz, only used for plotting if debug > 2. :return: peaks: Lists of tuples of peak values and locations. """ from scipy import ndimage import numpy as np from obspy import UTCDateTime if not starttime: starttime = UTCDateTime(0) # Set everything below the threshold to zero image = np.copy(arr) image = np.abs(image) image[image < thresh] = 0 if len(image[image > thresh]) == 0: print('No values over threshold found') return [] if debug > 0: print(' '.join(['Found', str(len(image[image > thresh])), 'samples above the threshold'])) initial_peaks = [] peaks = [] # Find the peaks labeled_image, number_of_objects = ndimage.label(image) peak_slices = ndimage.find_objects(labeled_image) for peak_slice in peak_slices: # print('Width of peak='+str(peak_slice[0].stop-peak_slice[0].start) window = arr[peak_slice[0].start: peak_slice[0].stop] initial_peaks.append((max(window), peak_slice[0].start + np.argmax(window))) # Sort initial peaks according to amplitude peaks_sort = sorted(initial_peaks, key=lambda amplitude: amplitude[0], reverse=True) # Debugging if debug >= 4: for peak in initial_peaks: print(peak) if initial_peaks: peaks.append(peaks_sort[0]) # Definitely take the biggest peak if debug > 3: print(' '.join(['Added the biggest peak of', str(peaks[0][0]), 'at sample', str(peaks[0][1])])) if len(initial_peaks) > 1: if debug > 3: msg = ' '.join(['Multiple peaks found, checking', 'them now to see if they overlap']) print(msg) for next_peak in peaks_sort: # i in xrange(1,len(peaks_sort)): # Loop through the amplitude sorted peaks # if the next highest amplitude peak is within trig_int of any # peak already in peaks then we don't want it, else, add it # next_peak=peaks_sort[i] if debug > 3: print(next_peak) for peak in peaks: add = False # Use add as a switch for whether or not to append # next peak to peaks, if once gone through all the peaks # it is True, then we will add it, otherwise we won't! if abs(next_peak[1]-peak[1]) < trig_int: if debug > 3: msg = ' '.join(['Difference in time is', str(next_peak[1]-peak[1]), '\n', 'Which is less than', str(trig_int)]) print(msg) add = False # Need to exit the loop here if false break else: add = True if add: if debug > 3: msg = ' '.join(['Adding peak of', str(next_peak[0]), 'at sample', str(next_peak[1])]) print(msg) peaks.append(next_peak) elif debug > 3: msg = ' '.join(['I did not add peak of', str(next_peak[0]), 'at sample', str(next_peak[1])]) print(msg) if debug >= 3: from eqcorrscan.utils import EQcorrscan_plotting _fname = ''.join(['peaks_', starttime.datetime.strftime('%Y-%m-%d'), '.pdf']) EQcorrscan_plotting.peaks_plot(image, starttime, samp_rate, True, peaks, _fname) peaks = sorted(peaks, key=lambda time: time[1], reverse=False) return peaks else: print('No peaks for you!') return peaks
[docs]def find_peaks_dep(arr, thresh, trig_int, debug=0, starttime=False, samp_rate=1.0): r"""Function to determine peaks in an array of data above a certain \ threshold. Depreciated peak-finding routine, very slow, but accurate. If all else \ fails this one should work. :type arr: ndarray :param arr: 1-D numpy array is required :type thresh: float :param thresh: The threshold below which will be considered noise and \ peaks will not be found in. :type trig_int: int :param trig_int: The minimum difference in samples between triggers,\ if multiple peaks within this window this code will find the highest. :type starttime: osbpy.UTCDateTime :param starttime: Starttime for plotting, only used if debug > 2. :type samp_rate: float :param samp_rate: Sampling rate in Hz, only used for plotting if debug > 2. :return: peaks: Lists of tuples of peak values and locations. """ import numpy as np from obspy import UTCDateTime if not starttime: starttime = UTCDateTime(0) # Perform some checks if trig_int < 3: msg = 'Trigger interval must be greater than 2 samples to find maxima' raise IOError(msg) # from joblib import Parallel, delayed # Will find peaks in the absolute then transfer these to the true values sig = np.abs(arr) - thresh true_peaks = [] for i in xrange(int(trig_int), int(len(sig) - trig_int), int(trig_int)): window = sig[i - trig_int: i + trig_int] # Define a moving window containing data from +/- the trigger iterval peaks = [] locs = [] for j in xrange(1, len(window) - 1): # Find all turning points within the window if window[j] > 0.0 and window[j] > window[j+1] and\ window[j] > window[j - 1]: peaks.append(window[j]) locs.append(i - trig_int + j) # Find maximum peak in window if peaks: true_peaks.append((np.max(np.array(peaks)), locs[np.argmax(np.array(peaks))])) # Get unique values peaks = sorted(list(set(true_peaks)), key=lambda loc: loc[1]) # Find highest peak in peaks within trig_int of each other for i in xrange(1, len(peaks) - 1): if peaks[i + 1][1]-peaks[i][1] < trig_int: if peaks[i][0] < peaks[i + 1][0]: peaks[i] = peaks[i + 1] else: peaks[i + 1] = peaks[i] elif peaks[i][1] - peaks[i - 1][1] < trig_int: if peaks[i][0] < peaks[i - 1][0]: peaks[i] = peaks[i - 1] else: peaks[i - 1] = peaks[i] peaks = sorted(list(set(peaks)), key=lambda loc: loc[1]) if debug >= 3: from eqcorrscan.utils import EQcorrscan_plotting _fname = ''.join(['peaks_', starttime.datetime.strftime('%Y-%m-%d'), '.pdf']) EQcorrscan_plotting.peaks_plot(arr, starttime, samp_rate, True, peaks, _fname) return peaks
if __name__ == "__main__": import doctest doctest.testmod()