import pandas as pd import numpy as np import os import matplotlib.pyplot as plt import pprint from .load_files import getInputLoadFile, get_user_input, getOutputPath SAMPLE_RATE = 8 def findPeaks(data, offset, start_WT, end_WT, thres=0, sampleRate=SAMPLE_RATE): ''' This function finds the peaks of an EDA signal and returns basic properties. Also, peak_end is assumed to be no later than the start of the next peak. (Is this okay??) ********* INPUTS ********** data: DataFrame with EDA as one of the columns and indexed by a datetimeIndex offset: the number of rising samples and falling samples after a peak needed to be counted as a peak start_WT: maximum number of seconds before the apex of a peak that is the "start" of the peak end_WT: maximum number of seconds after the apex of a peak that is the "rec.t/2" of the peak, 50% of amp thres: the minimum uS change required to register as a peak, defaults as 0 (i.e. all peaks count) sampleRate: number of samples per second, default=8 ********* OUTPUTS ********** peaks: list of binary, 1 if apex of SCR peak_start: list of binary, 1 if start of SCR peak_start_times: list of strings, if this index is the apex of an SCR, it contains datetime of start of peak peak_end: list of binary, 1 if rec.t/2 of SCR peak_end_times: list of strings, if this index is the apex of an SCR, it contains datetime of rec.t/2 amplitude: list of floats, value of EDA at apex - value of EDA at start max_deriv: list of floats, max derivative within 1 second of apex of SCR ''' EDA_deriv = data['filtered_eda'][1:].values - data['filtered_eda'][:-1].values peaks = np.zeros(len(EDA_deriv)) peak_sign = np.sign(EDA_deriv) for i in range(int(offset), int(len(EDA_deriv) - offset)): if peak_sign[i] == 1 and peak_sign[i + 1] < 1: peaks[i] = 1 for j in range(1, int(offset)): if peak_sign[i - j] < 1 or peak_sign[i + j] > -1: #if peak_sign[i-j]==-1 or peak_sign[i+j]==1: peaks[i] = 0 break # Finding start of peaks peak_start = np.zeros(len(EDA_deriv)) peak_start_times = [''] * len(data) max_deriv = np.zeros(len(data)) rise_time = np.zeros(len(data)) for i in range(0, len(peaks)): if peaks[i] == 1: temp_start = max(0, i - sampleRate) max_deriv[i] = max(EDA_deriv[temp_start:i]) start_deriv = .01 * max_deriv[i] found = False find_start = i # has to peak within start_WT seconds while found == False and find_start > (i - start_WT * sampleRate): if EDA_deriv[find_start] < start_deriv: found = True peak_start[find_start] = 1 peak_start_times[i] = data.index[find_start] rise_time[i] = get_seconds_and_microseconds(data.index[i] - pd.to_datetime(peak_start_times[i])) find_start = find_start - 1 # If we didn't find a start if found == False: peak_start[i - start_WT * sampleRate] = 1 peak_start_times[i] = data.index[i - start_WT * sampleRate] rise_time[i] = start_WT # Check if amplitude is too small if thres > 0 and (data['EDA'].iloc[i] - data['EDA'][peak_start_times[i]]) < thres: peaks[i] = 0 peak_start[i] = 0 peak_start_times[i] = '' max_deriv[i] = 0 rise_time[i] = 0 # Finding the end of the peak, amplitude of peak peak_end = np.zeros(len(data)) peak_end_times = [''] * len(data) amplitude = np.zeros(len(data)) decay_time = np.zeros(len(data)) half_rise = [''] * len(data) SCR_width = np.zeros(len(data)) for i in range(0, len(peaks)): if peaks[i] == 1: peak_amp = data['EDA'].iloc[i] start_amp = data['EDA'][peak_start_times[i]] amplitude[i] = peak_amp - start_amp half_amp = amplitude[i] * .5 + start_amp found = False find_end = i # has to decay within end_WT seconds while found == False and find_end < (i + end_WT * sampleRate) and find_end < len(peaks): if data['EDA'].iloc[find_end] < half_amp: found = True peak_end[find_end] = 1 peak_end_times[i] = data.index[find_end] decay_time[i] = get_seconds_and_microseconds(pd.to_datetime(peak_end_times[i]) - data.index[i]) # Find width find_rise = i found_rise = False while found_rise == False: if data['EDA'].iloc[find_rise] < half_amp: found_rise = True half_rise[i] = data.index[find_rise] SCR_width[i] = get_seconds_and_microseconds(pd.to_datetime(peak_end_times[i]) - data.index[find_rise]) find_rise = find_rise - 1 elif peak_start[find_end] == 1: found = True peak_end[find_end] = 1 peak_end_times[i] = data.index[find_end] find_end = find_end + 1 # If we didn't find an end if found == False: min_index = np.argmin(data['EDA'].iloc[i:(i + end_WT * sampleRate)].tolist()) peak_end[i + min_index] = 1 peak_end_times[i] = data.index[i + min_index] peaks = np.concatenate((peaks, np.array([0]))) peak_start = np.concatenate((peak_start, np.array([0]))) max_deriv = max_deriv * sampleRate # now in change in amplitude over change in time form (uS/second) return peaks, peak_start, peak_start_times, peak_end, peak_end_times, amplitude, max_deriv, rise_time, decay_time, SCR_width, half_rise def get_seconds_and_microseconds(pandas_time): return pandas_time.seconds + pandas_time.microseconds * 1e-6 def calcPeakFeatures(data,offset,thresh,start_WT,end_WT, sampleRate): returnedPeakData = findPeaks(data, offset*sampleRate, start_WT, end_WT, thresh, sampleRate) data['peaks'] = returnedPeakData[0] data['peak_start'] = returnedPeakData[1] data['peak_end'] = returnedPeakData[3] data['peak_start_times'] = returnedPeakData[2] data['peak_end_times'] = returnedPeakData[4] data['half_rise'] = returnedPeakData[10] # Note: If an SCR doesn't decrease to 50% of amplitude, then the peak_end = min(the next peak's start, 15 seconds after peak) data['amp'] = returnedPeakData[5] data['max_deriv'] = returnedPeakData[6] data['rise_time'] = returnedPeakData[7] data['decay_time'] = returnedPeakData[8] data['SCR_width'] = returnedPeakData[9] # To keep all filtered data remove this line # featureData = data[data.peaks==1][['EDA','rise_time','max_deriv','amp','decay_time','SCR_width']] # Replace 0s with NaN, this is where the 50% of the peak was not found, too close to the next peak # featureData[['SCR_width','decay_time']]=featureData[['SCR_width','decay_time']].replace(0, np.nan) # featureData['AUC']=featureData['amp']*featureData['SCR_width'] # if outfile is not None: # featureData.to_csv(outfile) return data # draws a graph of the data with the peaks marked on it # assumes that 'data' dataframe already contains the 'peaks' column def plotPeaks(data, x_seconds, sampleRate = SAMPLE_RATE): if x_seconds: time_m = np.arange(0,len(data))/float(sampleRate) else: time_m = np.arange(0,len(data))/(sampleRate*60.) data_min = min(data['EDA']) data_max = max(data['EDA']) #Plot the data with the Peaks marked plt.figure(1,figsize=(20, 5)) peak_height = data_max * 1.15 data['peaks_plot'] = data['peaks'] * peak_height plt.plot(time_m,data['peaks_plot'],'#4DBD33') #plt.plot(time_m,data['EDA']) plt.plot(time_m,data['filtered_eda']) plt.xlim([0,time_m[-1]]) y_min = min(0, data_min) - (data_max - data_min) * 0.1 plt.ylim([min(y_min, data_min),peak_height]) plt.title('EDA with Peaks marked') plt.ylabel('$\mu$S') if x_seconds: plt.xlabel('Time (s)') else: plt.xlabel('Time (min)') plt.show() def chooseValueOrDefault(str_input, default): if str_input == "": return default else: return float(str_input) if __name__ == "__main__": data, filepath_confirm = getInputLoadFile() fullOutputPath = getOutputPath() print("") print("Please choose settings for the peak detection algorithm. For default values press return") thresh_str = get_user_input('\tMinimum peak amplitude (default = .02):') thresh = chooseValueOrDefault(thresh_str,.02) offset_str = get_user_input('\tOffset (default = 1): ') offset = chooseValueOrDefault(offset_str,1) start_WT_str = get_user_input('\tMax rise time (s) (default = 4): ') start_WT = chooseValueOrDefault(start_WT_str,4) end_WT_str = get_user_input('\tMax decay time (s) (default = 4): ') end_WT = chooseValueOrDefault(end_WT_str,4) settings_dict = {'threshold':thresh, 'offset':offset, 'rise time':start_WT, 'decay time':end_WT} print("") print("Okay, finding peaks in file "+ filepath_confirm + " using the following parameters") pprint.pprint(settings_dict) peakData = calcPeakFeatures(data,fullOutputPath,offset,thresh,start_WT,end_WT) print("Features computed and saved to " + fullOutputPath) # Plotting the data print("") plot_ans = get_user_input("Do you want to plot the detected peaks? (y/n): ") if 'y' in plot_ans: secs_ans = get_user_input("Would you like the x-axis to be in seconds or minutes? (sec/min): ") if 'sec' in secs_ans: x_seconds=True else: x_seconds=False plotPeaks(peakData,x_seconds) else: print("\tOkay, script will not produce a plot") print("") print('--------------------------------') print("Please also cite this project:") print("Taylor, S., Jaques, N., Chen, W., Fedor, S., Sano, A., & Picard, R. Automatic identification of artifacts in electrodermal activity data. In Engineering in Medicine and Biology Conference. 2015") print('--------------------------------')