import pandas as pd import numpy as np import scipy.signal as _signal from calculatingfeatures.CalculatingFeatures.helper_functions import checkForFeature, hrvFeatureNames from calculatingfeatures.CalculatingFeatures.calculate_hrv_peaks import peak_detector def extractHrvFeatures(_sample, ma=False, detrend=False, m_deternd=False, low_pass=False, winsorize=False, winsorize_value=25, hampel_fiter=True, sampling=64, featureNames=None): """ Extract Martin's HRV features Warning: Input sample length has to be at least 256! :param _sample: array containing the HRV signal :param ma: should moving average filter be used prior the calculation :param detrend: should overall detrending be used prior the calculation :param m_deternd: should moving detrending be used prior the calculation :param low_pass: should low pass filter be used prior the calculation :param winsorize: should winsorize filter be used prior the calculation :param winsorize_value: winsorize value :param hampel_fiter: hould winsorize filter be used after the calculation :param sampling: the sampling frequency of the signal :param featureNames: :return: HRV features """ hrv_time_features, sample, rr, timings, peak_indx = get_HRV_features(_sample, ma, detrend, m_deternd, low_pass, winsorize, winsorize_value, hampel_fiter, sampling, featureNames=featureNames) return hrv_time_features def extractHrvFeatures2D(signal2D, ma=False, detrend=False, m_deternd=False, low_pass=False, winsorize=False, winsorize_value=25, hampel_fiter=True, sampling=64): """ Extract Martin's HRV features Warning: Input 2D array width (column count) has to be at least 200! :param signal2D: array containing the HRV signal in 2D (each row is processed seperately) :param ma: should moving average filter be used prior the calculation :param detrend: should overall detrending be used prior the calculation :param m_deternd: should moving detrending be used prior the calculation :param low_pass: should low pass filter be used prior the calculation :param winsorize: should winsorize filter be used prior the calculation :param winsorize_value: winsorize value :param hampel_fiter: hould winsorize filter be used after the calculation :param sampling: the sampling frequency of the signal :return: pandas dataframe of calculated HRV features, each row corresponds with each input row """ outData = pd.DataFrame() for sample in signal2D: features = extractHrvFeatures(sample, ma, detrend, m_deternd, low_pass, winsorize, winsorize_value, hampel_fiter, sampling) outData = outData.append(features, ignore_index=True) return outData # filter signala and calculate HRV features in time and in frequency domain def get_HRV_features(_sample, ma=False, detrend=False, m_deternd=False, low_pass=False, winsorize=True, winsorize_value=25, hampel_fiter=True, sampling=1000, featureNames=None): if featureNames is not None and len(set(featureNames).intersection(set(hrvFeatureNames))) == 0: return dict(), 0, 0, 0, 0 sample = _sample.copy() if low_pass: # lowpass filter sample = butter_lowpass_filter(sample) if m_deternd: # moving detrending sample = moving_detrending(sample, sampling) if detrend: # overall detrending sample = _signal.detrend(sample) if ma: # moving average sample = moving_average(sample) # winsorize the signal if winsorize: sample = winsorize_signal(sample, winsorize_value) # if dynamic_threshold: #find the median of the min-max normalized signal # thres = dynamic_threshold_value*np.median((sample - sample.min())/(sample.max() - sample.min())) rr, timings, peak_indx = detect_RR(sample, sampling) if hampel_fiter: rr, outlier_indeces = hampel_filtering(rr) timings, rr = medianFilter(timings, rr) bad_signal = False if len(rr) < len(sample) / (2 * sampling): # check whether HR is>30 #print("Bad signal. Too little RRs detected.") bad_signal = True elif len(rr) > len(sample) / (sampling / 4): # check whether HR is<240 #print("Bad signal. Too much RRs detected.") bad_signal = True hrv_time_features = HRV_time(rr, print_flag=False, badSignal=bad_signal, featureNames=featureNames) return hrv_time_features, sample, rr, timings, peak_indx def butter_lowpass(cutoff, fs, order): nyq = 0.5 * fs normal_cutoff = cutoff / nyq b, a = _signal.butter(order, normal_cutoff, btype='low', analog=False) return b, a def butter_lowpass_filter(data, cutoff=5, fs=64, order=3): b, a = butter_lowpass(cutoff, fs, order=order) y = _signal.lfilter(b, a, data) return pd.Series(y[1000:]) # perfrom detrending using sliding window def moving_detrending(sig_input, sampling_rate=64): sig = np.copy(sig_input) window_size = 1 * sampling_rate window_count = 1 start = (window_count - 1) * window_size end = window_count * window_size while (end <= len(sig)): if ((len(sig) - end) < window_size): end = end + window_size sig[start:end] = _signal.detrend(sig[start:end]) window_count = window_count + 1 start = (window_count - 1) * window_size end = window_count * window_size return sig # perform moving average def moving_average(sample, ma_size=10): sample = pd.Series(sample) sample_ma = sample.rolling(ma_size).mean() sample_ma = sample_ma.iloc[ma_size:].values return sample_ma def winsorize_signal(sample, winsorize_value): p_min = np.percentile(sample, winsorize_value) p_max = np.percentile(sample, 100 - winsorize_value) sample[sample > p_max] = p_max sample[sample < p_min] = p_min return sample # https://www.mathworks.com/help/signal/ref/hampel.html # compute median and standard deviation # of a window composed of the sample and its six surrounding samples # If a sample differs from the median by more than three standard deviations, # it is replaced with the median. # reutn fistered RRs and outlier indices def hampel_filtering(sample_rr): outlier_indicies = [] filtered_rr = [] for i in range(len(sample_rr)): start = i - 3 end = i + 3 if start < 0: # for the first 3 samples calculate median and std using the closest 6 samples start = 0 end = end + 3 - i if end > len(sample_rr) - 1: # for the last 3 samples calculate median and std using the first 6 samples start = len(sample_rr) - 7 end = len(sample_rr) - 1 sample_rr_part = sample_rr[start:end] # Prevent "Mean of empty slice" warning if len(sample_rr_part) == 0: sample_med = np.nan sample_std = np.nan else: sample_med = np.median(sample_rr_part) sample_std = np.std(sample_rr_part) if abs(sample_rr[i] - sample_med) > 3 * sample_std: outlier_indicies.append(i) filtered_rr.append(sample_med) # print('outlier') filtered_rr.append(sample_rr[i]) return np.array(filtered_rr), outlier_indicies def medianFilter(time, rr): percentageBorder = 0.8 if len(rr) == 0: median = np.nan else: median = np.median(rr) idx = (rr / median >= percentageBorder) & (rr / median <= (2 - percentageBorder)) # f_rr = rr[(rr/median>=percentageBorder) & (rr/median<=(2-percentageBorder))] f_rr = np.copy(rr) # f_rr[~idx]=median f_rr = f_rr[idx] f_time = timestamps_from_RR(f_rr) # f_time = time[(rr/median>=percentageBorder) & (rr/median<=(2-percentageBorder))] return f_time, f_rr def detect_RR(sig, sampling_rate): # peak_indx = peakutils.indexes(sig, thres=thres, min_dist=sampling_rate/2.5) peak_indx, _ = peak_detector(sig, sampling_rate) if len(peak_indx) == 0: return [], [], [] time = np.arange(len(sig)) tmp = time[peak_indx] timings1 = tmp[0:] timings = tmp[1:] RR_intervals = timings - timings1[:len(timings1) - 1] return RR_intervals / sampling_rate, timings / sampling_rate, peak_indx # extract HRV features in time domain def HRV_time(RR_intervals, print_flag, badSignal=False, featureNames=None): if not badSignal: ibi = np.mean(RR_intervals) # Take the mean of RR_list to get the mean Inter Beat Interval mean_hr = 60 / ibi sdnn = np.std(RR_intervals) # Take standard deviation of all R-R intervals # find successive/neighbouring RRs (i.e., filter noise) RR_diff = [] RR_sqdiff = [] for i in range(len(RR_intervals) - 1): RR_diff.append(np.absolute(RR_intervals[i + 1] - RR_intervals[i])) RR_sqdiff.append(np.power(np.absolute(RR_intervals[i + 1] - RR_intervals[i]), 2)) RR_diff = np.array(RR_diff) RR_sqdiff = np.array(RR_sqdiff) sdsd = np.std(RR_diff) # Take standard deviation of the differences between all subsequent R-R intervals rmssd = np.sqrt(np.mean(RR_sqdiff)) # Take root of the mean of the list of squared differences nn20 = [x for x in RR_diff if (x > 0.02)] # First create a list of all values over 20, 50 nn50 = [x for x in RR_diff if (x > 0.05)] pnn20 = 100 * float(len(nn20)) / float(len(RR_diff)) if len( RR_diff) > 0 else np.nan # Calculate the proportion of NN20, NN50 intervals to all intervals pnn50 = 100 * float(len(nn50)) / float(len(RR_diff)) if len(RR_diff) > 0 else np.nan sd1 = np.sqrt(0.5 * sdnn * sdnn) sd2 = np.nan tmp = 2.0 * sdsd * sdsd - 0.5 * sdnn * sdnn if tmp > 0: # avoid sqrt of negative values sd2 = np.sqrt(2.0 * sdsd * sdsd - 0.5 * sdnn * sdnn) if (print_flag): print("menHR:", mean_hr) print("IBI:", ibi) print("SDNN:", sdnn) print("sdsd", sdsd) print("RMSSD:", rmssd) print("pNN20:", pnn20) print("pNN50:", pnn50) print("sd1:", sd1) print("sd2:", sd2) print("sd1/sd2:", sd1 / sd2) out = {} if checkForFeature("meanHr", featureNames): out['meanHr'] = mean_hr if not badSignal else np.NaN if checkForFeature("ibi", featureNames): out['ibi'] = ibi if not badSignal else np.NaN if checkForFeature("sdnn", featureNames): out['sdnn'] = sdnn if not badSignal else np.NaN if checkForFeature("sdsd", featureNames): out['sdsd'] = sdsd if not badSignal else np.NaN if checkForFeature("rmssd", featureNames): out['rmssd'] = rmssd if not badSignal else np.NaN if checkForFeature("pnn20", featureNames): out['pnn20'] = pnn20 if not badSignal else np.NaN if checkForFeature("pnn50", featureNames): out['pnn50'] = pnn50 if not badSignal else np.NaN if checkForFeature("sd", featureNames): out['sd'] = sd1 if not badSignal else np.NaN if checkForFeature("sd2", featureNames): out['sd2'] = sd2 if not badSignal else np.NaN if checkForFeature("sd1/sd2", featureNames): out['sd1/sd2'] = sd1 / sd2 if not badSignal else np.NaN if checkForFeature("numRR", featureNames): out['numRR'] = len(RR_intervals) return out def timestamps_from_RR(rr_intervals): time = [] current_time = 0.0 for rr in rr_intervals: current_time = current_time + rr time.append(current_time) return np.array(time)