rapids/calculatingfeatures/CalculatingFeatures/hrv.py

305 lines
12 KiB
Python

import pandas as pd
import numpy as np
import scipy.signal as _signal
from calculatingfeatures.CalculatingFeatures.helper_functions import checkForFeature, hrvFeatureNames
from calculatingfeatures.CalculatingFeatures.calculate_hrv_peaks import peak_detector
def extractHrvFeatures(_sample, ma=False, detrend=False, m_deternd=False, low_pass=False, winsorize=False,
winsorize_value=25, hampel_fiter=True, sampling=64, featureNames=None):
""" Extract Martin's HRV features
Warning: Input sample length has to be at least 256!
:param _sample: array containing the HRV signal
:param ma: should moving average filter be used prior the calculation
:param detrend: should overall detrending be used prior the calculation
:param m_deternd: should moving detrending be used prior the calculation
:param low_pass: should low pass filter be used prior the calculation
:param winsorize: should winsorize filter be used prior the calculation
:param winsorize_value: winsorize value
:param hampel_fiter: hould winsorize filter be used after the calculation
:param sampling: the sampling frequency of the signal
:param featureNames:
:return: HRV features
"""
hrv_time_features, sample, rr, timings, peak_indx = get_HRV_features(_sample, ma, detrend, m_deternd, low_pass,
winsorize, winsorize_value,
hampel_fiter, sampling,
featureNames=featureNames)
return hrv_time_features
def extractHrvFeatures2D(signal2D, ma=False, detrend=False, m_deternd=False, low_pass=False, winsorize=False,
winsorize_value=25, hampel_fiter=True, sampling=64):
""" Extract Martin's HRV features
Warning: Input 2D array width (column count) has to be at least 200!
:param signal2D: array containing the HRV signal in 2D (each row is processed seperately)
:param ma: should moving average filter be used prior the calculation
:param detrend: should overall detrending be used prior the calculation
:param m_deternd: should moving detrending be used prior the calculation
:param low_pass: should low pass filter be used prior the calculation
:param winsorize: should winsorize filter be used prior the calculation
:param winsorize_value: winsorize value
:param hampel_fiter: hould winsorize filter be used after the calculation
:param sampling: the sampling frequency of the signal
:return: pandas dataframe of calculated HRV features, each row corresponds with each input row
"""
outData = pd.DataFrame()
for sample in signal2D:
features = extractHrvFeatures(sample, ma, detrend, m_deternd, low_pass,
winsorize, winsorize_value,
hampel_fiter, sampling)
outData = outData.append(features, ignore_index=True)
return outData
# filter signala and calculate HRV features in time and in frequency domain
def get_HRV_features(_sample, ma=False, detrend=False, m_deternd=False, low_pass=False, winsorize=True,
winsorize_value=25, hampel_fiter=True, sampling=1000, featureNames=None):
if featureNames is not None and len(set(featureNames).intersection(set(hrvFeatureNames))) == 0:
return dict(), 0, 0, 0, 0
sample = _sample.copy()
if low_pass: # lowpass filter
sample = butter_lowpass_filter(sample)
if m_deternd: # moving detrending
sample = moving_detrending(sample, sampling)
if detrend: # overall detrending
sample = _signal.detrend(sample)
if ma: # moving average
sample = moving_average(sample)
# winsorize the signal
if winsorize:
sample = winsorize_signal(sample, winsorize_value)
# if dynamic_threshold: #find the median of the min-max normalized signal
# thres = dynamic_threshold_value*np.median((sample - sample.min())/(sample.max() - sample.min()))
rr, timings, peak_indx = detect_RR(sample, sampling)
if hampel_fiter:
rr, outlier_indeces = hampel_filtering(rr)
timings, rr = medianFilter(timings, rr)
bad_signal = False
if len(rr) < len(sample) / (2 * sampling): # check whether HR is>30
#print("Bad signal. Too little RRs detected.")
bad_signal = True
elif len(rr) > len(sample) / (sampling / 4): # check whether HR is<240
#print("Bad signal. Too much RRs detected.")
bad_signal = True
hrv_time_features = HRV_time(rr, print_flag=False, badSignal=bad_signal, featureNames=featureNames)
return hrv_time_features, sample, rr, timings, peak_indx
def butter_lowpass(cutoff, fs, order):
nyq = 0.5 * fs
normal_cutoff = cutoff / nyq
b, a = _signal.butter(order, normal_cutoff, btype='low', analog=False)
return b, a
def butter_lowpass_filter(data, cutoff=5, fs=64, order=3):
b, a = butter_lowpass(cutoff, fs, order=order)
y = _signal.lfilter(b, a, data)
return pd.Series(y[1000:])
# perfrom detrending using sliding window
def moving_detrending(sig_input, sampling_rate=64):
sig = np.copy(sig_input)
window_size = 1 * sampling_rate
window_count = 1
start = (window_count - 1) * window_size
end = window_count * window_size
while (end <= len(sig)):
if ((len(sig) - end) < window_size):
end = end + window_size
sig[start:end] = _signal.detrend(sig[start:end])
window_count = window_count + 1
start = (window_count - 1) * window_size
end = window_count * window_size
return sig
# perform moving average
def moving_average(sample, ma_size=10):
sample = pd.Series(sample)
sample_ma = sample.rolling(ma_size).mean()
sample_ma = sample_ma.iloc[ma_size:].values
return sample_ma
def winsorize_signal(sample, winsorize_value):
p_min = np.percentile(sample, winsorize_value)
p_max = np.percentile(sample, 100 - winsorize_value)
sample[sample > p_max] = p_max
sample[sample < p_min] = p_min
return sample
# https://www.mathworks.com/help/signal/ref/hampel.html
# compute median and standard deviation
# of a window composed of the sample and its six surrounding samples
# If a sample differs from the median by more than three standard deviations,
# it is replaced with the median.
# reutn fistered RRs and outlier indices
def hampel_filtering(sample_rr):
outlier_indicies = []
filtered_rr = []
for i in range(len(sample_rr)):
start = i - 3
end = i + 3
if start < 0: # for the first 3 samples calculate median and std using the closest 6 samples
start = 0
end = end + 3 - i
if end > len(sample_rr) - 1: # for the last 3 samples calculate median and std using the first 6 samples
start = len(sample_rr) - 7
end = len(sample_rr) - 1
sample_rr_part = sample_rr[start:end]
# Prevent "Mean of empty slice" warning
if len(sample_rr_part) == 0:
sample_med = np.nan
sample_std = np.nan
else:
sample_med = np.median(sample_rr_part)
sample_std = np.std(sample_rr_part)
if abs(sample_rr[i] - sample_med) > 3 * sample_std:
outlier_indicies.append(i)
filtered_rr.append(sample_med)
# print('outlier')
filtered_rr.append(sample_rr[i])
return np.array(filtered_rr), outlier_indicies
def medianFilter(time, rr):
percentageBorder = 0.8
if len(rr) == 0:
median = np.nan
else:
median = np.median(rr)
idx = (rr / median >= percentageBorder) & (rr / median <= (2 - percentageBorder))
# f_rr = rr[(rr/median>=percentageBorder) & (rr/median<=(2-percentageBorder))]
f_rr = np.copy(rr)
# f_rr[~idx]=median
f_rr = f_rr[idx]
f_time = timestamps_from_RR(f_rr)
# f_time = time[(rr/median>=percentageBorder) & (rr/median<=(2-percentageBorder))]
return f_time, f_rr
def detect_RR(sig, sampling_rate):
# peak_indx = peakutils.indexes(sig, thres=thres, min_dist=sampling_rate/2.5)
peak_indx, _ = peak_detector(sig, sampling_rate)
if len(peak_indx) == 0:
return [], [], []
time = np.arange(len(sig))
tmp = time[peak_indx]
timings1 = tmp[0:]
timings = tmp[1:]
RR_intervals = timings - timings1[:len(timings1) - 1]
return RR_intervals / sampling_rate, timings / sampling_rate, peak_indx
# extract HRV features in time domain
def HRV_time(RR_intervals, print_flag, badSignal=False, featureNames=None):
if not badSignal:
ibi = np.mean(RR_intervals) # Take the mean of RR_list to get the mean Inter Beat Interval
mean_hr = 60 / ibi
sdnn = np.std(RR_intervals) # Take standard deviation of all R-R intervals
# find successive/neighbouring RRs (i.e., filter noise)
RR_diff = []
RR_sqdiff = []
for i in range(len(RR_intervals) - 1):
RR_diff.append(np.absolute(RR_intervals[i + 1] - RR_intervals[i]))
RR_sqdiff.append(np.power(np.absolute(RR_intervals[i + 1] - RR_intervals[i]), 2))
RR_diff = np.array(RR_diff)
RR_sqdiff = np.array(RR_sqdiff)
sdsd = np.std(RR_diff) # Take standard deviation of the differences between all subsequent R-R intervals
rmssd = np.sqrt(np.mean(RR_sqdiff)) # Take root of the mean of the list of squared differences
nn20 = [x for x in RR_diff if (x > 0.02)] # First create a list of all values over 20, 50
nn50 = [x for x in RR_diff if (x > 0.05)]
pnn20 = 100 * float(len(nn20)) / float(len(RR_diff)) if len(
RR_diff) > 0 else np.nan # Calculate the proportion of NN20, NN50 intervals to all intervals
pnn50 = 100 * float(len(nn50)) / float(len(RR_diff)) if len(RR_diff) > 0 else np.nan
sd1 = np.sqrt(0.5 * sdnn * sdnn)
sd2 = np.nan
tmp = 2.0 * sdsd * sdsd - 0.5 * sdnn * sdnn
if tmp > 0: # avoid sqrt of negative values
sd2 = np.sqrt(2.0 * sdsd * sdsd - 0.5 * sdnn * sdnn)
if (print_flag):
print("menHR:", mean_hr)
print("IBI:", ibi)
print("SDNN:", sdnn)
print("sdsd", sdsd)
print("RMSSD:", rmssd)
print("pNN20:", pnn20)
print("pNN50:", pnn50)
print("sd1:", sd1)
print("sd2:", sd2)
print("sd1/sd2:", sd1 / sd2)
out = {}
if checkForFeature("meanHr", featureNames):
out['meanHr'] = mean_hr if not badSignal else np.NaN
if checkForFeature("ibi", featureNames):
out['ibi'] = ibi if not badSignal else np.NaN
if checkForFeature("sdnn", featureNames):
out['sdnn'] = sdnn if not badSignal else np.NaN
if checkForFeature("sdsd", featureNames):
out['sdsd'] = sdsd if not badSignal else np.NaN
if checkForFeature("rmssd", featureNames):
out['rmssd'] = rmssd if not badSignal else np.NaN
if checkForFeature("pnn20", featureNames):
out['pnn20'] = pnn20 if not badSignal else np.NaN
if checkForFeature("pnn50", featureNames):
out['pnn50'] = pnn50 if not badSignal else np.NaN
if checkForFeature("sd", featureNames):
out['sd'] = sd1 if not badSignal else np.NaN
if checkForFeature("sd2", featureNames):
out['sd2'] = sd2 if not badSignal else np.NaN
if checkForFeature("sd1/sd2", featureNames):
out['sd1/sd2'] = sd1 / sd2 if not badSignal else np.NaN
if checkForFeature("numRR", featureNames):
out['numRR'] = len(RR_intervals)
return out
def timestamps_from_RR(rr_intervals):
time = []
current_time = 0.0
for rr in rr_intervals:
current_time = current_time + rr
time.append(current_time)
return np.array(time)