264 lines
9.5 KiB
Python
264 lines
9.5 KiB
Python
|
import pandas as pd
|
||
|
import scipy.signal as scisig
|
||
|
import os
|
||
|
import numpy as np
|
||
|
|
||
|
|
||
|
def get_user_input(prompt):
|
||
|
try:
|
||
|
return raw_input(prompt)
|
||
|
except NameError:
|
||
|
return input(prompt)
|
||
|
|
||
|
|
||
|
def getInputLoadFile():
|
||
|
'''Asks user for type of file and file path. Loads corresponding data.
|
||
|
|
||
|
OUTPUT:
|
||
|
data: DataFrame, index is a list of timestamps at 8Hz, columns include
|
||
|
AccelZ, AccelY, AccelX, Temp, EDA, filtered_eda
|
||
|
'''
|
||
|
print("Please enter information about your EDA file... ")
|
||
|
dataType = get_user_input("\tData Type (e4, q, shimmer, or misc): ")
|
||
|
if dataType=='q':
|
||
|
filepath = get_user_input("\tFile path: ")
|
||
|
filepath_confirm = filepath
|
||
|
data = loadData_Qsensor(filepath)
|
||
|
elif dataType=='e4':
|
||
|
filepath = get_user_input("\tPath to E4 directory: ")
|
||
|
filepath_confirm = os.path.join(filepath,"EDA.csv")
|
||
|
data = loadData_E4(filepath)
|
||
|
elif dataType=='shimmer':
|
||
|
filepath = get_user_input("\tFile path: ")
|
||
|
filepath_confirm = filepath
|
||
|
data = loadData_shimmer(filepath)
|
||
|
elif dataType=="misc":
|
||
|
filepath = get_user_input("\tFile path: ")
|
||
|
filepath_confirm = filepath
|
||
|
data = loadData_misc(filepath)
|
||
|
else:
|
||
|
print("Error: not a valid file choice")
|
||
|
|
||
|
return data, filepath_confirm
|
||
|
|
||
|
def getOutputPath():
|
||
|
print("")
|
||
|
print("Where would you like to save the computed output file?")
|
||
|
outfile = get_user_input('\tFile name: ')
|
||
|
outputPath = get_user_input('\tFile directory (./ for this directory): ')
|
||
|
fullOutputPath = os.path.join(outputPath,outfile)
|
||
|
if fullOutputPath[-4:] != '.csv':
|
||
|
fullOutputPath = fullOutputPath+'.csv'
|
||
|
return fullOutputPath
|
||
|
|
||
|
def loadData_Qsensor(filepath):
|
||
|
'''
|
||
|
This function loads the Q sensor data, uses a lowpass butterworth filter on the EDA signal
|
||
|
Note: currently assumes sampling rate of 8hz, 16hz, 32hz; if sampling rate is 16hz or 32hz the signal is downsampled
|
||
|
|
||
|
INPUT:
|
||
|
filepath: string, path to input file
|
||
|
|
||
|
OUTPUT:
|
||
|
data: DataFrame, index is a list of timestamps at 8Hz, columns include AccelZ, AccelY, AccelX, Temp, EDA, filtered_eda
|
||
|
'''
|
||
|
# Get header info
|
||
|
try:
|
||
|
header_info = pd.io.parsers.read_csv(filepath, nrows=5)
|
||
|
except IOError:
|
||
|
print("Error!! Couldn't load file, make sure the filepath is correct and you are using a csv from the q sensor software\n\n")
|
||
|
return
|
||
|
|
||
|
# Get sample rate
|
||
|
sampleRate = int((header_info.iloc[3,0]).split(":")[1].strip())
|
||
|
|
||
|
# Get the raw data
|
||
|
data = pd.io.parsers.read_csv(filepath, skiprows=7)
|
||
|
data = data.reset_index()
|
||
|
|
||
|
# Reset the index to be a time and reset the column headers
|
||
|
data.columns = ['AccelZ','AccelY','AccelX','Battery','Temp','EDA']
|
||
|
|
||
|
# Get Start Time
|
||
|
startTime = pd.to_datetime(header_info.iloc[4,0][12:-10])
|
||
|
|
||
|
# Make sure data has a sample rate of 8Hz
|
||
|
data = interpolateDataTo8Hz(data,sampleRate,startTime)
|
||
|
|
||
|
# Remove Battery Column
|
||
|
data = data[['AccelZ','AccelY','AccelX','Temp','EDA']]
|
||
|
|
||
|
# Get the filtered data using a low-pass butterworth filter (cutoff:1hz, fs:8hz, order:6)
|
||
|
data['filtered_eda'] = butter_lowpass_filter(data['EDA'], 1.0, 8, 6)
|
||
|
|
||
|
return data
|
||
|
|
||
|
def _loadSingleFile_E4(filepath,list_of_columns, expected_sample_rate,freq):
|
||
|
# Load data
|
||
|
data = pd.read_csv(filepath)
|
||
|
|
||
|
# Get the startTime and sample rate
|
||
|
startTime = pd.to_datetime(float(data.columns.values[0]),unit="s")
|
||
|
sampleRate = float(data.iloc[0][0])
|
||
|
data = data[data.index!=0]
|
||
|
data.index = data.index-1
|
||
|
|
||
|
# Reset the data frame assuming expected_sample_rate
|
||
|
data.columns = list_of_columns
|
||
|
if sampleRate != expected_sample_rate:
|
||
|
print('ERROR, NOT SAMPLED AT {0}HZ. PROBLEMS WILL OCCUR\n'.format(expected_sample_rate))
|
||
|
|
||
|
# Make sure data has a sample rate of 8Hz
|
||
|
data = interpolateDataTo8Hz(data,sampleRate,startTime)
|
||
|
|
||
|
return data
|
||
|
|
||
|
|
||
|
def loadData_E4(filepath):
|
||
|
# Load EDA data
|
||
|
eda_data = _loadSingleFile_E4(os.path.join(filepath,'EDA.csv'),["EDA"],4,"250L")
|
||
|
# Get the filtered data using a low-pass butterworth filter (cutoff:1hz, fs:8hz, order:6)
|
||
|
eda_data['filtered_eda'] = butter_lowpass_filter(eda_data['EDA'], 1.0, 8, 6)
|
||
|
|
||
|
# Load ACC data
|
||
|
acc_data = _loadSingleFile_E4(os.path.join(filepath,'ACC.csv'),["AccelX","AccelY","AccelZ"],32,"31250U")
|
||
|
# Scale the accelometer to +-2g
|
||
|
acc_data[["AccelX","AccelY","AccelZ"]] = acc_data[["AccelX","AccelY","AccelZ"]]/64.0
|
||
|
|
||
|
# Load Temperature data
|
||
|
temperature_data = _loadSingleFile_E4(os.path.join(filepath,'TEMP.csv'),["Temp"],4,"250L")
|
||
|
|
||
|
data = eda_data.join(acc_data, how='outer')
|
||
|
data = data.join(temperature_data, how='outer')
|
||
|
|
||
|
# E4 sometimes records different length files - adjust as necessary
|
||
|
min_length = min(len(acc_data), len(eda_data), len(temperature_data))
|
||
|
|
||
|
return data[:min_length]
|
||
|
|
||
|
def loadData_shimmer(filepath):
|
||
|
data = pd.read_csv(filepath, sep='\t', skiprows=(0,1))
|
||
|
|
||
|
orig_cols = data.columns
|
||
|
rename_cols = {}
|
||
|
|
||
|
for search, new_col in [['Timestamp','Timestamp'],
|
||
|
['Accel_LN_X', 'AccelX'], ['Accel_LN_Y', 'AccelY'], ['Accel_LN_Z', 'AccelZ'],
|
||
|
['Skin_Conductance', 'EDA']]:
|
||
|
orig = [c for c in orig_cols if search in c]
|
||
|
if len(orig) == 0:
|
||
|
continue
|
||
|
rename_cols[orig[0]] = new_col
|
||
|
|
||
|
data.rename(columns=rename_cols, inplace=True)
|
||
|
|
||
|
# TODO: Assuming no temperature is recorded
|
||
|
data['Temp'] = 0
|
||
|
|
||
|
# Drop the units row and unnecessary columns
|
||
|
data = data[data['Timestamp'] != 'ms']
|
||
|
data.index = pd.to_datetime(data['Timestamp'], unit='ms')
|
||
|
data = data[['AccelZ', 'AccelY', 'AccelX', 'Temp', 'EDA']]
|
||
|
|
||
|
for c in ['AccelZ', 'AccelY', 'AccelX', 'Temp', 'EDA']:
|
||
|
data[c] = pd.to_numeric(data[c])
|
||
|
|
||
|
# Convert to 8Hz
|
||
|
data = data.resample("125L").mean()
|
||
|
data.interpolate(inplace=True)
|
||
|
|
||
|
# Get the filtered data using a low-pass butterworth filter (cutoff:1hz, fs:8hz, order:6)
|
||
|
data['filtered_eda'] = butter_lowpass_filter(data['EDA'], 1.0, 8, 6)
|
||
|
|
||
|
return data
|
||
|
|
||
|
|
||
|
def loadData_getColNames(data_columns):
|
||
|
print("Here are the data columns of your file: ")
|
||
|
print(data_columns)
|
||
|
|
||
|
# Find the column names for each of the 5 data streams
|
||
|
colnames = ['EDA data','Temperature data','Acceleration X','Acceleration Y','Acceleration Z']
|
||
|
new_colnames = ['','','','','']
|
||
|
|
||
|
for i in range(len(new_colnames)):
|
||
|
new_colnames[i] = get_user_input("Column name that contains "+colnames[i]+": ")
|
||
|
while (new_colnames[i] not in data_columns):
|
||
|
print("Column not found. Please try again")
|
||
|
print("Here are the data columns of your file: ")
|
||
|
print(data_columns)
|
||
|
|
||
|
new_colnames[i] = get_user_input("Column name that contains "+colnames[i]+": ")
|
||
|
|
||
|
# Get user input on sample rate
|
||
|
sampleRate = get_user_input("Enter sample rate (must be an integer power of 2): ")
|
||
|
while (sampleRate.isdigit()==False) or (np.log(int(sampleRate))/np.log(2) != np.floor(np.log(int(sampleRate))/np.log(2))):
|
||
|
print("Not an integer power of two")
|
||
|
sampleRate = get_user_input("Enter sample rate (must be a integer power of 2): ")
|
||
|
sampleRate = int(sampleRate)
|
||
|
|
||
|
# Get user input on start time
|
||
|
startTime = pd.to_datetime(get_user_input("Enter a start time (format: YYYY-MM-DD HH:MM:SS): "))
|
||
|
while type(startTime)==str:
|
||
|
print("Not a valid date/time")
|
||
|
startTime = pd.to_datetime(get_user_input("Enter a start time (format: YYYY-MM-DD HH:MM:SS): "))
|
||
|
|
||
|
|
||
|
return sampleRate, startTime, new_colnames
|
||
|
|
||
|
|
||
|
def loadData_misc(filepath):
|
||
|
# Load data
|
||
|
data = pd.read_csv(filepath)
|
||
|
|
||
|
# Get the correct colnames
|
||
|
sampleRate, startTime, new_colnames = loadData_getColNames(data.columns.values)
|
||
|
|
||
|
data.rename(columns=dict(zip(new_colnames,['EDA','Temp','AccelX','AccelY','AccelZ'])), inplace=True)
|
||
|
data = data[['AccelZ','AccelY','AccelX','Temp','EDA']]
|
||
|
|
||
|
# Make sure data has a sample rate of 8Hz
|
||
|
data = interpolateDataTo8Hz(data,sampleRate,startTime)
|
||
|
|
||
|
# Get the filtered data using a low-pass butterworth filter (cutoff:1hz, fs:8hz, order:6)
|
||
|
data['filtered_eda'] = butter_lowpass_filter(data['EDA'], 1.0, 8, 6)
|
||
|
|
||
|
return data
|
||
|
|
||
|
def interpolateDataTo8Hz(data,sample_rate,startTime):
|
||
|
if sample_rate<8:
|
||
|
# Upsample by linear interpolation
|
||
|
if sample_rate==2:
|
||
|
data.index = pd.date_range(start=startTime, periods=len(data), freq='500L')
|
||
|
elif sample_rate==4:
|
||
|
data.index = pd.date_range(start=startTime, periods=len(data), freq='250L')
|
||
|
data = data.resample("125L").mean()
|
||
|
else:
|
||
|
if sample_rate>8:
|
||
|
# Downsample
|
||
|
idx_range = list(range(0,len(data))) # TODO: double check this one
|
||
|
data = data.iloc[idx_range[0::int(int(sample_rate)/8)]]
|
||
|
# Set the index to be 8Hz
|
||
|
data.index = pd.date_range(start=startTime, periods=len(data), freq='125L')
|
||
|
|
||
|
# Interpolate all empty values
|
||
|
data = interpolateEmptyValues(data)
|
||
|
return data
|
||
|
|
||
|
def interpolateEmptyValues(data):
|
||
|
cols = data.columns.values
|
||
|
for c in cols:
|
||
|
data.loc[:, c] = data[c].interpolate()
|
||
|
|
||
|
return data
|
||
|
|
||
|
def butter_lowpass(cutoff, fs, order=5):
|
||
|
# Filtering Helper functions
|
||
|
sos = scisig.butter(order, cutoff, btype='low', analog=False, output='sos', fs=fs)
|
||
|
return sos
|
||
|
|
||
|
def butter_lowpass_filter(data, cutoff, fs, order=5):
|
||
|
# Filtering Helper functions
|
||
|
sos = butter_lowpass(cutoff, fs, order=order)
|
||
|
y = scisig.sosfilt(sos, data)
|
||
|
return y
|