108 lines
4.6 KiB
Python
108 lines
4.6 KiB
Python
from zipfile import ZipFile
|
|
import warnings
|
|
from pathlib import Path
|
|
import pandas as pd
|
|
from pandas.core import indexing
|
|
import yaml
|
|
import csv
|
|
from collections import OrderedDict
|
|
from io import BytesIO, StringIO
|
|
|
|
def processAcceleration(x, y, z):
|
|
x = float(x)
|
|
y = float(y)
|
|
z = float(z)
|
|
return {'x': x, 'y': y, 'z': z}
|
|
|
|
|
|
def readFile(file, dtype):
|
|
dict = OrderedDict()
|
|
# file is an in-memory buffer
|
|
with file as csvfile:
|
|
if dtype in ('EMPATICA_ELECTRODERMAL_ACTIVITY', 'EMPATICA_TEMPERATURE', 'EMPATICA_HEARTRATE', 'EMPATICA_BLOOD_VOLUME_PULSE'):
|
|
reader = csv.reader(csvfile, delimiter='\n')
|
|
elif dtype == 'EMPATICA_ACCELEROMETER':
|
|
reader = csv.reader(csvfile, delimiter=',')
|
|
i = 0
|
|
for row in reader:
|
|
if i == 0:
|
|
timestamp = float(row[0])
|
|
elif i == 1:
|
|
hertz = float(row[0])
|
|
else:
|
|
if i == 2:
|
|
pass
|
|
else:
|
|
timestamp = timestamp + 1.0 / hertz
|
|
if dtype in ('EMPATICA_ELECTRODERMAL_ACTIVITY', 'EMPATICA_TEMPERATURE', 'EMPATICA_HEARTRATE', 'EMPATICA_BLOOD_VOLUME_PULSE'):
|
|
dict[timestamp] = row[0]
|
|
elif dtype == 'EMPATICA_ACCELEROMETER':
|
|
dict[timestamp] = processAcceleration(row[0], row[1], row[2])
|
|
i += 1
|
|
return dict
|
|
|
|
|
|
def extract_empatica_data(data, sensor):
|
|
sensor_data_file = BytesIO(data).getvalue().decode('utf-8')
|
|
sensor_data_file = StringIO(sensor_data_file)
|
|
column = sensor.replace("EMPATICA_", "").lower()
|
|
# read sensor data
|
|
if sensor in ('EMPATICA_ELECTRODERMAL_ACTIVITY', 'EMPATICA_TEMPERATURE', 'EMPATICA_HEARTRATE', 'EMPATICA_BLOOD_VOLUME_PULSE'):
|
|
ddict = readFile(sensor_data_file, sensor)
|
|
df = pd.DataFrame.from_dict(ddict, orient='index', columns=[column])
|
|
df[column] = df[column].astype(float)
|
|
df.index.name = 'timestamp'
|
|
|
|
elif sensor == 'EMPATICA_ACCELEROMETER':
|
|
ddict = readFile(sensor_data_file, sensor)
|
|
df = pd.DataFrame.from_dict(ddict, orient='index', columns=['x', 'y', 'z'])
|
|
df['x'] = df['x'].astype(float)
|
|
df['y'] = df['y'].astype(float)
|
|
df['z'] = df['z'].astype(float)
|
|
df.index.name = 'timestamp'
|
|
|
|
elif sensor == 'EMPATICA_INTER_BEAT_INTERVAL':
|
|
df = pd.read_csv(sensor_data_file, names=['timestamp', column], header=None)
|
|
timestampstart = float(df['timestamp'][0])
|
|
df['timestamp'] = (df['timestamp'][1:len(df)]).astype(float) + timestampstart
|
|
df = df.drop([0])
|
|
df[column] = df[column].astype(float)
|
|
df = df.set_index('timestamp')
|
|
|
|
else:
|
|
raise ValueError(
|
|
"sensor has an invalid name: {}".format(sensor))
|
|
|
|
# format timestamps
|
|
df.index *= 1000
|
|
df.index = df.index.astype(int)
|
|
return(df)
|
|
|
|
def pull_data(data_configuration, device, sensor, container, columns_to_download):
|
|
sensor_csv = container + '.csv'
|
|
warning = True
|
|
participant_data = pd.DataFrame(columns=columns_to_download.values())
|
|
participant_data.set_index('timestamp', inplace=True)
|
|
|
|
available_zipfiles = list((Path(data_configuration["FOLDER"]) / Path(device)).rglob("*.zip"))
|
|
if len(available_zipfiles) == 0:
|
|
warnings.warn("There were no zip files in: {}. If you were expecting data for this participant the [EMPATICA][DEVICE_IDS] key in their participant file is missing the pid".format((Path(data_configuration["FOLDER"]) / Path(device))))
|
|
|
|
for zipfile in available_zipfiles:
|
|
print("Extracting {} data from {} for {}".format(sensor, zipfile, device))
|
|
with ZipFile(zipfile, 'r') as zipFile:
|
|
listOfFileNames = zipFile.namelist()
|
|
for fileName in listOfFileNames:
|
|
if fileName == sensor_csv:
|
|
participant_data = pd.concat([participant_data, extract_empatica_data(zipFile.read(fileName), sensor)], axis=0)
|
|
warning = False
|
|
if warning:
|
|
warnings.warn("We could not find a zipped file for {} in {} (we tried to find {})".format(sensor, zipFile, sensor_csv))
|
|
|
|
participant_data.sort_index(inplace=True, ascending=True)
|
|
participant_data.reset_index(inplace=True)
|
|
participant_data.drop_duplicates(subset='timestamp', keep='first',inplace=True)
|
|
participant_data["device_id"] = device
|
|
return(participant_data)
|
|
|
|
# print(pull_data({'FOLDER': 'data/external/empatica'}, "e01", "EMPATICA_accelerometer", {'TIMESTAMP': 'timestamp', 'DEVICE_ID': 'device_id', 'DOUBLE_VALUES_0': 'x', 'DOUBLE_VALUES_1': 'y', 'DOUBLE_VALUES_2': 'z'})) |