rapids/src/data/streams/empatica_zip/container.py

108 lines
4.6 KiB
Python

from zipfile import ZipFile
import warnings
from pathlib import Path
import pandas as pd
from pandas.core import indexing
import yaml
import csv
from collections import OrderedDict
from io import BytesIO, StringIO
def processAcceleration(x, y, z):
x = float(x)
y = float(y)
z = float(z)
return {'x': x, 'y': y, 'z': z}
def readFile(file, dtype):
dict = OrderedDict()
# file is an in-memory buffer
with file as csvfile:
if dtype in ('EMPATICA_ELECTRODERMAL_ACTIVITY', 'EMPATICA_TEMPERATURE', 'EMPATICA_HEARTRATE', 'EMPATICA_BLOOD_VOLUME_PULSE'):
reader = csv.reader(csvfile, delimiter='\n')
elif dtype == 'EMPATICA_ACCELEROMETER':
reader = csv.reader(csvfile, delimiter=',')
i = 0
for row in reader:
if i == 0:
timestamp = float(row[0])
elif i == 1:
hertz = float(row[0])
else:
if i == 2:
pass
else:
timestamp = timestamp + 1.0 / hertz
if dtype in ('EMPATICA_ELECTRODERMAL_ACTIVITY', 'EMPATICA_TEMPERATURE', 'EMPATICA_HEARTRATE', 'EMPATICA_BLOOD_VOLUME_PULSE'):
dict[timestamp] = row[0]
elif dtype == 'EMPATICA_ACCELEROMETER':
dict[timestamp] = processAcceleration(row[0], row[1], row[2])
i += 1
return dict
def extract_empatica_data(data, sensor):
sensor_data_file = BytesIO(data).getvalue().decode('utf-8')
sensor_data_file = StringIO(sensor_data_file)
column = sensor.replace("EMPATICA_", "").lower()
# read sensor data
if sensor in ('EMPATICA_ELECTRODERMAL_ACTIVITY', 'EMPATICA_TEMPERATURE', 'EMPATICA_HEARTRATE', 'EMPATICA_BLOOD_VOLUME_PULSE'):
ddict = readFile(sensor_data_file, sensor)
df = pd.DataFrame.from_dict(ddict, orient='index', columns=[column])
df[column] = df[column].astype(float)
df.index.name = 'timestamp'
elif sensor == 'EMPATICA_ACCELEROMETER':
ddict = readFile(sensor_data_file, sensor)
df = pd.DataFrame.from_dict(ddict, orient='index', columns=['x', 'y', 'z'])
df['x'] = df['x'].astype(float)
df['y'] = df['y'].astype(float)
df['z'] = df['z'].astype(float)
df.index.name = 'timestamp'
elif sensor == 'EMPATICA_INTER_BEAT_INTERVAL':
df = pd.read_csv(sensor_data_file, names=['timestamp', column], header=None)
timestampstart = float(df['timestamp'][0])
df['timestamp'] = (df['timestamp'][1:len(df)]).astype(float) + timestampstart
df = df.drop([0])
df[column] = df[column].astype(float)
df = df.set_index('timestamp')
else:
raise ValueError(
"sensor has an invalid name: {}".format(sensor))
# format timestamps
df.index *= 1000
df.index = df.index.astype(int)
return(df)
def pull_data(data_configuration, device, sensor, container, columns_to_download):
sensor_csv = container + '.csv'
warning = True
participant_data = pd.DataFrame(columns=columns_to_download.values())
participant_data.set_index('timestamp', inplace=True)
available_zipfiles = list((Path(data_configuration["FOLDER"]) / Path(device)).rglob("*.zip"))
if len(available_zipfiles) == 0:
warnings.warn("There were no zip files in: {}. If you were expecting data for this participant the [EMPATICA][DEVICE_IDS] key in their participant file is missing the pid".format((Path(data_configuration["FOLDER"]) / Path(device))))
for zipfile in available_zipfiles:
print("Extracting {} data from {} for {}".format(sensor, zipfile, device))
with ZipFile(zipfile, 'r') as zipFile:
listOfFileNames = zipFile.namelist()
for fileName in listOfFileNames:
if fileName == sensor_csv:
participant_data = pd.concat([participant_data, extract_empatica_data(zipFile.read(fileName), sensor)], axis=0)
warning = False
if warning:
warnings.warn("We could not find a zipped file for {} in {} (we tried to find {})".format(sensor, zipFile, sensor_csv))
participant_data.sort_index(inplace=True, ascending=True)
participant_data.reset_index(inplace=True)
participant_data.drop_duplicates(subset='timestamp', keep='first',inplace=True)
participant_data["device_id"] = device
return(participant_data)
# print(pull_data({'FOLDER': 'data/external/empatica'}, "e01", "EMPATICA_accelerometer", {'TIMESTAMP': 'timestamp', 'DEVICE_ID': 'device_id', 'DOUBLE_VALUES_0': 'x', 'DOUBLE_VALUES_1': 'y', 'DOUBLE_VALUES_2': 'z'}))