Add empatica_zip docs

pull/128/head
JulioV 2021-03-11 13:23:56 -05:00
parent 8c4ac1fd43
commit a79997e0ac
8 changed files with 215 additions and 10 deletions

View File

@ -33,14 +33,15 @@ TIME_SEGMENTS: &time_segments
FILE: "data/external/timesegments_periodic.csv" FILE: "data/external/timesegments_periodic.csv"
INCLUDE_PAST_PERIODIC_SEGMENTS: FALSE # Only relevant if TYPE=PERIODIC, see docs INCLUDE_PAST_PERIODIC_SEGMENTS: FALSE # Only relevant if TYPE=PERIODIC, see docs
# See https://www.rapids.science/latest/setup/configuration/#timezone-of-your-study
TIMEZONE: TIMEZONE:
TYPE: MULTIPLE TYPE: SINGLE
SINGLE: SINGLE:
TZCODE: America/New_York TZCODE: America/New_York
MULTIPLE: MULTIPLE:
TZCODES_FILE: data/external/multiple_timezones_example.csv TZCODES_FILE: data/external/multiple_timezones_example.csv
IF_MISSING_TZCODE: USE_DEFAULT IF_MISSING_TZCODE: STOP
DEFAULT_TZCODE: America/Los_Angeles DEFAULT_TZCODE: America/New_York
FITBIT: FITBIT:
ALLOW_MULTIPLE_TZ_PER_DEVICE: False ALLOW_MULTIPLE_TZ_PER_DEVICE: False
INFER_FROM_SMARTPHONE_TZ: False INFER_FROM_SMARTPHONE_TZ: False

View File

@ -0,0 +1,136 @@
# `empatica_zip`
This [data stream](../../datastreams/data-streams-introduction) handles Empatica sensor data downloaded as zip files using the [E4 Connect](https://support.empatica.com/hc/en-us/articles/201608896-Data-export-and-formatting-from-E4-connect-).
## Container
You need to create a subfolder for every participant named after their `pid` in the path specified by `[EMPATICA_DATA_STREAMS][empatica_zipfiles][FOLDER]`. You can add one or more Empatica zip files to any subfolder.
The script to connect and download data from this container is at:
```bash
src/data/streams/empatica_zip/container.R
```
## Format
The `format.yaml` maps and transforms columns in your raw data stream to the [mandatory columns RAPIDS needs for Empatica sensors](../mandatory-empatica-format). This file is at:
```bash
src/data/streams/empatica_zip/format.yaml
```
All columns are mutated from the raw data in the zip files so you don't need to modify any column mappings.
??? info "EMPATICA_ACCELEROMETER"
**RAPIDS_COLUMN_MAPPINGS**
| RAPIDS column | Stream column |
|-----------------|-----------------|
| TIMESTAMP | timestamp|
| DEVICE_ID | device_id|
| DOUBLE_VALUES_0 | double_values_0|
| DOUBLE_VALUES_1 | double_values_1|
| DOUBLE_VALUES_2 | double_values_2|
**MUTATION**
- **COLUMN_MAPPINGS** (None)
- **SCRIPTS** (None)
??? info "EMPATICA_HEARTRATE"
**RAPIDS_COLUMN_MAPPINGS**
| RAPIDS column | Stream column |
|-----------------|-----------------|
|TIMESTAMP | timestamp|
|DEVICE_ID | device_id|
|HEARTRATE | heartrate|
**MUTATION**
- **COLUMN_MAPPINGS** (None)
- **SCRIPTS** (None)
??? info "EMPATICA_TEMPERATURE"
**RAPIDS_COLUMN_MAPPINGS**
| RAPIDS column | Stream column |
|-----------------|-----------------|
|TIMESTAMP | timestamp|
|DEVICE_ID | device_id|
|TEMPERATURE | temperature|
**MUTATION**
- **COLUMN_MAPPINGS** (None)
- **SCRIPTS** (None)
??? info "EMPATICA_ELECTRODERMAL_ACTIVITY"
**RAPIDS_COLUMN_MAPPINGS**
| RAPIDS column | Stream column |
|-----------------|-----------------|
|TIMESTAMP | timestamp|
|DEVICE_ID | device_id|
|ELECTRODERMAL_ACTIVITY | electrodermal_activity|
**MUTATION**
- **COLUMN_MAPPINGS** (None)
- **SCRIPTS** (None)
??? info "EMPATICA_BLOOD_VOLUME_PULSE"
**RAPIDS_COLUMN_MAPPINGS**
| RAPIDS column | Stream column |
|-----------------|-----------------|
|TIMESTAMP | timestamp|
|DEVICE_ID | device_id|
|BLOOD_VOLUME_PULSE | blood_volume_pulse|
**MUTATION**
- **COLUMN_MAPPINGS** (None)
- **SCRIPTS** (None)
??? info "EMPATICA_INTER_BEAT_INTERVAL"
**RAPIDS_COLUMN_MAPPINGS**
| RAPIDS column | Stream column |
|-----------------|-----------------|
|TIMESTAMP | timestamp|
|DEVICE_ID | device_id|
|INTER_BEAT_INTERVAL | inter_beat_interval|
**MUTATION**
- **COLUMN_MAPPINGS** (None)
- **SCRIPTS** (None)
??? info "EMPATICA_EMPATICA_TAGS"
**RAPIDS_COLUMN_MAPPINGS**
| RAPIDS column | Stream column |
|-----------------|-----------------|
|TIMESTAMP | timestamp|
|DEVICE_ID | device_id|
|TAGS | tags|
**MUTATION**
- **COLUMN_MAPPINGS** (None)
- **SCRIPTS** (None)

View File

@ -0,0 +1,61 @@
# Mandatory Empatica Format
This is a description of the format RAPIDS needs to process data for the following Empatica sensors.
??? info "EMPATICA_ACCELEROMETER"
| RAPIDS column | Description |
|-----------------|--------------------------------------------------------------|
| TIMESTAMP | An UNIX timestamp (13 digits) when a row of data was logged |
| DEVICE_ID | A string that uniquely identifies a device |
| DOUBLE_VALUES_0 | x axis of acceleration |
| DOUBLE_VALUES_1 | y axis of acceleration |
| DOUBLE_VALUES_2 | z axis of acceleration |
??? info "EMPATICA_HEARTRATE"
| RAPIDS column | Description |
|-----------------|-----------------|
| TIMESTAMP | An UNIX timestamp (13 digits) when a row of data was logged (automatically created by RAPIDS) |
| DEVICE_ID | A string that uniquely identifies a device |
| HEARTRATE | Intraday heartrate |
??? info "EMPATICA_TEMPERATURE"
| RAPIDS column | Description |
|-----------------|-----------------|
| TIMESTAMP | An UNIX timestamp (13 digits) when a row of data was logged (automatically created by RAPIDS) |
| DEVICE_ID | A string that uniquely identifies a device |
| TEMPERATURE | temperature |
??? info "EMPATICA_ELECTRODERMAL_ACTIVITY"
| RAPIDS column | Description |
|-----------------|-----------------|
| TIMESTAMP | An UNIX timestamp (13 digits) when a row of data was logged (automatically created by RAPIDS) |
| DEVICE_ID | A string that uniquely identifies a device |
| ELECTRODERMAL_ACTIVITY | electrical conductance |
??? info "EMPATICA_BLOOD_VOLUME_PULSE"
| RAPIDS column | Description |
|-----------------|-----------------|
| TIMESTAMP | An UNIX timestamp (13 digits) when a row of data was logged (automatically created by RAPIDS) |
| DEVICE_ID | A string that uniquely identifies a device |
| BLOOD_VOLUME_PULSE | blood volume pulse |
??? info "EMPATICA_INTER_BEAT_INTERVAL"
| RAPIDS column | Description |
|-----------------|-----------------|
| TIMESTAMP | An UNIX timestamp (13 digits) when a row of data was logged (automatically created by RAPIDS) |
| DEVICE_ID | A string that uniquely identifies a device |
| INTER_BEAT_INTERVAL | inter beat interval |
??? info "EMPATICA_TAGS"
| RAPIDS column | Description |
|-----------------|-----------------|
| TIMESTAMP | An UNIX timestamp (13 digits) when a row of data was logged (automatically created by RAPIDS) |
| DEVICE_ID | A string that uniquely identifies a device |
| TAGS | tags |

View File

@ -88,10 +88,13 @@ nav:
- Mandatory Phone Format: datastreams/mandatory-phone-format.md - Mandatory Phone Format: datastreams/mandatory-phone-format.md
- Fitbit: - Fitbit:
- fitbitjson_mysql: datastreams/fitbitjson-mysql.md - fitbitjson_mysql: datastreams/fitbitjson-mysql.md
- fitbitparsed_mysql: datastreams/fitbitparsed-mysql.md
- fitbitjson_csv: datastreams/fitbitjson-csv.md - fitbitjson_csv: datastreams/fitbitjson-csv.md
- fitbitparsed_mysql: datastreams/fitbitparsed-mysql.md
- fitbitparsed_csv: datastreams/fitbitparsed-csv.md - fitbitparsed_csv: datastreams/fitbitparsed-csv.md
- Mandatory Fitbit Format: datastreams/mandatory-fitbit-format.md - Mandatory Fitbit Format: datastreams/mandatory-fitbit-format.md
- Empatica:
- empatica_zip: datastreams/empatica-zip.md
- Mandatory Empatica Format: datastreams/mandatory-empatica-format.md
- Add New Data Streams: datastreams/add-new-data-streams.md - Add New Data Streams: datastreams/add-new-data-streams.md
- Behavioral Features: - Behavioral Features:
- Introduction: features/feature-introduction.md - Introduction: features/feature-introduction.md

View File

@ -45,11 +45,12 @@ def readFile(file, dtype):
def extract_empatica_data(data, sensor): def extract_empatica_data(data, sensor):
sensor_data_file = BytesIO(data).getvalue().decode('utf-8') sensor_data_file = BytesIO(data).getvalue().decode('utf-8')
sensor_data_file = StringIO(sensor_data_file) sensor_data_file = StringIO(sensor_data_file)
column = sensor.replace("EMPATICA_", "").lower()
# read sensor data # read sensor data
if sensor in ('EMPATICA_ELECTRODERMAL_ACTIVITY', 'EMPATICA_TEMPERATURE', 'EMPATICA_HEARTRATE', 'EMPATICA_BLOOD_VOLUME_PULSE'): if sensor in ('EMPATICA_ELECTRODERMAL_ACTIVITY', 'EMPATICA_TEMPERATURE', 'EMPATICA_HEARTRATE', 'EMPATICA_BLOOD_VOLUME_PULSE'):
ddict = readFile(sensor_data_file, sensor) ddict = readFile(sensor_data_file, sensor)
df = pd.DataFrame.from_dict(ddict, orient='index', columns=[sensor]) df = pd.DataFrame.from_dict(ddict, orient='index', columns=[column])
df[sensor] = df[sensor].astype(float) df[column] = df[column].astype(float)
df.index.name = 'timestamp' df.index.name = 'timestamp'
elif sensor == 'EMPATICA_ACCELEROMETER': elif sensor == 'EMPATICA_ACCELEROMETER':
@ -61,16 +62,16 @@ def extract_empatica_data(data, sensor):
df.index.name = 'timestamp' df.index.name = 'timestamp'
elif sensor == 'EMPATICA_INTER_BEAT_INTERVAL': elif sensor == 'EMPATICA_INTER_BEAT_INTERVAL':
df = pd.read_csv(sensor_data_file, names=['timestamp', sensor], header=None) df = pd.read_csv(sensor_data_file, names=['timestamp', column], header=None)
timestampstart = float(df['timestamp'][0]) timestampstart = float(df['timestamp'][0])
df['timestamp'] = (df['timestamp'][1:len(df)]).astype(float) + timestampstart df['timestamp'] = (df['timestamp'][1:len(df)]).astype(float) + timestampstart
df = df.drop([0]) df = df.drop([0])
df[sensor] = df[sensor].astype(float) df[column] = df[column].astype(float)
df = df.set_index('timestamp') df = df.set_index('timestamp')
else: else:
raise ValueError( raise ValueError(
"sensor can only be one of ['electrodermal_activity','temperature','heartrate','blood_volume_pulse','accelerometer','inter_beat_interval'].") "sensor has an invalid name: {}".format(sensor))
# format timestamps # format timestamps
df.index *= 1000 df.index *= 1000

View File

@ -119,6 +119,9 @@ pull_phone_data <- function(){
device_oss <- replace(device_oss, device_oss == "multiple", "infer") # support multiple for retro compatibility device_oss <- replace(device_oss, device_oss == "multiple", "infer") # support multiple for retro compatibility
validate_deviceid_platforms(devices, device_oss, participant_file) validate_deviceid_platforms(devices, device_oss, participant_file)
if(length(devices) == 0)
stop("There were no PHONE device ids in this participant file:", participant_file)
if(length(device_oss) == 1) if(length(device_oss) == 1)
device_oss <- rep(device_oss, length(devices)) device_oss <- rep(device_oss, length(devices))

View File

@ -86,7 +86,7 @@ pull_wearable_data_main <- function(){
rapids_schema <- read_yaml(rapids_schema_file) rapids_schema <- read_yaml(rapids_schema_file)
devices <- participant_data[[toupper(device_type)]]$DEVICE_IDS devices <- participant_data[[toupper(device_type)]]$DEVICE_IDS
if(length(devices) == 0) if(length(devices) == 0)
devices <- c(pid) stop("There were no ", device_type ," device ids in this participant file: ", participant_file)
validate_expected_columns_mapping(stream_schema, rapids_schema, sensor, rapids_schema_file, stream_format) validate_expected_columns_mapping(stream_schema, rapids_schema, sensor, rapids_schema_file, stream_format)
expected_columns <- tolower(names(stream_schema[[sensor]][["RAPIDS_COLUMN_MAPPINGS"]])) expected_columns <- tolower(names(stream_schema[[sensor]][["RAPIDS_COLUMN_MAPPINGS"]]))
participant_data <- setNames(data.frame(matrix(ncol = length(expected_columns), nrow = 0)), expected_columns) participant_data <- setNames(data.frame(matrix(ncol = length(expected_columns), nrow = 0)), expected_columns)