From a79997e0ac98569d174def0047035b2b0a29e9cd Mon Sep 17 00:00:00 2001 From: JulioV Date: Thu, 11 Mar 2021 13:23:56 -0500 Subject: [PATCH] Add empatica_zip docs --- config.yaml | 7 +- docs/datastreams/empatica-zip.md | 136 ++++++++++++++++++ docs/datastreams/mandatory-empatica-format.md | 61 ++++++++ mkdocs.yml | 5 +- .../container.py | 11 +- .../format.yaml | 0 src/data/streams/pull_phone_data.R | 3 + src/data/streams/pull_wearable_data.R | 2 +- 8 files changed, 215 insertions(+), 10 deletions(-) create mode 100644 docs/datastreams/empatica-zip.md create mode 100644 docs/datastreams/mandatory-empatica-format.md rename src/data/streams/{empatica_zipfiles => empatica_zip}/container.py (91%) rename src/data/streams/{empatica_zipfiles => empatica_zip}/format.yaml (100%) diff --git a/config.yaml b/config.yaml index 5b6750f0..51f81372 100644 --- a/config.yaml +++ b/config.yaml @@ -33,14 +33,15 @@ TIME_SEGMENTS: &time_segments FILE: "data/external/timesegments_periodic.csv" INCLUDE_PAST_PERIODIC_SEGMENTS: FALSE # Only relevant if TYPE=PERIODIC, see docs +# See https://www.rapids.science/latest/setup/configuration/#timezone-of-your-study TIMEZONE: - TYPE: MULTIPLE + TYPE: SINGLE SINGLE: TZCODE: America/New_York MULTIPLE: TZCODES_FILE: data/external/multiple_timezones_example.csv - IF_MISSING_TZCODE: USE_DEFAULT - DEFAULT_TZCODE: America/Los_Angeles + IF_MISSING_TZCODE: STOP + DEFAULT_TZCODE: America/New_York FITBIT: ALLOW_MULTIPLE_TZ_PER_DEVICE: False INFER_FROM_SMARTPHONE_TZ: False diff --git a/docs/datastreams/empatica-zip.md b/docs/datastreams/empatica-zip.md new file mode 100644 index 00000000..1d7a54d2 --- /dev/null +++ b/docs/datastreams/empatica-zip.md @@ -0,0 +1,136 @@ +# `empatica_zip` +This [data stream](../../datastreams/data-streams-introduction) handles Empatica sensor data downloaded as zip files using the [E4 Connect](https://support.empatica.com/hc/en-us/articles/201608896-Data-export-and-formatting-from-E4-connect-). + +## Container + +You need to create a subfolder for every participant named after their `pid` in the path specified by `[EMPATICA_DATA_STREAMS][empatica_zipfiles][FOLDER]`. You can add one or more Empatica zip files to any subfolder. + +The script to connect and download data from this container is at: +```bash +src/data/streams/empatica_zip/container.R +``` + +## Format + + +The `format.yaml` maps and transforms columns in your raw data stream to the [mandatory columns RAPIDS needs for Empatica sensors](../mandatory-empatica-format). This file is at: + +```bash +src/data/streams/empatica_zip/format.yaml +``` + +All columns are mutated from the raw data in the zip files so you don't need to modify any column mappings. + +??? info "EMPATICA_ACCELEROMETER" + + + **RAPIDS_COLUMN_MAPPINGS** + + | RAPIDS column | Stream column | + |-----------------|-----------------| + | TIMESTAMP | timestamp| + | DEVICE_ID | device_id| + | DOUBLE_VALUES_0 | double_values_0| + | DOUBLE_VALUES_1 | double_values_1| + | DOUBLE_VALUES_2 | double_values_2| + + **MUTATION** + + - **COLUMN_MAPPINGS** (None) + - **SCRIPTS** (None) + +??? info "EMPATICA_HEARTRATE" + + + **RAPIDS_COLUMN_MAPPINGS** + + | RAPIDS column | Stream column | + |-----------------|-----------------| + |TIMESTAMP | timestamp| + |DEVICE_ID | device_id| + |HEARTRATE | heartrate| + + **MUTATION** + + - **COLUMN_MAPPINGS** (None) + - **SCRIPTS** (None) + +??? info "EMPATICA_TEMPERATURE" + + + **RAPIDS_COLUMN_MAPPINGS** + + | RAPIDS column | Stream column | + |-----------------|-----------------| + |TIMESTAMP | timestamp| + |DEVICE_ID | device_id| + |TEMPERATURE | temperature| + + **MUTATION** + + - **COLUMN_MAPPINGS** (None) + - **SCRIPTS** (None) + +??? info "EMPATICA_ELECTRODERMAL_ACTIVITY" + + + **RAPIDS_COLUMN_MAPPINGS** + + | RAPIDS column | Stream column | + |-----------------|-----------------| + |TIMESTAMP | timestamp| + |DEVICE_ID | device_id| + |ELECTRODERMAL_ACTIVITY | electrodermal_activity| + + **MUTATION** + + - **COLUMN_MAPPINGS** (None) + - **SCRIPTS** (None) + +??? info "EMPATICA_BLOOD_VOLUME_PULSE" + + + **RAPIDS_COLUMN_MAPPINGS** + + | RAPIDS column | Stream column | + |-----------------|-----------------| + |TIMESTAMP | timestamp| + |DEVICE_ID | device_id| + |BLOOD_VOLUME_PULSE | blood_volume_pulse| + + **MUTATION** + + - **COLUMN_MAPPINGS** (None) + - **SCRIPTS** (None) + +??? info "EMPATICA_INTER_BEAT_INTERVAL" + + + **RAPIDS_COLUMN_MAPPINGS** + + | RAPIDS column | Stream column | + |-----------------|-----------------| + |TIMESTAMP | timestamp| + |DEVICE_ID | device_id| + |INTER_BEAT_INTERVAL | inter_beat_interval| + + **MUTATION** + + - **COLUMN_MAPPINGS** (None) + - **SCRIPTS** (None) + +??? info "EMPATICA_EMPATICA_TAGS" + + + **RAPIDS_COLUMN_MAPPINGS** + + | RAPIDS column | Stream column | + |-----------------|-----------------| + |TIMESTAMP | timestamp| + |DEVICE_ID | device_id| + |TAGS | tags| + + **MUTATION** + + - **COLUMN_MAPPINGS** (None) + - **SCRIPTS** (None) \ No newline at end of file diff --git a/docs/datastreams/mandatory-empatica-format.md b/docs/datastreams/mandatory-empatica-format.md new file mode 100644 index 00000000..1b69044c --- /dev/null +++ b/docs/datastreams/mandatory-empatica-format.md @@ -0,0 +1,61 @@ +# Mandatory Empatica Format + +This is a description of the format RAPIDS needs to process data for the following Empatica sensors. + +??? info "EMPATICA_ACCELEROMETER" + + | RAPIDS column | Description | + |-----------------|--------------------------------------------------------------| + | TIMESTAMP | An UNIX timestamp (13 digits) when a row of data was logged | + | DEVICE_ID | A string that uniquely identifies a device | + | DOUBLE_VALUES_0 | x axis of acceleration | + | DOUBLE_VALUES_1 | y axis of acceleration | + | DOUBLE_VALUES_2 | z axis of acceleration | + +??? info "EMPATICA_HEARTRATE" + + | RAPIDS column | Description | + |-----------------|-----------------| + | TIMESTAMP | An UNIX timestamp (13 digits) when a row of data was logged (automatically created by RAPIDS) | + | DEVICE_ID | A string that uniquely identifies a device | + | HEARTRATE | Intraday heartrate | + +??? info "EMPATICA_TEMPERATURE" + + | RAPIDS column | Description | + |-----------------|-----------------| + | TIMESTAMP | An UNIX timestamp (13 digits) when a row of data was logged (automatically created by RAPIDS) | + | DEVICE_ID | A string that uniquely identifies a device | + | TEMPERATURE | temperature | + +??? info "EMPATICA_ELECTRODERMAL_ACTIVITY" + + | RAPIDS column | Description | + |-----------------|-----------------| + | TIMESTAMP | An UNIX timestamp (13 digits) when a row of data was logged (automatically created by RAPIDS) | + | DEVICE_ID | A string that uniquely identifies a device | + | ELECTRODERMAL_ACTIVITY | electrical conductance | + +??? info "EMPATICA_BLOOD_VOLUME_PULSE" + + | RAPIDS column | Description | + |-----------------|-----------------| + | TIMESTAMP | An UNIX timestamp (13 digits) when a row of data was logged (automatically created by RAPIDS) | + | DEVICE_ID | A string that uniquely identifies a device | + | BLOOD_VOLUME_PULSE | blood volume pulse | + +??? info "EMPATICA_INTER_BEAT_INTERVAL" + + | RAPIDS column | Description | + |-----------------|-----------------| + | TIMESTAMP | An UNIX timestamp (13 digits) when a row of data was logged (automatically created by RAPIDS) | + | DEVICE_ID | A string that uniquely identifies a device | + | INTER_BEAT_INTERVAL | inter beat interval | + +??? info "EMPATICA_TAGS" + + | RAPIDS column | Description | + |-----------------|-----------------| + | TIMESTAMP | An UNIX timestamp (13 digits) when a row of data was logged (automatically created by RAPIDS) | + | DEVICE_ID | A string that uniquely identifies a device | + | TAGS | tags | diff --git a/mkdocs.yml b/mkdocs.yml index 4a5a70e5..bb84cc52 100644 --- a/mkdocs.yml +++ b/mkdocs.yml @@ -88,10 +88,13 @@ nav: - Mandatory Phone Format: datastreams/mandatory-phone-format.md - Fitbit: - fitbitjson_mysql: datastreams/fitbitjson-mysql.md - - fitbitparsed_mysql: datastreams/fitbitparsed-mysql.md - fitbitjson_csv: datastreams/fitbitjson-csv.md + - fitbitparsed_mysql: datastreams/fitbitparsed-mysql.md - fitbitparsed_csv: datastreams/fitbitparsed-csv.md - Mandatory Fitbit Format: datastreams/mandatory-fitbit-format.md + - Empatica: + - empatica_zip: datastreams/empatica-zip.md + - Mandatory Empatica Format: datastreams/mandatory-empatica-format.md - Add New Data Streams: datastreams/add-new-data-streams.md - Behavioral Features: - Introduction: features/feature-introduction.md diff --git a/src/data/streams/empatica_zipfiles/container.py b/src/data/streams/empatica_zip/container.py similarity index 91% rename from src/data/streams/empatica_zipfiles/container.py rename to src/data/streams/empatica_zip/container.py index 3c87bb2b..a1e3a993 100644 --- a/src/data/streams/empatica_zipfiles/container.py +++ b/src/data/streams/empatica_zip/container.py @@ -45,11 +45,12 @@ def readFile(file, dtype): def extract_empatica_data(data, sensor): sensor_data_file = BytesIO(data).getvalue().decode('utf-8') sensor_data_file = StringIO(sensor_data_file) + column = sensor.replace("EMPATICA_", "").lower() # read sensor data if sensor in ('EMPATICA_ELECTRODERMAL_ACTIVITY', 'EMPATICA_TEMPERATURE', 'EMPATICA_HEARTRATE', 'EMPATICA_BLOOD_VOLUME_PULSE'): ddict = readFile(sensor_data_file, sensor) - df = pd.DataFrame.from_dict(ddict, orient='index', columns=[sensor]) - df[sensor] = df[sensor].astype(float) + df = pd.DataFrame.from_dict(ddict, orient='index', columns=[column]) + df[column] = df[column].astype(float) df.index.name = 'timestamp' elif sensor == 'EMPATICA_ACCELEROMETER': @@ -61,16 +62,16 @@ def extract_empatica_data(data, sensor): df.index.name = 'timestamp' elif sensor == 'EMPATICA_INTER_BEAT_INTERVAL': - df = pd.read_csv(sensor_data_file, names=['timestamp', sensor], header=None) + df = pd.read_csv(sensor_data_file, names=['timestamp', column], header=None) timestampstart = float(df['timestamp'][0]) df['timestamp'] = (df['timestamp'][1:len(df)]).astype(float) + timestampstart df = df.drop([0]) - df[sensor] = df[sensor].astype(float) + df[column] = df[column].astype(float) df = df.set_index('timestamp') else: raise ValueError( - "sensor can only be one of ['electrodermal_activity','temperature','heartrate','blood_volume_pulse','accelerometer','inter_beat_interval'].") + "sensor has an invalid name: {}".format(sensor)) # format timestamps df.index *= 1000 diff --git a/src/data/streams/empatica_zipfiles/format.yaml b/src/data/streams/empatica_zip/format.yaml similarity index 100% rename from src/data/streams/empatica_zipfiles/format.yaml rename to src/data/streams/empatica_zip/format.yaml diff --git a/src/data/streams/pull_phone_data.R b/src/data/streams/pull_phone_data.R index 0126a8bd..e9478a93 100644 --- a/src/data/streams/pull_phone_data.R +++ b/src/data/streams/pull_phone_data.R @@ -119,6 +119,9 @@ pull_phone_data <- function(){ device_oss <- replace(device_oss, device_oss == "multiple", "infer") # support multiple for retro compatibility validate_deviceid_platforms(devices, device_oss, participant_file) + if(length(devices) == 0) + stop("There were no PHONE device ids in this participant file:", participant_file) + if(length(device_oss) == 1) device_oss <- rep(device_oss, length(devices)) diff --git a/src/data/streams/pull_wearable_data.R b/src/data/streams/pull_wearable_data.R index 09bf5fa3..990bba41 100644 --- a/src/data/streams/pull_wearable_data.R +++ b/src/data/streams/pull_wearable_data.R @@ -86,7 +86,7 @@ pull_wearable_data_main <- function(){ rapids_schema <- read_yaml(rapids_schema_file) devices <- participant_data[[toupper(device_type)]]$DEVICE_IDS if(length(devices) == 0) - devices <- c(pid) + stop("There were no ", device_type ," device ids in this participant file: ", participant_file) validate_expected_columns_mapping(stream_schema, rapids_schema, sensor, rapids_schema_file, stream_format) expected_columns <- tolower(names(stream_schema[[sensor]][["RAPIDS_COLUMN_MAPPINGS"]])) participant_data <- setNames(data.frame(matrix(ncol = length(expected_columns), nrow = 0)), expected_columns)