Merge branch 'feature/dbdp-empatica' into develop

feature/plugin_sentimental
JulioV 2021-02-21 17:33:33 -05:00
commit 4819e22fd5
33 changed files with 1427 additions and 56 deletions

View File

@ -288,6 +288,99 @@ for provider in config["FITBIT_STEPS_INTRADAY"]["PROVIDERS"].keys():
# files_to_compute.extend(expand("data/processed/features/{pid}/all_sensor_features.csv", pid=config["PIDS"]))
# files_to_compute.append("data/processed/features/all_participants/all_sensor_features.csv")
for provider in config["EMPATICA_ACCELEROMETER"]["PROVIDERS"].keys():
if config["EMPATICA_ACCELEROMETER"]["PROVIDERS"][provider]["COMPUTE"]:
for pid in config["PIDS"]:
suffixes = get_zip_suffixes(pid)
files_to_compute.extend(expand("data/raw/{pid}/empatica_accelerometer_unzipped_{suffix}.csv", pid=pid, suffix=suffixes))
files_to_compute.extend(expand("data/raw/{pid}/empatica_accelerometer_raw_{suffix}.csv", pid=pid, suffix=suffixes))
files_to_compute.extend(expand("data/raw/{pid}/empatica_accelerometer_joined.csv", pid=config["PIDS"]))
files_to_compute.extend(expand("data/raw/{pid}/empatica_accelerometer_with_datetime.csv", pid=config["PIDS"]))
files_to_compute.extend(expand("data/interim/{pid}/empatica_accelerometer_features/empatica_accelerometer_{language}_{provider_key}.csv", pid=config["PIDS"], language=config["EMPATICA_ACCELEROMETER"]["PROVIDERS"][provider]["SRC_LANGUAGE"].lower(), provider_key=provider.lower()))
files_to_compute.extend(expand("data/processed/features/{pid}/empatica_accelerometer.csv", pid=config["PIDS"]))
files_to_compute.extend(expand("data/processed/features/{pid}/all_sensor_features.csv", pid=config["PIDS"]))
files_to_compute.append("data/processed/features/all_participants/all_sensor_features.csv")
for provider in config["EMPATICA_HEARTRATE"]["PROVIDERS"].keys():
if config["EMPATICA_HEARTRATE"]["PROVIDERS"][provider]["COMPUTE"]:
for pid in config["PIDS"]:
suffixes = get_zip_suffixes(pid)
files_to_compute.extend(expand("data/raw/{pid}/empatica_heartrate_unzipped_{suffix}.csv", pid=pid, suffix=suffixes))
files_to_compute.extend(expand("data/raw/{pid}/empatica_heartrate_raw_{suffix}.csv", pid=pid, suffix=suffixes))
files_to_compute.extend(expand("data/raw/{pid}/empatica_heartrate_joined.csv", pid=config["PIDS"]))
files_to_compute.extend(expand("data/raw/{pid}/empatica_heartrate_with_datetime.csv", pid=config["PIDS"]))
files_to_compute.extend(expand("data/interim/{pid}/empatica_heartrate_features/empatica_heartrate_{language}_{provider_key}.csv", pid=config["PIDS"], language=config["EMPATICA_HEARTRATE"]["PROVIDERS"][provider]["SRC_LANGUAGE"].lower(), provider_key=provider.lower()))
files_to_compute.extend(expand("data/processed/features/{pid}/empatica_heartrate.csv", pid=config["PIDS"]))
files_to_compute.extend(expand("data/processed/features/{pid}/all_sensor_features.csv", pid=config["PIDS"]))
files_to_compute.append("data/processed/features/all_participants/all_sensor_features.csv")
for provider in config["EMPATICA_TEMPERATURE"]["PROVIDERS"].keys():
if config["EMPATICA_TEMPERATURE"]["PROVIDERS"][provider]["COMPUTE"]:
for pid in config["PIDS"]:
suffixes = get_zip_suffixes(pid)
files_to_compute.extend(expand("data/raw/{pid}/empatica_temperature_unzipped_{suffix}.csv", pid=pid, suffix=suffixes))
files_to_compute.extend(expand("data/raw/{pid}/empatica_temperature_raw_{suffix}.csv", pid=pid, suffix=suffixes))
files_to_compute.extend(expand("data/raw/{pid}/empatica_temperature_joined.csv", pid=config["PIDS"]))
files_to_compute.extend(expand("data/raw/{pid}/empatica_temperature_with_datetime.csv", pid=config["PIDS"]))
files_to_compute.extend(expand("data/interim/{pid}/empatica_temperature_features/empatica_temperature_{language}_{provider_key}.csv", pid=config["PIDS"], language=config["EMPATICA_TEMPERATURE"]["PROVIDERS"][provider]["SRC_LANGUAGE"].lower(), provider_key=provider.lower()))
files_to_compute.extend(expand("data/processed/features/{pid}/empatica_temperature.csv", pid=config["PIDS"]))
files_to_compute.extend(expand("data/processed/features/{pid}/all_sensor_features.csv", pid=config["PIDS"]))
files_to_compute.append("data/processed/features/all_participants/all_sensor_features.csv")
for provider in config["EMPATICA_ELECTRODERMAL_ACTIVITY"]["PROVIDERS"].keys():
if config["EMPATICA_ELECTRODERMAL_ACTIVITY"]["PROVIDERS"][provider]["COMPUTE"]:
for pid in config["PIDS"]:
suffixes = get_zip_suffixes(pid)
files_to_compute.extend(expand("data/raw/{pid}/empatica_electrodermal_activity_unzipped_{suffix}.csv", pid=pid, suffix=suffixes))
files_to_compute.extend(expand("data/raw/{pid}/empatica_electrodermal_activity_raw_{suffix}.csv", pid=pid, suffix=suffixes))
files_to_compute.extend(expand("data/raw/{pid}/empatica_electrodermal_activity_joined.csv", pid=config["PIDS"]))
files_to_compute.extend(expand("data/raw/{pid}/empatica_electrodermal_activity_with_datetime.csv", pid=config["PIDS"]))
files_to_compute.extend(expand("data/interim/{pid}/empatica_electrodermal_activity_features/empatica_electrodermal_activity_{language}_{provider_key}.csv", pid=config["PIDS"], language=config["EMPATICA_ELECTRODERMAL_ACTIVITY"]["PROVIDERS"][provider]["SRC_LANGUAGE"].lower(), provider_key=provider.lower()))
files_to_compute.extend(expand("data/processed/features/{pid}/empatica_electrodermal_activity.csv", pid=config["PIDS"]))
files_to_compute.extend(expand("data/processed/features/{pid}/all_sensor_features.csv", pid=config["PIDS"]))
files_to_compute.append("data/processed/features/all_participants/all_sensor_features.csv")
for provider in config["EMPATICA_BLOOD_VOLUME_PULSE"]["PROVIDERS"].keys():
if config["EMPATICA_BLOOD_VOLUME_PULSE"]["PROVIDERS"][provider]["COMPUTE"]:
for pid in config["PIDS"]:
suffixes = get_zip_suffixes(pid)
files_to_compute.extend(expand("data/raw/{pid}/empatica_blood_volume_pulse_unzipped_{suffix}.csv", pid=pid, suffix=suffixes))
files_to_compute.extend(expand("data/raw/{pid}/empatica_blood_volume_pulse_raw_{suffix}.csv", pid=pid, suffix=suffixes))
files_to_compute.extend(expand("data/raw/{pid}/empatica_blood_volume_pulse_joined.csv", pid=config["PIDS"]))
files_to_compute.extend(expand("data/raw/{pid}/empatica_blood_volume_pulse_with_datetime.csv", pid=config["PIDS"]))
files_to_compute.extend(expand("data/interim/{pid}/empatica_blood_volume_pulse_features/empatica_blood_volume_pulse_{language}_{provider_key}.csv", pid=config["PIDS"], language=config["EMPATICA_BLOOD_VOLUME_PULSE"]["PROVIDERS"][provider]["SRC_LANGUAGE"].lower(), provider_key=provider.lower()))
files_to_compute.extend(expand("data/processed/features/{pid}/empatica_blood_volume_pulse.csv", pid=config["PIDS"]))
files_to_compute.extend(expand("data/processed/features/{pid}/all_sensor_features.csv", pid=config["PIDS"]))
files_to_compute.append("data/processed/features/all_participants/all_sensor_features.csv")
for provider in config["EMPATICA_INTER_BEAT_INTERVAL"]["PROVIDERS"].keys():
if config["EMPATICA_INTER_BEAT_INTERVAL"]["PROVIDERS"][provider]["COMPUTE"]:
for pid in config["PIDS"]:
suffixes = get_zip_suffixes(pid)
files_to_compute.extend(expand("data/raw/{pid}/empatica_inter_beat_interval_unzipped_{suffix}.csv", pid=pid, suffix=suffixes))
files_to_compute.extend(expand("data/raw/{pid}/empatica_inter_beat_interval_raw_{suffix}.csv", pid=pid, suffix=suffixes))
files_to_compute.extend(expand("data/raw/{pid}/empatica_inter_beat_interval_joined.csv", pid=config["PIDS"]))
files_to_compute.extend(expand("data/raw/{pid}/empatica_inter_beat_interval_with_datetime.csv", pid=config["PIDS"]))
files_to_compute.extend(expand("data/interim/{pid}/empatica_inter_beat_interval_features/empatica_inter_beat_interval_{language}_{provider_key}.csv", pid=config["PIDS"], language=config["EMPATICA_INTER_BEAT_INTERVAL"]["PROVIDERS"][provider]["SRC_LANGUAGE"].lower(), provider_key=provider.lower()))
files_to_compute.extend(expand("data/processed/features/{pid}/empatica_inter_beat_interval.csv", pid=config["PIDS"]))
files_to_compute.extend(expand("data/processed/features/{pid}/all_sensor_features.csv", pid=config["PIDS"]))
files_to_compute.append("data/processed/features/all_participants/all_sensor_features.csv")
if isinstance(config["EMPATICA_TAGS"]["PROVIDERS"], dict):
for provider in config["EMPATICA_TAGS"]["PROVIDERS"].keys():
if config["EMPATICA_TAGS"]["PROVIDERS"][provider]["COMPUTE"]:
for pid in config["PIDS"]:
suffixes = get_zip_suffixes(pid)
files_to_compute.extend(expand("data/raw/{pid}/empatica_tags_unzipped_{suffix}.csv", pid=pid, suffix=suffixes))
files_to_compute.extend(expand("data/raw/{pid}/empatica_tags_raw_{suffix}.csv", pid=pid, suffix=suffixes))
files_to_compute.extend(expand("data/raw/{pid}/empatica_tags_joined.csv", pid=config["PIDS"]))
files_to_compute.extend(expand("data/raw/{pid}/empatica_tags_with_datetime.csv", pid=config["PIDS"]))
files_to_compute.extend(expand("data/interim/{pid}/empatica_tags_features/empatica_tags_{language}_{provider_key}.csv", pid=config["PIDS"], language=config["EMPATICA_TAGS"]["PROVIDERS"][provider]["SRC_LANGUAGE"].lower(), provider_key=provider.lower()))
files_to_compute.extend(expand("data/processed/features/{pid}/empatica_tags.csv", pid=config["PIDS"]))
files_to_compute.extend(expand("data/processed/features/{pid}/all_sensor_features.csv", pid=config["PIDS"]))
files_to_compute.append("data/processed/features/all_participants/all_sensor_features.csv")
# Visualization for Data Exploration
if config["HISTOGRAM_PHONE_DATA_YIELD"]["PLOT"]:
files_to_compute.append("reports/data_exploration/histogram_phone_data_yield.html")

View File

@ -21,9 +21,11 @@ CREATE_PARTICIPANT_FILES:
DEVICE_ID_COLUMN: device_id # column name
IGNORED_DEVICE_IDS: []
FITBIT_SECTION:
ADD: TRUE
ADD: FALSE
DEVICE_ID_COLUMN: device_id # column name
IGNORED_DEVICE_IDS: []
EMPATICA_SECTION:
ADD: FALSE
# See https://www.rapids.science/latest/setup/configuration/#time-segments
TIME_SEGMENTS: &time_segments
@ -408,6 +410,78 @@ FITBIT_STEPS_INTRADAY:
# FEATURES: []
########################################################################################################################
# EMPATICA #
########################################################################################################################
EMPATICA_DATA_CONFIGURATION:
SOURCE:
TYPE: ZIP_FILE
FOLDER: data/external/empatica
TIMEZONE:
TYPE: SINGLE # Empatica devices don't support time zones so we read this data in the timezone indicated by VALUE
VALUE: *timezone
# Sensors ------
EMPATICA_ACCELEROMETER:
TABLE: ACC
PROVIDERS:
DBDP:
COMPUTE: False
FEATURES: ["maxmagnitude", "minmagnitude", "avgmagnitude", "medianmagnitude", "stdmagnitude"]
SRC_FOLDER: "dbdp" # inside src/features/empatica_accelerometer
SRC_LANGUAGE: "python"
EMPATICA_HEARTRATE:
TABLE: HR
PROVIDERS:
DBDP:
COMPUTE: False
FEATURES: ["maxhr", "minhr", "avghr", "medianhr", "modehr", "stdhr", "diffmaxmodehr", "diffminmodehr", "entropyhr"]
SRC_FOLDER: "dbdp" # inside src/features/empatica_heartrate
SRC_LANGUAGE: "python"
EMPATICA_TEMPERATURE:
TABLE: TEMP
PROVIDERS:
DBDP:
COMPUTE: False
FEATURES: ["maxtemp", "mintemp", "avgtemp", "mediantemp", "modetemp", "stdtemp", "diffmaxmodetemp", "diffminmodetemp", "entropytemp"]
SRC_FOLDER: "dbdp" # inside src/features/empatica_heartrate
SRC_LANGUAGE: "python"
EMPATICA_ELECTRODERMAL_ACTIVITY:
TABLE: EDA
PROVIDERS:
DBDP:
COMPUTE: False
FEATURES: ["maxeda", "mineda", "avgeda", "medianeda", "modeeda", "stdeda", "diffmaxmodeeda", "diffminmodeeda", "entropyeda"]
SRC_FOLDER: "dbdp" # inside src/features/empatica_electrodermal_activity
SRC_LANGUAGE: "python"
EMPATICA_BLOOD_VOLUME_PULSE:
TABLE: BVP
PROVIDERS:
DBDP:
COMPUTE: False
FEATURES: ["maxbvp", "minbvp", "avgbvp", "medianbvp", "modebvp", "stdbvp", "diffmaxmodebvp", "diffminmodebvp", "entropybvp"]
SRC_FOLDER: "dbdp" # inside src/features/empatica_blood_volume_pulse
SRC_LANGUAGE: "python"
EMPATICA_INTER_BEAT_INTERVAL:
TABLE: IBI
PROVIDERS:
DBDP:
COMPUTE: False
FEATURES: ["maxibi", "minibi", "avgibi", "medianibi", "modeibi", "stdibi", "diffmaxmodeibi", "diffminmodeibi", "entropyibi"]
SRC_FOLDER: "dbdp" # inside src/features/inter_beat_interval
SRC_LANGUAGE: "python"
EMPATICA_TAGS:
TABLE: TAGS
PROVIDERS: # None implemented yet
########################################################################################################################
# PLOTS #

View File

@ -1,5 +1,9 @@
# Change Log
## Next release
- Add support for Empatica devices (all sensors)
- Add logo
- Move Citation page to the Setup section
## v0.4.3
- Fix bug when any of the rows from any sensor do not belong a time segment
## v0.4.2

View File

@ -14,6 +14,14 @@ If you used RAPIDS, please cite [this paper](https://preprints.jmir.org/preprint
DOI: 10.2196/preprints.23246
URL: https://preprints.jmir.org/preprint/23246
## DBDP (all Empatica sensors)
If you computed features using the provider `[DBDP]` of any of the Empatica sensors (accelerometer, heart rate, temperature, EDA, BVP, IBI, tags) cite [this paper](https://www.cambridge.org/core/journals/journal-of-clinical-and-translational-science/article/digital-biomarker-discovery-pipeline-an-open-source-software-platform-for-the-development-of-digital-biomarkers-using-mhealth-and-wearables-data/A6696CEF138247077B470F4800090E63) in addition to RAPIDS.
!!! cite "Bent et al. citation"
Bent, B., Wang, K., Grzesiak, E., Jiang, C., Qi, Y., Jiang, Y., Cho, P., Zingler, K., Ogbeide, F.I., Zhao, A., Runge, R., Sim, I., Dunn, J. (2020). The Digital Biomarker Discovery Pipeline: An open source software platform for the development of digital biomarkers using mHealth and wearables data. Journal of Clinical and Translational Science, 1-28. doi:10.1017/cts.2020.511
## Panda (accelerometer)
If you computed accelerometer features using the provider `[PHONE_ACCLEROMETER][PANDA]` cite [this paper](https://pubmed.ncbi.nlm.nih.gov/31657854/) in addition to RAPIDS.

View File

@ -1,10 +1,9 @@
# Add New Features
!!! hint
We recommend reading the [Behavioral Features Introduction](../feature-introduction/) before reading this page
!!! hint
You won't have to deal with time zones, dates, times, data cleaning or preprocessing. The data that RAPIDS pipes to your feature extraction code is ready to process.
- We recommend reading the [Behavioral Features Introduction](../feature-introduction/) before reading this page.
- You can implement new features in Python or R scripts.
- You won't have to deal with time zones, dates, times, data cleaning or preprocessing. The data that RAPIDS pipes to your feature extraction code is ready to process.
## New Features for Existing Sensors
@ -19,6 +18,8 @@ As a tutorial, we will add a new provider for `PHONE_ACCELEROMETER` called `VEGA
??? info "Existing Sensors"
An existing sensor is any of the phone or Fitbit sensors with a configuration entry in `config.yaml`:
Smartphone (AWARE)
- Phone Accelerometer
- Phone Activity Recognition
- Phone Applications Foreground
@ -33,6 +34,9 @@ As a tutorial, we will add a new provider for `PHONE_ACCELEROMETER` called `VEGA
- Phone Screen
- Phone WiFI Connected
- Phone WiFI Visible
Fitbit
- Fitbit Data Yield
- Fitbit Heart Rate Summary
- Fitbit Heart Rate Intraday
@ -40,6 +44,16 @@ As a tutorial, we will add a new provider for `PHONE_ACCELEROMETER` called `VEGA
- Fitbit Steps Summary
- Fitbit Steps Intraday
Empatica
- Empatica Accelerometer
- Empatica Heart Rate
- Empatica Temperature
- Empatica Electrodermal Activity
- Empatica Blood Volume Pulse
- Empatica Inter Beat Interval
- Empatica Tags
### Modify the `config.yaml` file

View File

@ -0,0 +1,44 @@
# Empatica Accelerometer
Sensor parameters description for `[EMPATICA_ACCELEROMETER]`:
|Key                              | Description |
|----------------|-----------------------------------------------------------------------------------------------------------------------------------
|`[TABLE]`| Name of the CSV file containing accelerometer data that is compressed inside an Empatica zip file. Since these zip files are created [automatically](https://support.empatica.com/hc/en-us/articles/201608896-Data-export-and-formatting-from-E4-connect-) by Empatica, there is no need to change the value of this attribute.
## DBDP provider
!!! info "Available time segments and platforms"
- Available for all time segments
!!! info "File Sequence"
```bash
- data/raw/{pid}/empatica_accelerometer_unzipped_{zip-file}.csv # one per zip file
- data/raw/{pid}/empatica_accelerometer_raw_{zip-file}.csv # one per zip file
- data/raw/{pid}/empatica_accelerometer_joined.csv
- data/raw/{pid}/empatica_accelerometer_with_datetime.csv
- data/interim/{pid}/empatica_accelerometer_features/empatica_accelerometer_{language}_{provider_key}.csv
- data/processed/features/{pid}/empatica_accelerometer.csv
```
Parameters description for `[EMPATICA_ACCELEROMETER][PROVIDERS][DBDP]`:
|Key                              | Description |
|----------------|-----------------------------------------------------------------------------------------------------------------------------------
|`[COMPUTE]`| Set to `True` to extract `EMPATICA_ACCELEROMETER` features from the `DBDP` provider|
|`[FEATURES]` | Features to be computed, see table below
Features description for `[EMPATICA_ACCELEROMETER][PROVIDERS][RAPDBDPIDS]`:
|Feature |Units |Description|
|-------------------------- |---------- |---------------------------|
|maxmagnitude |m/s^2^ |The maximum magnitude of acceleration ($\|acceleration\| = \sqrt{x^2 + y^2 + z^2}$).
|minmagnitude |m/s^2^ |The minimum magnitude of acceleration.
|avgmagnitude |m/s^2^ |The average magnitude of acceleration.
|medianmagnitude |m/s^2^ |The median magnitude of acceleration.
|stdmagnitude |m/s^2^ |The standard deviation of acceleration.
!!! note "Assumptions/Observations"
1. Analyzing accelerometer data is a memory intensive task. If RAPIDS crashes is likely because the accelerometer dataset for a participant is too big to fit in memory. We are considering different alternatives to overcome this problem, if this is something you need, get in touch and we can discuss how to implement it.

View File

@ -0,0 +1,48 @@
# Empatica Blood Volume Pulse
Sensor parameters description for `[EMPATICA_BLOOD_VOLUME_PULSE]`:
|Key                              | Description |
|----------------|-----------------------------------------------------------------------------------------------------------------------------------
|`[TABLE]`| Name of the CSV file containing blood volume pulse data that is compressed inside an Empatica zip file. Since these zip files are created [automatically](https://support.empatica.com/hc/en-us/articles/201608896-Data-export-and-formatting-from-E4-connect-) by Empatica, there is no need to change the value of this attribute.
## DBDP provider
!!! info "Available time segments and platforms"
- Available for all time segments
!!! info "File Sequence"
```bash
- data/raw/{pid}/empatica_blood_volume_pulse_unzipped_{zip-file}.csv # one per zip file
- data/raw/{pid}/empatica_blood_volume_pulse_raw_{zip-file}.csv # one per zip file
- data/raw/{pid}/empatica_blood_volume_pulse_joined.csv
- data/raw/{pid}/empatica_blood_volume_pulse_with_datetime.csv
- data/interim/{pid}/empatica_blood_volume_pulse_features/empatica_blood_volume_pulse_{language}_{provider_key}.csv
- data/processed/features/{pid}/empatica_blood_volume_pulse.csv
```
Parameters description for `[EMPATICA_BLOOD_VOLUME_PULSE][PROVIDERS][DBDP]`:
|Key                              | Description |
|----------------|-----------------------------------------------------------------------------------------------------------------------------------
|`[COMPUTE]` | Set to `True` to extract `EMPATICA_BLOOD_VOLUME_PULSE` features from the `DBDP` provider|
|`[FEATURES]` | Features to be computed from blood volume pulse intraday data, see table below |
Features description for `[EMPATICA_BLOOD_VOLUME_PULSE][PROVIDERS][DBDP]`:
|Feature |Units |Description|
|-------------------------- |-------------- |---------------------------|
|maxbvp |- |The maximum blood volume pulse during a time segment.
|minbvp |- |The minimum blood volume pulse during a time segment.
|avgbvp |- |The average blood volume pulse during a time segment.
|medianbvp |- |The median of blood volume pulse during a time segment.
|modebvp |- |The mode of blood volume pulse during a time segment.
|stdbvp |- |The standard deviation of blood volume pulse during a time segment.
|diffmaxmodebvp |- |The difference between the maximum and mode blood volume pulse during a time segment.
|diffminmodebvp |- |The difference between the mode and minimum blood volume pulse during a time segment.
|entropybvp |nats |Shannons entropy measurement based on blood volume pulse during a time segment.
!!! note "Assumptions/Observations"
For more information about BVP read [this](https://support.empatica.com/hc/en-us/articles/360029719792-E4-data-BVP-expected-signal).

View File

@ -0,0 +1,48 @@
# Empatica Electrodermal Activity
Sensor parameters description for `[EMPATICA_ELECTRODERMAL_ACTIVITY]`:
|Key                              | Description |
|----------------|-----------------------------------------------------------------------------------------------------------------------------------
|`[TABLE]`| Name of the CSV file containing electrodermal activity data that is compressed inside an Empatica zip file. Since these zip files are created [automatically](https://support.empatica.com/hc/en-us/articles/201608896-Data-export-and-formatting-from-E4-connect-) by Empatica, there is no need to change the value of this attribute.
## DBDP provider
!!! info "Available time segments and platforms"
- Available for all time segments
!!! info "File Sequence"
```bash
- data/raw/{pid}/empatica_electrodermal_activity_unzipped_{zip-file}.csv # one per zip file
- data/raw/{pid}/empatica_electrodermal_activity_raw_{zip-file}.csv # one per zip file
- data/raw/{pid}/empatica_electrodermal_activity_joined.csv
- data/raw/{pid}/empatica_electrodermal_activity_with_datetime.csv
- data/interim/{pid}/empatica_electrodermal_activity_features/empatica_electrodermal activity_{language}_{provider_key}.csv
- data/processed/features/{pid}/empatica_electrodermal_activity.csv
```
Parameters description for `[EMPATICA_ELECTRODERMAL_ACTIVITY][PROVIDERS][DBDP]`:
|Key                              | Description |
|----------------|-----------------------------------------------------------------------------------------------------------------------------------
|`[COMPUTE]` | Set to `True` to extract `EMPATICA_ELECTRODERMAL_ACTIVITY` features from the `DBDP` provider|
|`[FEATURES]` | Features to be computed from electrodermal activity intraday data, see table below |
Features description for `[EMPATICA_ELECTRODERMAL ACTIVITY][PROVIDERS][DBDP]`:
|Feature |Units |Description|
|-------------------------- |-------------- |---------------------------|
|maxeda |microsiemens |The maximum electrical conductance during a time segment.
|mineda |microsiemens |The minimum electrical conductance during a time segment.
|avgeda |microsiemens |The average electrical conductance during a time segment.
|medianeda |microsiemens |The median of electrical conductance during a time segment.
|modeeda |microsiemens |The mode of electrical conductance during a time segment.
|stdeda |microsiemens |The standard deviation of electrical conductance during a time segment.
|diffmaxmodeeda |microsiemens |The difference between the maximum and mode electrical conductance during a time segment.
|diffminmodeeda |microsiemens |The difference between the mode and minimum electrical conductance during a time segment.
|entropyeda |nats |Shannons entropy measurement based on electrical conductance during a time segment.
!!! note "Assumptions/Observations"
None

View File

@ -0,0 +1,48 @@
# Empatica Heart Rate
Sensor parameters description for `[EMPATICA_HEARTRATE]`:
|Key                              | Description |
|----------------|-----------------------------------------------------------------------------------------------------------------------------------
|`[TABLE]`| Name of the CSV file containing heart rate data that is compressed inside an Empatica zip file. Since these zip files are created [automatically](https://support.empatica.com/hc/en-us/articles/201608896-Data-export-and-formatting-from-E4-connect-) by Empatica, there is no need to change the value of this attribute.
## DBDP provider
!!! info "Available time segments and platforms"
- Available for all time segments
!!! info "File Sequence"
```bash
- data/raw/{pid}/empatica_heartrate_unzipped_{zip-file}.csv # one per zip file
- data/raw/{pid}/empatica_heartrate_raw_{zip-file}.csv # one per zip file
- data/raw/{pid}/empatica_heartrate_joined.csv
- data/raw/{pid}/empatica_heartrate_with_datetime.csv
- data/interim/{pid}/empatica_heartrate_features/empatica_heartrate_{language}_{provider_key}.csv
- data/processed/features/{pid}/empatica_heartrate.csv
```
Parameters description for `[EMPATICA_HEARTRATE][PROVIDERS][DBDP]`:
|Key                              | Description |
|----------------|-----------------------------------------------------------------------------------------------------------------------------------
|`[COMPUTE]` | Set to `True` to extract `EMPATICA_HEARTRATE` features from the `DBDP` provider|
|`[FEATURES]` | Features to be computed from heart rate intraday data, see table below |
Features description for `[EMPATICA_HEARTRATE][PROVIDERS][DBDP]`:
|Feature |Units |Description|
|-------------------------- |-------------- |---------------------------|
|maxhr |beats |The maximum heart rate during a time segment.
|minhr |beats |The minimum heart rate during a time segment.
|avghr |beats |The average heart rate during a time segment.
|medianhr |beats |The median of heart rate during a time segment.
|modehr |beats |The mode of heart rate during a time segment.
|stdhr |beats |The standard deviation of heart rate during a time segment.
|diffmaxmodehr |beats |The difference between the maximum and mode heart rate during a time segment.
|diffminmodehr |beats |The difference between the mode and minimum heart rate during a time segment.
|entropyhr |nats |Shannons entropy measurement based on heart rate during a time segment.
!!! note "Assumptions/Observations"
We extract the previous features based on the average heart rate values computed in [10-second windows](https://support.empatica.com/hc/en-us/articles/360029469772-E4-data-HR-csv-explanation).

View File

@ -0,0 +1,48 @@
# Empatica Inter Beat Interval
Sensor parameters description for `[EMPATICA_INTER_BEAT_INTERVAL]`:
|Key                              | Description |
|----------------|-----------------------------------------------------------------------------------------------------------------------------------
|`[TABLE]`| Name of the CSV file containing inter beat interval data that is compressed inside an Empatica zip file. Since these zip files are created [automatically](https://support.empatica.com/hc/en-us/articles/201608896-Data-export-and-formatting-from-E4-connect-) by Empatica, there is no need to change the value of this attribute.
## DBDP provider
!!! info "Available time segments and platforms"
- Available for all time segments
!!! info "File Sequence"
```bash
- data/raw/{pid}/empatica_inter_beat_interval_unzipped_{zip-file}.csv # one per zip file
- data/raw/{pid}/empatica_inter_beat_interval_raw_{zip-file}.csv # one per zip file
- data/raw/{pid}/empatica_inter_beat_interval_joined.csv
- data/raw/{pid}/empatica_inter_beat_interval_with_datetime.csv
- data/interim/{pid}/empatica_inter_beat_interval_features/empatica_inter_beat_interval_{language}_{provider_key}.csv
- data/processed/features/{pid}/empatica_inter_beat_interval.csv
```
Parameters description for `[EMPATICA_INTER_BEAT_INTERVAL][PROVIDERS][DBDP]`:
|Key                              | Description |
|----------------|-----------------------------------------------------------------------------------------------------------------------------------
|`[COMPUTE]` | Set to `True` to extract `EMPATICA_INTER_BEAT_INTERVAL` features from the `DBDP` provider|
|`[FEATURES]` | Features to be computed from inter beat interval intraday data, see table below |
Features description for `[EMPATICA_INTER_BEAT_INTERVAL][PROVIDERS][DBDP]`:
|Feature |Units |Description|
|-------------------------- |-------------- |---------------------------|
|maxibi |seconds |The maximum inter beat interval during a time segment.
|minibi |seconds |The minimum inter beat interval during a time segment.
|avgibi |seconds |The average inter beat interval during a time segment.
|medianibi |seconds |The median of inter beat interval during a time segment.
|modeibi |seconds |The mode of inter beat interval during a time segment.
|stdibi |seconds |The standard deviation of inter beat interval during a time segment.
|diffmaxmodeibi |seconds |The difference between the maximum and mode inter beat interval during a time segment.
|diffminmodeibi |seconds |The difference between the mode and minimum inter beat interval during a time segment.
|entropyibi |nats |Shannons entropy measurement based on inter beat interval during a time segment.
!!! note "Assumptions/Observations"
For more information about IBI read [this](https://support.empatica.com/hc/en-us/articles/360030058011-E4-data-IBI-expected-signal).

View File

@ -0,0 +1,11 @@
# Empatica Tags
Sensor parameters description for `[EMPATICA_TAGS]`:
|Key                              | Description |
|----------------|-----------------------------------------------------------------------------------------------------------------------------------
|`[TABLE]`| Name of the CSV file containing tags data that is compressed inside an Empatica zip file. Since these zip files are created [automatically](https://support.empatica.com/hc/en-us/articles/201608896-Data-export-and-formatting-from-E4-connect-) by Empatica, there is no need to change the value of this attribute.
!!! Note
- No feature providers have been implemented for this sensor yet, however you can [implement your own features](../add-new-features).
- To know more about tags read [this](https://support.empatica.com/hc/en-us/articles/204578699-Event-Marking-with-the-E4-wristband).

View File

@ -0,0 +1,48 @@
# Empatica Temperature
Sensor parameters description for `[EMPATICA_TEMPERATURE]`:
|Key                              | Description |
|----------------|-----------------------------------------------------------------------------------------------------------------------------------
|`[TABLE]`| Name of the CSV file containing temperature data that is compressed inside an Empatica zip file. Since these zip files are created [automatically](https://support.empatica.com/hc/en-us/articles/201608896-Data-export-and-formatting-from-E4-connect-) by Empatica, there is no need to change the value of this attribute.
## DBDP provider
!!! info "Available time segments and platforms"
- Available for all time segments
!!! info "File Sequence"
```bash
- data/raw/{pid}/empatica_temperature_unzipped_{zip-file}.csv # one per zip file
- data/raw/{pid}/empatica_temperature_raw_{zip-file}.csv # one per zip file
- data/raw/{pid}/empatica_temperature_joined.csv
- data/raw/{pid}/empatica_temperature_with_datetime.csv
- data/interim/{pid}/empatica_temperature_features/empatica_temperature_{language}_{provider_key}.csv
- data/processed/features/{pid}/empatica_temperature.csv
```
Parameters description for `[EMPATICA_TEMPERATURE][PROVIDERS][DBDP]`:
|Key                              | Description |
|----------------|-----------------------------------------------------------------------------------------------------------------------------------
|`[COMPUTE]` | Set to `True` to extract `EMPATICA_TEMPERATURE` features from the `DBDP` provider|
|`[FEATURES]` | Features to be computed from temperature intraday data, see table below |
Features description for `[EMPATICA_TEMPERATURE][PROVIDERS][DBDP]`:
|Feature |Units |Description|
|-------------------------- |-------------- |---------------------------|
|maxtemp |degrees C |The maximum temperature during a time segment.
|mintemp |degrees C |The minimum temperature during a time segment.
|avgtemp |degrees C |The average temperature during a time segment.
|mediantemp |degrees C |The median of temperature during a time segment.
|modetemp |degrees C |The mode of temperature during a time segment.
|stdtemp |degrees C |The standard deviation of temperature during a time segment.
|diffmaxmodetemp |degrees C |The difference between the maximum and mode temperature during a time segment.
|diffminmodetemp |degrees C |The difference between the mode and minimum temperature during a time segment.
|entropytemp |nats |Shannons entropy measurement based on temperature during a time segment.
!!! note "Assumptions/Observations"
None

View File

@ -1,6 +1,6 @@
# Behavioral Features Introduction
Every phone or Fitbit sensor has a corresponding config section in `config.yaml`, these sections follow a similar structure and we'll use `PHONE_ACCELEROMETER` as an example to explain this structure.
Every device sensor has a corresponding config section in `config.yaml`, these sections follow a similar structure and we'll use `PHONE_ACCELEROMETER` as an example to explain this structure.
!!! hint
- We recommend reading this page if you are using RAPIDS for the first time

BIN
docs/img/logo.png 100644

Binary file not shown.

After

Width:  |  Height:  |  Size: 12 KiB

View File

@ -2,14 +2,14 @@
Reproducible Analysis Pipeline for Data Streams (RAPIDS) allows you to process smartphone and wearable data to [extract](features/feature-introduction.md) and [create](features/add-new-features.md) **behavioral features** (a.k.a. digital biomarkers), [visualize](visualizations/data-quality-visualizations.md) mobile sensor data and [structure](workflow-examples/analysis.md) your analysis into reproducible workflows.
RAPIDS is open source, documented, modular, tested, and reproducible. At the moment we support smartphone data collected with [AWARE](https://awareframework.com/) and wearable data from Fitbit devices.
RAPIDS is open source, documented, modular, tested, and reproducible. At the moment we support smartphone data collected with [AWARE](https://awareframework.com/), wearable data from Fitbit devices, and wearable data from Empatica devices (in collaboration with the [DBDP](https://dbdp.org/)).
!!! tip
:material-slack: Questions or feedback can be posted on the \#rapids channel in AWARE Framework\'s [slack](http://awareframework.com:3000/).
:material-github: Bugs and feature requests should be posted on [Github](https://github.com/carissalow/rapids/issues).
:fontawesome-solid-tasks: Join our discussions on our algorithms and assumptions for feature [processing](https://github.com/carissalow/rapids/issues?q=is%3Aissue+is%3Aopen+label%3Adiscussion).
:fontawesome-solid-tasks: Join our discussions on our algorithms and assumptions for feature [processing](https://github.com/carissalow/rapids/discussions).
:fontawesome-solid-play: Ready to start? Go to [Installation](setup/installation/), then to [Configuration](setup/configuration/), and then to [Execution](setup/execution/)

View File

@ -18,6 +18,8 @@ When you are done with this configuration, go to [executing RAPIDS](../execution
---
## Database credentials
Only follow this step if you are processing smartphone or Fitbit data stored in a database. For reference, we list below the data sources RAPIDS support for each type of device.
1. Create an empty file called `#!bash .env` in your RAPIDS root directory
2. Add the following lines and replace your database-specific credentials (user, password, host, and database):
@ -30,17 +32,27 @@ When you are done with this configuration, go to [executing RAPIDS](../execution
database=MY_DATABASE
```
!!! warning
The label `MY_GROUP` is arbitrary but it has to match the following `config.yaml` key:
??? warning "What is `[MY_GROUP]`?"
The label `[MY_GROUP]` is arbitrary but it has to match the following `config.yaml` key:
```yaml
DATABASE_GROUP: &database_group
MY_GROUP
```
!!! hint
??? hint "Connecting to localhost (host machine) from inside our docker container"
If you are using RAPIDS' docker container and Docker-for-mac or Docker-for-Windows 18.03+, you can connect to a MySQL database in your host machine using `host.docker.internal` instead of `127.0.0.1` or `localhost`. In a Linux host you need to run our docker container using `docker run --network="host" -d moshiresearch/rapids:latest` and then `127.0.0.1` will point to your host machine.
!!! note
- You can ignore this step if you are only processing Fitbit data in CSV files.
??? hint "Data sources supported for each device type"
| Device | Database | CSV Files | Zip files
|--|--|--|--|
| Smartphone| Yes (MySQL) | No | No |
| Fitbit| Yes (MySQL) | Yes | No |
| Empatica| No | No | Yes |
- RAPIDS only supports MySQL/MariaDB databases. If you would like to add support for a different database engine get in touch and we can discuss how to implement it.
- Fitbit data can be processed as the JSON object produced by Fitbit's API (recommended) or in a parsed tabular fashion.
- Empatica devices produce a zip file with a CSV file per sensor which can be processed directly in RAPIDS.
---
## Timezone of your study
@ -64,16 +76,13 @@ Support coming soon.
Participant files link together multiple devices (smartphones and wearables) to specific participants and identify them throughout RAPIDS. You can create these files manually or [automatically](#automatic-creation-of-participant-files). Participant files are stored in `data/external/participant_files/pxx.yaml` and follow a unified [structure](#structure-of-participants-files).
!!! note
??? important "Remember to modify the `config.yaml` file with your PIDS"
The list `PIDS` in `config.yaml` needs to have the participant file names of the people you want to process. For example, if you created `p01.yaml`, `p02.yaml` and `p03.yaml` files in `/data/external/participant_files/ `, then `PIDS` should be:
```yaml
PIDS: [p01, p02, p03]
```
!!! tip
Attribute *values* of the `[PHONE]` and `[FITBIT]` sections in every participant file are optional which allows you to analyze data from participants that only carried smartphones, only Fitbit devices, or both.
??? hint "Optional: Migrating participants files with the old format"
??? info "Optional: Migrating participants files with the old format"
If you were using the pre-release version of RAPIDS with participant files in plain text (as opposed to yaml), you can run the following command and your old files will be converted into yaml files stored in `data/external/participant_files/`
```bash
@ -82,9 +91,11 @@ Participant files link together multiple devices (smartphones and wearables) to
### Structure of participants files
!!! example "Example of the structure of a participant file"
??? example "Example of the structure of a participant file"
In this example, the participant used an android phone, an ios phone, and a fitbit device throughout the study between Apr 23rd 2020 and Oct 28th 2020
In this example, the participant used an android phone, an ios phone, a fitbit device, and a Empatica device throughout the study between Apr 23rd 2020 and Oct 28th 2020
If your participants didn't use a `[PHONE]`, `[FITBIT]` or `[EMPATICA]` device, it is not necessary to include that section in their participant file. In other words, you can analyse data from 1 or more devices per participant.
```yaml
PHONE:
@ -98,30 +109,41 @@ Participant files link together multiple devices (smartphones and wearables) to
LABEL: test01
START_DATE: 2020-04-23
END_DATE: 2020-10-28
EMPATICA: # Empatica doesn't have a device_id because the devices produce zip files per participant
LABEL: test01
START_DATE: 2020-04-23
END_DATE: 2020-10-28
```
**For `[PHONE]`**
=== "[PHONE]"
| Key                      | Description |
|-------------------------------|--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
| `[DEVICE_IDS]` | An array of the strings that uniquely identify each smartphone, you can have more than one for when participants changed phones in the middle of the study, in this case, data from all their devices will be joined and relabeled with the last 1 on this list. |
| `[PLATFORMS]` | An array that specifies the OS of each smartphone in `[DEVICE_IDS]` , use a combination of `android` or `ios` (we support participants that changed platforms in the middle of your study!). If you have an `aware_device` table in your database you can set `[PLATFORMS]: [multiple]` and RAPIDS will infer them automatically. |
| `[LABEL]` | A string that is used in reports and visualizations. |
| `[START_DATE]` | A string with format `YYY-MM-DD` . Only data collected *after* this date will be included in the analysis |
| `[END_DATE]` | A string with format `YYY-MM-DD` . Only data collected *before* this date will be included in the analysis |
| Key                      | Description |
|-------------------------------|--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
| `[DEVICE_IDS]` | An array of the strings that uniquely identify each smartphone, you can have more than one for when participants changed phones in the middle of the study, in this case, data from all their devices will be joined and relabeled with the last 1 on this list. |
| `[PLATFORMS]` | An array that specifies the OS of each smartphone in `[DEVICE_IDS]` , use a combination of `android` or `ios` (we support participants that changed platforms in the middle of your study!). If you have an `aware_device` table in your database you can set `[PLATFORMS]: [multiple]` and RAPIDS will infer them automatically. |
| `[LABEL]` | A string that is used in reports and visualizations. |
| `[START_DATE]` | A string with format `YYY-MM-DD` . Only data collected *after* this date will be included in the analysis |
| `[END_DATE]` | A string with format `YYY-MM-DD` . Only data collected *before* this date will be included in the analysis |
**For `[FITBIT]`**
=== "[FITBIT]"
| Key                      | Description |
|------------------|-----------------------------------------------------------------------------------------------------------|
| `[DEVICE_IDS]` | An array of the strings that uniquely identify each Fitbit, you can have more than one in case the participant changed devices in the middle of the study, in this case, data from all devices will be joined and relabeled with the last `device_id` on this list. |
| `[LABEL]` | A string that is used in reports and visualizations. |
| `[START_DATE]` | A string with format `YYY-MM-DD` . Only data collected *after* this date will be included in the analysis |
| `[END_DATE]` | A string with format `YYY-MM-DD` . Only data collected *before* this date will be included in the analysis |
| Key                      | Description |
|------------------|-----------------------------------------------------------------------------------------------------------|
| `[DEVICE_IDS]` | An array of the strings that uniquely identify each Fitbit, you can have more than one in case the participant changed devices in the middle of the study, in this case, data from all devices will be joined and relabeled with the last `device_id` on this list. |
| `[LABEL]` | A string that is used in reports and visualizations. |
| `[START_DATE]` | A string with format `YYY-MM-DD` . Only data collected *after* this date will be included in the analysis |
| `[END_DATE]` | A string with format `YYY-MM-DD` . Only data collected *before* this date will be included in the analysis |
=== "[EMPATICA]"
| Key                      | Description |
|------------------|-----------------------------------------------------------------------------------------------------------|
| `[LABEL]` | A string that is used in reports and visualizations. |
| `[START_DATE]` | A string with format `YYY-MM-DD` . Only data collected *after* this date will be included in the analysis |
| `[END_DATE]` | A string with format `YYY-MM-DD` . Only data collected *before* this date will be included in the analysis
### Automatic creation of participant files
You have two options a) use the `aware_device` table in your database or b) use a CSV file. In either case, in your `config.yaml`, set `[PHONE_SECTION][ADD]` or `[FITBIT_SECTION][ADD]` to `TRUE` depending on what devices you used in your study. Set `[DEVICE_ID_COLUMN]` to the name of the column that uniquely identifies each device and include any device ids you want to ignore in `[IGNORED_DEVICE_IDS]`.
You have two options a) use the `aware_device` table in your database or b) use a CSV file. In either case, in your `config.yaml`, set the devices (`PHONE`, `FITBIT`, `EMPATICA`) `[ADD]` flag to `TRUE` depending on what devices you used in your study. Set `[DEVICE_ID_COLUMN]` to the name of the column that uniquely identifies each device (only for `PHONE` and `FITBIT`).
=== "aware_device table"
@ -139,9 +161,11 @@ You have two options a) use the `aware_device` table in your database or b) use
DEVICE_ID_COLUMN: device_id # column name
IGNORED_DEVICE_IDS: []
FITBIT_SECTION:
ADD: TRUE # or FALSE
ADD: FALSE # or FALSE
DEVICE_ID_COLUMN: fitbit_id # column name
IGNORED_DEVICE_IDS: []
EMPATICA_SECTION: # Empatica doesn't have a device_id column because the devices produce zip files per participant
ADD: FALSE # or FALSE
```
Then run
@ -166,9 +190,11 @@ You have two options a) use the `aware_device` table in your database or b) use
DEVICE_ID_COLUMN: device_id # column name
IGNORED_DEVICE_IDS: []
FITBIT_SECTION:
ADD: TRUE # or FALSE
ADD: FALSE # or FALSE
DEVICE_ID_COLUMN: fitbit_id # column name
IGNORED_DEVICE_IDS: []
EMPATICA_SECTION: # Empatica doesn't have a device_id column because the devices produce zip files per participant
ADD: FALSE # or FALSE
```
Your CSV file (`[SOURCE][CSV_FILE_PATH]`) should have the following columns but you can omit any values you don't have on each column:
@ -370,7 +396,7 @@ Time segments (or epochs) are the time windows on which you want to extract beha
---
## Device Data Source Configuration
You might need to modify the following config keys in your `config.yaml` depending on what devices your participants used and where you are storing your data. You can ignore `[PHONE_DATA_CONFIGURATION]` or `[FITBIT_DATA_CONFIGURATION]` if you are not working with either devices.
You might need to modify the following config keys in your `config.yaml` depending on what devices your participants used and where you are storing your data (ignore the sections of devices you did not use).
=== "Phone"
@ -426,6 +452,46 @@ You might need to modify the following config keys in your `config.yaml` dependi
| `[TIMEZONE]` `[TYPE]` | Only `SINGLE` is supported (Fitbit devices always store data in local time). |
| `[TIMEZONE]` `[VALUE]` | `*timezone` points to the value defined before in [Timezone of your study](#timezone-of-your-study) |
=== "Empatica"
The relevant `config.yaml` section looks like this by default:
```yaml
SOURCE:
TYPE: ZIP_FILE
FOLDER: data/external/empatica
TIMEZONE:
TYPE: SINGLE # Empatica devices don't support time zones so we read this data in the timezone indicated by VALUE
VALUE: *timezone
```
**Parameters for `[EMPATICA_DATA_CONFIGURATION]`**
| Key | Description |
|---------------------|----------------------------------------------------------------------------------------------------------------------------|
| `[SOURCE] [TYPE]` | Only `ZIP_FILE` is supported (Empatica devices save sensor data in CSV files that are zipped together).|
| `[SOURCE] [FOLDER]` | The relative path to a folder containing one folder per participant. The name of a participant folder should match their pid in `config[PIDS]`, for example `p01`. Each participant folder can have one or more zip files with any name; in other words, the sensor data contained in those zip files belongs to a single participant. The zip files are [automatically](https://support.empatica.com/hc/en-us/articles/201608896-Data-export-and-formatting-from-E4-connect-) generated by Empatica and have a CSV file per sensor (`ACC`, `HR`, `TEMP`, `EDA`, `BVP`, `TAGS`). All CSV files of the same type contained in one or more zip files are uncompressed, parsed, sorted by timestamp, and joinned together.|
| `[TIMEZONE] [TYPE]` | Only `SINGLE` is supported for now |
| `[TIMEZONE] [VALUE]` | `*timezone` points to the value defined before in [Timezone of your study](#timezone-of-your-study) |
??? example "Example of an EMPATICA FOLDER"
In the file tree below, we want to process the data of three participants: `p01`, `p02`, and `p03`. `p01` has two zip files, `p02` has only one zip file, and `p03` has three zip files. Each zip will have a CSV file per sensor that are joinned together and process by RAPIDS. These zip files are generated by Empatica.
```bash
data/ # this folder exists in the root RAPIDS folder
external/
empatica/
p01/
file1.zip
file2.zip
p02/
aaaa.zip
p03/
t1.zip
t2.zip
t3.zip
```
---
## Sensor and Features to Process

View File

@ -9,28 +9,28 @@ After you have [installed](../installation) and [configured](../configuration) R
!!! done "Ready to extract behavioral features"
If you are ready to extract features head over to the [Behavioral Features Introduction](../../features/feature-introduction/)
!!! info
!!! hint "We wrap Snakemake"
The script `#!bash ./rapids` is a wrapper around Snakemake so you can pass any parameters that Snakemake accepts (e.g. `-j1`).
!!! hint "Updating RAPIDS output after modifying `config.yaml`"
Any changes to the `config.yaml` file will be applied automatically and only the relevant files will be updated. This means that after modifying the features list for `PHONE_MESSAGE` for example, RAPIDS will update the output file with the correct features.
Any changes to the `config.yaml` file will be applied automatically and only the relevant files will be updated. This means that after modifying the features list for `PHONE_MESSAGE` for example, RAPIDS will execute the script that computes `MESSAGES` features and update its output file.
!!! hint "Multi-core"
You can run RAPIDS over multiple cores by modifying the `-j` argument (e.g. use `-j8` to use 8 cores). **However**, take into account that this means multiple sensor datasets for different participants will be load in memory at the same time. If RAPIDS crashes because it ran out of memory reduce the number of cores and try again.
You can run RAPIDS over multiple cores by modifying the `-j` argument (e.g. use `-j8` to use 8 cores). **However**, take into account that this means multiple sensor datasets for different participants will be loaded in memory at the same time. If RAPIDS crashes because it ran out of memory reduce the number of cores and try again.
As reference, we have run RAPIDS over 12 cores and 32 Gb of RAM without problems for a study with 200 participants with 14 days of low-frequency smartphone data (no accelerometer, gyroscope, or magnetometer).
!!! hint "Forcing a complete rerun"
If you want to update your data from your database or rerun the whole pipeline from scratch run one or both of the following commands depending on the devices you are using:
```bash
./rapids -j1 -R download_phone_data
./rapids -j1 -R download_fitbit_data
```
!!! hint "Deleting RAPIDS output"
If you want to delete all the output files RAPIDS produces you can execute the following command:
```bash
./rapids -j1 --delete-all-output
```
!!! hint "Forcing a complete rerun"
If you want to update your raw data or rerun the whole pipeline from scratch run the following commands:
```bash
./rapids -j1 --delete-all-output
./rapids -j1
```

View File

@ -68,6 +68,16 @@ If you are interested in contributing feel free to submit a pull request or cont
??? abstract "About"
- [Linkedin Profile](https://www.linkedin.com/in/ysefidgar/)
### Joe Kim
??? abstract "About"
- [Personal Website](https://www.juseongjoekim.com/)
### Brinnae Bent
??? abstract "About"
- [Personal Website](https://runsdata.org/)
## Advisors
### Afsaneh Doryab

View File

@ -46,8 +46,8 @@ repo_url: 'https://github.com/carissalow/rapids'
copyright: 'Released under AGPL'
theme:
name: material
icon:
logo: material/air-filter
logo: img/logo.png
favicon: img/logo.png
palette:
- scheme: default
primary: blue
@ -74,6 +74,7 @@ nav:
- Installation: 'setup/installation.md'
- Configuration: setup/configuration.md
- Execution: setup/execution.md
- Citation: citation.md
- Example Workflows:
- Minimal: workflow-examples/minimal.md
- Analysis: workflow-examples/analysis.md
@ -105,6 +106,14 @@ nav:
- Fitbit Sleep Summary: features/fitbit-sleep-summary.md
- Fitbit Steps Summary: features/fitbit-steps-summary.md
- Fitbit Steps Intraday: features/fitbit-steps-intraday.md
- Empatica:
- Empatica Accelerometer: features/empatica-accelerometer.md
- Empatica Heart Rate: features/empatica-heartrate.md
- Empatica Temperature: features/empatica-temperature.md
- Empatica Electrodermal Activity: features/empatica-electrodermal-activity.md
- Empatica Blood Volume Pulse: features/empatica-blood-volume-pulse.md
- Empatica Inter Beat Interval: features/empatica-inter-beat-interval.md
- Empatica Tags: features/empatica-tags.md
- Add New Features: features/add-new-features.md
- Visualizations:
- Data Quality: visualizations/data-quality-visualizations.md
@ -122,4 +131,3 @@ nav:
- FAQ: faq.md
- Team: team.md
- Change Log: change-log.md
- Citation: citation.md

View File

@ -14,6 +14,7 @@ local({
# signal that we're loading renv during R startup
Sys.setenv("RENV_R_INITIALIZING" = "true")
on.exit(Sys.unsetenv("RENV_R_INITIALIZING"), add = TRUE)
Sys.setenv("TZDIR" = file.path(R.home(), "share", "zoneinfo"))
# signal that we've consented to use renv
options(renv.consent = TRUE)

View File

@ -15,7 +15,7 @@ def optional_steps_sleep_input(wildcards):
def input_merge_sensor_features_for_individual_participants(wildcards):
feature_files = []
for config_key in config.keys():
if config_key.startswith(("PHONE", "FITBIT")) and "PROVIDERS" in config[config_key] and isinstance(config[config_key]["PROVIDERS"], dict):
if config_key.startswith(("PHONE", "FITBIT", "EMPATICA")) and "PROVIDERS" in config[config_key] and isinstance(config[config_key]["PROVIDERS"], dict):
for provider_key, provider in config[config_key]["PROVIDERS"].items():
if "COMPUTE" in provider.keys() and provider["COMPUTE"]:
feature_files.append("data/processed/features/{pid}/" + config_key.lower() + ".csv")
@ -30,3 +30,15 @@ def get_phone_sensor_names():
phone_sensor_names.append(config_key)
return phone_sensor_names
from pathlib import Path
def get_zip_suffixes(pid):
zipfiles = list((Path("data/external/empatica/") / Path(pid)).rglob("*.zip"))
suffixes = []
for zipfile in zipfiles:
suffixes.append(zipfile.stem)
return suffixes
def get_all_raw_empatica_sensor_files(wildcards):
suffixes = get_zip_suffixes(wildcards.pid)
files = ["data/raw/{}/empatica_{}_raw_{}.csv".format(wildcards.pid, wildcards.sensor, suffix) for suffix in suffixes]
return(files)

View File

@ -2,7 +2,7 @@ rule join_features_from_providers:
input:
sensor_features = find_features_files
wildcard_constraints:
sensor_key = '(phone|fitbit).*'
sensor_key = '(phone|fitbit|empatica).*'
output:
"data/processed/features/{pid}/{sensor_key}.csv"
script:
@ -675,3 +675,185 @@ rule merge_sensor_features_for_all_participants:
"data/processed/features/all_participants/all_sensor_features.csv"
script:
"../src/features/utils/merge_sensor_features_for_all_participants.R"
rule empatica_accelerometer_python_features:
input:
sensor_data = "data/raw/{pid}/empatica_accelerometer_with_datetime.csv",
time_segments_labels = "data/interim/time_segments/{pid}_time_segments_labels.csv"
params:
provider = lambda wildcards: config["EMPATICA_ACCELEROMETER"]["PROVIDERS"][wildcards.provider_key.upper()],
provider_key = "{provider_key}",
sensor_key = "empatica_accelerometer"
output:
"data/interim/{pid}/empatica_accelerometer_features/empatica_accelerometer_python_{provider_key}.csv"
script:
"../src/features/entry.py"
rule empatica_accelerometer_r_features:
input:
sensor_data = "data/raw/{pid}/empatica_accelerometer_with_datetime.csv",
time_segments_labels = "data/interim/time_segments/{pid}_time_segments_labels.csv"
params:
provider = lambda wildcards: config["EMPATICA_ACCELEROMETER"]["PROVIDERS"][wildcards.provider_key.upper()],
provider_key = "{provider_key}",
sensor_key = "empatica_accelerometer"
output:
"data/interim/{pid}/empatica_accelerometer_features/empatica_accelerometer_r_{provider_key}.csv"
script:
"../src/features/entry.R"
rule empatica_heartrate_python_features:
input:
sensor_data = "data/raw/{pid}/empatica_heartrate_with_datetime.csv",
time_segments_labels = "data/interim/time_segments/{pid}_time_segments_labels.csv"
params:
provider = lambda wildcards: config["EMPATICA_HEARTRATE"]["PROVIDERS"][wildcards.provider_key.upper()],
provider_key = "{provider_key}",
sensor_key = "empatica_heartrate"
output:
"data/interim/{pid}/empatica_heartrate_features/empatica_heartrate_python_{provider_key}.csv"
script:
"../src/features/entry.py"
rule empatica_heartrate_r_features:
input:
sensor_data = "data/raw/{pid}/empatica_heartrate_with_datetime.csv",
time_segments_labels = "data/interim/time_segments/{pid}_time_segments_labels.csv"
params:
provider = lambda wildcards: config["EMPATICA_HEARTRATE"]["PROVIDERS"][wildcards.provider_key.upper()],
provider_key = "{provider_key}",
sensor_key = "empatica_heartrate"
output:
"data/interim/{pid}/empatica_heartrate_features/empatica_heartrate_r_{provider_key}.csv"
script:
"../src/features/entry.R"
rule empatica_temperature_python_features:
input:
sensor_data = "data/raw/{pid}/empatica_temperature_with_datetime.csv",
time_segments_labels = "data/interim/time_segments/{pid}_time_segments_labels.csv"
params:
provider = lambda wildcards: config["EMPATICA_TEMPERATURE"]["PROVIDERS"][wildcards.provider_key.upper()],
provider_key = "{provider_key}",
sensor_key = "empatica_temperature"
output:
"data/interim/{pid}/empatica_temperature_features/empatica_temperature_python_{provider_key}.csv"
script:
"../src/features/entry.py"
rule empatica_temperature_r_features:
input:
sensor_data = "data/raw/{pid}/empatica_temperature_with_datetime.csv",
time_segments_labels = "data/interim/time_segments/{pid}_time_segments_labels.csv"
params:
provider = lambda wildcards: config["EMPATICA_TEMPERATURE"]["PROVIDERS"][wildcards.provider_key.upper()],
provider_key = "{provider_key}",
sensor_key = "empatica_temperature"
output:
"data/interim/{pid}/empatica_temperature_features/empatica_temperature_r_{provider_key}.csv"
script:
"../src/features/entry.R"
rule empatica_electrodermal_activity_python_features:
input:
sensor_data = "data/raw/{pid}/empatica_electrodermal_activity_with_datetime.csv",
time_segments_labels = "data/interim/time_segments/{pid}_time_segments_labels.csv"
params:
provider = lambda wildcards: config["EMPATICA_ELECTRODERMAL_ACTIVITY"]["PROVIDERS"][wildcards.provider_key.upper()],
provider_key = "{provider_key}",
sensor_key = "empatica_electrodermal_activity"
output:
"data/interim/{pid}/empatica_electrodermal_activity_features/empatica_electrodermal_activity_python_{provider_key}.csv"
script:
"../src/features/entry.py"
rule empatica_electrodermal_activity_r_features:
input:
sensor_data = "data/raw/{pid}/empatica_electrodermal_activity_with_datetime.csv",
time_segments_labels = "data/interim/time_segments/{pid}_time_segments_labels.csv"
params:
provider = lambda wildcards: config["EMPATICA_ELECTRODERMAL_ACTIVITY"]["PROVIDERS"][wildcards.provider_key.upper()],
provider_key = "{provider_key}",
sensor_key = "empatica_electrodermal_activity"
output:
"data/interim/{pid}/empatica_electrodermal_activity_features/empatica_electrodermal_activity_r_{provider_key}.csv"
script:
"../src/features/entry.R"
rule empatica_blood_volume_pulse_python_features:
input:
sensor_data = "data/raw/{pid}/empatica_blood_volume_pulse_with_datetime.csv",
time_segments_labels = "data/interim/time_segments/{pid}_time_segments_labels.csv"
params:
provider = lambda wildcards: config["EMPATICA_BLOOD_VOLUME_PULSE"]["PROVIDERS"][wildcards.provider_key.upper()],
provider_key = "{provider_key}",
sensor_key = "empatica_blood_volume_pulse"
output:
"data/interim/{pid}/empatica_blood_volume_pulse_features/empatica_blood_volume_pulse_python_{provider_key}.csv"
script:
"../src/features/entry.py"
rule empatica_blood_volume_pulse_r_features:
input:
sensor_data = "data/raw/{pid}/empatica_blood_volume_pulse_with_datetime.csv",
time_segments_labels = "data/interim/time_segments/{pid}_time_segments_labels.csv"
params:
provider = lambda wildcards: config["EMPATICA_BLOOD_VOLUME_PULSE"]["PROVIDERS"][wildcards.provider_key.upper()],
provider_key = "{provider_key}",
sensor_key = "empatica_blood_volume_pulse"
output:
"data/interim/{pid}/empatica_blood_volume_pulse_features/empatica_blood_volume_pulse_r_{provider_key}.csv"
script:
"../src/features/entry.R"
rule empatica_inter_beat_interval_python_features:
input:
sensor_data = "data/raw/{pid}/empatica_inter_beat_interval_with_datetime.csv",
time_segments_labels = "data/interim/time_segments/{pid}_time_segments_labels.csv"
params:
provider = lambda wildcards: config["EMPATICA_INTER_BEAT_INTERVAL"]["PROVIDERS"][wildcards.provider_key.upper()],
provider_key = "{provider_key}",
sensor_key = "empatica_inter_beat_interval"
output:
"data/interim/{pid}/empatica_inter_beat_interval_features/empatica_inter_beat_interval_python_{provider_key}.csv"
script:
"../src/features/entry.py"
rule empatica_inter_beat_interval_r_features:
input:
sensor_data = "data/raw/{pid}/empatica_inter_beat_interval_with_datetime.csv",
time_segments_labels = "data/interim/time_segments/{pid}_time_segments_labels.csv"
params:
provider = lambda wildcards: config["EMPATICA_INTER_BEAT_INTERVAL"]["PROVIDERS"][wildcards.provider_key.upper()],
provider_key = "{provider_key}",
sensor_key = "empatica_inter_beat_interval"
output:
"data/interim/{pid}/empatica_inter_beat_interval_features/empatica_inter_beat_interval_r_{provider_key}.csv"
script:
"../src/features/entry.R"
rule empatica_tags_python_features:
input:
sensor_data = "data/raw/{pid}/empatica_tags_with_datetime.csv",
time_segments_labels = "data/interim/time_segments/{pid}_time_segments_labels.csv"
params:
provider = lambda wildcards: config["EMPATICA_TAGS"]["PROVIDERS"][wildcards.provider_key.upper()],
provider_key = "{provider_key}",
sensor_key = "empatica_tags"
output:
"data/interim/{pid}/empatica_tags_features/empatica_tags_python_{provider_key}.csv"
script:
"../src/features/entry.py"
rule empatica_tags_r_features:
input:
sensor_data = "data/raw/{pid}/empatica_tags_with_datetime.csv",
time_segments_labels = "data/interim/time_segments/{pid}_time_segments_labels.csv"
params:
provider = lambda wildcards: config["EMPATICA_TAGS"]["PROVIDERS"][wildcards.provider_key.upper()],
provider_key = "{provider_key}",
sensor_key = "empatica_tags"
output:
"data/interim/{pid}/empatica_tags_features/empatica_tags_r_{provider_key}.csv"
script:
"../src/features/entry.R"

View File

@ -242,3 +242,51 @@ rule fitbit_readable_datetime:
"data/raw/{pid}/fitbit_{sensor}_{fitbit_data_type}_parsed_with_datetime.csv"
script:
"../src/data/readable_datetime.R"
from pathlib import Path
rule unzip_empatica_data:
input:
input_file = Path(config["EMPATICA_DATA_CONFIGURATION"]["SOURCE"]["FOLDER"]) / Path("{pid}") / Path("{suffix}.zip"),
participant_file = "data/external/participant_files/{pid}.yaml"
params:
sensor = "{sensor}"
output:
sensor_output = "data/raw/{pid}/empatica_{sensor}_unzipped_{suffix}.csv"
script:
"../src/data/empatica/unzip_empatica_data.py"
rule extract_empatica_data:
input:
input_file = "data/raw/{pid}/empatica_{sensor}_unzipped_{suffix}.csv",
participant_file = "data/external/participant_files/{pid}.yaml"
params:
data_configuration = config["EMPATICA_DATA_CONFIGURATION"],
sensor = "{sensor}",
table = lambda wildcards: config["EMPATICA_" + str(wildcards.sensor).upper()]["TABLE"],
output:
sensor_output = "data/raw/{pid}/empatica_{sensor}_raw_{suffix}.csv"
script:
"../src/data/empatica/extract_empatica_data.py"
rule join_empatica_data:
input:
input_files = get_all_raw_empatica_sensor_files,
output:
sensor_output = "data/raw/{pid}/empatica_{sensor}_joined.csv"
script:
"../src/data/empatica/join_empatica_data.R"
rule empatica_readable_datetime:
input:
sensor_input = "data/raw/{pid}/empatica_{sensor}_joined.csv",
time_segments = "data/interim/time_segments/{pid}_time_segments.csv"
params:
timezones = config["PHONE_DATA_CONFIGURATION"]["TIMEZONE"]["TYPE"],
fixed_timezone = config["PHONE_DATA_CONFIGURATION"]["TIMEZONE"]["VALUE"],
time_segments_type = config["TIME_SEGMENTS"]["TYPE"],
include_past_periodic_segments = config["TIME_SEGMENTS"]["INCLUDE_PAST_PERIODIC_SEGMENTS"]
output:
"data/raw/{pid}/empatica_{sensor}_with_datetime.csv"
script:
"../src/data/readable_datetime.R"

View File

@ -13,6 +13,7 @@ phone_device_id_column = config$PHONE_SECTION$DEVICE_ID_COLUMN
fitbit_device_id_column = config$FITBIT_SECTION$DEVICE_ID_COLUMN
add_phone_section = config$PHONE_SECTION$ADD
add_fitbit_section = config$FITBIT_SECTION$ADD
add_empatica_section = config$EMPATICA_SECTION$ADD
phone_ignored = config$PHONE_SECTION$IGNORED_DEVICE_IDS
fitbit_ignored = config$FITBIT_SECTION$IGNORED_DEVICE_IDS
@ -54,6 +55,7 @@ participants %>%
pwalk(function(add_phone_section, add_fitbit_section, phone_device_id_column, fitbit_device_id_column, ...) {
empty_phone <- c("PHONE:", " DEVICE_IDS:", " PLATFORMS:"," LABEL:", " START_DATE:", " END_DATE:")
empty_fitbit <- c("FITBIT:", " DEVICE_IDS:", " LABEL:", " START_DATE:", " END_DATE:")
empty_empatica <- c("EMPATICA:", " LABEL:", " START_DATE:", " END_DATE:")
row <- tibble(...)
lines <- c()
start_date = if_else(is.na(row$start_date), "", row$start_date)
@ -71,6 +73,12 @@ participants %>%
} else
lines <- append(lines, empty_fitbit)
if(add_empatica_section == TRUE){
lines <- append(lines, c("EMPATICA:",
paste(" LABEL:",row$label), paste(" START_DATE:", start_date), paste(" END_DATE:", end_date)))
} else
lines <- append(lines, empty_empatica)
file_connection <- file(paste0("./data/external/participant_files/", row$pid, ".yaml"))
writeLines(lines, file_connection)
close(file_connection)

View File

@ -0,0 +1,94 @@
import pandas as pd
from pandas.core import indexing
import yaml
import csv
from collections import OrderedDict
def processAcceleration(x, y, z):
x = float(x)
y = float(y)
z = float(z)
return {'x': x, 'y': y, 'z': z}
def readFile(file, dtype):
dict = OrderedDict()
with open(file, 'rt') as csvfile:
if dtype in ('electrodermal_activity', 'temperature', 'heartrate', 'blood_volume_pulse'):
reader = csv.reader(csvfile, delimiter='\n')
elif dtype == 'accelerometer':
reader = csv.reader(csvfile, delimiter=',')
i = 0
for row in reader:
if i == 0:
timestamp = float(row[0])
elif i == 1:
hertz = float(row[0])
else:
if i == 2:
pass
else:
timestamp = timestamp + 1.0 / hertz
if dtype in ('electrodermal_activity', 'temperature', 'heartrate', 'blood_volume_pulse'):
dict[timestamp] = row[0]
elif dtype == 'accelerometer':
dict[timestamp] = processAcceleration(row[0], row[1], row[2])
i += 1
return dict
def extract_empatica_data(sensor_data_file, output_file, start_date, end_date, timezone, sensor):
# read sensor data
if sensor in ('electrodermal_activity', 'temperature', 'heartrate', 'blood_volume_pulse'):
ddict = readFile(sensor_data_file, sensor)
df = pd.DataFrame.from_dict(ddict, orient='index', columns=[sensor])
df[sensor] = df[sensor].astype(float)
df.index.name = 'timestamp'
elif sensor == 'accelerometer':
ddict = readFile(sensor_data_file, sensor)
df = pd.DataFrame.from_dict(ddict, orient='index', columns=['x', 'y', 'z'])
df['x'] = df['x'].astype(float)
df['y'] = df['y'].astype(float)
df['z'] = df['z'].astype(float)
df.index.name = 'timestamp'
elif sensor == 'inter_beat_interval':
df = pd.read_csv(sensor_data_file, names=['timestamp', sensor], header=None)
timestampstart = float(df['timestamp'][0])
df['timestamp'] = (df['timestamp'][1:len(df)]).astype(float) + timestampstart
df = df.drop([0])
df[sensor] = df[sensor].astype(float)
df = df.set_index('timestamp')
else:
raise ValueError(
"sensor can only be one of ['electrodermal_activity','temperature','heartrate','blood_volume_pulse','accelerometer','inter_beat_interval'].")
# filter based on given start and end date
start_date_utc = pd.Timestamp(start_date, tz=timezone).timestamp()
end_date_utc = pd.Timestamp(end_date, tz=timezone).timestamp()
df = df[start_date_utc:end_date_utc]
# format timestamps
df.index *= 1000
df.index = df.index.astype(int)
# output csv file
df.to_csv(output_file)
sensor_data_file = snakemake.input[0]
output_file = snakemake.output[0]
with open(snakemake.input[1], "r", encoding="utf-8") as f:
participant_file = yaml.safe_load(f)
start_date = participant_file["EMPATICA"]["START_DATE"]
end_date = participant_file["EMPATICA"]["END_DATE"]
timezone = snakemake.params["data_configuration"]["TIMEZONE"]["VALUE"]
sensor = snakemake.params["sensor"]
extract_empatica_data(sensor_data_file, output_file, start_date, end_date, timezone, sensor)

View File

@ -0,0 +1,17 @@
source("renv/activate.R")
library("tidyr")
library("dplyr", warn.conflicts = F)
empatica_files <- snakemake@input[["input_files"]]
empatica_data <- setNames(data.frame(matrix(ncol = 1, nrow = 0)), c("timestamp"))
for(file in empatica_files){
data <- read.csv(file)
if(! ("timestamp" %in% colnames(data)))
stop(paste("This file does not have a timestamp column, something might have gone wrong while unzipping it:", file))
empatica_data <- merge(empatica_data, data, all = TRUE)
}
write.csv(empatica_data, snakemake@output[[1]], row.names = FALSE)

View File

@ -0,0 +1,21 @@
from zipfile import ZipFile
import warnings
sensor_short_name = {"accelerometer":"ACC",
"temperature":"TEMP",
"tags":"tags",
"heartrate":"HR",
"inter_beat_interval":"IBI",
"blood_volume_pulse":"BVP",
"electrodermal_activity":"EDA"}
sensor_csv = sensor_short_name[snakemake.params["sensor"]] + '.csv'
warning = True
with ZipFile(snakemake.input[0], 'r') as zipFile:
listOfFileNames = zipFile.namelist()
for fileName in listOfFileNames:
if fileName == sensor_csv:
with open(snakemake.output[0], 'wb') as outputFile:
outputFile.write(zipFile.read(fileName))
warning = False
if(warning):
warnings.warn("We could not find a zipped file for {} in {} (we tried to find {})".format(snakemake.params["sensor"], snakemake.input[0], sensor_csv))

View File

@ -0,0 +1,36 @@
import pandas as pd
import numpy as np
def dbdp_features(sensor_data_files, time_segment, provider, filter_data_by_segment, *args, **kwargs):
acc_data = pd.read_csv(sensor_data_files["sensor_data"])
requested_features = provider["FEATURES"]
# name of the features this function can compute
base_features_names = ["maxmagnitude", "minmagnitude", "avgmagnitude", "medianmagnitude", "stdmagnitude"]
# the subset of requested features this function can compute
features_to_compute = list(set(requested_features) & set(base_features_names))
acc_features = pd.DataFrame(columns=["local_segment"] + features_to_compute)
if not acc_data.empty:
acc_data = filter_data_by_segment(acc_data, time_segment)
if not acc_data.empty:
acc_features = pd.DataFrame()
# get magnitude related features: magnitude = sqrt(x^2+y^2+z^2)
magnitude = acc_data.apply(lambda row: np.sqrt(row["x"] ** 2 + row["y"] ** 2 + row["z"] ** 2), axis=1)
acc_data = acc_data.assign(magnitude = magnitude.values)
if "maxmagnitude" in features_to_compute:
acc_features["maxmagnitude"] = acc_data.groupby(["local_segment"])["magnitude"].max()
if "minmagnitude" in features_to_compute:
acc_features["minmagnitude"] = acc_data.groupby(["local_segment"])["magnitude"].min()
if "avgmagnitude" in features_to_compute:
acc_features["avgmagnitude"] = acc_data.groupby(["local_segment"])["magnitude"].mean()
if "medianmagnitude" in features_to_compute:
acc_features["medianmagnitude"] = acc_data.groupby(["local_segment"])["magnitude"].median()
if "stdmagnitude" in features_to_compute:
acc_features["stdmagnitude"] = acc_data.groupby(["local_segment"])["magnitude"].std()
acc_features = acc_features.reset_index()
return acc_features

View File

@ -0,0 +1,76 @@
import pandas as pd
from scipy.stats import entropy
def statsFeatures(bvp_data, features, bvp_features):
col_name = "blood_volume_pulse"
if "sumbvp" in features:
bvp_features["sumbvp"] = bvp_data[["local_segment", col_name]].groupby(["local_segment"])[
col_name].sum()
if "maxbvp" in features:
bvp_features["maxbvp"] = bvp_data[["local_segment", col_name]].groupby(["local_segment"])[
col_name].max()
if "minbvp" in features:
bvp_features["minbvp"] = bvp_data[["local_segment", col_name]].groupby(["local_segment"])[
col_name].min()
if "avgbvp" in features:
bvp_features["avgbvp"] = bvp_data[["local_segment", col_name]].groupby(["local_segment"])[
col_name].mean()
if "medianbvp" in features:
bvp_features["medianbvp"] = bvp_data[["local_segment", col_name]].groupby(["local_segment"])[
col_name].median()
if "modebvp" in features:
bvp_features["modebvp"] = bvp_data[["local_segment", col_name]].groupby(["local_segment"])[
col_name].agg(lambda x: pd.Series.mode(x)[0])
if "stdbvp" in features:
bvp_features["stdbvp"] = bvp_data[["local_segment", col_name]].groupby(["local_segment"])[
col_name].std()
if "diffmaxmodebvp" in features:
bvp_features["diffmaxmodebvp"] = bvp_data[["local_segment", col_name]].groupby(["local_segment"])[
col_name].max() - \
bvp_data[["local_segment", col_name]].groupby(["local_segment"])[
col_name].agg(lambda x: pd.Series.mode(x)[0])
if "diffminmodebvp" in features:
bvp_features["diffminmodebvp"] = bvp_data[["local_segment", col_name]].groupby(["local_segment"])[
col_name].agg(lambda x: pd.Series.mode(x)[0]) - \
bvp_data[["local_segment", col_name]].groupby(["local_segment"])[
col_name].min()
if "entropybvp" in features:
bvp_features["entropybvp"] = bvp_data[["local_segment", col_name]].groupby(["local_segment"])[
col_name].agg(entropy)
return bvp_features
def extractBVPFeaturesFromIntradayData(bvp_intraday_data, features, time_segment, filter_data_by_segment):
bvp_intraday_features = pd.DataFrame(columns=["local_segment"] + features)
if not bvp_intraday_data.empty:
bvp_intraday_data = filter_data_by_segment(bvp_intraday_data, time_segment)
if not bvp_intraday_data.empty:
bvp_intraday_features = pd.DataFrame()
# get stats of bvp
bvp_intraday_features = statsFeatures(bvp_intraday_data, features, bvp_intraday_features)
bvp_intraday_features.reset_index(inplace=True)
return bvp_intraday_features
def dbdp_features(sensor_data_files, time_segment, provider, filter_data_by_segment, *args, **kwargs):
bvp_intraday_data = pd.read_csv(sensor_data_files["sensor_data"])
requested_intraday_features = provider["FEATURES"]
# name of the features this function can compute
base_intraday_features_names = ["maxbvp", "minbvp", "avgbvp", "medianbvp", "modebvp", "stdbvp", "diffmaxmodebvp",
"diffminmodebvp", "entropybvp"]
# the subset of requested features this function can compute
intraday_features_to_compute = list(set(requested_intraday_features) & set(base_intraday_features_names))
# extract features from intraday data
bvp_intraday_features = extractBVPFeaturesFromIntradayData(bvp_intraday_data,
intraday_features_to_compute, time_segment,
filter_data_by_segment)
return bvp_intraday_features

View File

@ -0,0 +1,76 @@
import pandas as pd
from scipy.stats import entropy
def statsFeatures(eda_data, features, eda_features):
col_name = "electrodermal_activity"
if "sumeda" in features:
eda_features["sumeda"] = eda_data[["local_segment", col_name]].groupby(["local_segment"])[
col_name].sum()
if "maxeda" in features:
eda_features["maxeda"] = eda_data[["local_segment", col_name]].groupby(["local_segment"])[
col_name].max()
if "mineda" in features:
eda_features["mineda"] = eda_data[["local_segment", col_name]].groupby(["local_segment"])[
col_name].min()
if "avgeda" in features:
eda_features["avgeda"] = eda_data[["local_segment", col_name]].groupby(["local_segment"])[
col_name].mean()
if "medianeda" in features:
eda_features["medianeda"] = eda_data[["local_segment", col_name]].groupby(["local_segment"])[
col_name].median()
if "modeeda" in features:
eda_features["modeeda"] = eda_data[["local_segment", col_name]].groupby(["local_segment"])[
col_name].agg(lambda x: pd.Series.mode(x)[0])
if "stdeda" in features:
eda_features["stdeda"] = eda_data[["local_segment", col_name]].groupby(["local_segment"])[
col_name].std()
if "diffmaxmodeeda" in features:
eda_features["diffmaxmodeeda"] = eda_data[["local_segment", col_name]].groupby(["local_segment"])[
col_name].max() - \
eda_data[["local_segment", col_name]].groupby(["local_segment"])[
col_name].agg(lambda x: pd.Series.mode(x)[0])
if "diffminmodeeda" in features:
eda_features["diffminmodeeda"] = eda_data[["local_segment", col_name]].groupby(["local_segment"])[
col_name].agg(lambda x: pd.Series.mode(x)[0]) - \
eda_data[["local_segment", col_name]].groupby(["local_segment"])[
col_name].min()
if "entropyeda" in features:
eda_features["entropyeda"] = eda_data[["local_segment", col_name]].groupby(["local_segment"])[
col_name].agg(entropy)
return eda_features
def extractEDAFeaturesFromIntradayData(eda_intraday_data, features, time_segment, filter_data_by_segment):
eda_intraday_features = pd.DataFrame(columns=["local_segment"] + features)
if not eda_intraday_data.empty:
eda_intraday_data = filter_data_by_segment(eda_intraday_data, time_segment)
if not eda_intraday_data.empty:
eda_intraday_features = pd.DataFrame()
# get stats of eda
eda_intraday_features = statsFeatures(eda_intraday_data, features, eda_intraday_features)
eda_intraday_features.reset_index(inplace=True)
return eda_intraday_features
def dbdp_features(sensor_data_files, time_segment, provider, filter_data_by_segment, *args, **kwargs):
eda_intraday_data = pd.read_csv(sensor_data_files["sensor_data"])
requested_intraday_features = provider["FEATURES"]
# name of the features this function can compute
base_intraday_features_names = ["maxeda", "mineda", "avgeda", "medianeda", "modeeda", "stdeda", "diffmaxmodeeda",
"diffminmodeeda", "entropyeda"]
# the subset of requested features this function can compute
intraday_features_to_compute = list(set(requested_intraday_features) & set(base_intraday_features_names))
# extract features from intraday data
eda_intraday_features = extractEDAFeaturesFromIntradayData(eda_intraday_data,
intraday_features_to_compute, time_segment,
filter_data_by_segment)
return eda_intraday_features

View File

@ -0,0 +1,76 @@
import pandas as pd
from scipy.stats import entropy
def statsFeatures(heartrate_data, features, heartrate_features):
col_name = "heartrate"
if "sumhr" in features:
heartrate_features["sumhr"] = heartrate_data[["local_segment", col_name]].groupby(["local_segment"])[
col_name].sum()
if "maxhr" in features:
heartrate_features["maxhr"] = heartrate_data[["local_segment", col_name]].groupby(["local_segment"])[
col_name].max()
if "minhr" in features:
heartrate_features["minhr"] = heartrate_data[["local_segment", col_name]].groupby(["local_segment"])[
col_name].min()
if "avghr" in features:
heartrate_features["avghr"] = heartrate_data[["local_segment", col_name]].groupby(["local_segment"])[
col_name].mean()
if "medianhr" in features:
heartrate_features["medianhr"] = heartrate_data[["local_segment", col_name]].groupby(["local_segment"])[
col_name].median()
if "modehr" in features:
heartrate_features["modehr"] = heartrate_data[["local_segment", col_name]].groupby(["local_segment"])[
col_name].agg(lambda x: pd.Series.mode(x)[0])
if "stdhr" in features:
heartrate_features["stdhr"] = heartrate_data[["local_segment", col_name]].groupby(["local_segment"])[
col_name].std()
if "diffmaxmodehr" in features:
heartrate_features["diffmaxmodehr"] = heartrate_data[["local_segment", col_name]].groupby(["local_segment"])[
col_name].max() - \
heartrate_data[["local_segment", col_name]].groupby(["local_segment"])[
col_name].agg(lambda x: pd.Series.mode(x)[0])
if "diffminmodehr" in features:
heartrate_features["diffminmodehr"] = heartrate_data[["local_segment", col_name]].groupby(["local_segment"])[
col_name].agg(lambda x: pd.Series.mode(x)[0]) - \
heartrate_data[["local_segment", col_name]].groupby(["local_segment"])[
col_name].min()
if "entropyhr" in features:
heartrate_features["entropyhr"] = heartrate_data[["local_segment", col_name]].groupby(["local_segment"])[
col_name].agg(entropy)
return heartrate_features
def extractHRFeaturesFromIntradayData(heartrate_intraday_data, features, time_segment, filter_data_by_segment):
heartrate_intraday_features = pd.DataFrame(columns=["local_segment"] + features)
if not heartrate_intraday_data.empty:
heartrate_intraday_data = filter_data_by_segment(heartrate_intraday_data, time_segment)
if not heartrate_intraday_data.empty:
heartrate_intraday_features = pd.DataFrame()
# get stats of heartrate
heartrate_intraday_features = statsFeatures(heartrate_intraday_data, features, heartrate_intraday_features)
heartrate_intraday_features.reset_index(inplace=True)
return heartrate_intraday_features
def dbdp_features(sensor_data_files, time_segment, provider, filter_data_by_segment, *args, **kwargs):
heartrate_intraday_data = pd.read_csv(sensor_data_files["sensor_data"])
requested_intraday_features = provider["FEATURES"]
# name of the features this function can compute
base_intraday_features_names = ["maxhr", "minhr", "avghr", "medianhr", "modehr", "stdhr", "diffmaxmodehr",
"diffminmodehr", "entropyhr"]
# the subset of requested features this function can compute
intraday_features_to_compute = list(set(requested_intraday_features) & set(base_intraday_features_names))
# extract features from intraday data
heartrate_intraday_features = extractHRFeaturesFromIntradayData(heartrate_intraday_data,
intraday_features_to_compute, time_segment,
filter_data_by_segment)
return heartrate_intraday_features

View File

@ -0,0 +1,76 @@
import pandas as pd
from scipy.stats import entropy
def statsFeatures(ibi_data, features, ibi_features):
col_name = "inter_beat_interval"
if "sumibi" in features:
ibi_features["sumibi"] = ibi_data[["local_segment", col_name]].groupby(["local_segment"])[
col_name].sum()
if "maxibi" in features:
ibi_features["maxibi"] = ibi_data[["local_segment", col_name]].groupby(["local_segment"])[
col_name].max()
if "minibi" in features:
ibi_features["minibi"] = ibi_data[["local_segment", col_name]].groupby(["local_segment"])[
col_name].min()
if "avgibi" in features:
ibi_features["avgibi"] = ibi_data[["local_segment", col_name]].groupby(["local_segment"])[
col_name].mean()
if "medianibi" in features:
ibi_features["medianibi"] = ibi_data[["local_segment", col_name]].groupby(["local_segment"])[
col_name].median()
if "modeibi" in features:
ibi_features["modeibi"] = ibi_data[["local_segment", col_name]].groupby(["local_segment"])[
col_name].agg(lambda x: pd.Series.mode(x)[0])
if "stdibi" in features:
ibi_features["stdibi"] = ibi_data[["local_segment", col_name]].groupby(["local_segment"])[
col_name].std()
if "diffmaxmodeibi" in features:
ibi_features["diffmaxmodeibi"] = ibi_data[["local_segment", col_name]].groupby(["local_segment"])[
col_name].max() - \
ibi_data[["local_segment", col_name]].groupby(["local_segment"])[
col_name].agg(lambda x: pd.Series.mode(x)[0])
if "diffminmodeibi" in features:
ibi_features["diffminmodeibi"] = ibi_data[["local_segment", col_name]].groupby(["local_segment"])[
col_name].agg(lambda x: pd.Series.mode(x)[0]) - \
ibi_data[["local_segment", col_name]].groupby(["local_segment"])[
col_name].min()
if "entropyibi" in features:
ibi_features["entropyibi"] = ibi_data[["local_segment", col_name]].groupby(["local_segment"])[
col_name].agg(entropy)
return ibi_features
def extractIBIFeaturesFromIntradayData(ibi_intraday_data, features, time_segment, filter_data_by_segment):
ibi_intraday_features = pd.DataFrame(columns=["local_segment"] + features)
if not ibi_intraday_data.empty:
ibi_intraday_data = filter_data_by_segment(ibi_intraday_data, time_segment)
if not ibi_intraday_data.empty:
ibi_intraday_features = pd.DataFrame()
# get stats of ibi
ibi_intraday_features = statsFeatures(ibi_intraday_data, features, ibi_intraday_features)
ibi_intraday_features.reset_index(inplace=True)
return ibi_intraday_features
def dbdp_features(sensor_data_files, time_segment, provider, filter_data_by_segment, *args, **kwargs):
ibi_intraday_data = pd.read_csv(sensor_data_files["sensor_data"])
requested_intraday_features = provider["FEATURES"]
# name of the features this function can compute
base_intraday_features_names = ["maxibi", "minibi", "avgibi", "medianibi", "modeibi", "stdibi", "diffmaxmodeibi",
"diffminmodeibi", "entropyibi"]
# the subset of requested features this function can compute
intraday_features_to_compute = list(set(requested_intraday_features) & set(base_intraday_features_names))
# extract features from intraday data
ibi_intraday_features = extractIBIFeaturesFromIntradayData(ibi_intraday_data,
intraday_features_to_compute, time_segment,
filter_data_by_segment)
return ibi_intraday_features

View File

@ -0,0 +1,76 @@
import pandas as pd
from scipy.stats import entropy
def statsFeatures(temperature_data, features, temperature_features):
col_name = "temperature"
if "sumtemp" in features:
temperature_features["sumtemp"] = temperature_data[["local_segment", col_name]].groupby(["local_segment"])[
col_name].sum()
if "maxtemp" in features:
temperature_features["maxtemp"] = temperature_data[["local_segment", col_name]].groupby(["local_segment"])[
col_name].max()
if "mintemp" in features:
temperature_features["mintemp"] = temperature_data[["local_segment", col_name]].groupby(["local_segment"])[
col_name].min()
if "avgtemp" in features:
temperature_features["avgtemp"] = temperature_data[["local_segment", col_name]].groupby(["local_segment"])[
col_name].mean()
if "mediantemp" in features:
temperature_features["mediantemp"] = temperature_data[["local_segment", col_name]].groupby(["local_segment"])[
col_name].median()
if "modetemp" in features:
temperature_features["modetemp"] = temperature_data[["local_segment", col_name]].groupby(["local_segment"])[
col_name].agg(lambda x: pd.Series.mode(x)[0])
if "stdtemp" in features:
temperature_features["stdtemp"] = temperature_data[["local_segment", col_name]].groupby(["local_segment"])[
col_name].std()
if "diffmaxmodetemp" in features:
temperature_features["diffmaxmodetemp"] = temperature_data[["local_segment", col_name]].groupby(["local_segment"])[
col_name].max() - \
temperature_data[["local_segment", col_name]].groupby(["local_segment"])[
col_name].agg(lambda x: pd.Series.mode(x)[0])
if "diffminmodetemp" in features:
temperature_features["diffminmodetemp"] = temperature_data[["local_segment", col_name]].groupby(["local_segment"])[
col_name].agg(lambda x: pd.Series.mode(x)[0]) - \
temperature_data[["local_segment", col_name]].groupby(["local_segment"])[
col_name].min()
if "entropytemp" in features:
temperature_features["entropytemp"] = temperature_data[["local_segment", col_name]].groupby(["local_segment"])[
col_name].agg(entropy)
return temperature_features
def extractTempFeaturesFromIntradayData(temperature_intraday_data, features, time_segment, filter_data_by_segment):
temperature_intraday_features = pd.DataFrame(columns=["local_segment"] + features)
if not temperature_intraday_data.empty:
temperature_intraday_data = filter_data_by_segment(temperature_intraday_data, time_segment)
if not temperature_intraday_data.empty:
temperature_intraday_features = pd.DataFrame()
# get stats of temperature
temperature_intraday_features = statsFeatures(temperature_intraday_data, features, temperature_intraday_features)
temperature_intraday_features.reset_index(inplace=True)
return temperature_intraday_features
def dbdp_features(sensor_data_files, time_segment, provider, filter_data_by_segment, *args, **kwargs):
temperature_intraday_data = pd.read_csv(sensor_data_files["sensor_data"])
requested_intraday_features = provider["FEATURES"]
# name of the features this function can compute
base_intraday_features_names = ["maxtemp", "mintemp", "avgtemp", "mediantemp", "modetemp", "stdtemp", "diffmaxmodetemp",
"diffminmodetemp", "entropytemp"]
# the subset of requested features this function can compute
intraday_features_to_compute = list(set(requested_intraday_features) & set(base_intraday_features_names))
# extract features from intraday data
temperature_intraday_features = extractTempFeaturesFromIntradayData(temperature_intraday_data,
intraday_features_to_compute, time_segment,
filter_data_by_segment)
return temperature_intraday_features