Merge branch 'empatica_calculating_features'

sociality-task
Primoz 2022-07-07 15:35:47 +00:00
commit a65a85cce9
8 changed files with 126 additions and 49 deletions

7
.gitignore vendored
View File

@ -114,7 +114,7 @@ sn_profile_*/
settings.dcf settings.dcf
tests/fakedata_generation/ tests/fakedata_generation/
site/ site/
!credentials.yaml credentials.yaml
# Docker container and other files # Docker container and other files
.devcontainer .devcontainer
@ -122,6 +122,5 @@ site/
# Calculating features module # Calculating features module
calculatingfeatures/ calculatingfeatures/
# Temp folder for rapids data/external
# Temp data rapids_temp_data/
rapids_temp_data/

View File

@ -16,11 +16,55 @@ By [MoSHI](https://www.moshi.pitt.edu/), [University of Pittsburgh](https://www.
For RAPIDS installation refer to to the [documentation](https://www.rapids.science/1.8/setup/installation/) For RAPIDS installation refer to to the [documentation](https://www.rapids.science/1.8/setup/installation/)
## For the installation of the Docker version
1. Follow the [instructions](https://www.rapids.science/1.8/setup/installation/) to setup RAPIDS via Docker (from scratch).
2. Delete current contents in /rapids/ folder when in a container session.
```
cd ..
rm -rf rapids/{*,.*}
cd rapids
```
3. Clone RAPIDS workspace from Git and checkout a specific branch.
```
git clone "https://repo.ijs.si/junoslukan/rapids.git" .
git checkout <branch_name>
```
4. Install missing “libpq-dev” dependency with bash.
```
apt-get update -y
apt-get install -y libpq-dev
```
5. Restore R venv.
Type R to go to the interactive R session and then:
```
renv::restore()
```
6. Install cr-features module
From: https://repo.ijs.si/matjazbostic/calculatingfeatures.git -> branch calculations_for_rapids.
Then follow the CalculatingFeatures section below.
7. Install all required packages from environment.yml, prune also deletes conda packages not present in environment file.
conda env update --file environment.yml prune
8. If you wish to update your R or Python venvs.
```
R interactive session renv::snapshot()
Python: conda env export --no-builds | sed 's/^.*libgfortran.*$/ - libgfortran/' | sed 's/^.*mkl=.*$/ - mkl/' > environment.ym
```
## CalculatingFeatures ## CalculatingFeatures
This RAPIDS extension uses CalculatingFeatures library accessible [here](https://repo.ijs.si/matjazbostic/calculatingfeatures). This RAPIDS extension uses CalculatingFeatures library accessible [here](https://repo.ijs.si/matjazbostic/calculatingfeatures).
To use CalculatingFeatures library: To use CalculatingFeatures library:
- For now, use the "modifications_for_rapids" branch to get the newest version of cr-features that is functional for RAPIDS-STRAW analysis.
- Follow the installation instructions in the [README.md](https://repo.ijs.si/matjazbostic/calculatingfeatures/-/blob/master/README.md). - Follow the installation instructions in the [README.md](https://repo.ijs.si/matjazbostic/calculatingfeatures/-/blob/master/README.md).
- Copy built calculatingfeatures folder into the RAPIDS workspace. - Copy built calculatingfeatures folder into the RAPIDS workspace.
@ -28,5 +72,6 @@ To use CalculatingFeatures library:
- Install the CalculatingFeatures package by: - Install the CalculatingFeatures package by:
``` ```
pip install "path/to/the/calculatingfeatures/folder" pip install "path/to/the/calculatingfeatures/folder"
e.g. "./calculatingfeatures" if the folder is copied to main parent directory
CalculatingFeatures package has to be built and installed everytime to get the newest version. CalculatingFeatures package has to be built and installed everytime to get the newest version.
``` ```

View File

@ -5,6 +5,7 @@ include: "rules/common.smk"
include: "rules/renv.smk" include: "rules/renv.smk"
include: "rules/preprocessing.smk" include: "rules/preprocessing.smk"
include: "rules/features.smk" include: "rules/features.smk"
include: "rules/models.smk"
include: "rules/reports.smk" include: "rules/reports.smk"
import itertools import itertools

View File

@ -3,16 +3,17 @@
######################################################################################################################## ########################################################################################################################
# See https://www.rapids.science/latest/setup/configuration/#participant-files # See https://www.rapids.science/latest/setup/configuration/#participant-files
PIDS: [p031] #p01, p02, p03] PIDS: ['p031', 'p032', 'p033', 'p034', 'p035', 'p036', 'p037', 'p038', 'p039', 'p040', 'p042', 'p043', 'p044', 'p045', 'p046', 'p049', 'p050', 'p052', 'p053', 'p054', 'p055', 'p057', 'p058', 'p059', 'p060', 'p061', 'p062', 'p064', 'p067', 'p068', 'p069', 'p070', 'p071', 'p072', 'p073', 'p074', 'p075', 'p076', 'p077', 'p078', 'p079', 'p080', 'p081', 'p082', 'p083', 'p084', 'p085', 'p086', 'p088', 'p089', 'p090', 'p091', 'p092', 'p093', 'p106', 'p107']
# See https://www.rapids.science/latest/setup/configuration/#automatic-creation-of-participant-files # See https://www.rapids.science/latest/setup/configuration/#automatic-creation-of-participant-files
CREATE_PARTICIPANT_FILES: CREATE_PARTICIPANT_FILES:
CSV_FILE_PATH: "data/external/example_participants.csv" # see docs for required format USERNAMES_CSV: "data/external/main_study_usernames.csv"
CSV_FILE_PATH: "data/external/main_study_participants.csv" # see docs for required format
PHONE_SECTION: PHONE_SECTION:
ADD: True ADD: True
IGNORED_DEVICE_IDS: [] IGNORED_DEVICE_IDS: []
FITBIT_SECTION: FITBIT_SECTION:
ADD: True ADD: False
IGNORED_DEVICE_IDS: [] IGNORED_DEVICE_IDS: []
EMPATICA_SECTION: EMPATICA_SECTION:
ADD: True ADD: True
@ -21,16 +22,17 @@ CREATE_PARTICIPANT_FILES:
# See https://www.rapids.science/latest/setup/configuration/#time-segments # See https://www.rapids.science/latest/setup/configuration/#time-segments
TIME_SEGMENTS: &time_segments TIME_SEGMENTS: &time_segments
TYPE: PERIODIC # FREQUENCY, PERIODIC, EVENT TYPE: PERIODIC # FREQUENCY, PERIODIC, EVENT
FILE: "data/external/timesegments_periodic.csv" FILE: "data/external/timesegments_daily.csv"
INCLUDE_PAST_PERIODIC_SEGMENTS: FALSE # Only relevant if TYPE=PERIODIC, see docs INCLUDE_PAST_PERIODIC_SEGMENTS: TRUE # Only relevant if TYPE=PERIODIC, see docs
# See https://www.rapids.science/latest/setup/configuration/#timezone-of-your-study # See https://www.rapids.science/latest/setup/configuration/#timezone-of-your-study
TIMEZONE: TIMEZONE:
TYPE: SINGLE TYPE: MULTIPLE
SINGLE: SINGLE:
TZCODE: Europe/Ljubljana TZCODE: Europe/Ljubljana
MULTIPLE: MULTIPLE:
TZCODES_FILE: data/external/multiple_timezones_example.csv TZ_FILE: data/external/timezone.csv
TZCODES_FILE: data/external/multiple_timezones.csv
IF_MISSING_TZCODE: USE_DEFAULT IF_MISSING_TZCODE: USE_DEFAULT
DEFAULT_TZCODE: Europe/Ljubljana DEFAULT_TZCODE: Europe/Ljubljana
FITBIT: FITBIT:
@ -85,7 +87,7 @@ PHONE_ACTIVITY_RECOGNITION:
EPISODE_THRESHOLD_BETWEEN_ROWS: 5 # minutes. Max time difference for two consecutive rows to be considered within the same AR episode. EPISODE_THRESHOLD_BETWEEN_ROWS: 5 # minutes. Max time difference for two consecutive rows to be considered within the same AR episode.
PROVIDERS: PROVIDERS:
RAPIDS: RAPIDS:
COMPUTE: False COMPUTE: True
FEATURES: ["count", "mostcommonactivity", "countuniqueactivities", "durationstationary", "durationmobile", "durationvehicle"] FEATURES: ["count", "mostcommonactivity", "countuniqueactivities", "durationstationary", "durationmobile", "durationvehicle"]
ACTIVITY_CLASSES: ACTIVITY_CLASSES:
STATIONARY: ["still", "tilting"] STATIONARY: ["still", "tilting"]
@ -114,7 +116,7 @@ PHONE_APPLICATIONS_FOREGROUND:
SCRAPE_MISSING_CATEGORIES: False # whether or not to scrape missing genres, only effective if CATALOGUE_SOURCE is equal to FILE. If CATALOGUE_SOURCE is equal to GOOGLE, all genres are scraped anyway SCRAPE_MISSING_CATEGORIES: False # whether or not to scrape missing genres, only effective if CATALOGUE_SOURCE is equal to FILE. If CATALOGUE_SOURCE is equal to GOOGLE, all genres are scraped anyway
PROVIDERS: PROVIDERS:
RAPIDS: RAPIDS:
COMPUTE: False COMPUTE: True
INCLUDE_EPISODE_FEATURES: True INCLUDE_EPISODE_FEATURES: True
SINGLE_CATEGORIES: ["all", "email"] SINGLE_CATEGORIES: ["all", "email"]
MULTIPLE_CATEGORIES: MULTIPLE_CATEGORIES:
@ -149,7 +151,7 @@ PHONE_BATTERY:
EPISODE_THRESHOLD_BETWEEN_ROWS: 30 # minutes. Max time difference for two consecutive rows to be considered within the same battery episode. EPISODE_THRESHOLD_BETWEEN_ROWS: 30 # minutes. Max time difference for two consecutive rows to be considered within the same battery episode.
PROVIDERS: PROVIDERS:
RAPIDS: RAPIDS:
COMPUTE: False COMPUTE: True
FEATURES: ["countdischarge", "sumdurationdischarge", "countcharge", "sumdurationcharge", "avgconsumptionrate", "maxconsumptionrate"] FEATURES: ["countdischarge", "sumdurationdischarge", "countcharge", "sumdurationcharge", "avgconsumptionrate", "maxconsumptionrate"]
SRC_SCRIPT: src/features/phone_battery/rapids/main.py SRC_SCRIPT: src/features/phone_battery/rapids/main.py
@ -158,12 +160,12 @@ PHONE_BLUETOOTH:
CONTAINER: bluetooth CONTAINER: bluetooth
PROVIDERS: PROVIDERS:
RAPIDS: RAPIDS:
COMPUTE: False COMPUTE: True
FEATURES: ["countscans", "uniquedevices", "countscansmostuniquedevice"] FEATURES: ["countscans", "uniquedevices", "countscansmostuniquedevice"]
SRC_SCRIPT: src/features/phone_bluetooth/rapids/main.R SRC_SCRIPT: src/features/phone_bluetooth/rapids/main.R
DORYAB: DORYAB:
COMPUTE: False COMPUTE: True
FEATURES: FEATURES:
ALL: ALL:
DEVICES: ["countscans", "uniquedevices", "meanscans", "stdscans"] DEVICES: ["countscans", "uniquedevices", "meanscans", "stdscans"]
@ -184,7 +186,7 @@ PHONE_CALLS:
CONTAINER: call CONTAINER: call
PROVIDERS: PROVIDERS:
RAPIDS: RAPIDS:
COMPUTE: False COMPUTE: True
FEATURES_TYPE: EPISODES # EVENTS or EPISODES FEATURES_TYPE: EPISODES # EVENTS or EPISODES
CALL_TYPES: [missed, incoming, outgoing] CALL_TYPES: [missed, incoming, outgoing]
FEATURES: FEATURES:
@ -227,7 +229,7 @@ PHONE_DATA_YIELD:
PHONE_WIFI_VISIBLE] PHONE_WIFI_VISIBLE]
PROVIDERS: PROVIDERS:
RAPIDS: RAPIDS:
COMPUTE: False COMPUTE: True
FEATURES: [ratiovalidyieldedminutes, ratiovalidyieldedhours] FEATURES: [ratiovalidyieldedminutes, ratiovalidyieldedhours]
MINUTE_RATIO_THRESHOLD_FOR_VALID_YIELDED_HOURS: 0.5 # 0 to 1, minimum percentage of valid minutes in an hour to be considered valid. MINUTE_RATIO_THRESHOLD_FOR_VALID_YIELDED_HOURS: 0.5 # 0 to 1, minimum percentage of valid minutes in an hour to be considered valid.
SRC_SCRIPT: src/features/phone_data_yield/rapids/main.R SRC_SCRIPT: src/features/phone_data_yield/rapids/main.R
@ -255,7 +257,7 @@ PHONE_LIGHT:
CONTAINER: light_sensor CONTAINER: light_sensor
PROVIDERS: PROVIDERS:
RAPIDS: RAPIDS:
COMPUTE: False COMPUTE: True
FEATURES: ["count", "maxlux", "minlux", "avglux", "medianlux", "stdlux"] FEATURES: ["count", "maxlux", "minlux", "avglux", "medianlux", "stdlux"]
SRC_SCRIPT: src/features/phone_light/rapids/main.py SRC_SCRIPT: src/features/phone_light/rapids/main.py
@ -269,7 +271,7 @@ PHONE_LOCATIONS:
PROVIDERS: PROVIDERS:
DORYAB: DORYAB:
COMPUTE: False COMPUTE: True
FEATURES: ["locationvariance","loglocationvariance","totaldistance","avgspeed","varspeed", "numberofsignificantplaces","numberlocationtransitions","radiusgyration","timeattop1location","timeattop2location","timeattop3location","movingtostaticratio","outlierstimepercent","maxlengthstayatclusters","minlengthstayatclusters","avglengthstayatclusters","stdlengthstayatclusters","locationentropy","normalizedlocationentropy","timeathome", "homelabel"] FEATURES: ["locationvariance","loglocationvariance","totaldistance","avgspeed","varspeed", "numberofsignificantplaces","numberlocationtransitions","radiusgyration","timeattop1location","timeattop2location","timeattop3location","movingtostaticratio","outlierstimepercent","maxlengthstayatclusters","minlengthstayatclusters","avglengthstayatclusters","stdlengthstayatclusters","locationentropy","normalizedlocationentropy","timeathome", "homelabel"]
DBSCAN_EPS: 100 # meters DBSCAN_EPS: 100 # meters
DBSCAN_MINSAMPLES: 5 DBSCAN_MINSAMPLES: 5
@ -284,7 +286,7 @@ PHONE_LOCATIONS:
SRC_SCRIPT: src/features/phone_locations/doryab/main.py SRC_SCRIPT: src/features/phone_locations/doryab/main.py
BARNETT: BARNETT:
COMPUTE: False COMPUTE: True
FEATURES: ["hometime","disttravelled","rog","maxdiam","maxhomedist","siglocsvisited","avgflightlen","stdflightlen","avgflightdur","stdflightdur","probpause","siglocentropy","circdnrtn","wkenddayrtn"] FEATURES: ["hometime","disttravelled","rog","maxdiam","maxhomedist","siglocsvisited","avgflightlen","stdflightlen","avgflightdur","stdflightdur","probpause","siglocentropy","circdnrtn","wkenddayrtn"]
IF_MULTIPLE_TIMEZONES: USE_MOST_COMMON IF_MULTIPLE_TIMEZONES: USE_MOST_COMMON
MINUTES_DATA_USED: False # Use this for quality control purposes, how many minutes of data (location coordinates gruped by minute) were used to compute features MINUTES_DATA_USED: False # Use this for quality control purposes, how many minutes of data (location coordinates gruped by minute) were used to compute features
@ -302,7 +304,7 @@ PHONE_MESSAGES:
CONTAINER: sms CONTAINER: sms
PROVIDERS: PROVIDERS:
RAPIDS: RAPIDS:
COMPUTE: False COMPUTE: True
MESSAGES_TYPES : [received, sent] MESSAGES_TYPES : [received, sent]
FEATURES: FEATURES:
received: [count, distinctcontacts, timefirstmessage, timelastmessage, countmostfrequentcontact] received: [count, distinctcontacts, timefirstmessage, timelastmessage, countmostfrequentcontact]
@ -314,7 +316,7 @@ PHONE_SCREEN:
CONTAINER: screen CONTAINER: screen
PROVIDERS: PROVIDERS:
RAPIDS: RAPIDS:
COMPUTE: False COMPUTE: True
REFERENCE_HOUR_FIRST_USE: 0 REFERENCE_HOUR_FIRST_USE: 0
IGNORE_EPISODES_SHORTER_THAN: 0 # in minutes, set to 0 to disable IGNORE_EPISODES_SHORTER_THAN: 0 # in minutes, set to 0 to disable
IGNORE_EPISODES_LONGER_THAN: 360 # in minutes, set to 0 to disable IGNORE_EPISODES_LONGER_THAN: 360 # in minutes, set to 0 to disable
@ -336,13 +338,12 @@ PHONE_WIFI_VISIBLE:
CONTAINER: wifi CONTAINER: wifi
PROVIDERS: PROVIDERS:
RAPIDS: RAPIDS:
COMPUTE: False COMPUTE: True
FEATURES: ["countscans", "uniquedevices", "countscansmostuniquedevice"] FEATURES: ["countscans", "uniquedevices", "countscansmostuniquedevice"]
SRC_SCRIPT: src/features/phone_wifi_visible/rapids/main.R SRC_SCRIPT: src/features/phone_wifi_visible/rapids/main.R
######################################################################################################################## ########################################################################################################################
# FITBIT # # FITBIT #
######################################################################################################################## ########################################################################################################################
@ -484,6 +485,7 @@ FITBIT_STEPS_INTRADAY:
INCLUDE_ZERO_STEP_ROWS: False INCLUDE_ZERO_STEP_ROWS: False
SRC_SCRIPT: src/features/fitbit_steps_intraday/rapids/main.py SRC_SCRIPT: src/features/fitbit_steps_intraday/rapids/main.py
######################################################################################################################## ########################################################################################################################
# EMPATICA # # EMPATICA #
######################################################################################################################## ########################################################################################################################
@ -506,7 +508,7 @@ EMPATICA_ACCELEROMETER:
FEATURES: ["maxmagnitude", "minmagnitude", "avgmagnitude", "medianmagnitude", "stdmagnitude"] FEATURES: ["maxmagnitude", "minmagnitude", "avgmagnitude", "medianmagnitude", "stdmagnitude"]
SRC_SCRIPT: src/features/empatica_accelerometer/dbdp/main.py SRC_SCRIPT: src/features/empatica_accelerometer/dbdp/main.py
CR: CR:
COMPUTE: False COMPUTE: True
FEATURES: ["totalMagnitudeBand", "absoluteMeanBand", "varianceBand"] # Acc features FEATURES: ["totalMagnitudeBand", "absoluteMeanBand", "varianceBand"] # Acc features
WINDOWS: WINDOWS:
COMPUTE: True COMPUTE: True
@ -534,7 +536,7 @@ EMPATICA_TEMPERATURE:
FEATURES: ["maxtemp", "mintemp", "avgtemp", "mediantemp", "modetemp", "stdtemp", "diffmaxmodetemp", "diffminmodetemp", "entropytemp"] FEATURES: ["maxtemp", "mintemp", "avgtemp", "mediantemp", "modetemp", "stdtemp", "diffmaxmodetemp", "diffminmodetemp", "entropytemp"]
SRC_SCRIPT: src/features/empatica_temperature/dbdp/main.py SRC_SCRIPT: src/features/empatica_temperature/dbdp/main.py
CR: CR:
COMPUTE: False COMPUTE: True
FEATURES: ["maximum", "minimum", "meanAbsChange", "longestStrikeAboveMean", "longestStrikeBelowMean", FEATURES: ["maximum", "minimum", "meanAbsChange", "longestStrikeAboveMean", "longestStrikeBelowMean",
"stdDev", "median", "meanChange", "sumSquared", "squareSumOfComponent", "sumOfSquareComponents"] "stdDev", "median", "meanChange", "sumSquared", "squareSumOfComponent", "sumOfSquareComponents"]
WINDOWS: WINDOWS:
@ -595,7 +597,7 @@ EMPATICA_INTER_BEAT_INTERVAL:
FEATURES: ["maxibi", "minibi", "avgibi", "medianibi", "modeibi", "stdibi", "diffmaxmodeibi", "diffminmodeibi", "entropyibi"] FEATURES: ["maxibi", "minibi", "avgibi", "medianibi", "modeibi", "stdibi", "diffmaxmodeibi", "diffminmodeibi", "entropyibi"]
SRC_SCRIPT: src/features/empatica_inter_beat_interval/dbdp/main.py SRC_SCRIPT: src/features/empatica_inter_beat_interval/dbdp/main.py
CR: CR:
COMPUTE: False COMPUTE: True
FEATURES: ['meanHr', 'ibi', 'sdnn', 'sdsd', 'rmssd', 'pnn20', 'pnn50', 'sd', 'sd2', 'sd1/sd2', 'numRR', # Time features FEATURES: ['meanHr', 'ibi', 'sdnn', 'sdsd', 'rmssd', 'pnn20', 'pnn50', 'sd', 'sd2', 'sd1/sd2', 'numRR', # Time features
'VLF', 'LF', 'LFnorm', 'HF', 'HFnorm', 'LF/HF', 'fullIntegral'] # Freq features 'VLF', 'LF', 'LFnorm', 'HF', 'HFnorm', 'LF/HF', 'fullIntegral'] # Freq features
PATCH_WITH_BVP: True PATCH_WITH_BVP: True
@ -612,7 +614,6 @@ EMPATICA_TAGS:
PROVIDERS: # None implemented yet PROVIDERS: # None implemented yet
######################################################################################################################## ########################################################################################################################
# PLOTS # # PLOTS #
######################################################################################################################## ########################################################################################################################
@ -654,17 +655,17 @@ HEATMAP_FEATURE_CORRELATION_MATRIX:
ALL_CLEANING_INDIVIDUAL: ALL_CLEANING_INDIVIDUAL:
PROVIDERS: PROVIDERS:
RAPIDS: RAPIDS:
COMPUTE: False COMPUTE: True
IMPUTE_SELECTED_EVENT_FEATURES: IMPUTE_SELECTED_EVENT_FEATURES:
COMPUTE: True COMPUTE: True
MIN_DATA_YIELDED_MINUTES_TO_IMPUTE: 0.33 MIN_DATA_YIELDED_MINUTES_TO_IMPUTE: 0.33
COLS_NAN_THRESHOLD: 0.3 # set to 1 to disable COLS_NAN_THRESHOLD: 0.3 # set to 1 to disable
COLS_VAR_THRESHOLD: True COLS_VAR_THRESHOLD: True
ROWS_NAN_THRESHOLD: 0.3 # set to 1 to disable ROWS_NAN_THRESHOLD: 1 # set to 1 to disable
DATA_YIELD_FEATURE: RATIO_VALID_YIELDED_HOURS # RATIO_VALID_YIELDED_HOURS or RATIO_VALID_YIELDED_MINUTES DATA_YIELD_FEATURE: RATIO_VALID_YIELDED_HOURS # RATIO_VALID_YIELDED_HOURS or RATIO_VALID_YIELDED_MINUTES
DATA_YIELD_RATIO_THRESHOLD: 0.5 # set to 0 to disable DATA_YIELD_RATIO_THRESHOLD: 0.3 # set to 0 to disable
DROP_HIGHLY_CORRELATED_FEATURES: DROP_HIGHLY_CORRELATED_FEATURES:
COMPUTE: True COMPUTE: False
MIN_OVERLAP_FOR_CORR_THRESHOLD: 0.5 MIN_OVERLAP_FOR_CORR_THRESHOLD: 0.5
CORR_THRESHOLD: 0.95 CORR_THRESHOLD: 0.95
SRC_SCRIPT: src/features/all_cleaning_individual/rapids/main.R SRC_SCRIPT: src/features/all_cleaning_individual/rapids/main.R
@ -672,17 +673,17 @@ ALL_CLEANING_INDIVIDUAL:
ALL_CLEANING_OVERALL: ALL_CLEANING_OVERALL:
PROVIDERS: PROVIDERS:
RAPIDS: RAPIDS:
COMPUTE: False COMPUTE: True
IMPUTE_SELECTED_EVENT_FEATURES: IMPUTE_SELECTED_EVENT_FEATURES:
COMPUTE: True COMPUTE: True
MIN_DATA_YIELDED_MINUTES_TO_IMPUTE: 0.33 MIN_DATA_YIELDED_MINUTES_TO_IMPUTE: 0.33
COLS_NAN_THRESHOLD: 0.3 # set to 1 to disable COLS_NAN_THRESHOLD: 0.3 # set to 1 to disable
COLS_VAR_THRESHOLD: True COLS_VAR_THRESHOLD: True
ROWS_NAN_THRESHOLD: 0.3 # set to 1 to disable ROWS_NAN_THRESHOLD: 1 # set to 1 to disable
DATA_YIELD_FEATURE: RATIO_VALID_YIELDED_HOURS # RATIO_VALID_YIELDED_HOURS or RATIO_VALID_YIELDED_MINUTES DATA_YIELD_FEATURE: RATIO_VALID_YIELDED_HOURS # RATIO_VALID_YIELDED_HOURS or RATIO_VALID_YIELDED_MINUTES
DATA_YIELD_RATIO_THRESHOLD: 0.5 # set to 0 to disable DATA_YIELD_RATIO_THRESHOLD: 0.3 # set to 0 to disable
DROP_HIGHLY_CORRELATED_FEATURES: DROP_HIGHLY_CORRELATED_FEATURES:
COMPUTE: True COMPUTE: False
MIN_OVERLAP_FOR_CORR_THRESHOLD: 0.5 MIN_OVERLAP_FOR_CORR_THRESHOLD: 0.5
CORR_THRESHOLD: 0.95 CORR_THRESHOLD: 0.95
SRC_SCRIPT: src/features/all_cleaning_overall/rapids/main.R SRC_SCRIPT: src/features/all_cleaning_overall/rapids/main.R
@ -691,12 +692,14 @@ ALL_CLEANING_OVERALL:
######################################################################################################################## ########################################################################################################################
# Z-score standardization # # Z-score standardization #
######################################################################################################################## ########################################################################################################################
STANDARDIZATION: STANDARDIZATION:
PROVIDERS: PROVIDERS:
CR: CR:
COMPUTE: True COMPUTE: True
SRC_SCRIPT: src/features/standardization/main.py SRC_SCRIPT: src/features/standardization/main.py
######################################################################################################################## ########################################################################################################################
# Baseline # # Baseline #
######################################################################################################################## ########################################################################################################################
@ -716,4 +719,3 @@ PARAMS_FOR_ANALYSIS:
TARGET: TARGET:
COMPUTE: True COMPUTE: True
LABEL: PANAS_negative_affect_mean LABEL: PANAS_negative_affect_mean

View File

@ -111,7 +111,7 @@ dependencies:
- biosppy==0.8.0 - biosppy==0.8.0
- cached-property==1.5.2 - cached-property==1.5.2
- configargparse==0.15.1 - configargparse==0.15.1
- cr-features==0.1.15 - cr-features==0.1.20
- cycler==0.11.0 - cycler==0.11.0
- decorator==4.4.2 - decorator==4.4.2
- fonttools==4.33.2 - fonttools==4.33.2

View File

@ -111,7 +111,7 @@
"Package": "Rcpp", "Package": "Rcpp",
"Version": "1.0.7", "Version": "1.0.7",
"Source": "Repository", "Source": "Repository",
"Repository": "RSPM", "Repository": "CRAN",
"Hash": "dab19adae4440ae55aa8a9d238b246bb" "Hash": "dab19adae4440ae55aa8a9d238b246bb"
}, },
"RcppArmadillo": { "RcppArmadillo": {
@ -356,7 +356,7 @@
"Package": "dbplyr", "Package": "dbplyr",
"Version": "2.1.1", "Version": "2.1.1",
"Source": "Repository", "Source": "Repository",
"Repository": "RSPM", "Repository": "CRAN",
"Hash": "1f37fa4ab2f5f7eded42f78b9a887182" "Hash": "1f37fa4ab2f5f7eded42f78b9a887182"
}, },
"desc": { "desc": {
@ -405,7 +405,7 @@
"Package": "ellipsis", "Package": "ellipsis",
"Version": "0.3.2", "Version": "0.3.2",
"Source": "Repository", "Source": "Repository",
"Repository": "RSPM", "Repository": "CRAN",
"Hash": "bb0eec2fe32e88d9e2836c2f73ea2077" "Hash": "bb0eec2fe32e88d9e2836c2f73ea2077"
}, },
"entropy": { "entropy": {
@ -566,7 +566,7 @@
"Package": "gower", "Package": "gower",
"Version": "0.2.2", "Version": "0.2.2",
"Source": "Repository", "Source": "Repository",
"Repository": "RSPM", "Repository": "CRAN",
"Hash": "be6a2b3529928bd803d1c437d1d43152" "Hash": "be6a2b3529928bd803d1c437d1d43152"
}, },
"graphlayouts": { "graphlayouts": {
@ -615,7 +615,7 @@
"Package": "hms", "Package": "hms",
"Version": "1.1.1", "Version": "1.1.1",
"Source": "Repository", "Source": "Repository",
"Repository": "RSPM", "Repository": "CRAN",
"Hash": "5b8a2dd0fdbe2ab4f6081e6c7be6dfca" "Hash": "5b8a2dd0fdbe2ab4f6081e6c7be6dfca"
}, },
"htmlTable": { "htmlTable": {
@ -888,7 +888,7 @@
"Package": "parallelly", "Package": "parallelly",
"Version": "1.29.0", "Version": "1.29.0",
"Source": "Repository", "Source": "Repository",
"Repository": "RSPM", "Repository": "CRAN",
"Hash": "b5f399c9ce96977e22ef32c20b6cfe87" "Hash": "b5f399c9ce96977e22ef32c20b6cfe87"
}, },
"pbapply": { "pbapply": {
@ -1000,7 +1000,7 @@
"Package": "progressr", "Package": "progressr",
"Version": "0.9.0", "Version": "0.9.0",
"Source": "Repository", "Source": "Repository",
"Repository": "RSPM", "Repository": "CRAN",
"Hash": "ca0d80ecc29903f7579edbabd91f4199" "Hash": "ca0d80ecc29903f7579edbabd91f4199"
}, },
"promises": { "promises": {
@ -1343,7 +1343,7 @@
"Package": "vctrs", "Package": "vctrs",
"Version": "0.3.8", "Version": "0.3.8",
"Source": "Repository", "Source": "Repository",
"Repository": "RSPM", "Repository": "CRAN",
"Hash": "ecf749a1b39ea72bd9b51b76292261f1" "Hash": "ecf749a1b39ea72bd9b51b76292261f1"
}, },
"viridis": { "viridis": {

View File

@ -4,6 +4,36 @@ rule create_example_participant_files:
shell: shell:
"echo 'PHONE:\n DEVICE_IDS: [a748ee1a-1d0b-4ae9-9074-279a2b6ba524]\n PLATFORMS: [android]\n LABEL: test-01\n START_DATE: 2020-04-23 00:00:00\n END_DATE: 2020-05-04 23:59:59\nFITBIT:\n DEVICE_IDS: [a748ee1a-1d0b-4ae9-9074-279a2b6ba524]\n LABEL: test-01\n START_DATE: 2020-04-23 00:00:00\n END_DATE: 2020-05-04 23:59:59\n' >> ./data/external/participant_files/example01.yaml && echo 'PHONE:\n DEVICE_IDS: [13dbc8a3-dae3-4834-823a-4bc96a7d459d]\n PLATFORMS: [ios]\n LABEL: test-02\n START_DATE: 2020-04-23 00:00:00\n END_DATE: 2020-05-04 23:59:59\nFITBIT:\n DEVICE_IDS: [13dbc8a3-dae3-4834-823a-4bc96a7d459d]\n LABEL: test-02\n START_DATE: 2020-04-23 00:00:00\n END_DATE: 2020-05-04 23:59:59\n' >> ./data/external/participant_files/example02.yaml" "echo 'PHONE:\n DEVICE_IDS: [a748ee1a-1d0b-4ae9-9074-279a2b6ba524]\n PLATFORMS: [android]\n LABEL: test-01\n START_DATE: 2020-04-23 00:00:00\n END_DATE: 2020-05-04 23:59:59\nFITBIT:\n DEVICE_IDS: [a748ee1a-1d0b-4ae9-9074-279a2b6ba524]\n LABEL: test-01\n START_DATE: 2020-04-23 00:00:00\n END_DATE: 2020-05-04 23:59:59\n' >> ./data/external/participant_files/example01.yaml && echo 'PHONE:\n DEVICE_IDS: [13dbc8a3-dae3-4834-823a-4bc96a7d459d]\n PLATFORMS: [ios]\n LABEL: test-02\n START_DATE: 2020-04-23 00:00:00\n END_DATE: 2020-05-04 23:59:59\nFITBIT:\n DEVICE_IDS: [13dbc8a3-dae3-4834-823a-4bc96a7d459d]\n LABEL: test-02\n START_DATE: 2020-04-23 00:00:00\n END_DATE: 2020-05-04 23:59:59\n' >> ./data/external/participant_files/example02.yaml"
# rule query_usernames_device_empatica_ids:
# params:
# baseline_folder = "/mnt/e/STRAWbaseline/"
# output:
# usernames_file = config["CREATE_PARTICIPANT_FILES"]["USERNAMES_CSV"],
# timezone_file = config["TIMEZONE"]["MULTIPLE"]["TZ_FILE"]
# script:
# "../../participants/prepare_usernames_file.py"
rule prepare_tzcodes_file:
input:
timezone_file = config["TIMEZONE"]["MULTIPLE"]["TZ_FILE"]
output:
tzcodes_file = config["TIMEZONE"]["MULTIPLE"]["TZCODES_FILE"]
script:
"../tools/create_multi_timezones_file.py"
rule prepare_participants_csv:
input:
username_list = config["CREATE_PARTICIPANT_FILES"]["USERNAMES_CSV"]
params:
data_configuration = config["PHONE_DATA_STREAMS"][config["PHONE_DATA_STREAMS"]["USE"]],
participants_table = "participants",
device_id_table = "esm",
start_end_date_table = "esm"
output:
participants_file = config["CREATE_PARTICIPANT_FILES"]["CSV_FILE_PATH"]
script:
"../src/data/translate_usernames_into_participants_data.R"
rule create_participants_files: rule create_participants_files:
input: input:
participants_file = config["CREATE_PARTICIPANT_FILES"]["CSV_FILE_PATH"] participants_file = config["CREATE_PARTICIPANT_FILES"]["CSV_FILE_PATH"]
@ -218,4 +248,4 @@ rule empatica_readable_datetime:
output: output:
"data/raw/{pid}/empatica_{sensor}_with_datetime.csv" "data/raw/{pid}/empatica_{sensor}_with_datetime.csv"
script: script:
"../src/data/datetime/readable_datetime.R" "../src/data/datetime/readable_datetime.R"

View File

@ -58,7 +58,7 @@ participants %>%
lines <- append(lines, empty_fitbit) lines <- append(lines, empty_fitbit)
if(add_empatica_section == TRUE && !is.na(row[empatica_device_id_column])){ if(add_empatica_section == TRUE && !is.na(row[empatica_device_id_column])){
lines <- append(lines, c("EMPATICA:", paste0(" DEVICE_IDS: [",row[empatica_device_id_column],"]"), lines <- append(lines, c("EMPATICA:", paste0(" DEVICE_IDS: [",row$label,"]"),
paste(" LABEL:",row$label), paste(" START_DATE:", start_date), paste(" END_DATE:", end_date))) paste(" LABEL:",row$label), paste(" START_DATE:", start_date), paste(" END_DATE:", end_date)))
} else } else
lines <- append(lines, empty_empatica) lines <- append(lines, empty_empatica)