Working version that integrates both phone and empatica feature calculations.
parent
0425403951
commit
5a777ac79f
|
@ -114,10 +114,14 @@ sn_profile_*/
|
|||
settings.dcf
|
||||
tests/fakedata_generation/
|
||||
site/
|
||||
!credentials.yaml
|
||||
credentials.yaml
|
||||
|
||||
# Docker container and other files
|
||||
.devcontainer
|
||||
|
||||
# Calculating features module
|
||||
calculatingfeatures/
|
||||
|
||||
|
||||
# Temp folder for rapids data/external
|
||||
rapids_temp_data/
|
||||
|
|
|
@ -5,6 +5,7 @@ include: "rules/common.smk"
|
|||
include: "rules/renv.smk"
|
||||
include: "rules/preprocessing.smk"
|
||||
include: "rules/features.smk"
|
||||
include: "rules/models.smk"
|
||||
include: "rules/reports.smk"
|
||||
|
||||
import itertools
|
||||
|
|
70
config.yaml
70
config.yaml
|
@ -3,16 +3,17 @@
|
|||
########################################################################################################################
|
||||
|
||||
# See https://www.rapids.science/latest/setup/configuration/#participant-files
|
||||
PIDS: [p031] #p01, p02, p03]
|
||||
PIDS: ['p031', 'p032', 'p033', 'p034', 'p035', 'p036', 'p037', 'p038', 'p039', 'p040', 'p042', 'p043', 'p044', 'p045', 'p046', 'p049', 'p050', 'p052', 'p053', 'p054', 'p055', 'p057', 'p058', 'p059', 'p060', 'p061', 'p062', 'p064', 'p067', 'p068', 'p069', 'p070', 'p071', 'p072', 'p073', 'p074', 'p075', 'p076', 'p077', 'p078', 'p079', 'p080', 'p081', 'p082', 'p083', 'p084', 'p085', 'p086', 'p088', 'p089', 'p090', 'p091', 'p092', 'p093', 'p106', 'p107']
|
||||
|
||||
# See https://www.rapids.science/latest/setup/configuration/#automatic-creation-of-participant-files
|
||||
CREATE_PARTICIPANT_FILES:
|
||||
CSV_FILE_PATH: "data/external/example_participants.csv" # see docs for required format
|
||||
USERNAMES_CSV: "data/external/main_study_usernames.csv"
|
||||
CSV_FILE_PATH: "data/external/main_study_participants.csv" # see docs for required format
|
||||
PHONE_SECTION:
|
||||
ADD: True
|
||||
IGNORED_DEVICE_IDS: []
|
||||
FITBIT_SECTION:
|
||||
ADD: True
|
||||
ADD: False
|
||||
IGNORED_DEVICE_IDS: []
|
||||
EMPATICA_SECTION:
|
||||
ADD: True
|
||||
|
@ -21,16 +22,17 @@ CREATE_PARTICIPANT_FILES:
|
|||
# See https://www.rapids.science/latest/setup/configuration/#time-segments
|
||||
TIME_SEGMENTS: &time_segments
|
||||
TYPE: PERIODIC # FREQUENCY, PERIODIC, EVENT
|
||||
FILE: "data/external/timesegments_periodic.csv"
|
||||
INCLUDE_PAST_PERIODIC_SEGMENTS: FALSE # Only relevant if TYPE=PERIODIC, see docs
|
||||
FILE: "data/external/timesegments_daily.csv"
|
||||
INCLUDE_PAST_PERIODIC_SEGMENTS: TRUE # Only relevant if TYPE=PERIODIC, see docs
|
||||
|
||||
# See https://www.rapids.science/latest/setup/configuration/#timezone-of-your-study
|
||||
TIMEZONE:
|
||||
TYPE: SINGLE
|
||||
TYPE: MULTIPLE
|
||||
SINGLE:
|
||||
TZCODE: Europe/Ljubljana
|
||||
MULTIPLE:
|
||||
TZCODES_FILE: data/external/multiple_timezones_example.csv
|
||||
TZ_FILE: data/external/timezone.csv
|
||||
TZCODES_FILE: data/external/multiple_timezones.csv
|
||||
IF_MISSING_TZCODE: USE_DEFAULT
|
||||
DEFAULT_TZCODE: Europe/Ljubljana
|
||||
FITBIT:
|
||||
|
@ -85,7 +87,7 @@ PHONE_ACTIVITY_RECOGNITION:
|
|||
EPISODE_THRESHOLD_BETWEEN_ROWS: 5 # minutes. Max time difference for two consecutive rows to be considered within the same AR episode.
|
||||
PROVIDERS:
|
||||
RAPIDS:
|
||||
COMPUTE: False
|
||||
COMPUTE: True
|
||||
FEATURES: ["count", "mostcommonactivity", "countuniqueactivities", "durationstationary", "durationmobile", "durationvehicle"]
|
||||
ACTIVITY_CLASSES:
|
||||
STATIONARY: ["still", "tilting"]
|
||||
|
@ -114,7 +116,7 @@ PHONE_APPLICATIONS_FOREGROUND:
|
|||
SCRAPE_MISSING_CATEGORIES: False # whether or not to scrape missing genres, only effective if CATALOGUE_SOURCE is equal to FILE. If CATALOGUE_SOURCE is equal to GOOGLE, all genres are scraped anyway
|
||||
PROVIDERS:
|
||||
RAPIDS:
|
||||
COMPUTE: False
|
||||
COMPUTE: True
|
||||
INCLUDE_EPISODE_FEATURES: True
|
||||
SINGLE_CATEGORIES: ["all", "email"]
|
||||
MULTIPLE_CATEGORIES:
|
||||
|
@ -149,7 +151,7 @@ PHONE_BATTERY:
|
|||
EPISODE_THRESHOLD_BETWEEN_ROWS: 30 # minutes. Max time difference for two consecutive rows to be considered within the same battery episode.
|
||||
PROVIDERS:
|
||||
RAPIDS:
|
||||
COMPUTE: False
|
||||
COMPUTE: True
|
||||
FEATURES: ["countdischarge", "sumdurationdischarge", "countcharge", "sumdurationcharge", "avgconsumptionrate", "maxconsumptionrate"]
|
||||
SRC_SCRIPT: src/features/phone_battery/rapids/main.py
|
||||
|
||||
|
@ -158,12 +160,12 @@ PHONE_BLUETOOTH:
|
|||
CONTAINER: bluetooth
|
||||
PROVIDERS:
|
||||
RAPIDS:
|
||||
COMPUTE: False
|
||||
COMPUTE: True
|
||||
FEATURES: ["countscans", "uniquedevices", "countscansmostuniquedevice"]
|
||||
SRC_SCRIPT: src/features/phone_bluetooth/rapids/main.R
|
||||
|
||||
DORYAB:
|
||||
COMPUTE: False
|
||||
COMPUTE: True
|
||||
FEATURES:
|
||||
ALL:
|
||||
DEVICES: ["countscans", "uniquedevices", "meanscans", "stdscans"]
|
||||
|
@ -184,7 +186,7 @@ PHONE_CALLS:
|
|||
CONTAINER: call
|
||||
PROVIDERS:
|
||||
RAPIDS:
|
||||
COMPUTE: False
|
||||
COMPUTE: True
|
||||
FEATURES_TYPE: EPISODES # EVENTS or EPISODES
|
||||
CALL_TYPES: [missed, incoming, outgoing]
|
||||
FEATURES:
|
||||
|
@ -227,7 +229,7 @@ PHONE_DATA_YIELD:
|
|||
PHONE_WIFI_VISIBLE]
|
||||
PROVIDERS:
|
||||
RAPIDS:
|
||||
COMPUTE: False
|
||||
COMPUTE: True
|
||||
FEATURES: [ratiovalidyieldedminutes, ratiovalidyieldedhours]
|
||||
MINUTE_RATIO_THRESHOLD_FOR_VALID_YIELDED_HOURS: 0.5 # 0 to 1, minimum percentage of valid minutes in an hour to be considered valid.
|
||||
SRC_SCRIPT: src/features/phone_data_yield/rapids/main.R
|
||||
|
@ -255,7 +257,7 @@ PHONE_LIGHT:
|
|||
CONTAINER: light_sensor
|
||||
PROVIDERS:
|
||||
RAPIDS:
|
||||
COMPUTE: False
|
||||
COMPUTE: True
|
||||
FEATURES: ["count", "maxlux", "minlux", "avglux", "medianlux", "stdlux"]
|
||||
SRC_SCRIPT: src/features/phone_light/rapids/main.py
|
||||
|
||||
|
@ -269,7 +271,7 @@ PHONE_LOCATIONS:
|
|||
|
||||
PROVIDERS:
|
||||
DORYAB:
|
||||
COMPUTE: False
|
||||
COMPUTE: True
|
||||
FEATURES: ["locationvariance","loglocationvariance","totaldistance","avgspeed","varspeed", "numberofsignificantplaces","numberlocationtransitions","radiusgyration","timeattop1location","timeattop2location","timeattop3location","movingtostaticratio","outlierstimepercent","maxlengthstayatclusters","minlengthstayatclusters","avglengthstayatclusters","stdlengthstayatclusters","locationentropy","normalizedlocationentropy","timeathome", "homelabel"]
|
||||
DBSCAN_EPS: 100 # meters
|
||||
DBSCAN_MINSAMPLES: 5
|
||||
|
@ -284,7 +286,7 @@ PHONE_LOCATIONS:
|
|||
SRC_SCRIPT: src/features/phone_locations/doryab/main.py
|
||||
|
||||
BARNETT:
|
||||
COMPUTE: False
|
||||
COMPUTE: True
|
||||
FEATURES: ["hometime","disttravelled","rog","maxdiam","maxhomedist","siglocsvisited","avgflightlen","stdflightlen","avgflightdur","stdflightdur","probpause","siglocentropy","circdnrtn","wkenddayrtn"]
|
||||
IF_MULTIPLE_TIMEZONES: USE_MOST_COMMON
|
||||
MINUTES_DATA_USED: False # Use this for quality control purposes, how many minutes of data (location coordinates gruped by minute) were used to compute features
|
||||
|
@ -302,7 +304,7 @@ PHONE_MESSAGES:
|
|||
CONTAINER: sms
|
||||
PROVIDERS:
|
||||
RAPIDS:
|
||||
COMPUTE: False
|
||||
COMPUTE: True
|
||||
MESSAGES_TYPES : [received, sent]
|
||||
FEATURES:
|
||||
received: [count, distinctcontacts, timefirstmessage, timelastmessage, countmostfrequentcontact]
|
||||
|
@ -314,7 +316,7 @@ PHONE_SCREEN:
|
|||
CONTAINER: screen
|
||||
PROVIDERS:
|
||||
RAPIDS:
|
||||
COMPUTE: False
|
||||
COMPUTE: True
|
||||
REFERENCE_HOUR_FIRST_USE: 0
|
||||
IGNORE_EPISODES_SHORTER_THAN: 0 # in minutes, set to 0 to disable
|
||||
IGNORE_EPISODES_LONGER_THAN: 360 # in minutes, set to 0 to disable
|
||||
|
@ -336,13 +338,12 @@ PHONE_WIFI_VISIBLE:
|
|||
CONTAINER: wifi
|
||||
PROVIDERS:
|
||||
RAPIDS:
|
||||
COMPUTE: False
|
||||
COMPUTE: True
|
||||
FEATURES: ["countscans", "uniquedevices", "countscansmostuniquedevice"]
|
||||
SRC_SCRIPT: src/features/phone_wifi_visible/rapids/main.R
|
||||
|
||||
|
||||
|
||||
|
||||
########################################################################################################################
|
||||
# FITBIT #
|
||||
########################################################################################################################
|
||||
|
@ -484,6 +485,7 @@ FITBIT_STEPS_INTRADAY:
|
|||
INCLUDE_ZERO_STEP_ROWS: False
|
||||
SRC_SCRIPT: src/features/fitbit_steps_intraday/rapids/main.py
|
||||
|
||||
|
||||
########################################################################################################################
|
||||
# EMPATICA #
|
||||
########################################################################################################################
|
||||
|
@ -506,7 +508,7 @@ EMPATICA_ACCELEROMETER:
|
|||
FEATURES: ["maxmagnitude", "minmagnitude", "avgmagnitude", "medianmagnitude", "stdmagnitude"]
|
||||
SRC_SCRIPT: src/features/empatica_accelerometer/dbdp/main.py
|
||||
CR:
|
||||
COMPUTE: False
|
||||
COMPUTE: True
|
||||
FEATURES: ["totalMagnitudeBand", "absoluteMeanBand", "varianceBand"] # Acc features
|
||||
WINDOWS:
|
||||
COMPUTE: True
|
||||
|
@ -534,7 +536,7 @@ EMPATICA_TEMPERATURE:
|
|||
FEATURES: ["maxtemp", "mintemp", "avgtemp", "mediantemp", "modetemp", "stdtemp", "diffmaxmodetemp", "diffminmodetemp", "entropytemp"]
|
||||
SRC_SCRIPT: src/features/empatica_temperature/dbdp/main.py
|
||||
CR:
|
||||
COMPUTE: False
|
||||
COMPUTE: True
|
||||
FEATURES: ["maximum", "minimum", "meanAbsChange", "longestStrikeAboveMean", "longestStrikeBelowMean",
|
||||
"stdDev", "median", "meanChange", "sumSquared", "squareSumOfComponent", "sumOfSquareComponents"]
|
||||
WINDOWS:
|
||||
|
@ -595,7 +597,7 @@ EMPATICA_INTER_BEAT_INTERVAL:
|
|||
FEATURES: ["maxibi", "minibi", "avgibi", "medianibi", "modeibi", "stdibi", "diffmaxmodeibi", "diffminmodeibi", "entropyibi"]
|
||||
SRC_SCRIPT: src/features/empatica_inter_beat_interval/dbdp/main.py
|
||||
CR:
|
||||
COMPUTE: False
|
||||
COMPUTE: True
|
||||
FEATURES: ['meanHr', 'ibi', 'sdnn', 'sdsd', 'rmssd', 'pnn20', 'pnn50', 'sd', 'sd2', 'sd1/sd2', 'numRR', # Time features
|
||||
'VLF', 'LF', 'LFnorm', 'HF', 'HFnorm', 'LF/HF', 'fullIntegral'] # Freq features
|
||||
PATCH_WITH_BVP: True
|
||||
|
@ -612,7 +614,6 @@ EMPATICA_TAGS:
|
|||
PROVIDERS: # None implemented yet
|
||||
|
||||
|
||||
|
||||
########################################################################################################################
|
||||
# PLOTS #
|
||||
########################################################################################################################
|
||||
|
@ -654,17 +655,17 @@ HEATMAP_FEATURE_CORRELATION_MATRIX:
|
|||
ALL_CLEANING_INDIVIDUAL:
|
||||
PROVIDERS:
|
||||
RAPIDS:
|
||||
COMPUTE: False
|
||||
COMPUTE: True
|
||||
IMPUTE_SELECTED_EVENT_FEATURES:
|
||||
COMPUTE: True
|
||||
MIN_DATA_YIELDED_MINUTES_TO_IMPUTE: 0.33
|
||||
COLS_NAN_THRESHOLD: 0.3 # set to 1 to disable
|
||||
COLS_VAR_THRESHOLD: True
|
||||
ROWS_NAN_THRESHOLD: 0.3 # set to 1 to disable
|
||||
ROWS_NAN_THRESHOLD: 1 # set to 1 to disable
|
||||
DATA_YIELD_FEATURE: RATIO_VALID_YIELDED_HOURS # RATIO_VALID_YIELDED_HOURS or RATIO_VALID_YIELDED_MINUTES
|
||||
DATA_YIELD_RATIO_THRESHOLD: 0.5 # set to 0 to disable
|
||||
DATA_YIELD_RATIO_THRESHOLD: 0.3 # set to 0 to disable
|
||||
DROP_HIGHLY_CORRELATED_FEATURES:
|
||||
COMPUTE: True
|
||||
COMPUTE: False
|
||||
MIN_OVERLAP_FOR_CORR_THRESHOLD: 0.5
|
||||
CORR_THRESHOLD: 0.95
|
||||
SRC_SCRIPT: src/features/all_cleaning_individual/rapids/main.R
|
||||
|
@ -672,17 +673,17 @@ ALL_CLEANING_INDIVIDUAL:
|
|||
ALL_CLEANING_OVERALL:
|
||||
PROVIDERS:
|
||||
RAPIDS:
|
||||
COMPUTE: False
|
||||
COMPUTE: True
|
||||
IMPUTE_SELECTED_EVENT_FEATURES:
|
||||
COMPUTE: True
|
||||
MIN_DATA_YIELDED_MINUTES_TO_IMPUTE: 0.33
|
||||
COLS_NAN_THRESHOLD: 0.3 # set to 1 to disable
|
||||
COLS_VAR_THRESHOLD: True
|
||||
ROWS_NAN_THRESHOLD: 0.3 # set to 1 to disable
|
||||
ROWS_NAN_THRESHOLD: 1 # set to 1 to disable
|
||||
DATA_YIELD_FEATURE: RATIO_VALID_YIELDED_HOURS # RATIO_VALID_YIELDED_HOURS or RATIO_VALID_YIELDED_MINUTES
|
||||
DATA_YIELD_RATIO_THRESHOLD: 0.5 # set to 0 to disable
|
||||
DATA_YIELD_RATIO_THRESHOLD: 0.3 # set to 0 to disable
|
||||
DROP_HIGHLY_CORRELATED_FEATURES:
|
||||
COMPUTE: True
|
||||
COMPUTE: False
|
||||
MIN_OVERLAP_FOR_CORR_THRESHOLD: 0.5
|
||||
CORR_THRESHOLD: 0.95
|
||||
SRC_SCRIPT: src/features/all_cleaning_overall/rapids/main.R
|
||||
|
@ -691,12 +692,14 @@ ALL_CLEANING_OVERALL:
|
|||
########################################################################################################################
|
||||
# Z-score standardization #
|
||||
########################################################################################################################
|
||||
|
||||
STANDARDIZATION:
|
||||
PROVIDERS:
|
||||
CR:
|
||||
COMPUTE: True
|
||||
SRC_SCRIPT: src/features/standardization/main.py
|
||||
|
||||
|
||||
########################################################################################################################
|
||||
# Baseline #
|
||||
########################################################################################################################
|
||||
|
@ -716,4 +719,3 @@ PARAMS_FOR_ANALYSIS:
|
|||
TARGET:
|
||||
COMPUTE: True
|
||||
LABEL: PANAS_negative_affect_mean
|
||||
|
||||
|
|
|
@ -4,6 +4,36 @@ rule create_example_participant_files:
|
|||
shell:
|
||||
"echo 'PHONE:\n DEVICE_IDS: [a748ee1a-1d0b-4ae9-9074-279a2b6ba524]\n PLATFORMS: [android]\n LABEL: test-01\n START_DATE: 2020-04-23 00:00:00\n END_DATE: 2020-05-04 23:59:59\nFITBIT:\n DEVICE_IDS: [a748ee1a-1d0b-4ae9-9074-279a2b6ba524]\n LABEL: test-01\n START_DATE: 2020-04-23 00:00:00\n END_DATE: 2020-05-04 23:59:59\n' >> ./data/external/participant_files/example01.yaml && echo 'PHONE:\n DEVICE_IDS: [13dbc8a3-dae3-4834-823a-4bc96a7d459d]\n PLATFORMS: [ios]\n LABEL: test-02\n START_DATE: 2020-04-23 00:00:00\n END_DATE: 2020-05-04 23:59:59\nFITBIT:\n DEVICE_IDS: [13dbc8a3-dae3-4834-823a-4bc96a7d459d]\n LABEL: test-02\n START_DATE: 2020-04-23 00:00:00\n END_DATE: 2020-05-04 23:59:59\n' >> ./data/external/participant_files/example02.yaml"
|
||||
|
||||
# rule query_usernames_device_empatica_ids:
|
||||
# params:
|
||||
# baseline_folder = "/mnt/e/STRAWbaseline/"
|
||||
# output:
|
||||
# usernames_file = config["CREATE_PARTICIPANT_FILES"]["USERNAMES_CSV"],
|
||||
# timezone_file = config["TIMEZONE"]["MULTIPLE"]["TZ_FILE"]
|
||||
# script:
|
||||
# "../../participants/prepare_usernames_file.py"
|
||||
|
||||
rule prepare_tzcodes_file:
|
||||
input:
|
||||
timezone_file = config["TIMEZONE"]["MULTIPLE"]["TZ_FILE"]
|
||||
output:
|
||||
tzcodes_file = config["TIMEZONE"]["MULTIPLE"]["TZCODES_FILE"]
|
||||
script:
|
||||
"../tools/create_multi_timezones_file.py"
|
||||
|
||||
rule prepare_participants_csv:
|
||||
input:
|
||||
username_list = config["CREATE_PARTICIPANT_FILES"]["USERNAMES_CSV"]
|
||||
params:
|
||||
data_configuration = config["PHONE_DATA_STREAMS"][config["PHONE_DATA_STREAMS"]["USE"]],
|
||||
participants_table = "participants",
|
||||
device_id_table = "esm",
|
||||
start_end_date_table = "esm"
|
||||
output:
|
||||
participants_file = config["CREATE_PARTICIPANT_FILES"]["CSV_FILE_PATH"]
|
||||
script:
|
||||
"../src/data/translate_usernames_into_participants_data.R"
|
||||
|
||||
rule create_participants_files:
|
||||
input:
|
||||
participants_file = config["CREATE_PARTICIPANT_FILES"]["CSV_FILE_PATH"]
|
||||
|
|
|
@ -58,7 +58,7 @@ participants %>%
|
|||
lines <- append(lines, empty_fitbit)
|
||||
|
||||
if(add_empatica_section == TRUE && !is.na(row[empatica_device_id_column])){
|
||||
lines <- append(lines, c("EMPATICA:", paste0(" DEVICE_IDS: [",row[empatica_device_id_column],"]"),
|
||||
lines <- append(lines, c("EMPATICA:", paste0(" DEVICE_IDS: [",row$label,"]"),
|
||||
paste(" LABEL:",row$label), paste(" START_DATE:", start_date), paste(" END_DATE:", end_date)))
|
||||
} else
|
||||
lines <- append(lines, empty_empatica)
|
||||
|
|
Loading…
Reference in New Issue