Working version that integrates both phone and empatica feature calculations.

sociality-task
Primoz 2022-07-07 15:00:47 +00:00
parent 0425403951
commit 5a777ac79f
5 changed files with 74 additions and 37 deletions

6
.gitignore vendored
View File

@ -114,10 +114,14 @@ sn_profile_*/
settings.dcf
tests/fakedata_generation/
site/
!credentials.yaml
credentials.yaml
# Docker container and other files
.devcontainer
# Calculating features module
calculatingfeatures/
# Temp folder for rapids data/external
rapids_temp_data/

View File

@ -5,6 +5,7 @@ include: "rules/common.smk"
include: "rules/renv.smk"
include: "rules/preprocessing.smk"
include: "rules/features.smk"
include: "rules/models.smk"
include: "rules/reports.smk"
import itertools

View File

@ -3,16 +3,17 @@
########################################################################################################################
# See https://www.rapids.science/latest/setup/configuration/#participant-files
PIDS: [p031] #p01, p02, p03]
PIDS: ['p031', 'p032', 'p033', 'p034', 'p035', 'p036', 'p037', 'p038', 'p039', 'p040', 'p042', 'p043', 'p044', 'p045', 'p046', 'p049', 'p050', 'p052', 'p053', 'p054', 'p055', 'p057', 'p058', 'p059', 'p060', 'p061', 'p062', 'p064', 'p067', 'p068', 'p069', 'p070', 'p071', 'p072', 'p073', 'p074', 'p075', 'p076', 'p077', 'p078', 'p079', 'p080', 'p081', 'p082', 'p083', 'p084', 'p085', 'p086', 'p088', 'p089', 'p090', 'p091', 'p092', 'p093', 'p106', 'p107']
# See https://www.rapids.science/latest/setup/configuration/#automatic-creation-of-participant-files
CREATE_PARTICIPANT_FILES:
CSV_FILE_PATH: "data/external/example_participants.csv" # see docs for required format
USERNAMES_CSV: "data/external/main_study_usernames.csv"
CSV_FILE_PATH: "data/external/main_study_participants.csv" # see docs for required format
PHONE_SECTION:
ADD: True
IGNORED_DEVICE_IDS: []
FITBIT_SECTION:
ADD: True
ADD: False
IGNORED_DEVICE_IDS: []
EMPATICA_SECTION:
ADD: True
@ -21,16 +22,17 @@ CREATE_PARTICIPANT_FILES:
# See https://www.rapids.science/latest/setup/configuration/#time-segments
TIME_SEGMENTS: &time_segments
TYPE: PERIODIC # FREQUENCY, PERIODIC, EVENT
FILE: "data/external/timesegments_periodic.csv"
INCLUDE_PAST_PERIODIC_SEGMENTS: FALSE # Only relevant if TYPE=PERIODIC, see docs
FILE: "data/external/timesegments_daily.csv"
INCLUDE_PAST_PERIODIC_SEGMENTS: TRUE # Only relevant if TYPE=PERIODIC, see docs
# See https://www.rapids.science/latest/setup/configuration/#timezone-of-your-study
TIMEZONE:
TYPE: SINGLE
TYPE: MULTIPLE
SINGLE:
TZCODE: Europe/Ljubljana
MULTIPLE:
TZCODES_FILE: data/external/multiple_timezones_example.csv
TZ_FILE: data/external/timezone.csv
TZCODES_FILE: data/external/multiple_timezones.csv
IF_MISSING_TZCODE: USE_DEFAULT
DEFAULT_TZCODE: Europe/Ljubljana
FITBIT:
@ -85,7 +87,7 @@ PHONE_ACTIVITY_RECOGNITION:
EPISODE_THRESHOLD_BETWEEN_ROWS: 5 # minutes. Max time difference for two consecutive rows to be considered within the same AR episode.
PROVIDERS:
RAPIDS:
COMPUTE: False
COMPUTE: True
FEATURES: ["count", "mostcommonactivity", "countuniqueactivities", "durationstationary", "durationmobile", "durationvehicle"]
ACTIVITY_CLASSES:
STATIONARY: ["still", "tilting"]
@ -114,7 +116,7 @@ PHONE_APPLICATIONS_FOREGROUND:
SCRAPE_MISSING_CATEGORIES: False # whether or not to scrape missing genres, only effective if CATALOGUE_SOURCE is equal to FILE. If CATALOGUE_SOURCE is equal to GOOGLE, all genres are scraped anyway
PROVIDERS:
RAPIDS:
COMPUTE: False
COMPUTE: True
INCLUDE_EPISODE_FEATURES: True
SINGLE_CATEGORIES: ["all", "email"]
MULTIPLE_CATEGORIES:
@ -149,7 +151,7 @@ PHONE_BATTERY:
EPISODE_THRESHOLD_BETWEEN_ROWS: 30 # minutes. Max time difference for two consecutive rows to be considered within the same battery episode.
PROVIDERS:
RAPIDS:
COMPUTE: False
COMPUTE: True
FEATURES: ["countdischarge", "sumdurationdischarge", "countcharge", "sumdurationcharge", "avgconsumptionrate", "maxconsumptionrate"]
SRC_SCRIPT: src/features/phone_battery/rapids/main.py
@ -158,12 +160,12 @@ PHONE_BLUETOOTH:
CONTAINER: bluetooth
PROVIDERS:
RAPIDS:
COMPUTE: False
COMPUTE: True
FEATURES: ["countscans", "uniquedevices", "countscansmostuniquedevice"]
SRC_SCRIPT: src/features/phone_bluetooth/rapids/main.R
DORYAB:
COMPUTE: False
COMPUTE: True
FEATURES:
ALL:
DEVICES: ["countscans", "uniquedevices", "meanscans", "stdscans"]
@ -184,7 +186,7 @@ PHONE_CALLS:
CONTAINER: call
PROVIDERS:
RAPIDS:
COMPUTE: False
COMPUTE: True
FEATURES_TYPE: EPISODES # EVENTS or EPISODES
CALL_TYPES: [missed, incoming, outgoing]
FEATURES:
@ -227,7 +229,7 @@ PHONE_DATA_YIELD:
PHONE_WIFI_VISIBLE]
PROVIDERS:
RAPIDS:
COMPUTE: False
COMPUTE: True
FEATURES: [ratiovalidyieldedminutes, ratiovalidyieldedhours]
MINUTE_RATIO_THRESHOLD_FOR_VALID_YIELDED_HOURS: 0.5 # 0 to 1, minimum percentage of valid minutes in an hour to be considered valid.
SRC_SCRIPT: src/features/phone_data_yield/rapids/main.R
@ -255,7 +257,7 @@ PHONE_LIGHT:
CONTAINER: light_sensor
PROVIDERS:
RAPIDS:
COMPUTE: False
COMPUTE: True
FEATURES: ["count", "maxlux", "minlux", "avglux", "medianlux", "stdlux"]
SRC_SCRIPT: src/features/phone_light/rapids/main.py
@ -269,7 +271,7 @@ PHONE_LOCATIONS:
PROVIDERS:
DORYAB:
COMPUTE: False
COMPUTE: True
FEATURES: ["locationvariance","loglocationvariance","totaldistance","avgspeed","varspeed", "numberofsignificantplaces","numberlocationtransitions","radiusgyration","timeattop1location","timeattop2location","timeattop3location","movingtostaticratio","outlierstimepercent","maxlengthstayatclusters","minlengthstayatclusters","avglengthstayatclusters","stdlengthstayatclusters","locationentropy","normalizedlocationentropy","timeathome", "homelabel"]
DBSCAN_EPS: 100 # meters
DBSCAN_MINSAMPLES: 5
@ -284,7 +286,7 @@ PHONE_LOCATIONS:
SRC_SCRIPT: src/features/phone_locations/doryab/main.py
BARNETT:
COMPUTE: False
COMPUTE: True
FEATURES: ["hometime","disttravelled","rog","maxdiam","maxhomedist","siglocsvisited","avgflightlen","stdflightlen","avgflightdur","stdflightdur","probpause","siglocentropy","circdnrtn","wkenddayrtn"]
IF_MULTIPLE_TIMEZONES: USE_MOST_COMMON
MINUTES_DATA_USED: False # Use this for quality control purposes, how many minutes of data (location coordinates gruped by minute) were used to compute features
@ -302,7 +304,7 @@ PHONE_MESSAGES:
CONTAINER: sms
PROVIDERS:
RAPIDS:
COMPUTE: False
COMPUTE: True
MESSAGES_TYPES : [received, sent]
FEATURES:
received: [count, distinctcontacts, timefirstmessage, timelastmessage, countmostfrequentcontact]
@ -314,7 +316,7 @@ PHONE_SCREEN:
CONTAINER: screen
PROVIDERS:
RAPIDS:
COMPUTE: False
COMPUTE: True
REFERENCE_HOUR_FIRST_USE: 0
IGNORE_EPISODES_SHORTER_THAN: 0 # in minutes, set to 0 to disable
IGNORE_EPISODES_LONGER_THAN: 360 # in minutes, set to 0 to disable
@ -336,13 +338,12 @@ PHONE_WIFI_VISIBLE:
CONTAINER: wifi
PROVIDERS:
RAPIDS:
COMPUTE: False
COMPUTE: True
FEATURES: ["countscans", "uniquedevices", "countscansmostuniquedevice"]
SRC_SCRIPT: src/features/phone_wifi_visible/rapids/main.R
########################################################################################################################
# FITBIT #
########################################################################################################################
@ -484,6 +485,7 @@ FITBIT_STEPS_INTRADAY:
INCLUDE_ZERO_STEP_ROWS: False
SRC_SCRIPT: src/features/fitbit_steps_intraday/rapids/main.py
########################################################################################################################
# EMPATICA #
########################################################################################################################
@ -506,7 +508,7 @@ EMPATICA_ACCELEROMETER:
FEATURES: ["maxmagnitude", "minmagnitude", "avgmagnitude", "medianmagnitude", "stdmagnitude"]
SRC_SCRIPT: src/features/empatica_accelerometer/dbdp/main.py
CR:
COMPUTE: False
COMPUTE: True
FEATURES: ["totalMagnitudeBand", "absoluteMeanBand", "varianceBand"] # Acc features
WINDOWS:
COMPUTE: True
@ -534,7 +536,7 @@ EMPATICA_TEMPERATURE:
FEATURES: ["maxtemp", "mintemp", "avgtemp", "mediantemp", "modetemp", "stdtemp", "diffmaxmodetemp", "diffminmodetemp", "entropytemp"]
SRC_SCRIPT: src/features/empatica_temperature/dbdp/main.py
CR:
COMPUTE: False
COMPUTE: True
FEATURES: ["maximum", "minimum", "meanAbsChange", "longestStrikeAboveMean", "longestStrikeBelowMean",
"stdDev", "median", "meanChange", "sumSquared", "squareSumOfComponent", "sumOfSquareComponents"]
WINDOWS:
@ -595,7 +597,7 @@ EMPATICA_INTER_BEAT_INTERVAL:
FEATURES: ["maxibi", "minibi", "avgibi", "medianibi", "modeibi", "stdibi", "diffmaxmodeibi", "diffminmodeibi", "entropyibi"]
SRC_SCRIPT: src/features/empatica_inter_beat_interval/dbdp/main.py
CR:
COMPUTE: False
COMPUTE: True
FEATURES: ['meanHr', 'ibi', 'sdnn', 'sdsd', 'rmssd', 'pnn20', 'pnn50', 'sd', 'sd2', 'sd1/sd2', 'numRR', # Time features
'VLF', 'LF', 'LFnorm', 'HF', 'HFnorm', 'LF/HF', 'fullIntegral'] # Freq features
PATCH_WITH_BVP: True
@ -612,7 +614,6 @@ EMPATICA_TAGS:
PROVIDERS: # None implemented yet
########################################################################################################################
# PLOTS #
########################################################################################################################
@ -654,17 +655,17 @@ HEATMAP_FEATURE_CORRELATION_MATRIX:
ALL_CLEANING_INDIVIDUAL:
PROVIDERS:
RAPIDS:
COMPUTE: False
COMPUTE: True
IMPUTE_SELECTED_EVENT_FEATURES:
COMPUTE: True
MIN_DATA_YIELDED_MINUTES_TO_IMPUTE: 0.33
COLS_NAN_THRESHOLD: 0.3 # set to 1 to disable
COLS_VAR_THRESHOLD: True
ROWS_NAN_THRESHOLD: 0.3 # set to 1 to disable
ROWS_NAN_THRESHOLD: 1 # set to 1 to disable
DATA_YIELD_FEATURE: RATIO_VALID_YIELDED_HOURS # RATIO_VALID_YIELDED_HOURS or RATIO_VALID_YIELDED_MINUTES
DATA_YIELD_RATIO_THRESHOLD: 0.5 # set to 0 to disable
DATA_YIELD_RATIO_THRESHOLD: 0.3 # set to 0 to disable
DROP_HIGHLY_CORRELATED_FEATURES:
COMPUTE: True
COMPUTE: False
MIN_OVERLAP_FOR_CORR_THRESHOLD: 0.5
CORR_THRESHOLD: 0.95
SRC_SCRIPT: src/features/all_cleaning_individual/rapids/main.R
@ -672,17 +673,17 @@ ALL_CLEANING_INDIVIDUAL:
ALL_CLEANING_OVERALL:
PROVIDERS:
RAPIDS:
COMPUTE: False
COMPUTE: True
IMPUTE_SELECTED_EVENT_FEATURES:
COMPUTE: True
MIN_DATA_YIELDED_MINUTES_TO_IMPUTE: 0.33
COLS_NAN_THRESHOLD: 0.3 # set to 1 to disable
COLS_VAR_THRESHOLD: True
ROWS_NAN_THRESHOLD: 0.3 # set to 1 to disable
ROWS_NAN_THRESHOLD: 1 # set to 1 to disable
DATA_YIELD_FEATURE: RATIO_VALID_YIELDED_HOURS # RATIO_VALID_YIELDED_HOURS or RATIO_VALID_YIELDED_MINUTES
DATA_YIELD_RATIO_THRESHOLD: 0.5 # set to 0 to disable
DATA_YIELD_RATIO_THRESHOLD: 0.3 # set to 0 to disable
DROP_HIGHLY_CORRELATED_FEATURES:
COMPUTE: True
COMPUTE: False
MIN_OVERLAP_FOR_CORR_THRESHOLD: 0.5
CORR_THRESHOLD: 0.95
SRC_SCRIPT: src/features/all_cleaning_overall/rapids/main.R
@ -691,12 +692,14 @@ ALL_CLEANING_OVERALL:
########################################################################################################################
# Z-score standardization #
########################################################################################################################
STANDARDIZATION:
PROVIDERS:
CR:
COMPUTE: True
SRC_SCRIPT: src/features/standardization/main.py
########################################################################################################################
# Baseline #
########################################################################################################################
@ -716,4 +719,3 @@ PARAMS_FOR_ANALYSIS:
TARGET:
COMPUTE: True
LABEL: PANAS_negative_affect_mean

View File

@ -4,6 +4,36 @@ rule create_example_participant_files:
shell:
"echo 'PHONE:\n DEVICE_IDS: [a748ee1a-1d0b-4ae9-9074-279a2b6ba524]\n PLATFORMS: [android]\n LABEL: test-01\n START_DATE: 2020-04-23 00:00:00\n END_DATE: 2020-05-04 23:59:59\nFITBIT:\n DEVICE_IDS: [a748ee1a-1d0b-4ae9-9074-279a2b6ba524]\n LABEL: test-01\n START_DATE: 2020-04-23 00:00:00\n END_DATE: 2020-05-04 23:59:59\n' >> ./data/external/participant_files/example01.yaml && echo 'PHONE:\n DEVICE_IDS: [13dbc8a3-dae3-4834-823a-4bc96a7d459d]\n PLATFORMS: [ios]\n LABEL: test-02\n START_DATE: 2020-04-23 00:00:00\n END_DATE: 2020-05-04 23:59:59\nFITBIT:\n DEVICE_IDS: [13dbc8a3-dae3-4834-823a-4bc96a7d459d]\n LABEL: test-02\n START_DATE: 2020-04-23 00:00:00\n END_DATE: 2020-05-04 23:59:59\n' >> ./data/external/participant_files/example02.yaml"
# rule query_usernames_device_empatica_ids:
# params:
# baseline_folder = "/mnt/e/STRAWbaseline/"
# output:
# usernames_file = config["CREATE_PARTICIPANT_FILES"]["USERNAMES_CSV"],
# timezone_file = config["TIMEZONE"]["MULTIPLE"]["TZ_FILE"]
# script:
# "../../participants/prepare_usernames_file.py"
rule prepare_tzcodes_file:
input:
timezone_file = config["TIMEZONE"]["MULTIPLE"]["TZ_FILE"]
output:
tzcodes_file = config["TIMEZONE"]["MULTIPLE"]["TZCODES_FILE"]
script:
"../tools/create_multi_timezones_file.py"
rule prepare_participants_csv:
input:
username_list = config["CREATE_PARTICIPANT_FILES"]["USERNAMES_CSV"]
params:
data_configuration = config["PHONE_DATA_STREAMS"][config["PHONE_DATA_STREAMS"]["USE"]],
participants_table = "participants",
device_id_table = "esm",
start_end_date_table = "esm"
output:
participants_file = config["CREATE_PARTICIPANT_FILES"]["CSV_FILE_PATH"]
script:
"../src/data/translate_usernames_into_participants_data.R"
rule create_participants_files:
input:
participants_file = config["CREATE_PARTICIPANT_FILES"]["CSV_FILE_PATH"]
@ -218,4 +248,4 @@ rule empatica_readable_datetime:
output:
"data/raw/{pid}/empatica_{sensor}_with_datetime.csv"
script:
"../src/data/datetime/readable_datetime.R"
"../src/data/datetime/readable_datetime.R"

View File

@ -58,7 +58,7 @@ participants %>%
lines <- append(lines, empty_fitbit)
if(add_empatica_section == TRUE && !is.na(row[empatica_device_id_column])){
lines <- append(lines, c("EMPATICA:", paste0(" DEVICE_IDS: [",row[empatica_device_id_column],"]"),
lines <- append(lines, c("EMPATICA:", paste0(" DEVICE_IDS: [",row$label,"]"),
paste(" LABEL:",row$label), paste(" START_DATE:", start_date), paste(" END_DATE:", end_date)))
} else
lines <- append(lines, empty_empatica)