From 5f51c94ac6c466a012d1d5b9f0e3368a033c23a1 Mon Sep 17 00:00:00 2001 From: JulioV Date: Wed, 25 Nov 2020 19:42:11 -0500 Subject: [PATCH] Separate device data configuration and update docs --- Snakefile | 17 +- config.yaml | 374 ++++++++++++++------------ docs/features/feature-introduction.md | 9 +- docs/setup/configuration.md | 118 ++++---- docs/setup/execution.md | 6 +- example_profile/example_config.yaml | 44 +-- mkdocs.yml | 2 +- rules/preprocessing.smk | 62 ++--- rules/reports.smk | 2 +- src/data/download_fitbit_data.R | 5 +- 10 files changed, 327 insertions(+), 312 deletions(-) diff --git a/Snakefile b/Snakefile index f7a188d1..dfd96877 100644 --- a/Snakefile +++ b/Snakefile @@ -163,9 +163,6 @@ for provider in config["PHONE_LOCATIONS"]["PROVIDERS"].keys(): files_to_compute.extend(expand("data/processed/features/{pid}/all_sensor_features.csv", pid=config["PIDS"])) files_to_compute.append("data/processed/features/all_participants/all_sensor_features.csv") -if config["FITBIT_CALORIES"]["TABLE_FORMAT"] not in ["JSON", "CSV"]: - raise ValueError("config['FITBIT_CALORIES']['TABLE_FORMAT'] should be JSON or CSV but you typed" + config["FITBIT_CALORIES"]["TABLE_FORMAT"]) - for provider in config["FITBIT_HEARTRATE_SUMMARY"]["PROVIDERS"].keys(): if config["FITBIT_HEARTRATE_SUMMARY"]["PROVIDERS"][provider]["COMPUTE"]: files_to_compute.extend(expand("data/raw/{pid}/fitbit_heartrate_summary_raw.csv", pid=config["PIDS"])) @@ -222,13 +219,13 @@ for provider in config["FITBIT_STEPS_INTRADAY"]["PROVIDERS"].keys(): files_to_compute.extend(expand("data/processed/features/{pid}/all_sensor_features.csv", pid=config["PIDS"])) files_to_compute.append("data/processed/features/all_participants/all_sensor_features.csv") -for provider in config["FITBIT_CALORIES"]["PROVIDERS"].keys(): - if config["FITBIT_CALORIES"]["PROVIDERS"][provider]["COMPUTE"]: - files_to_compute.extend(expand("data/raw/{pid}/fitbit_calories_{fitbit_data_type}_raw.csv", pid=config["PIDS"], fitbit_data_type=(["json"] if config["FITBIT_CALORIES"]["TABLE_FORMAT"] == "JSON" else ["summary", "intraday"]))) - files_to_compute.extend(expand("data/raw/{pid}/fitbit_calories_{fitbit_data_type}_parsed.csv", pid=config["PIDS"], fitbit_data_type=["summary", "intraday"])) - files_to_compute.extend(expand("data/raw/{pid}/fitbit_calories_{fitbit_data_type}_parsed_with_datetime.csv", pid=config["PIDS"], fitbit_data_type=["summary", "intraday"])) - files_to_compute.extend(expand("data/processed/features/{pid}/all_sensor_features.csv", pid=config["PIDS"])) - files_to_compute.append("data/processed/features/all_participants/all_sensor_features.csv") +# for provider in config["FITBIT_CALORIES"]["PROVIDERS"].keys(): +# if config["FITBIT_CALORIES"]["PROVIDERS"][provider]["COMPUTE"]: +# files_to_compute.extend(expand("data/raw/{pid}/fitbit_calories_{fitbit_data_type}_raw.csv", pid=config["PIDS"], fitbit_data_type=(["json"] if config["FITBIT_CALORIES"]["TABLE_FORMAT"] == "JSON" else ["summary", "intraday"]))) +# files_to_compute.extend(expand("data/raw/{pid}/fitbit_calories_{fitbit_data_type}_parsed.csv", pid=config["PIDS"], fitbit_data_type=["summary", "intraday"])) +# files_to_compute.extend(expand("data/raw/{pid}/fitbit_calories_{fitbit_data_type}_parsed_with_datetime.csv", pid=config["PIDS"], fitbit_data_type=["summary", "intraday"])) +# files_to_compute.extend(expand("data/processed/features/{pid}/all_sensor_features.csv", pid=config["PIDS"])) +# files_to_compute.append("data/processed/features/all_participants/all_sensor_features.csv") # visualization for data exploration diff --git a/config.yaml b/config.yaml index 73db75c4..abbe02f3 100644 --- a/config.yaml +++ b/config.yaml @@ -1,15 +1,15 @@ -# See https://www.rapids.science/setup/configuration/#database-credentials +# See https://www.rapids.science/latest/setup/configuration/#database-credentials DATABASE_GROUP: &database_group MY_GROUP -# See https://www.rapids.science/setup/configuration/#timezone-of-your-study +# See https://www.rapids.science/latest/setup/configuration/#timezone-of-your-study TIMEZONE: &timezone America/New_York -# See https://www.rapids.science/setup/configuration/#participant-files -PIDS: [j01] +# See https://www.rapids.science/latest/setup/configuration/#participant-files +PIDS: [test01] -# See https://www.rapids.science/setup/configuration/#automatic-creation-of-participant-files +# See https://www.rapids.science/latest/setup/configuration/#automatic-creation-of-participant-files CREATE_PARTICIPANT_FILES: SOURCE: TYPE: AWARE_DEVICE_TABLE #AWARE_DEVICE_TABLE or CSV_FILE @@ -25,59 +25,110 @@ CREATE_PARTICIPANT_FILES: DEVICE_ID_COLUMN: device_id # column name IGNORED_DEVICE_IDS: [] -# See https://www.rapids.science/setup/configuration/#day-segments +# See https://www.rapids.science/latest/setup/configuration/#day-segments DAY_SEGMENTS: &day_segments TYPE: PERIODIC # FREQUENCY, PERIODIC, EVENT FILE: "data/external/daysegments_periodic.csv" INCLUDE_PAST_PERIODIC_SEGMENTS: FALSE # Only relevant if TYPE=PERIODIC, see docs -# See https://www.rapids.science/setup/configuration/#device-data-source-configuration -DEVICE_DATA: - PHONE: - SOURCE: - TYPE: DATABASE - DATABASE_GROUP: *database_group - DEVICE_ID_COLUMN: device_id # column name - TIMEZONE: - TYPE: SINGLE # SINGLE or MULTIPLE - VALUE: *timezone # IF TYPE=SINGLE, see docs - FITBIT: - SOURCE: - TYPE: DATABASE # DATABASE or FILES (set each FITBIT_SENSOR TABLE attribute accordingly with a table name or a file path) - COLUMN_FORMAT: JSON # JSON or PLAIN_TEXT - DATABASE_GROUP: *database_group - DEVICE_ID_COLUMN: device_id # column name - TIMEZONE: - TYPE: SINGLE # Fitbit only supports SINGLE timezones - VALUE: *timezone # see docs -############## PHONE ########################################################### -################################################################################ -PHONE_DATA_YIELD: - SENSORS: [] +######################################################################################################################## +# PHONE # +######################################################################################################################## + +# See https://www.rapids.science/latest/setup/configuration/#device-data-source-configuration +PHONE_DATA_CONFIGURATION: + SOURCE: + TYPE: DATABASE + DATABASE_GROUP: *database_group + DEVICE_ID_COLUMN: device_id # column name + TIMEZONE: + TYPE: SINGLE # SINGLE or MULTIPLE + VALUE: *timezone # IF TYPE=SINGLE, see docs + +# Sensors ------ + +# https://www.rapids.science/latest/features/phone-accelerometer/ +PHONE_ACCELEROMETER: + TABLE: accelerometer PROVIDERS: RAPIDS: COMPUTE: False - FEATURES: [ratiovalidyieldedminutes, ratiovalidyieldedhours] - MINUTE_RATIO_THRESHOLD_FOR_VALID_YIELDED_HOURS: 0.5 # 0 to 1 representing the number of minutes with at least - SRC_LANGUAGE: "r" - SRC_FOLDER: "rapids" # inside src/features/phone_data_yield + FEATURES: ["maxmagnitude", "minmagnitude", "avgmagnitude", "medianmagnitude", "stdmagnitude"] + SRC_FOLDER: "rapids" # inside src/features/phone_accelerometer + SRC_LANGUAGE: "python" + + PANDA: + COMPUTE: False + VALID_SENSED_MINUTES: False + FEATURES: + exertional_activity_episode: ["sumduration", "maxduration", "minduration", "avgduration", "medianduration", "stdduration"] + nonexertional_activity_episode: ["sumduration", "maxduration", "minduration", "avgduration", "medianduration", "stdduration"] + SRC_FOLDER: "panda" # inside src/features/phone_accelerometer + SRC_LANGUAGE: "python" -# Communication SMS features config, TYPES and FEATURES keys need to match -PHONE_MESSAGES: - TABLE: messages +# See https://www.rapids.science/latest/features/phone-activity-recognition/ +PHONE_ACTIVITY_RECOGNITION: + TABLE: + ANDROID: plugin_google_activity_recognition + IOS: plugin_ios_activity_recognition + EPISODE_THRESHOLD_BETWEEN_ROWS: 5 # minutes. Max time difference for two consecutive rows to be considered within the same battery episode. PROVIDERS: RAPIDS: COMPUTE: False - MESSAGES_TYPES : [received, sent] - FEATURES: - received: [count, distinctcontacts, timefirstmessage, timelastmessage, countmostfrequentcontact] - sent: [count, distinctcontacts, timefirstmessage, timelastmessage, countmostfrequentcontact] - SRC_LANGUAGE: "r" - SRC_FOLDER: "rapids" # inside src/features/phone_messages + FEATURES: ["count", "mostcommonactivity", "countuniqueactivities", "durationstationary", "durationmobile", "durationvehicle"] + ACTIVITY_CLASSES: + STATIONARY: ["still", "tilting"] + MOBILE: ["on_foot", "walking", "running", "on_bicycle"] + VEHICLE: ["in_vehicle"] + SRC_FOLDER: "rapids" # inside src/features/phone_activity_recognition + SRC_LANGUAGE: "python" -# Communication call features config, TYPES and FEATURES keys need to match +# See https://www.rapids.science/latest/features/phone-applications-foreground/ +PHONE_APPLICATIONS_FOREGROUND: + TABLE: applications_foreground + APPLICATION_CATEGORIES: + CATALOGUE_SOURCE: FILE # FILE (genres are read from CATALOGUE_FILE) or GOOGLE (genres are scrapped from the Play Store) + CATALOGUE_FILE: "data/external/stachl_application_genre_catalogue.csv" + UPDATE_CATALOGUE_FILE: False # if CATALOGUE_SOURCE is equal to FILE, whether or not to update CATALOGUE_FILE, if CATALOGUE_SOURCE is equal to GOOGLE all scraped genres will be saved to CATALOGUE_FILE + SCRAPE_MISSING_CATEGORIES: False # whether or not to scrape missing genres, only effective if CATALOGUE_SOURCE is equal to FILE. If CATALOGUE_SOURCE is equal to GOOGLE, all genres are scraped anyway + PROVIDERS: + RAPIDS: + COMPUTE: False + SINGLE_CATEGORIES: ["all", "email"] + MULTIPLE_CATEGORIES: + social: ["socialnetworks", "socialmediatools"] + entertainment: ["entertainment", "gamingknowledge", "gamingcasual", "gamingadventure", "gamingstrategy", "gamingtoolscommunity", "gamingroleplaying", "gamingaction", "gaminglogic", "gamingsports", "gamingsimulation"] + SINGLE_APPS: ["top1global", "com.facebook.moments", "com.google.android.youtube", "com.twitter.android"] # There's no entropy for single apps + EXCLUDED_CATEGORIES: [] + EXCLUDED_APPS: ["com.fitbit.FitbitMobile", "com.aware.plugin.upmc.cancer"] + FEATURES: ["count", "timeoffirstuse", "timeoflastuse", "frequencyentropy"] + SRC_FOLDER: "rapids" # inside src/features/phone_applications_foreground + SRC_LANGUAGE: "python" + +# See https://www.rapids.science/latest/features/phone-battery/ +PHONE_BATTERY: + TABLE: battery + EPISODE_THRESHOLD_BETWEEN_ROWS: 30 # minutes. Max time difference for two consecutive rows to be considered within the same battery episode. + PROVIDERS: + RAPIDS: + COMPUTE: False + FEATURES: ["countdischarge", "sumdurationdischarge", "countcharge", "sumdurationcharge", "avgconsumptionrate", "maxconsumptionrate"] + SRC_FOLDER: "rapids" # inside src/features/phone_battery + SRC_LANGUAGE: "python" + +# See https://www.rapids.science/latest/features/phone-bluetooth/ +PHONE_BLUETOOTH: + TABLE: bluetooth + PROVIDERS: + RAPIDS: + COMPUTE: False + FEATURES: ["countscans", "uniquedevices", "countscansmostuniquedevice"] + SRC_FOLDER: "rapids" # inside src/features/phone_bluetooth + SRC_LANGUAGE: "r" + +# See https://www.rapids.science/latest/features/phone-calls/ PHONE_CALLS: TABLE: calls PROVIDERS: @@ -91,6 +142,47 @@ PHONE_CALLS: SRC_LANGUAGE: "r" SRC_FOLDER: "rapids" # inside src/features/phone_calls +# See https://www.rapids.science/latest/features/phone-conversation/ +PHONE_CONVERSATION: + TABLE: + ANDROID: plugin_studentlife_audio_android + IOS: plugin_studentlife_audio + PROVIDERS: + RAPIDS: + COMPUTE: False + FEATURES: ["minutessilence", "minutesnoise", "minutesvoice", "minutesunknown","sumconversationduration","avgconversationduration", + "sdconversationduration","minconversationduration","maxconversationduration","timefirstconversation","timelastconversation","noisesumenergy", + "noiseavgenergy","noisesdenergy","noiseminenergy","noisemaxenergy","voicesumenergy", + "voiceavgenergy","voicesdenergy","voiceminenergy","voicemaxenergy","silencesensedfraction","noisesensedfraction", + "voicesensedfraction","unknownsensedfraction","silenceexpectedfraction","noiseexpectedfraction","voiceexpectedfraction", + "unknownexpectedfraction","countconversation"] + RECORDING_MINUTES: 1 + PAUSED_MINUTES : 3 + SRC_FOLDER: "rapids" # inside src/features/phone_conversation + SRC_LANGUAGE: "python" + +# See https://www.rapids.science/latest/features/phone-data-yield/ +PHONE_DATA_YIELD: + SENSORS: [] + PROVIDERS: + RAPIDS: + COMPUTE: False + FEATURES: [ratiovalidyieldedminutes, ratiovalidyieldedhours] + MINUTE_RATIO_THRESHOLD_FOR_VALID_YIELDED_HOURS: 0.5 # 0 to 1 representing the number of minutes with at least + SRC_LANGUAGE: "r" + SRC_FOLDER: "rapids" # inside src/features/phone_data_yield + +# See https://www.rapids.science/latest/features/phone-light/ +PHONE_LIGHT: + TABLE: light + PROVIDERS: + RAPIDS: + COMPUTE: False + FEATURES: ["count", "maxlux", "minlux", "avglux", "medianlux", "stdlux"] + SRC_FOLDER: "rapids" # inside src/features/phone_light + SRC_LANGUAGE: "python" + +# See https://www.rapids.science/latest/features/phone-locations/ PHONE_LOCATIONS: TABLE: locations LOCATIONS_TO_USE: FUSED_RESAMPLED # ALL, GPS OR FUSED_RESAMPLED @@ -118,42 +210,20 @@ PHONE_LOCATIONS: SRC_FOLDER: "barnett" # inside src/features/phone_locations SRC_LANGUAGE: "r" -PHONE_BLUETOOTH: - TABLE: bluetooth +# See https://www.rapids.science/latest/features/phone-messages/ +PHONE_MESSAGES: + TABLE: messages PROVIDERS: RAPIDS: COMPUTE: False - FEATURES: ["countscans", "uniquedevices", "countscansmostuniquedevice"] - SRC_FOLDER: "rapids" # inside src/features/phone_bluetooth + MESSAGES_TYPES : [received, sent] + FEATURES: + received: [count, distinctcontacts, timefirstmessage, timelastmessage, countmostfrequentcontact] + sent: [count, distinctcontacts, timefirstmessage, timelastmessage, countmostfrequentcontact] SRC_LANGUAGE: "r" + SRC_FOLDER: "rapids" # inside src/features/phone_messages - -PHONE_ACTIVITY_RECOGNITION: - TABLE: - ANDROID: plugin_google_activity_recognition - IOS: plugin_ios_activity_recognition - EPISODE_THRESHOLD_BETWEEN_ROWS: 5 # minutes. Max time difference for two consecutive rows to be considered within the same battery episode. - PROVIDERS: - RAPIDS: - COMPUTE: False - FEATURES: ["count", "mostcommonactivity", "countuniqueactivities", "durationstationary", "durationmobile", "durationvehicle"] - ACTIVITY_CLASSES: - STATIONARY: ["still", "tilting"] - MOBILE: ["on_foot", "walking", "running", "on_bicycle"] - VEHICLE: ["in_vehicle"] - SRC_FOLDER: "rapids" # inside src/features/phone_activity_recognition - SRC_LANGUAGE: "python" - -PHONE_BATTERY: - TABLE: battery - EPISODE_THRESHOLD_BETWEEN_ROWS: 30 # minutes. Max time difference for two consecutive rows to be considered within the same battery episode. - PROVIDERS: - RAPIDS: - COMPUTE: False - FEATURES: ["countdischarge", "sumdurationdischarge", "countcharge", "sumdurationcharge", "avgconsumptionrate", "maxconsumptionrate"] - SRC_FOLDER: "rapids" # inside src/features/phone_battery - SRC_LANGUAGE: "python" - +# See https://www.rapids.science/latest/features/phone-screen/ PHONE_SCREEN: TABLE: screen PROVIDERS: @@ -167,63 +237,7 @@ PHONE_SCREEN: SRC_FOLDER: "rapids" # inside src/features/phone_screen SRC_LANGUAGE: "python" -PHONE_LIGHT: - TABLE: light - PROVIDERS: - RAPIDS: - COMPUTE: False - FEATURES: ["count", "maxlux", "minlux", "avglux", "medianlux", "stdlux"] - SRC_FOLDER: "rapids" # inside src/features/phone_light - SRC_LANGUAGE: "python" - -PHONE_ACCELEROMETER: - TABLE: accelerometer - PROVIDERS: - RAPIDS: - COMPUTE: False - FEATURES: ["maxmagnitude", "minmagnitude", "avgmagnitude", "medianmagnitude", "stdmagnitude"] - SRC_FOLDER: "rapids" # inside src/features/phone_accelerometer - SRC_LANGUAGE: "python" - - PANDA: - COMPUTE: False - VALID_SENSED_MINUTES: False - FEATURES: - exertional_activity_episode: ["sumduration", "maxduration", "minduration", "avgduration", "medianduration", "stdduration"] - nonexertional_activity_episode: ["sumduration", "maxduration", "minduration", "avgduration", "medianduration", "stdduration"] - SRC_FOLDER: "panda" # inside src/features/phone_accelerometer - SRC_LANGUAGE: "python" - -PHONE_APPLICATIONS_FOREGROUND: - TABLE: applications_foreground - APPLICATION_CATEGORIES: - CATALOGUE_SOURCE: FILE # FILE (genres are read from CATALOGUE_FILE) or GOOGLE (genres are scrapped from the Play Store) - CATALOGUE_FILE: "data/external/stachl_application_genre_catalogue.csv" - UPDATE_CATALOGUE_FILE: False # if CATALOGUE_SOURCE is equal to FILE, whether or not to update CATALOGUE_FILE, if CATALOGUE_SOURCE is equal to GOOGLE all scraped genres will be saved to CATALOGUE_FILE - SCRAPE_MISSING_CATEGORIES: False # whether or not to scrape missing genres, only effective if CATALOGUE_SOURCE is equal to FILE. If CATALOGUE_SOURCE is equal to GOOGLE, all genres are scraped anyway - PROVIDERS: - RAPIDS: - COMPUTE: False - SINGLE_CATEGORIES: ["all", "email"] - MULTIPLE_CATEGORIES: - social: ["socialnetworks", "socialmediatools"] - entertainment: ["entertainment", "gamingknowledge", "gamingcasual", "gamingadventure", "gamingstrategy", "gamingtoolscommunity", "gamingroleplaying", "gamingaction", "gaminglogic", "gamingsports", "gamingsimulation"] - SINGLE_APPS: ["top1global", "com.facebook.moments", "com.google.android.youtube", "com.twitter.android"] # There's no entropy for single apps - EXCLUDED_CATEGORIES: [] - EXCLUDED_APPS: ["com.fitbit.FitbitMobile", "com.aware.plugin.upmc.cancer"] - FEATURES: ["count", "timeoffirstuse", "timeoflastuse", "frequencyentropy"] - SRC_FOLDER: "rapids" # inside src/features/phone_applications_foreground - SRC_LANGUAGE: "python" - -PHONE_WIFI_VISIBLE: - TABLE: "wifi" - PROVIDERS: - RAPIDS: - COMPUTE: False - FEATURES: ["countscans", "uniquedevices", "countscansmostuniquedevice"] - SRC_FOLDER: "rapids" # inside src/features/phone_wifi_visible - SRC_LANGUAGE: "r" - +# See https://www.rapids.science/latest/features/phone-wifi-connected/ PHONE_WIFI_CONNECTED: TABLE: "sensor_wifi" PROVIDERS: @@ -233,27 +247,39 @@ PHONE_WIFI_CONNECTED: SRC_FOLDER: "rapids" # inside src/features/phone_wifi_connected SRC_LANGUAGE: "r" -PHONE_CONVERSATION: - TABLE: - ANDROID: plugin_studentlife_audio_android - IOS: plugin_studentlife_audio +# See https://www.rapids.science/latest/features/phone-wifi-visible/ +PHONE_WIFI_VISIBLE: + TABLE: "wifi" PROVIDERS: RAPIDS: COMPUTE: False - FEATURES: ["minutessilence", "minutesnoise", "minutesvoice", "minutesunknown","sumconversationduration","avgconversationduration", - "sdconversationduration","minconversationduration","maxconversationduration","timefirstconversation","timelastconversation","noisesumenergy", - "noiseavgenergy","noisesdenergy","noiseminenergy","noisemaxenergy","voicesumenergy", - "voiceavgenergy","voicesdenergy","voiceminenergy","voicemaxenergy","silencesensedfraction","noisesensedfraction", - "voicesensedfraction","unknownsensedfraction","silenceexpectedfraction","noiseexpectedfraction","voiceexpectedfraction", - "unknownexpectedfraction","countconversation"] - RECORDING_MINUTES: 1 - PAUSED_MINUTES : 3 - SRC_FOLDER: "rapids" # inside src/features/phone_conversation - SRC_LANGUAGE: "python" + FEATURES: ["countscans", "uniquedevices", "countscansmostuniquedevice"] + SRC_FOLDER: "rapids" # inside src/features/phone_wifi_visible + SRC_LANGUAGE: "r" -############## FITBIT ########################################################## -################################################################################ + + + + +######################################################################################################################## +# FITBIT # +######################################################################################################################## + +# See https://www.rapids.science/latest/setup/configuration/#device-data-source-configuration +FITBIT_DATA_CONFIGURATION: + SOURCE: + TYPE: DATABASE # DATABASE or FILES (set each [FITBIT_SENSOR][TABLE] attribute with a table name or a file path accordingly) + COLUMN_FORMAT: JSON # JSON or PLAIN_TEXT + DATABASE_GROUP: *database_group + DEVICE_ID_COLUMN: device_id # column name + TIMEZONE: + TYPE: SINGLE # Fitbit devices don't support time zones so we read this data in the timezone indicated by VALUE + VALUE: *timezone + +# Sensors ------ + +# See https://www.rapids.science/latest/features/fitbit-heartrate-summary/ FITBIT_HEARTRATE_SUMMARY: TABLE: heartrate_summary PROVIDERS: @@ -263,6 +289,7 @@ FITBIT_HEARTRATE_SUMMARY: SRC_FOLDER: "rapids" # inside src/features/fitbit_heartrate_summary SRC_LANGUAGE: "python" +# See https://www.rapids.science/latest/features/fitbit-heartrate-intraday/ FITBIT_HEARTRATE_INTRADAY: TABLE: heartrate_intraday PROVIDERS: @@ -272,6 +299,19 @@ FITBIT_HEARTRATE_INTRADAY: SRC_FOLDER: "rapids" # inside src/features/fitbit_heartrate_intraday SRC_LANGUAGE: "python" +# See https://www.rapids.science/latest/features/fitbit-sleep-summary/ +FITBIT_SLEEP_SUMMARY: + TABLE: sleep_summary + SLEEP_EPISODE_TIMESTAMP: end # summary sleep episodes are considered as events based on either the start timestamp or end timestamp. + PROVIDERS: + RAPIDS: + COMPUTE: False + FEATURES: ["countepisode", "avgefficiency", "sumdurationafterwakeup", "sumdurationasleep", "sumdurationawake", "sumdurationtofallasleep", "sumdurationinbed", "avgdurationafterwakeup", "avgdurationasleep", "avgdurationawake", "avgdurationtofallasleep", "avgdurationinbed"] + SLEEP_TYPES: ["main", "nap", "all"] + SRC_FOLDER: "rapids" # inside src/features/fitbit_sleep_summary + SRC_LANGUAGE: "python" + +# See https://www.rapids.science/latest/features/fitbit-steps-summary/ FITBIT_STEPS_SUMMARY: TABLE: steps_summary PROVIDERS: @@ -281,6 +321,7 @@ FITBIT_STEPS_SUMMARY: SRC_FOLDER: "rapids" # inside src/features/fitbit_steps_summary SRC_LANGUAGE: "python" +# See https://www.rapids.science/latest/features/fitbit-steps-intraday/ FITBIT_STEPS_INTRADAY: TABLE: steps_intraday PROVIDERS: @@ -295,31 +336,24 @@ FITBIT_STEPS_INTRADAY: SRC_FOLDER: "rapids" # inside src/features/fitbit_steps_intraday SRC_LANGUAGE: "python" -FITBIT_SLEEP_SUMMARY: - TABLE: sleep_summary - SLEEP_EPISODE_TIMESTAMP: end # summary sleep episodes are considered as events based on either the start timestamp or end timestamp. - PROVIDERS: - RAPIDS: - COMPUTE: False - FEATURES: ["countepisode", "avgefficiency", "sumdurationafterwakeup", "sumdurationasleep", "sumdurationawake", "sumdurationtofallasleep", "sumdurationinbed", "avgdurationafterwakeup", "avgdurationasleep", "avgdurationawake", "avgdurationtofallasleep", "avgdurationinbed"] - SLEEP_TYPES: ["main", "nap", "all"] - SRC_FOLDER: "rapids" # inside src/features/fitbit_sleep_summary - SRC_LANGUAGE: "python" +# FITBIT_CALORIES: +# TABLE_FORMAT: JSON # JSON or CSV. If your JSON or CSV data are files change [DEVICE_DATA][FITBIT][SOURCE][TYPE] to FILES +# TABLE: +# JSON: fitbit_calories +# CSV: +# SUMMARY: calories_summary +# INTRADAY: calories_intraday +# PROVIDERS: +# RAPIDS: +# COMPUTE: False +# FEATURES: [] -FITBIT_CALORIES: - TABLE_FORMAT: JSON # JSON or CSV. If your JSON or CSV data are files change [DEVICE_DATA][FITBIT][SOURCE][TYPE] to FILES - TABLE: - JSON: fitbit_calories - CSV: - SUMMARY: calories_summary - INTRADAY: calories_intraday - PROVIDERS: - RAPIDS: - COMPUTE: False - FEATURES: [] -### Visualizations ############################################################# -################################################################################ + + +######################################################################################################################## +# PLOTS # +######################################################################################################################## HEATMAP_FEATURES_CORRELATIONS: PLOT: False diff --git a/docs/features/feature-introduction.md b/docs/features/feature-introduction.md index 403ce42f..f755b34f 100644 --- a/docs/features/feature-introduction.md +++ b/docs/features/feature-introduction.md @@ -3,10 +3,10 @@ Every phone or Fitbit sensor has a corresponding config section in `config.yaml`, these sections follow a similar structure and we'll use `PHONE_ACCELEROMETER` as an example to explain this structure. !!! hint - We recommend reading this page if you are using RAPIDS for the first time + - We recommend reading this page if you are using RAPIDS for the first time + - All computed sensor features are stored under `/data/processed/features` on files per sensor, per participant and per study (all participants). + - Every time you change any sensor parameters, provider parameters or provider features, all the necessary files will be updated as soon as you execute RAPIDS. -!!! hint - All sensor features are stored under `/data/processed/features` on files per sensor, per participant and per study (all participants). !!! example "Config section example for `PHONE_ACCELEROMETER`" @@ -55,6 +55,3 @@ We explain every provider's parameter in a table under the `Parameters descripti Each provider offers a set of behavioral features (see `#4.2` or `#5.2` in the example). For some providers these features are grouped in an array (like those for `RAPIDS` provider in `#4.2`) but for others they are grouped in a collection of arrays depending on the meaning and purpose of those features (like those for `PANDAS` provider in `#5.2`). In either case, you can delete the features you are not interested in and they will not be included in the sensor's output feature file. We explain each behavioral feature in a table under the `Features description` heading on each provider documentation page. - -!!! hint - Every time you change any sensor parameters, provider parameters or provider features, all the necessary files will be updated as soon as you execute RAPIDS. diff --git a/docs/setup/configuration.md b/docs/setup/configuration.md index afbbf249..72fa9814 100644 --- a/docs/setup/configuration.md +++ b/docs/setup/configuration.md @@ -317,85 +317,67 @@ Day segments (or epochs) are the time windows on which you want to extract behav --- ## Device Data Source Configuration -You might need to modify the following config keys in your `config.yaml` depending on what devices your participants used and where you are storing your data. +You might need to modify the following config keys in your `config.yaml` depending on what devices your participants used and where you are storing your data. You can ignore `[PHONE_DATA_CONFIGURATION]` or `[FITBIT_DATA_CONFIGURATION]` if you are not working with either devices. -!!! hint - You can ignore `[DEVICE_DATA][PHONE]` or `[DEVICE_DATA][FITBIT]` if you are not working with either devices. +=== "Phone" -The relevant `config.yaml` section looks as follows by default: + The relevant `config.yaml` section looks like this by default: -```yaml -DEVICE_DATA: - PHONE: - SOURCE: - TYPE: DATABASE - DATABASE_GROUP: *database_group - DEVICE_ID_COLUMN: device_id # column name - TIMEZONE: - TYPE: SINGLE - VALUE: *timezone - FITBIT: - SOURCE: - TYPE: DATABASE # DATABASE or FILES (set each FITBIT_SENSOR TABLE attribute accordingly with a table name or a file path) - COLUMN_FORMAT: JSON # JSON or PLAIN_TEXT - DATABASE_GROUP: *database_group - DEVICE_ID_COLUMN: fitbit_id # column name - TIMEZONE: - TYPE: SINGLE # Fitbit only supports SINGLE timezones - VALUE: *timezone + ```yaml + PHONE_DATA_CONFIGURATION: + SOURCE: + TYPE: DATABASE + DATABASE_GROUP: *database_group + DEVICE_ID_COLUMN: device_id # column name + TIMEZONE: + TYPE: SINGLE # SINGLE (MULTIPLE support coming soon) + VALUE: *timezone -``` + ``` -**For `[DEVICE_DATA][PHONE]`** + **Parameters for `[PHONE_DATA_CONFIGURATION]`** -| Key | Description | -|---------------------|----------------------------------------------------------------------------------------------------------------------------| -| `[SOURCE] [TYPE]` | Only `DATABASE` is supported (phone data will be pulled from a database) | -| `[SOURCE] [DATABASE_GROUP]` | `*database_group` points to the value defined before in [Database credentials](#database-credentials) | -| `[SOURCE] [DEVICE_ID_COLUMN]` | The column that has strings that uniquely identify smartphones. For data collected with AWARE this is usually `device_id` | -| `[TIMEZONE] [TYPE]` | Only `SINGLE` is supported | -| `[TIMEZONE] [VALUE]` | `*timezone` points to the value defined before in [Timezone of your study](#timezone-of-your-study) | + | Key | Description | + |---------------------|----------------------------------------------------------------------------------------------------------------------------| + | `[SOURCE] [TYPE]` | Only `DATABASE` is supported (phone data will be pulled from a database) | + | `[SOURCE] [DATABASE_GROUP]` | `*database_group` points to the value defined before in [Database credentials](#database-credentials) | + | `[SOURCE] [DEVICE_ID_COLUMN]` | A column that contains strings that uniquely identify smartphones. For data collected with AWARE this is usually `device_id` | + | `[TIMEZONE] [TYPE]` | Only `SINGLE` is supported for now | + | `[TIMEZONE] [VALUE]` | `*timezone` points to the value defined before in [Timezone of your study](#timezone-of-your-study) | -**For `[DEVICE_DATA][FITBIT]`** +=== "Fitbit" -| Key | Description | -|------------------------------------|-------------------------------------------------------------------------------------------------------------------------------------------------------------------| -| `[SOURCE]` `[TYPE]` | `DATABASE` or `FILES` (set each `[FITBIT_SENSOR]` `[TABLE]` attribute accordingly with a table name or a file path) | -| `[SOURCE]` `[COLUMN_FORMAT]` | `JSON` or `PLAIN_TEXT`. Column format of the source data. | -| `[SOURCE]` `[DATABASE_GROUP]` | `*database_group` points to the value defined before in [Database credentials](#database-credentials). Only used if `[TYPE]` is `DATABASE` . | -| `[SOURCE]` `[DEVICE_ID_COLUMN]` | The column that has strings that uniquely identify Fitbit devices. | -| `[TIMEZONE]` `[TYPE]` | Only `SINGLE` is supported (Fitbit devices always store data in local time). | -| `[TIMEZONE]` `[VALUE]` | `*timezone` points to the value defined before in [Timezone of your study](#timezone-of-your-study) | + The relevant `config.yaml` section looks like this by default: + + ```yaml + FITBIT_DATA_CONFIGURATION: + SOURCE: + TYPE: DATABASE # DATABASE or FILES (set each [FITBIT_SENSOR][TABLE] attribute with a table name or a file path accordingly) + COLUMN_FORMAT: JSON # JSON or PLAIN_TEXT + DATABASE_GROUP: *database_group + DEVICE_ID_COLUMN: device_id # column name + TIMEZONE: + TYPE: SINGLE # Fitbit devices don't support time zones so we read this data in the timezone indicated by VALUE + VALUE: *timezone + + ``` + + **Parameters for For `[FITBIT_DATA_CONFIGURATION]`** + + | Key | Description | + |------------------------------------|-------------------------------------------------------------------------------------------------------------------------------------------------------------------| + | `[SOURCE]` `[TYPE]` | `DATABASE` or `FILES` (set each `[FITBIT_SENSOR]` `[TABLE]` attribute accordingly with a table name or a file path) | + | `[SOURCE]` `[COLUMN_FORMAT]` | `JSON` or `PLAIN_TEXT`. Column format of the source data. If you pulled your data directly from the Fitbit API the column containing the sensor data will be in `JSON` format | + | `[SOURCE]` `[DATABASE_GROUP]` | `*database_group` points to the value defined before in [Database credentials](#database-credentials). Only used if `[TYPE]` is `DATABASE` . | + | `[SOURCE]` `[DEVICE_ID_COLUMN]` | A column that contains strings that uniquely identify Fitbit devices. | + | `[TIMEZONE]` `[TYPE]` | Only `SINGLE` is supported (Fitbit devices always store data in local time). | + | `[TIMEZONE]` `[VALUE]` | `*timezone` points to the value defined before in [Timezone of your study](#timezone-of-your-study) | --- ## Sensor and Features to Process -Finally, you need to modify the `config.yaml` of the sensors you want to process. All sensors follow the same naming nomenclature `DEVICE_SENSOR` and have the following basic attributes (we will use `PHONE_MESSAGES` as an example). - -!!! hint - Every time you change any sensor parameters, all the necessary files will be updated as soon as you execute RAPIDS. Some sensors will have specific attributes (like `MESSAGES_TYPES`) so refer to each sensor documentation. - -```yaml -PHONE_MESSAGES: - TABLE: messages - PROVIDERS: - RAPIDS: - COMPUTE: True - MESSAGES_TYPES : [received, sent] - FEATURES: - received: [count, distinctcontacts, timefirstmessage, timelastmessage, countmostfrequentcontact] - sent: [count, distinctcontacts, timefirstmessage, timelastmessage, countmostfrequentcontact] - SRC_LANGUAGE: "r" - SRC_FOLDER: "rapids" # inside src/features/phone_messages -``` - -| Key                      | Description | -|-------------------------------|---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| -| `[TABLE]` | The name of the table in your database that stores this sensor data. | -| `[PROVIDERS]` | A collection of `providers` . A provider is an author or group of authors that created specific features for the sensor at hand. The provider for all the features implemented by our team is called `RAPIDS` but we have also included contributions from other researchers (for example `DORYAB` for location features). | -| `[PROVIDER]` `[COMPUTE]` | Set this to `TRUE` if you want to process features for this `provider` . | -| `[PROVIDER]` `[FEATURES]` | A list of all the features available for the `provider` . Delete those that you don't want to compute. | -| `[PROVIDER]` `[SRC_LANGUAGE]` | The programming language ( `r` or `python` ) in which the features of this `provider` are implemented. | -| `[PROVIDER]` `[SRC_FOLDER]` | The folder where the script(s) to compute the features of this `provider` are stored. This folder is always inside `src/features/[DEVICE_SENSOR]/` | +Finally, you need to modify the `config.yaml` section of the sensors you want to extract behavioral features from. All sensors follow the same naming nomenclature (`DEVICE_SENSOR`) and parameter structure which we explain in the [Behavioral Features Introduction](../../features/feature-introduction/). +!!! done + Head over to [Execution](../execution/) to learn how to execute RAPIDS. \ No newline at end of file diff --git a/docs/setup/execution.md b/docs/setup/execution.md index 31adf3df..626fbf5a 100644 --- a/docs/setup/execution.md +++ b/docs/setup/execution.md @@ -6,6 +6,9 @@ After you have [installed](../installation) and [configured](../configuration) R ./rapids -j1 ``` +!!! done "Ready to extract behavioral features" + If you are ready to extract features head over to the [Behavioral Features Introduction](../../features/feature-introduction/) + !!! info The script `#!bash ./rapids` is a wrapper around Snakemake so you can pass any parameters that Snakemake accepts (e.g. `-j1`). @@ -31,6 +34,3 @@ After you have [installed](../installation) and [configured](../configuration) R ```bash ./rapids -j1 -R clean ``` - -!!! done "Ready to extract behavioral features" - If you are ready to extract features head over to the [Behavioral Features Introduction](../../features/feature-introduction/) \ No newline at end of file diff --git a/example_profile/example_config.yaml b/example_profile/example_config.yaml index 47197e4d..70296f31 100644 --- a/example_profile/example_config.yaml +++ b/example_profile/example_config.yaml @@ -31,29 +31,21 @@ DAY_SEGMENTS: &day_segments FILE: "example_profile/exampleworkflow_daysegments.csv" INCLUDE_PAST_PERIODIC_SEGMENTS: FALSE # Only relevant if TYPE=PERIODIC, see docs -# See https://www.rapids.science/setup/configuration/#device-data-source-configuration -DEVICE_DATA: - PHONE: - SOURCE: - TYPE: DATABASE - DATABASE_GROUP: *database_group - DEVICE_ID_COLUMN: device_id # column name - TIMEZONE: - TYPE: SINGLE # SINGLE or MULTIPLE - VALUE: *timezone # IF TYPE=SINGLE, see docs - FITBIT: - SOURCE: - TYPE: DATABASE # DATABASE or FILES (set each FITBIT_SENSOR TABLE attribute accordingly with a table name or a file path) - COLUMN_FORMAT: JSON # JSON or PLAIN_TEXT - DATABASE_GROUP: *database_group - DEVICE_ID_COLUMN: device_id # column name - TIMEZONE: - TYPE: SINGLE # Fitbit only supports SINGLE timezones - VALUE: *timezone # see docs - ############## PHONE ########################################################### ################################################################################ +# See https://www.rapids.science/setup/configuration/#device-data-source-configuration +PHONE_DATA_CONFIGURATION: + SOURCE: + TYPE: DATABASE + DATABASE_GROUP: *database_group + DEVICE_ID_COLUMN: device_id # column name + TIMEZONE: + TYPE: SINGLE # SINGLE or MULTIPLE + VALUE: *timezone # IF TYPE=SINGLE, see docs + +# Sensors ------ + PHONE_DATA_YIELD: SENSORS: [PHONE_ACCELEROMETER, PHONE_ACTIVITY_RECOGNITION, PHONE_APPLICATIONS_FOREGROUND, PHONE_BATTERY, PHONE_BLUETOOTH, PHONE_CALLS, PHONE_CONVERSATION, PHONE_LIGHT, PHONE_LOCATIONS, PHONE_MESSAGES, PHONE_SCREEN, PHONE_WIFI_CONNECTED, PHONE_WIFI_VISIBLE] PROVIDERS: @@ -254,6 +246,18 @@ PHONE_CONVERSATION: ############## FITBIT ########################################################## ################################################################################ +FITBIT_DATA_CONFIGURATION: + SOURCE: + TYPE: DATABASE # DATABASE or FILES (set each [FITBIT_SENSOR][TABLE] attribute with a table name or a file path accordingly) + COLUMN_FORMAT: JSON # JSON or PLAIN_TEXT + DATABASE_GROUP: *database_group + DEVICE_ID_COLUMN: device_id # column name + TIMEZONE: + TYPE: SINGLE # Fitbit only supports SINGLE timezones + VALUE: *timezone # see docs + HIDDEN: + SINGLE_FITBIT_TABLE: TRUE + FITBIT_HEARTRATE_SUMMARY: TABLE: fitbit_data PROVIDERS: diff --git a/mkdocs.yml b/mkdocs.yml index bc4fa651..0a6ea7c9 100644 --- a/mkdocs.yml +++ b/mkdocs.yml @@ -79,7 +79,6 @@ nav: - Behavioral Features: - Introduction: features/feature-introduction.md - Phone: - - Phone Data Yield: features/phone-data-yield.md - Phone Accelerometer: features/phone-accelerometer.md - Phone Activity Recognition: features/phone-activity-recognition.md - Phone Applications Foreground: features/phone-applications-foreground.md @@ -87,6 +86,7 @@ nav: - Phone Bluetooth: features/phone-bluetooth.md - Phone Calls: features/phone-calls.md - Phone Conversation: features/phone-conversation.md + - Phone Data Yield: features/phone-data-yield.md - Phone Light: features/phone-light.md - Phone Locations: features/phone-locations.md - Phone Messages: features/phone-messages.md diff --git a/rules/preprocessing.smk b/rules/preprocessing.smk index eb539ca7..2ed42809 100644 --- a/rules/preprocessing.smk +++ b/rules/preprocessing.smk @@ -27,10 +27,10 @@ rule download_phone_data: input: "data/external/participant_files/{pid}.yaml" params: - source = config["DEVICE_DATA"]["PHONE"]["SOURCE"], + source = config["PHONE_DATA_CONFIGURATION"]["SOURCE"], sensor = "phone_" + "{sensor}", table = lambda wildcards: config["PHONE_" + str(wildcards.sensor).upper()]["TABLE"], - timezone = config["DEVICE_DATA"]["PHONE"]["TIMEZONE"]["VALUE"], + timezone = config["PHONE_DATA_CONFIGURATION"]["TIMEZONE"]["VALUE"], aware_multiplatform_tables = config["PHONE_ACTIVITY_RECOGNITION"]["TABLE"]["ANDROID"] + "," + config["PHONE_ACTIVITY_RECOGNITION"]["TABLE"]["IOS"] + "," + config["PHONE_CONVERSATION"]["TABLE"]["ANDROID"] + "," + config["PHONE_CONVERSATION"]["TABLE"]["IOS"], output: "data/raw/{pid}/phone_{sensor}_raw.csv" @@ -40,9 +40,9 @@ rule download_phone_data: rule download_fitbit_data: input: participant_file = "data/external/participant_files/{pid}.yaml", - input_file = [] if config["DEVICE_DATA"]["FITBIT"]["SOURCE"]["TYPE"] == "DATABASE" else lambda wildcards: config["FITBIT_" + str(wildcards.sensor).upper()]["TABLE"] + input_file = [] if config["FITBIT_DATA_CONFIGURATION"]["SOURCE"]["TYPE"] == "DATABASE" else lambda wildcards: config["FITBIT_" + str(wildcards.sensor).upper()]["TABLE"] params: - source = config["DEVICE_DATA"]["FITBIT"]["SOURCE"], + data_configuration = config["FITBIT_DATA_CONFIGURATION"], sensor = "fitbit_" + "{sensor}", table = lambda wildcards: config["FITBIT_" + str(wildcards.sensor).upper()]["TABLE"], output: @@ -68,8 +68,8 @@ rule phone_readable_datetime: sensor_input = "data/raw/{pid}/phone_{sensor}_raw.csv", day_segments = "data/interim/day_segments/{pid}_day_segments.csv" params: - timezones = config["DEVICE_DATA"]["PHONE"]["TIMEZONE"]["TYPE"], - fixed_timezone = config["DEVICE_DATA"]["PHONE"]["TIMEZONE"]["VALUE"], + timezones = config["PHONE_DATA_CONFIGURATION"]["TIMEZONE"]["TYPE"], + fixed_timezone = config["PHONE_DATA_CONFIGURATION"]["TIMEZONE"]["VALUE"], day_segments_type = config["DAY_SEGMENTS"]["TYPE"], include_past_periodic_segments = config["DAY_SEGMENTS"]["INCLUDE_PAST_PERIODIC_SEGMENTS"] output: @@ -92,8 +92,8 @@ rule phone_yielded_timestamps_with_datetime: sensor_input = "data/interim/{pid}/phone_yielded_timestamps.csv", day_segments = "data/interim/day_segments/{pid}_day_segments.csv" params: - timezones = config["DEVICE_DATA"]["PHONE"]["TIMEZONE"]["TYPE"], - fixed_timezone = config["DEVICE_DATA"]["PHONE"]["TIMEZONE"]["VALUE"], + timezones = config["PHONE_DATA_CONFIGURATION"]["TIMEZONE"]["TYPE"], + fixed_timezone = config["PHONE_DATA_CONFIGURATION"]["TIMEZONE"]["VALUE"], day_segments_type = config["DAY_SEGMENTS"]["TYPE"], include_past_periodic_segments = config["DAY_SEGMENTS"]["INCLUDE_PAST_PERIODIC_SEGMENTS"] output: @@ -130,8 +130,8 @@ rule phone_locations_processed_with_datetime: sensor_input = "data/interim/{pid}/phone_locations_processed.csv", day_segments = "data/interim/day_segments/{pid}_day_segments.csv" params: - timezones = config["DEVICE_DATA"]["PHONE"]["TIMEZONE"]["TYPE"], - fixed_timezone = config["DEVICE_DATA"]["PHONE"]["TIMEZONE"]["VALUE"], + timezones = config["PHONE_DATA_CONFIGURATION"]["TIMEZONE"]["TYPE"], + fixed_timezone = config["PHONE_DATA_CONFIGURATION"]["TIMEZONE"]["VALUE"], day_segments_type = config["DAY_SEGMENTS"]["TYPE"], include_past_periodic_segments = config["DAY_SEGMENTS"]["INCLUDE_PAST_PERIODIC_SEGMENTS"] output: @@ -152,8 +152,8 @@ rule resample_episodes_with_datetime: sensor_input = "data/interim/{pid}/{sensor}_episodes_resampled.csv", day_segments = "data/interim/day_segments/{pid}_day_segments.csv" params: - timezones = config["DEVICE_DATA"]["PHONE"]["TIMEZONE"]["TYPE"], - fixed_timezone = config["DEVICE_DATA"]["PHONE"]["TIMEZONE"]["VALUE"], + timezones = config["PHONE_DATA_CONFIGURATION"]["TIMEZONE"]["TYPE"], + fixed_timezone = config["PHONE_DATA_CONFIGURATION"]["TIMEZONE"]["VALUE"], day_segments_type = config["DAY_SEGMENTS"]["TYPE"], include_past_periodic_segments = config["DAY_SEGMENTS"]["INCLUDE_PAST_PERIODIC_SEGMENTS"] output: @@ -178,9 +178,9 @@ rule fitbit_parse_heartrate: input: "data/raw/{pid}/fitbit_heartrate_{fitbit_data_type}_raw.csv" params: - timezone = config["DEVICE_DATA"]["PHONE"]["TIMEZONE"]["VALUE"], + timezone = config["FITBIT_DATA_CONFIGURATION"]["TIMEZONE"]["VALUE"], table = lambda wildcards: config["FITBIT_HEARTRATE_"+str(wildcards.fitbit_data_type).upper()]["TABLE"], - column_format = config["DEVICE_DATA"]["FITBIT"]["SOURCE"]["COLUMN_FORMAT"], + column_format = config["FITBIT_DATA_CONFIGURATION"]["SOURCE"]["COLUMN_FORMAT"], fitbit_data_type = "{fitbit_data_type}" output: "data/raw/{pid}/fitbit_heartrate_{fitbit_data_type}_parsed.csv" @@ -191,9 +191,9 @@ rule fitbit_parse_steps: input: "data/raw/{pid}/fitbit_steps_{fitbit_data_type}_raw.csv" params: - timezone = config["DEVICE_DATA"]["PHONE"]["TIMEZONE"]["VALUE"], + timezone = config["FITBIT_DATA_CONFIGURATION"]["TIMEZONE"]["VALUE"], table = lambda wildcards: config["FITBIT_STEPS_"+str(wildcards.fitbit_data_type).upper()]["TABLE"], - column_format = config["DEVICE_DATA"]["FITBIT"]["SOURCE"]["COLUMN_FORMAT"], + column_format = config["FITBIT_DATA_CONFIGURATION"]["SOURCE"]["COLUMN_FORMAT"], fitbit_data_type = "{fitbit_data_type}" output: "data/raw/{pid}/fitbit_steps_{fitbit_data_type}_parsed.csv" @@ -204,9 +204,9 @@ rule fitbit_parse_sleep: input: "data/raw/{pid}/fitbit_sleep_{fitbit_data_type}_raw.csv" params: - timezone = config["DEVICE_DATA"]["PHONE"]["TIMEZONE"]["VALUE"], + timezone = config["FITBIT_DATA_CONFIGURATION"]["TIMEZONE"]["VALUE"], table = lambda wildcards: config["FITBIT_SLEEP_"+str(wildcards.fitbit_data_type).upper()]["TABLE"], - column_format = config["DEVICE_DATA"]["FITBIT"]["SOURCE"]["COLUMN_FORMAT"], + column_format = config["FITBIT_DATA_CONFIGURATION"]["SOURCE"]["COLUMN_FORMAT"], fitbit_data_type = "{fitbit_data_type}", sleep_episode_timestamp = config["FITBIT_SLEEP_SUMMARY"]["SLEEP_EPISODE_TIMESTAMP"] output: @@ -214,25 +214,25 @@ rule fitbit_parse_sleep: script: "../src/data/fitbit_parse_sleep.py" -rule fitbit_parse_calories: - input: - data = expand("data/raw/{{pid}}/fitbit_calories_{fitbit_data_type}_raw.csv", fitbit_data_type = (["json"] if config["FITBIT_CALORIES"]["TABLE_FORMAT"] == "JSON" else ["summary", "intraday"])) - params: - timezone = config["DEVICE_DATA"]["PHONE"]["TIMEZONE"]["VALUE"], - table = config["FITBIT_CALORIES"]["TABLE"], - table_format = config["FITBIT_CALORIES"]["TABLE_FORMAT"] - output: - summary_data = "data/raw/{pid}/fitbit_calories_summary_parsed.csv", - intraday_data = "data/raw/{pid}/fitbit_calories_intraday_parsed.csv" - script: - "../src/data/fitbit_parse_calories.py" +# rule fitbit_parse_calories: +# input: +# data = expand("data/raw/{{pid}}/fitbit_calories_{fitbit_data_type}_raw.csv", fitbit_data_type = (["json"] if config["FITBIT_CALORIES"]["TABLE_FORMAT"] == "JSON" else ["summary", "intraday"])) +# params: +# timezone = config["FITBIT_DATA_CONFIGURATION"]["TIMEZONE"]["VALUE"], +# table = config["FITBIT_CALORIES"]["TABLE"], +# table_format = config["FITBIT_CALORIES"]["TABLE_FORMAT"] +# output: +# summary_data = "data/raw/{pid}/fitbit_calories_summary_parsed.csv", +# intraday_data = "data/raw/{pid}/fitbit_calories_intraday_parsed.csv" +# script: +# "../src/data/fitbit_parse_calories.py" rule fitbit_readable_datetime: input: sensor_input = "data/raw/{pid}/fitbit_{sensor}_{fitbit_data_type}_parsed.csv", day_segments = "data/interim/day_segments/{pid}_day_segments.csv" params: - fixed_timezone = config["DEVICE_DATA"]["FITBIT"]["TIMEZONE"]["VALUE"], + fixed_timezone = config["FITBIT_DATA_CONFIGURATION"]["TIMEZONE"]["VALUE"], day_segments_type = config["DAY_SEGMENTS"]["TYPE"], include_past_periodic_segments = config["DAY_SEGMENTS"]["INCLUDE_PAST_PERIODIC_SEGMENTS"] output: diff --git a/rules/reports.smk b/rules/reports.smk index a385022a..ef44c249 100644 --- a/rules/reports.smk +++ b/rules/reports.smk @@ -66,7 +66,7 @@ rule overall_compliance_heatmap: pid_files = expand("data/external/{pid}", pid=config["PIDS"]) params: only_show_valid_days = config["OVERALL_COMPLIANCE_HEATMAP"]["ONLY_SHOW_VALID_DAYS"], - local_timezone = config["DEVICE_DATA"]["PHONE"]["TIMEZONE"]["VALUE"], + local_timezone = config["PHONE_DATA_CONFIGURATION"]["TIMEZONE"]["VALUE"], expected_num_of_days = config["OVERALL_COMPLIANCE_HEATMAP"]["EXPECTED_NUM_OF_DAYS"], bin_size = config["OVERALL_COMPLIANCE_HEATMAP"]["BIN_SIZE"], min_bins_per_hour = "{min_valid_bins_per_hour}" diff --git a/src/data/download_fitbit_data.R b/src/data/download_fitbit_data.R index adc4b312..e42b8b55 100644 --- a/src/data/download_fitbit_data.R +++ b/src/data/download_fitbit_data.R @@ -8,7 +8,8 @@ library(yaml) participant_file <- snakemake@input[["participant_file"]] input_file <- snakemake@input[["input_file"]] -source <- snakemake@params[["source"]] +data_configuration <- snakemake@params[["data_configuration"]] +source <- data_configuration$SOURCE sensor <- snakemake@params[["sensor"]] table <- snakemake@params[["table"]] sensor_file <- snakemake@output[[1]] @@ -36,7 +37,7 @@ sensor_data <- sensor_data %>% rename(device_id = source$DEVICE_ID_COLUMN) %>% mutate(device_id = unified_device_id) # Unify device_id -if(FALSE) # For MoSHI use, we didn't split fitbit sensors into different tables +if("HIDDEN" %in% names(data_configuration) && data_configuration$HIDDEN$SINGLE_FITBIT_TABLE == TRUE) # For MoSHI use, we didn't split fitbit sensors into different tables sensor_data <- sensor_data %>% filter(fitbit_data_type == str_split(sensor, "_", simplify = TRUE)[[2]]) # Droping duplicates on all columns except for _id or id