Separate device data configuration and update docs

pull/103/head
JulioV 2020-11-25 19:42:11 -05:00
parent c734c8b415
commit 5f51c94ac6
10 changed files with 327 additions and 312 deletions

View File

@ -163,9 +163,6 @@ for provider in config["PHONE_LOCATIONS"]["PROVIDERS"].keys():
files_to_compute.extend(expand("data/processed/features/{pid}/all_sensor_features.csv", pid=config["PIDS"])) files_to_compute.extend(expand("data/processed/features/{pid}/all_sensor_features.csv", pid=config["PIDS"]))
files_to_compute.append("data/processed/features/all_participants/all_sensor_features.csv") files_to_compute.append("data/processed/features/all_participants/all_sensor_features.csv")
if config["FITBIT_CALORIES"]["TABLE_FORMAT"] not in ["JSON", "CSV"]:
raise ValueError("config['FITBIT_CALORIES']['TABLE_FORMAT'] should be JSON or CSV but you typed" + config["FITBIT_CALORIES"]["TABLE_FORMAT"])
for provider in config["FITBIT_HEARTRATE_SUMMARY"]["PROVIDERS"].keys(): for provider in config["FITBIT_HEARTRATE_SUMMARY"]["PROVIDERS"].keys():
if config["FITBIT_HEARTRATE_SUMMARY"]["PROVIDERS"][provider]["COMPUTE"]: if config["FITBIT_HEARTRATE_SUMMARY"]["PROVIDERS"][provider]["COMPUTE"]:
files_to_compute.extend(expand("data/raw/{pid}/fitbit_heartrate_summary_raw.csv", pid=config["PIDS"])) files_to_compute.extend(expand("data/raw/{pid}/fitbit_heartrate_summary_raw.csv", pid=config["PIDS"]))
@ -222,13 +219,13 @@ for provider in config["FITBIT_STEPS_INTRADAY"]["PROVIDERS"].keys():
files_to_compute.extend(expand("data/processed/features/{pid}/all_sensor_features.csv", pid=config["PIDS"])) files_to_compute.extend(expand("data/processed/features/{pid}/all_sensor_features.csv", pid=config["PIDS"]))
files_to_compute.append("data/processed/features/all_participants/all_sensor_features.csv") files_to_compute.append("data/processed/features/all_participants/all_sensor_features.csv")
for provider in config["FITBIT_CALORIES"]["PROVIDERS"].keys(): # for provider in config["FITBIT_CALORIES"]["PROVIDERS"].keys():
if config["FITBIT_CALORIES"]["PROVIDERS"][provider]["COMPUTE"]: # if config["FITBIT_CALORIES"]["PROVIDERS"][provider]["COMPUTE"]:
files_to_compute.extend(expand("data/raw/{pid}/fitbit_calories_{fitbit_data_type}_raw.csv", pid=config["PIDS"], fitbit_data_type=(["json"] if config["FITBIT_CALORIES"]["TABLE_FORMAT"] == "JSON" else ["summary", "intraday"]))) # files_to_compute.extend(expand("data/raw/{pid}/fitbit_calories_{fitbit_data_type}_raw.csv", pid=config["PIDS"], fitbit_data_type=(["json"] if config["FITBIT_CALORIES"]["TABLE_FORMAT"] == "JSON" else ["summary", "intraday"])))
files_to_compute.extend(expand("data/raw/{pid}/fitbit_calories_{fitbit_data_type}_parsed.csv", pid=config["PIDS"], fitbit_data_type=["summary", "intraday"])) # files_to_compute.extend(expand("data/raw/{pid}/fitbit_calories_{fitbit_data_type}_parsed.csv", pid=config["PIDS"], fitbit_data_type=["summary", "intraday"]))
files_to_compute.extend(expand("data/raw/{pid}/fitbit_calories_{fitbit_data_type}_parsed_with_datetime.csv", pid=config["PIDS"], fitbit_data_type=["summary", "intraday"])) # files_to_compute.extend(expand("data/raw/{pid}/fitbit_calories_{fitbit_data_type}_parsed_with_datetime.csv", pid=config["PIDS"], fitbit_data_type=["summary", "intraday"]))
files_to_compute.extend(expand("data/processed/features/{pid}/all_sensor_features.csv", pid=config["PIDS"])) # files_to_compute.extend(expand("data/processed/features/{pid}/all_sensor_features.csv", pid=config["PIDS"]))
files_to_compute.append("data/processed/features/all_participants/all_sensor_features.csv") # files_to_compute.append("data/processed/features/all_participants/all_sensor_features.csv")
# visualization for data exploration # visualization for data exploration

View File

@ -1,15 +1,15 @@
# See https://www.rapids.science/setup/configuration/#database-credentials # See https://www.rapids.science/latest/setup/configuration/#database-credentials
DATABASE_GROUP: &database_group DATABASE_GROUP: &database_group
MY_GROUP MY_GROUP
# See https://www.rapids.science/setup/configuration/#timezone-of-your-study # See https://www.rapids.science/latest/setup/configuration/#timezone-of-your-study
TIMEZONE: &timezone TIMEZONE: &timezone
America/New_York America/New_York
# See https://www.rapids.science/setup/configuration/#participant-files # See https://www.rapids.science/latest/setup/configuration/#participant-files
PIDS: [j01] PIDS: [test01]
# See https://www.rapids.science/setup/configuration/#automatic-creation-of-participant-files # See https://www.rapids.science/latest/setup/configuration/#automatic-creation-of-participant-files
CREATE_PARTICIPANT_FILES: CREATE_PARTICIPANT_FILES:
SOURCE: SOURCE:
TYPE: AWARE_DEVICE_TABLE #AWARE_DEVICE_TABLE or CSV_FILE TYPE: AWARE_DEVICE_TABLE #AWARE_DEVICE_TABLE or CSV_FILE
@ -25,59 +25,110 @@ CREATE_PARTICIPANT_FILES:
DEVICE_ID_COLUMN: device_id # column name DEVICE_ID_COLUMN: device_id # column name
IGNORED_DEVICE_IDS: [] IGNORED_DEVICE_IDS: []
# See https://www.rapids.science/setup/configuration/#day-segments # See https://www.rapids.science/latest/setup/configuration/#day-segments
DAY_SEGMENTS: &day_segments DAY_SEGMENTS: &day_segments
TYPE: PERIODIC # FREQUENCY, PERIODIC, EVENT TYPE: PERIODIC # FREQUENCY, PERIODIC, EVENT
FILE: "data/external/daysegments_periodic.csv" FILE: "data/external/daysegments_periodic.csv"
INCLUDE_PAST_PERIODIC_SEGMENTS: FALSE # Only relevant if TYPE=PERIODIC, see docs INCLUDE_PAST_PERIODIC_SEGMENTS: FALSE # Only relevant if TYPE=PERIODIC, see docs
# See https://www.rapids.science/setup/configuration/#device-data-source-configuration
DEVICE_DATA:
PHONE:
SOURCE:
TYPE: DATABASE
DATABASE_GROUP: *database_group
DEVICE_ID_COLUMN: device_id # column name
TIMEZONE:
TYPE: SINGLE # SINGLE or MULTIPLE
VALUE: *timezone # IF TYPE=SINGLE, see docs
FITBIT:
SOURCE:
TYPE: DATABASE # DATABASE or FILES (set each FITBIT_SENSOR TABLE attribute accordingly with a table name or a file path)
COLUMN_FORMAT: JSON # JSON or PLAIN_TEXT
DATABASE_GROUP: *database_group
DEVICE_ID_COLUMN: device_id # column name
TIMEZONE:
TYPE: SINGLE # Fitbit only supports SINGLE timezones
VALUE: *timezone # see docs
############## PHONE ###########################################################
################################################################################
PHONE_DATA_YIELD: ########################################################################################################################
SENSORS: [] # PHONE #
########################################################################################################################
# See https://www.rapids.science/latest/setup/configuration/#device-data-source-configuration
PHONE_DATA_CONFIGURATION:
SOURCE:
TYPE: DATABASE
DATABASE_GROUP: *database_group
DEVICE_ID_COLUMN: device_id # column name
TIMEZONE:
TYPE: SINGLE # SINGLE or MULTIPLE
VALUE: *timezone # IF TYPE=SINGLE, see docs
# Sensors ------
# https://www.rapids.science/latest/features/phone-accelerometer/
PHONE_ACCELEROMETER:
TABLE: accelerometer
PROVIDERS: PROVIDERS:
RAPIDS: RAPIDS:
COMPUTE: False COMPUTE: False
FEATURES: [ratiovalidyieldedminutes, ratiovalidyieldedhours] FEATURES: ["maxmagnitude", "minmagnitude", "avgmagnitude", "medianmagnitude", "stdmagnitude"]
MINUTE_RATIO_THRESHOLD_FOR_VALID_YIELDED_HOURS: 0.5 # 0 to 1 representing the number of minutes with at least SRC_FOLDER: "rapids" # inside src/features/phone_accelerometer
SRC_LANGUAGE: "r" SRC_LANGUAGE: "python"
SRC_FOLDER: "rapids" # inside src/features/phone_data_yield
PANDA:
COMPUTE: False
VALID_SENSED_MINUTES: False
FEATURES:
exertional_activity_episode: ["sumduration", "maxduration", "minduration", "avgduration", "medianduration", "stdduration"]
nonexertional_activity_episode: ["sumduration", "maxduration", "minduration", "avgduration", "medianduration", "stdduration"]
SRC_FOLDER: "panda" # inside src/features/phone_accelerometer
SRC_LANGUAGE: "python"
# Communication SMS features config, TYPES and FEATURES keys need to match # See https://www.rapids.science/latest/features/phone-activity-recognition/
PHONE_MESSAGES: PHONE_ACTIVITY_RECOGNITION:
TABLE: messages TABLE:
ANDROID: plugin_google_activity_recognition
IOS: plugin_ios_activity_recognition
EPISODE_THRESHOLD_BETWEEN_ROWS: 5 # minutes. Max time difference for two consecutive rows to be considered within the same battery episode.
PROVIDERS: PROVIDERS:
RAPIDS: RAPIDS:
COMPUTE: False COMPUTE: False
MESSAGES_TYPES : [received, sent] FEATURES: ["count", "mostcommonactivity", "countuniqueactivities", "durationstationary", "durationmobile", "durationvehicle"]
FEATURES: ACTIVITY_CLASSES:
received: [count, distinctcontacts, timefirstmessage, timelastmessage, countmostfrequentcontact] STATIONARY: ["still", "tilting"]
sent: [count, distinctcontacts, timefirstmessage, timelastmessage, countmostfrequentcontact] MOBILE: ["on_foot", "walking", "running", "on_bicycle"]
SRC_LANGUAGE: "r" VEHICLE: ["in_vehicle"]
SRC_FOLDER: "rapids" # inside src/features/phone_messages SRC_FOLDER: "rapids" # inside src/features/phone_activity_recognition
SRC_LANGUAGE: "python"
# Communication call features config, TYPES and FEATURES keys need to match # See https://www.rapids.science/latest/features/phone-applications-foreground/
PHONE_APPLICATIONS_FOREGROUND:
TABLE: applications_foreground
APPLICATION_CATEGORIES:
CATALOGUE_SOURCE: FILE # FILE (genres are read from CATALOGUE_FILE) or GOOGLE (genres are scrapped from the Play Store)
CATALOGUE_FILE: "data/external/stachl_application_genre_catalogue.csv"
UPDATE_CATALOGUE_FILE: False # if CATALOGUE_SOURCE is equal to FILE, whether or not to update CATALOGUE_FILE, if CATALOGUE_SOURCE is equal to GOOGLE all scraped genres will be saved to CATALOGUE_FILE
SCRAPE_MISSING_CATEGORIES: False # whether or not to scrape missing genres, only effective if CATALOGUE_SOURCE is equal to FILE. If CATALOGUE_SOURCE is equal to GOOGLE, all genres are scraped anyway
PROVIDERS:
RAPIDS:
COMPUTE: False
SINGLE_CATEGORIES: ["all", "email"]
MULTIPLE_CATEGORIES:
social: ["socialnetworks", "socialmediatools"]
entertainment: ["entertainment", "gamingknowledge", "gamingcasual", "gamingadventure", "gamingstrategy", "gamingtoolscommunity", "gamingroleplaying", "gamingaction", "gaminglogic", "gamingsports", "gamingsimulation"]
SINGLE_APPS: ["top1global", "com.facebook.moments", "com.google.android.youtube", "com.twitter.android"] # There's no entropy for single apps
EXCLUDED_CATEGORIES: []
EXCLUDED_APPS: ["com.fitbit.FitbitMobile", "com.aware.plugin.upmc.cancer"]
FEATURES: ["count", "timeoffirstuse", "timeoflastuse", "frequencyentropy"]
SRC_FOLDER: "rapids" # inside src/features/phone_applications_foreground
SRC_LANGUAGE: "python"
# See https://www.rapids.science/latest/features/phone-battery/
PHONE_BATTERY:
TABLE: battery
EPISODE_THRESHOLD_BETWEEN_ROWS: 30 # minutes. Max time difference for two consecutive rows to be considered within the same battery episode.
PROVIDERS:
RAPIDS:
COMPUTE: False
FEATURES: ["countdischarge", "sumdurationdischarge", "countcharge", "sumdurationcharge", "avgconsumptionrate", "maxconsumptionrate"]
SRC_FOLDER: "rapids" # inside src/features/phone_battery
SRC_LANGUAGE: "python"
# See https://www.rapids.science/latest/features/phone-bluetooth/
PHONE_BLUETOOTH:
TABLE: bluetooth
PROVIDERS:
RAPIDS:
COMPUTE: False
FEATURES: ["countscans", "uniquedevices", "countscansmostuniquedevice"]
SRC_FOLDER: "rapids" # inside src/features/phone_bluetooth
SRC_LANGUAGE: "r"
# See https://www.rapids.science/latest/features/phone-calls/
PHONE_CALLS: PHONE_CALLS:
TABLE: calls TABLE: calls
PROVIDERS: PROVIDERS:
@ -91,6 +142,47 @@ PHONE_CALLS:
SRC_LANGUAGE: "r" SRC_LANGUAGE: "r"
SRC_FOLDER: "rapids" # inside src/features/phone_calls SRC_FOLDER: "rapids" # inside src/features/phone_calls
# See https://www.rapids.science/latest/features/phone-conversation/
PHONE_CONVERSATION:
TABLE:
ANDROID: plugin_studentlife_audio_android
IOS: plugin_studentlife_audio
PROVIDERS:
RAPIDS:
COMPUTE: False
FEATURES: ["minutessilence", "minutesnoise", "minutesvoice", "minutesunknown","sumconversationduration","avgconversationduration",
"sdconversationduration","minconversationduration","maxconversationduration","timefirstconversation","timelastconversation","noisesumenergy",
"noiseavgenergy","noisesdenergy","noiseminenergy","noisemaxenergy","voicesumenergy",
"voiceavgenergy","voicesdenergy","voiceminenergy","voicemaxenergy","silencesensedfraction","noisesensedfraction",
"voicesensedfraction","unknownsensedfraction","silenceexpectedfraction","noiseexpectedfraction","voiceexpectedfraction",
"unknownexpectedfraction","countconversation"]
RECORDING_MINUTES: 1
PAUSED_MINUTES : 3
SRC_FOLDER: "rapids" # inside src/features/phone_conversation
SRC_LANGUAGE: "python"
# See https://www.rapids.science/latest/features/phone-data-yield/
PHONE_DATA_YIELD:
SENSORS: []
PROVIDERS:
RAPIDS:
COMPUTE: False
FEATURES: [ratiovalidyieldedminutes, ratiovalidyieldedhours]
MINUTE_RATIO_THRESHOLD_FOR_VALID_YIELDED_HOURS: 0.5 # 0 to 1 representing the number of minutes with at least
SRC_LANGUAGE: "r"
SRC_FOLDER: "rapids" # inside src/features/phone_data_yield
# See https://www.rapids.science/latest/features/phone-light/
PHONE_LIGHT:
TABLE: light
PROVIDERS:
RAPIDS:
COMPUTE: False
FEATURES: ["count", "maxlux", "minlux", "avglux", "medianlux", "stdlux"]
SRC_FOLDER: "rapids" # inside src/features/phone_light
SRC_LANGUAGE: "python"
# See https://www.rapids.science/latest/features/phone-locations/
PHONE_LOCATIONS: PHONE_LOCATIONS:
TABLE: locations TABLE: locations
LOCATIONS_TO_USE: FUSED_RESAMPLED # ALL, GPS OR FUSED_RESAMPLED LOCATIONS_TO_USE: FUSED_RESAMPLED # ALL, GPS OR FUSED_RESAMPLED
@ -118,42 +210,20 @@ PHONE_LOCATIONS:
SRC_FOLDER: "barnett" # inside src/features/phone_locations SRC_FOLDER: "barnett" # inside src/features/phone_locations
SRC_LANGUAGE: "r" SRC_LANGUAGE: "r"
PHONE_BLUETOOTH: # See https://www.rapids.science/latest/features/phone-messages/
TABLE: bluetooth PHONE_MESSAGES:
TABLE: messages
PROVIDERS: PROVIDERS:
RAPIDS: RAPIDS:
COMPUTE: False COMPUTE: False
FEATURES: ["countscans", "uniquedevices", "countscansmostuniquedevice"] MESSAGES_TYPES : [received, sent]
SRC_FOLDER: "rapids" # inside src/features/phone_bluetooth FEATURES:
received: [count, distinctcontacts, timefirstmessage, timelastmessage, countmostfrequentcontact]
sent: [count, distinctcontacts, timefirstmessage, timelastmessage, countmostfrequentcontact]
SRC_LANGUAGE: "r" SRC_LANGUAGE: "r"
SRC_FOLDER: "rapids" # inside src/features/phone_messages
# See https://www.rapids.science/latest/features/phone-screen/
PHONE_ACTIVITY_RECOGNITION:
TABLE:
ANDROID: plugin_google_activity_recognition
IOS: plugin_ios_activity_recognition
EPISODE_THRESHOLD_BETWEEN_ROWS: 5 # minutes. Max time difference for two consecutive rows to be considered within the same battery episode.
PROVIDERS:
RAPIDS:
COMPUTE: False
FEATURES: ["count", "mostcommonactivity", "countuniqueactivities", "durationstationary", "durationmobile", "durationvehicle"]
ACTIVITY_CLASSES:
STATIONARY: ["still", "tilting"]
MOBILE: ["on_foot", "walking", "running", "on_bicycle"]
VEHICLE: ["in_vehicle"]
SRC_FOLDER: "rapids" # inside src/features/phone_activity_recognition
SRC_LANGUAGE: "python"
PHONE_BATTERY:
TABLE: battery
EPISODE_THRESHOLD_BETWEEN_ROWS: 30 # minutes. Max time difference for two consecutive rows to be considered within the same battery episode.
PROVIDERS:
RAPIDS:
COMPUTE: False
FEATURES: ["countdischarge", "sumdurationdischarge", "countcharge", "sumdurationcharge", "avgconsumptionrate", "maxconsumptionrate"]
SRC_FOLDER: "rapids" # inside src/features/phone_battery
SRC_LANGUAGE: "python"
PHONE_SCREEN: PHONE_SCREEN:
TABLE: screen TABLE: screen
PROVIDERS: PROVIDERS:
@ -167,63 +237,7 @@ PHONE_SCREEN:
SRC_FOLDER: "rapids" # inside src/features/phone_screen SRC_FOLDER: "rapids" # inside src/features/phone_screen
SRC_LANGUAGE: "python" SRC_LANGUAGE: "python"
PHONE_LIGHT: # See https://www.rapids.science/latest/features/phone-wifi-connected/
TABLE: light
PROVIDERS:
RAPIDS:
COMPUTE: False
FEATURES: ["count", "maxlux", "minlux", "avglux", "medianlux", "stdlux"]
SRC_FOLDER: "rapids" # inside src/features/phone_light
SRC_LANGUAGE: "python"
PHONE_ACCELEROMETER:
TABLE: accelerometer
PROVIDERS:
RAPIDS:
COMPUTE: False
FEATURES: ["maxmagnitude", "minmagnitude", "avgmagnitude", "medianmagnitude", "stdmagnitude"]
SRC_FOLDER: "rapids" # inside src/features/phone_accelerometer
SRC_LANGUAGE: "python"
PANDA:
COMPUTE: False
VALID_SENSED_MINUTES: False
FEATURES:
exertional_activity_episode: ["sumduration", "maxduration", "minduration", "avgduration", "medianduration", "stdduration"]
nonexertional_activity_episode: ["sumduration", "maxduration", "minduration", "avgduration", "medianduration", "stdduration"]
SRC_FOLDER: "panda" # inside src/features/phone_accelerometer
SRC_LANGUAGE: "python"
PHONE_APPLICATIONS_FOREGROUND:
TABLE: applications_foreground
APPLICATION_CATEGORIES:
CATALOGUE_SOURCE: FILE # FILE (genres are read from CATALOGUE_FILE) or GOOGLE (genres are scrapped from the Play Store)
CATALOGUE_FILE: "data/external/stachl_application_genre_catalogue.csv"
UPDATE_CATALOGUE_FILE: False # if CATALOGUE_SOURCE is equal to FILE, whether or not to update CATALOGUE_FILE, if CATALOGUE_SOURCE is equal to GOOGLE all scraped genres will be saved to CATALOGUE_FILE
SCRAPE_MISSING_CATEGORIES: False # whether or not to scrape missing genres, only effective if CATALOGUE_SOURCE is equal to FILE. If CATALOGUE_SOURCE is equal to GOOGLE, all genres are scraped anyway
PROVIDERS:
RAPIDS:
COMPUTE: False
SINGLE_CATEGORIES: ["all", "email"]
MULTIPLE_CATEGORIES:
social: ["socialnetworks", "socialmediatools"]
entertainment: ["entertainment", "gamingknowledge", "gamingcasual", "gamingadventure", "gamingstrategy", "gamingtoolscommunity", "gamingroleplaying", "gamingaction", "gaminglogic", "gamingsports", "gamingsimulation"]
SINGLE_APPS: ["top1global", "com.facebook.moments", "com.google.android.youtube", "com.twitter.android"] # There's no entropy for single apps
EXCLUDED_CATEGORIES: []
EXCLUDED_APPS: ["com.fitbit.FitbitMobile", "com.aware.plugin.upmc.cancer"]
FEATURES: ["count", "timeoffirstuse", "timeoflastuse", "frequencyentropy"]
SRC_FOLDER: "rapids" # inside src/features/phone_applications_foreground
SRC_LANGUAGE: "python"
PHONE_WIFI_VISIBLE:
TABLE: "wifi"
PROVIDERS:
RAPIDS:
COMPUTE: False
FEATURES: ["countscans", "uniquedevices", "countscansmostuniquedevice"]
SRC_FOLDER: "rapids" # inside src/features/phone_wifi_visible
SRC_LANGUAGE: "r"
PHONE_WIFI_CONNECTED: PHONE_WIFI_CONNECTED:
TABLE: "sensor_wifi" TABLE: "sensor_wifi"
PROVIDERS: PROVIDERS:
@ -233,27 +247,39 @@ PHONE_WIFI_CONNECTED:
SRC_FOLDER: "rapids" # inside src/features/phone_wifi_connected SRC_FOLDER: "rapids" # inside src/features/phone_wifi_connected
SRC_LANGUAGE: "r" SRC_LANGUAGE: "r"
PHONE_CONVERSATION: # See https://www.rapids.science/latest/features/phone-wifi-visible/
TABLE: PHONE_WIFI_VISIBLE:
ANDROID: plugin_studentlife_audio_android TABLE: "wifi"
IOS: plugin_studentlife_audio
PROVIDERS: PROVIDERS:
RAPIDS: RAPIDS:
COMPUTE: False COMPUTE: False
FEATURES: ["minutessilence", "minutesnoise", "minutesvoice", "minutesunknown","sumconversationduration","avgconversationduration", FEATURES: ["countscans", "uniquedevices", "countscansmostuniquedevice"]
"sdconversationduration","minconversationduration","maxconversationduration","timefirstconversation","timelastconversation","noisesumenergy", SRC_FOLDER: "rapids" # inside src/features/phone_wifi_visible
"noiseavgenergy","noisesdenergy","noiseminenergy","noisemaxenergy","voicesumenergy", SRC_LANGUAGE: "r"
"voiceavgenergy","voicesdenergy","voiceminenergy","voicemaxenergy","silencesensedfraction","noisesensedfraction",
"voicesensedfraction","unknownsensedfraction","silenceexpectedfraction","noiseexpectedfraction","voiceexpectedfraction",
"unknownexpectedfraction","countconversation"]
RECORDING_MINUTES: 1
PAUSED_MINUTES : 3
SRC_FOLDER: "rapids" # inside src/features/phone_conversation
SRC_LANGUAGE: "python"
############## FITBIT ##########################################################
################################################################################
########################################################################################################################
# FITBIT #
########################################################################################################################
# See https://www.rapids.science/latest/setup/configuration/#device-data-source-configuration
FITBIT_DATA_CONFIGURATION:
SOURCE:
TYPE: DATABASE # DATABASE or FILES (set each [FITBIT_SENSOR][TABLE] attribute with a table name or a file path accordingly)
COLUMN_FORMAT: JSON # JSON or PLAIN_TEXT
DATABASE_GROUP: *database_group
DEVICE_ID_COLUMN: device_id # column name
TIMEZONE:
TYPE: SINGLE # Fitbit devices don't support time zones so we read this data in the timezone indicated by VALUE
VALUE: *timezone
# Sensors ------
# See https://www.rapids.science/latest/features/fitbit-heartrate-summary/
FITBIT_HEARTRATE_SUMMARY: FITBIT_HEARTRATE_SUMMARY:
TABLE: heartrate_summary TABLE: heartrate_summary
PROVIDERS: PROVIDERS:
@ -263,6 +289,7 @@ FITBIT_HEARTRATE_SUMMARY:
SRC_FOLDER: "rapids" # inside src/features/fitbit_heartrate_summary SRC_FOLDER: "rapids" # inside src/features/fitbit_heartrate_summary
SRC_LANGUAGE: "python" SRC_LANGUAGE: "python"
# See https://www.rapids.science/latest/features/fitbit-heartrate-intraday/
FITBIT_HEARTRATE_INTRADAY: FITBIT_HEARTRATE_INTRADAY:
TABLE: heartrate_intraday TABLE: heartrate_intraday
PROVIDERS: PROVIDERS:
@ -272,6 +299,19 @@ FITBIT_HEARTRATE_INTRADAY:
SRC_FOLDER: "rapids" # inside src/features/fitbit_heartrate_intraday SRC_FOLDER: "rapids" # inside src/features/fitbit_heartrate_intraday
SRC_LANGUAGE: "python" SRC_LANGUAGE: "python"
# See https://www.rapids.science/latest/features/fitbit-sleep-summary/
FITBIT_SLEEP_SUMMARY:
TABLE: sleep_summary
SLEEP_EPISODE_TIMESTAMP: end # summary sleep episodes are considered as events based on either the start timestamp or end timestamp.
PROVIDERS:
RAPIDS:
COMPUTE: False
FEATURES: ["countepisode", "avgefficiency", "sumdurationafterwakeup", "sumdurationasleep", "sumdurationawake", "sumdurationtofallasleep", "sumdurationinbed", "avgdurationafterwakeup", "avgdurationasleep", "avgdurationawake", "avgdurationtofallasleep", "avgdurationinbed"]
SLEEP_TYPES: ["main", "nap", "all"]
SRC_FOLDER: "rapids" # inside src/features/fitbit_sleep_summary
SRC_LANGUAGE: "python"
# See https://www.rapids.science/latest/features/fitbit-steps-summary/
FITBIT_STEPS_SUMMARY: FITBIT_STEPS_SUMMARY:
TABLE: steps_summary TABLE: steps_summary
PROVIDERS: PROVIDERS:
@ -281,6 +321,7 @@ FITBIT_STEPS_SUMMARY:
SRC_FOLDER: "rapids" # inside src/features/fitbit_steps_summary SRC_FOLDER: "rapids" # inside src/features/fitbit_steps_summary
SRC_LANGUAGE: "python" SRC_LANGUAGE: "python"
# See https://www.rapids.science/latest/features/fitbit-steps-intraday/
FITBIT_STEPS_INTRADAY: FITBIT_STEPS_INTRADAY:
TABLE: steps_intraday TABLE: steps_intraday
PROVIDERS: PROVIDERS:
@ -295,31 +336,24 @@ FITBIT_STEPS_INTRADAY:
SRC_FOLDER: "rapids" # inside src/features/fitbit_steps_intraday SRC_FOLDER: "rapids" # inside src/features/fitbit_steps_intraday
SRC_LANGUAGE: "python" SRC_LANGUAGE: "python"
FITBIT_SLEEP_SUMMARY: # FITBIT_CALORIES:
TABLE: sleep_summary # TABLE_FORMAT: JSON # JSON or CSV. If your JSON or CSV data are files change [DEVICE_DATA][FITBIT][SOURCE][TYPE] to FILES
SLEEP_EPISODE_TIMESTAMP: end # summary sleep episodes are considered as events based on either the start timestamp or end timestamp. # TABLE:
PROVIDERS: # JSON: fitbit_calories
RAPIDS: # CSV:
COMPUTE: False # SUMMARY: calories_summary
FEATURES: ["countepisode", "avgefficiency", "sumdurationafterwakeup", "sumdurationasleep", "sumdurationawake", "sumdurationtofallasleep", "sumdurationinbed", "avgdurationafterwakeup", "avgdurationasleep", "avgdurationawake", "avgdurationtofallasleep", "avgdurationinbed"] # INTRADAY: calories_intraday
SLEEP_TYPES: ["main", "nap", "all"] # PROVIDERS:
SRC_FOLDER: "rapids" # inside src/features/fitbit_sleep_summary # RAPIDS:
SRC_LANGUAGE: "python" # COMPUTE: False
# FEATURES: []
FITBIT_CALORIES:
TABLE_FORMAT: JSON # JSON or CSV. If your JSON or CSV data are files change [DEVICE_DATA][FITBIT][SOURCE][TYPE] to FILES
TABLE:
JSON: fitbit_calories
CSV:
SUMMARY: calories_summary
INTRADAY: calories_intraday
PROVIDERS:
RAPIDS:
COMPUTE: False
FEATURES: []
### Visualizations #############################################################
################################################################################
########################################################################################################################
# PLOTS #
########################################################################################################################
HEATMAP_FEATURES_CORRELATIONS: HEATMAP_FEATURES_CORRELATIONS:
PLOT: False PLOT: False

View File

@ -3,10 +3,10 @@
Every phone or Fitbit sensor has a corresponding config section in `config.yaml`, these sections follow a similar structure and we'll use `PHONE_ACCELEROMETER` as an example to explain this structure. Every phone or Fitbit sensor has a corresponding config section in `config.yaml`, these sections follow a similar structure and we'll use `PHONE_ACCELEROMETER` as an example to explain this structure.
!!! hint !!! hint
We recommend reading this page if you are using RAPIDS for the first time - We recommend reading this page if you are using RAPIDS for the first time
- All computed sensor features are stored under `/data/processed/features` on files per sensor, per participant and per study (all participants).
- Every time you change any sensor parameters, provider parameters or provider features, all the necessary files will be updated as soon as you execute RAPIDS.
!!! hint
All sensor features are stored under `/data/processed/features` on files per sensor, per participant and per study (all participants).
!!! example "Config section example for `PHONE_ACCELEROMETER`" !!! example "Config section example for `PHONE_ACCELEROMETER`"
@ -55,6 +55,3 @@ We explain every provider's parameter in a table under the `Parameters descripti
Each provider offers a set of behavioral features (see `#4.2` or `#5.2` in the example). For some providers these features are grouped in an array (like those for `RAPIDS` provider in `#4.2`) but for others they are grouped in a collection of arrays depending on the meaning and purpose of those features (like those for `PANDAS` provider in `#5.2`). In either case, you can delete the features you are not interested in and they will not be included in the sensor's output feature file. Each provider offers a set of behavioral features (see `#4.2` or `#5.2` in the example). For some providers these features are grouped in an array (like those for `RAPIDS` provider in `#4.2`) but for others they are grouped in a collection of arrays depending on the meaning and purpose of those features (like those for `PANDAS` provider in `#5.2`). In either case, you can delete the features you are not interested in and they will not be included in the sensor's output feature file.
We explain each behavioral feature in a table under the `Features description` heading on each provider documentation page. We explain each behavioral feature in a table under the `Features description` heading on each provider documentation page.
!!! hint
Every time you change any sensor parameters, provider parameters or provider features, all the necessary files will be updated as soon as you execute RAPIDS.

View File

@ -317,85 +317,67 @@ Day segments (or epochs) are the time windows on which you want to extract behav
--- ---
## Device Data Source Configuration ## Device Data Source Configuration
You might need to modify the following config keys in your `config.yaml` depending on what devices your participants used and where you are storing your data. You might need to modify the following config keys in your `config.yaml` depending on what devices your participants used and where you are storing your data. You can ignore `[PHONE_DATA_CONFIGURATION]` or `[FITBIT_DATA_CONFIGURATION]` if you are not working with either devices.
!!! hint === "Phone"
You can ignore `[DEVICE_DATA][PHONE]` or `[DEVICE_DATA][FITBIT]` if you are not working with either devices.
The relevant `config.yaml` section looks as follows by default: The relevant `config.yaml` section looks like this by default:
```yaml ```yaml
DEVICE_DATA: PHONE_DATA_CONFIGURATION:
PHONE: SOURCE:
SOURCE: TYPE: DATABASE
TYPE: DATABASE DATABASE_GROUP: *database_group
DATABASE_GROUP: *database_group DEVICE_ID_COLUMN: device_id # column name
DEVICE_ID_COLUMN: device_id # column name TIMEZONE:
TIMEZONE: TYPE: SINGLE # SINGLE (MULTIPLE support coming soon)
TYPE: SINGLE VALUE: *timezone
VALUE: *timezone
FITBIT:
SOURCE:
TYPE: DATABASE # DATABASE or FILES (set each FITBIT_SENSOR TABLE attribute accordingly with a table name or a file path)
COLUMN_FORMAT: JSON # JSON or PLAIN_TEXT
DATABASE_GROUP: *database_group
DEVICE_ID_COLUMN: fitbit_id # column name
TIMEZONE:
TYPE: SINGLE # Fitbit only supports SINGLE timezones
VALUE: *timezone
``` ```
**For `[DEVICE_DATA][PHONE]`** **Parameters for `[PHONE_DATA_CONFIGURATION]`**
| Key | Description | | Key | Description |
|---------------------|----------------------------------------------------------------------------------------------------------------------------| |---------------------|----------------------------------------------------------------------------------------------------------------------------|
| `[SOURCE] [TYPE]` | Only `DATABASE` is supported (phone data will be pulled from a database) | | `[SOURCE] [TYPE]` | Only `DATABASE` is supported (phone data will be pulled from a database) |
| `[SOURCE] [DATABASE_GROUP]` | `*database_group` points to the value defined before in [Database credentials](#database-credentials) | | `[SOURCE] [DATABASE_GROUP]` | `*database_group` points to the value defined before in [Database credentials](#database-credentials) |
| `[SOURCE] [DEVICE_ID_COLUMN]` | The column that has strings that uniquely identify smartphones. For data collected with AWARE this is usually `device_id` | | `[SOURCE] [DEVICE_ID_COLUMN]` | A column that contains strings that uniquely identify smartphones. For data collected with AWARE this is usually `device_id` |
| `[TIMEZONE] [TYPE]` | Only `SINGLE` is supported | | `[TIMEZONE] [TYPE]` | Only `SINGLE` is supported for now |
| `[TIMEZONE] [VALUE]` | `*timezone` points to the value defined before in [Timezone of your study](#timezone-of-your-study) | | `[TIMEZONE] [VALUE]` | `*timezone` points to the value defined before in [Timezone of your study](#timezone-of-your-study) |
**For `[DEVICE_DATA][FITBIT]`** === "Fitbit"
| Key | Description | The relevant `config.yaml` section looks like this by default:
|------------------------------------|-------------------------------------------------------------------------------------------------------------------------------------------------------------------|
| `[SOURCE]` `[TYPE]` | `DATABASE` or `FILES` (set each `[FITBIT_SENSOR]` `[TABLE]` attribute accordingly with a table name or a file path) | ```yaml
| `[SOURCE]` `[COLUMN_FORMAT]` | `JSON` or `PLAIN_TEXT`. Column format of the source data. | FITBIT_DATA_CONFIGURATION:
| `[SOURCE]` `[DATABASE_GROUP]` | `*database_group` points to the value defined before in [Database credentials](#database-credentials). Only used if `[TYPE]` is `DATABASE` . | SOURCE:
| `[SOURCE]` `[DEVICE_ID_COLUMN]` | The column that has strings that uniquely identify Fitbit devices. | TYPE: DATABASE # DATABASE or FILES (set each [FITBIT_SENSOR][TABLE] attribute with a table name or a file path accordingly)
| `[TIMEZONE]` `[TYPE]` | Only `SINGLE` is supported (Fitbit devices always store data in local time). | COLUMN_FORMAT: JSON # JSON or PLAIN_TEXT
| `[TIMEZONE]` `[VALUE]` | `*timezone` points to the value defined before in [Timezone of your study](#timezone-of-your-study) | DATABASE_GROUP: *database_group
DEVICE_ID_COLUMN: device_id # column name
TIMEZONE:
TYPE: SINGLE # Fitbit devices don't support time zones so we read this data in the timezone indicated by VALUE
VALUE: *timezone
```
**Parameters for For `[FITBIT_DATA_CONFIGURATION]`**
| Key | Description |
|------------------------------------|-------------------------------------------------------------------------------------------------------------------------------------------------------------------|
| `[SOURCE]` `[TYPE]` | `DATABASE` or `FILES` (set each `[FITBIT_SENSOR]` `[TABLE]` attribute accordingly with a table name or a file path) |
| `[SOURCE]` `[COLUMN_FORMAT]` | `JSON` or `PLAIN_TEXT`. Column format of the source data. If you pulled your data directly from the Fitbit API the column containing the sensor data will be in `JSON` format |
| `[SOURCE]` `[DATABASE_GROUP]` | `*database_group` points to the value defined before in [Database credentials](#database-credentials). Only used if `[TYPE]` is `DATABASE` . |
| `[SOURCE]` `[DEVICE_ID_COLUMN]` | A column that contains strings that uniquely identify Fitbit devices. |
| `[TIMEZONE]` `[TYPE]` | Only `SINGLE` is supported (Fitbit devices always store data in local time). |
| `[TIMEZONE]` `[VALUE]` | `*timezone` points to the value defined before in [Timezone of your study](#timezone-of-your-study) |
--- ---
## Sensor and Features to Process ## Sensor and Features to Process
Finally, you need to modify the `config.yaml` of the sensors you want to process. All sensors follow the same naming nomenclature `DEVICE_SENSOR` and have the following basic attributes (we will use `PHONE_MESSAGES` as an example). Finally, you need to modify the `config.yaml` section of the sensors you want to extract behavioral features from. All sensors follow the same naming nomenclature (`DEVICE_SENSOR`) and parameter structure which we explain in the [Behavioral Features Introduction](../../features/feature-introduction/).
!!! hint
Every time you change any sensor parameters, all the necessary files will be updated as soon as you execute RAPIDS. Some sensors will have specific attributes (like `MESSAGES_TYPES`) so refer to each sensor documentation.
```yaml
PHONE_MESSAGES:
TABLE: messages
PROVIDERS:
RAPIDS:
COMPUTE: True
MESSAGES_TYPES : [received, sent]
FEATURES:
received: [count, distinctcontacts, timefirstmessage, timelastmessage, countmostfrequentcontact]
sent: [count, distinctcontacts, timefirstmessage, timelastmessage, countmostfrequentcontact]
SRC_LANGUAGE: "r"
SRC_FOLDER: "rapids" # inside src/features/phone_messages
```
| Key                      | Description |
|-------------------------------|---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
| `[TABLE]` | The name of the table in your database that stores this sensor data. |
| `[PROVIDERS]` | A collection of `providers` . A provider is an author or group of authors that created specific features for the sensor at hand. The provider for all the features implemented by our team is called `RAPIDS` but we have also included contributions from other researchers (for example `DORYAB` for location features). |
| `[PROVIDER]` `[COMPUTE]` | Set this to `TRUE` if you want to process features for this `provider` . |
| `[PROVIDER]` `[FEATURES]` | A list of all the features available for the `provider` . Delete those that you don't want to compute. |
| `[PROVIDER]` `[SRC_LANGUAGE]` | The programming language ( `r` or `python` ) in which the features of this `provider` are implemented. |
| `[PROVIDER]` `[SRC_FOLDER]` | The folder where the script(s) to compute the features of this `provider` are stored. This folder is always inside `src/features/[DEVICE_SENSOR]/` |
!!! done
Head over to [Execution](../execution/) to learn how to execute RAPIDS.

View File

@ -6,6 +6,9 @@ After you have [installed](../installation) and [configured](../configuration) R
./rapids -j1 ./rapids -j1
``` ```
!!! done "Ready to extract behavioral features"
If you are ready to extract features head over to the [Behavioral Features Introduction](../../features/feature-introduction/)
!!! info !!! info
The script `#!bash ./rapids` is a wrapper around Snakemake so you can pass any parameters that Snakemake accepts (e.g. `-j1`). The script `#!bash ./rapids` is a wrapper around Snakemake so you can pass any parameters that Snakemake accepts (e.g. `-j1`).
@ -31,6 +34,3 @@ After you have [installed](../installation) and [configured](../configuration) R
```bash ```bash
./rapids -j1 -R clean ./rapids -j1 -R clean
``` ```
!!! done "Ready to extract behavioral features"
If you are ready to extract features head over to the [Behavioral Features Introduction](../../features/feature-introduction/)

View File

@ -31,29 +31,21 @@ DAY_SEGMENTS: &day_segments
FILE: "example_profile/exampleworkflow_daysegments.csv" FILE: "example_profile/exampleworkflow_daysegments.csv"
INCLUDE_PAST_PERIODIC_SEGMENTS: FALSE # Only relevant if TYPE=PERIODIC, see docs INCLUDE_PAST_PERIODIC_SEGMENTS: FALSE # Only relevant if TYPE=PERIODIC, see docs
# See https://www.rapids.science/setup/configuration/#device-data-source-configuration
DEVICE_DATA:
PHONE:
SOURCE:
TYPE: DATABASE
DATABASE_GROUP: *database_group
DEVICE_ID_COLUMN: device_id # column name
TIMEZONE:
TYPE: SINGLE # SINGLE or MULTIPLE
VALUE: *timezone # IF TYPE=SINGLE, see docs
FITBIT:
SOURCE:
TYPE: DATABASE # DATABASE or FILES (set each FITBIT_SENSOR TABLE attribute accordingly with a table name or a file path)
COLUMN_FORMAT: JSON # JSON or PLAIN_TEXT
DATABASE_GROUP: *database_group
DEVICE_ID_COLUMN: device_id # column name
TIMEZONE:
TYPE: SINGLE # Fitbit only supports SINGLE timezones
VALUE: *timezone # see docs
############## PHONE ########################################################### ############## PHONE ###########################################################
################################################################################ ################################################################################
# See https://www.rapids.science/setup/configuration/#device-data-source-configuration
PHONE_DATA_CONFIGURATION:
SOURCE:
TYPE: DATABASE
DATABASE_GROUP: *database_group
DEVICE_ID_COLUMN: device_id # column name
TIMEZONE:
TYPE: SINGLE # SINGLE or MULTIPLE
VALUE: *timezone # IF TYPE=SINGLE, see docs
# Sensors ------
PHONE_DATA_YIELD: PHONE_DATA_YIELD:
SENSORS: [PHONE_ACCELEROMETER, PHONE_ACTIVITY_RECOGNITION, PHONE_APPLICATIONS_FOREGROUND, PHONE_BATTERY, PHONE_BLUETOOTH, PHONE_CALLS, PHONE_CONVERSATION, PHONE_LIGHT, PHONE_LOCATIONS, PHONE_MESSAGES, PHONE_SCREEN, PHONE_WIFI_CONNECTED, PHONE_WIFI_VISIBLE] SENSORS: [PHONE_ACCELEROMETER, PHONE_ACTIVITY_RECOGNITION, PHONE_APPLICATIONS_FOREGROUND, PHONE_BATTERY, PHONE_BLUETOOTH, PHONE_CALLS, PHONE_CONVERSATION, PHONE_LIGHT, PHONE_LOCATIONS, PHONE_MESSAGES, PHONE_SCREEN, PHONE_WIFI_CONNECTED, PHONE_WIFI_VISIBLE]
PROVIDERS: PROVIDERS:
@ -254,6 +246,18 @@ PHONE_CONVERSATION:
############## FITBIT ########################################################## ############## FITBIT ##########################################################
################################################################################ ################################################################################
FITBIT_DATA_CONFIGURATION:
SOURCE:
TYPE: DATABASE # DATABASE or FILES (set each [FITBIT_SENSOR][TABLE] attribute with a table name or a file path accordingly)
COLUMN_FORMAT: JSON # JSON or PLAIN_TEXT
DATABASE_GROUP: *database_group
DEVICE_ID_COLUMN: device_id # column name
TIMEZONE:
TYPE: SINGLE # Fitbit only supports SINGLE timezones
VALUE: *timezone # see docs
HIDDEN:
SINGLE_FITBIT_TABLE: TRUE
FITBIT_HEARTRATE_SUMMARY: FITBIT_HEARTRATE_SUMMARY:
TABLE: fitbit_data TABLE: fitbit_data
PROVIDERS: PROVIDERS:

View File

@ -79,7 +79,6 @@ nav:
- Behavioral Features: - Behavioral Features:
- Introduction: features/feature-introduction.md - Introduction: features/feature-introduction.md
- Phone: - Phone:
- Phone Data Yield: features/phone-data-yield.md
- Phone Accelerometer: features/phone-accelerometer.md - Phone Accelerometer: features/phone-accelerometer.md
- Phone Activity Recognition: features/phone-activity-recognition.md - Phone Activity Recognition: features/phone-activity-recognition.md
- Phone Applications Foreground: features/phone-applications-foreground.md - Phone Applications Foreground: features/phone-applications-foreground.md
@ -87,6 +86,7 @@ nav:
- Phone Bluetooth: features/phone-bluetooth.md - Phone Bluetooth: features/phone-bluetooth.md
- Phone Calls: features/phone-calls.md - Phone Calls: features/phone-calls.md
- Phone Conversation: features/phone-conversation.md - Phone Conversation: features/phone-conversation.md
- Phone Data Yield: features/phone-data-yield.md
- Phone Light: features/phone-light.md - Phone Light: features/phone-light.md
- Phone Locations: features/phone-locations.md - Phone Locations: features/phone-locations.md
- Phone Messages: features/phone-messages.md - Phone Messages: features/phone-messages.md

View File

@ -27,10 +27,10 @@ rule download_phone_data:
input: input:
"data/external/participant_files/{pid}.yaml" "data/external/participant_files/{pid}.yaml"
params: params:
source = config["DEVICE_DATA"]["PHONE"]["SOURCE"], source = config["PHONE_DATA_CONFIGURATION"]["SOURCE"],
sensor = "phone_" + "{sensor}", sensor = "phone_" + "{sensor}",
table = lambda wildcards: config["PHONE_" + str(wildcards.sensor).upper()]["TABLE"], table = lambda wildcards: config["PHONE_" + str(wildcards.sensor).upper()]["TABLE"],
timezone = config["DEVICE_DATA"]["PHONE"]["TIMEZONE"]["VALUE"], timezone = config["PHONE_DATA_CONFIGURATION"]["TIMEZONE"]["VALUE"],
aware_multiplatform_tables = config["PHONE_ACTIVITY_RECOGNITION"]["TABLE"]["ANDROID"] + "," + config["PHONE_ACTIVITY_RECOGNITION"]["TABLE"]["IOS"] + "," + config["PHONE_CONVERSATION"]["TABLE"]["ANDROID"] + "," + config["PHONE_CONVERSATION"]["TABLE"]["IOS"], aware_multiplatform_tables = config["PHONE_ACTIVITY_RECOGNITION"]["TABLE"]["ANDROID"] + "," + config["PHONE_ACTIVITY_RECOGNITION"]["TABLE"]["IOS"] + "," + config["PHONE_CONVERSATION"]["TABLE"]["ANDROID"] + "," + config["PHONE_CONVERSATION"]["TABLE"]["IOS"],
output: output:
"data/raw/{pid}/phone_{sensor}_raw.csv" "data/raw/{pid}/phone_{sensor}_raw.csv"
@ -40,9 +40,9 @@ rule download_phone_data:
rule download_fitbit_data: rule download_fitbit_data:
input: input:
participant_file = "data/external/participant_files/{pid}.yaml", participant_file = "data/external/participant_files/{pid}.yaml",
input_file = [] if config["DEVICE_DATA"]["FITBIT"]["SOURCE"]["TYPE"] == "DATABASE" else lambda wildcards: config["FITBIT_" + str(wildcards.sensor).upper()]["TABLE"] input_file = [] if config["FITBIT_DATA_CONFIGURATION"]["SOURCE"]["TYPE"] == "DATABASE" else lambda wildcards: config["FITBIT_" + str(wildcards.sensor).upper()]["TABLE"]
params: params:
source = config["DEVICE_DATA"]["FITBIT"]["SOURCE"], data_configuration = config["FITBIT_DATA_CONFIGURATION"],
sensor = "fitbit_" + "{sensor}", sensor = "fitbit_" + "{sensor}",
table = lambda wildcards: config["FITBIT_" + str(wildcards.sensor).upper()]["TABLE"], table = lambda wildcards: config["FITBIT_" + str(wildcards.sensor).upper()]["TABLE"],
output: output:
@ -68,8 +68,8 @@ rule phone_readable_datetime:
sensor_input = "data/raw/{pid}/phone_{sensor}_raw.csv", sensor_input = "data/raw/{pid}/phone_{sensor}_raw.csv",
day_segments = "data/interim/day_segments/{pid}_day_segments.csv" day_segments = "data/interim/day_segments/{pid}_day_segments.csv"
params: params:
timezones = config["DEVICE_DATA"]["PHONE"]["TIMEZONE"]["TYPE"], timezones = config["PHONE_DATA_CONFIGURATION"]["TIMEZONE"]["TYPE"],
fixed_timezone = config["DEVICE_DATA"]["PHONE"]["TIMEZONE"]["VALUE"], fixed_timezone = config["PHONE_DATA_CONFIGURATION"]["TIMEZONE"]["VALUE"],
day_segments_type = config["DAY_SEGMENTS"]["TYPE"], day_segments_type = config["DAY_SEGMENTS"]["TYPE"],
include_past_periodic_segments = config["DAY_SEGMENTS"]["INCLUDE_PAST_PERIODIC_SEGMENTS"] include_past_periodic_segments = config["DAY_SEGMENTS"]["INCLUDE_PAST_PERIODIC_SEGMENTS"]
output: output:
@ -92,8 +92,8 @@ rule phone_yielded_timestamps_with_datetime:
sensor_input = "data/interim/{pid}/phone_yielded_timestamps.csv", sensor_input = "data/interim/{pid}/phone_yielded_timestamps.csv",
day_segments = "data/interim/day_segments/{pid}_day_segments.csv" day_segments = "data/interim/day_segments/{pid}_day_segments.csv"
params: params:
timezones = config["DEVICE_DATA"]["PHONE"]["TIMEZONE"]["TYPE"], timezones = config["PHONE_DATA_CONFIGURATION"]["TIMEZONE"]["TYPE"],
fixed_timezone = config["DEVICE_DATA"]["PHONE"]["TIMEZONE"]["VALUE"], fixed_timezone = config["PHONE_DATA_CONFIGURATION"]["TIMEZONE"]["VALUE"],
day_segments_type = config["DAY_SEGMENTS"]["TYPE"], day_segments_type = config["DAY_SEGMENTS"]["TYPE"],
include_past_periodic_segments = config["DAY_SEGMENTS"]["INCLUDE_PAST_PERIODIC_SEGMENTS"] include_past_periodic_segments = config["DAY_SEGMENTS"]["INCLUDE_PAST_PERIODIC_SEGMENTS"]
output: output:
@ -130,8 +130,8 @@ rule phone_locations_processed_with_datetime:
sensor_input = "data/interim/{pid}/phone_locations_processed.csv", sensor_input = "data/interim/{pid}/phone_locations_processed.csv",
day_segments = "data/interim/day_segments/{pid}_day_segments.csv" day_segments = "data/interim/day_segments/{pid}_day_segments.csv"
params: params:
timezones = config["DEVICE_DATA"]["PHONE"]["TIMEZONE"]["TYPE"], timezones = config["PHONE_DATA_CONFIGURATION"]["TIMEZONE"]["TYPE"],
fixed_timezone = config["DEVICE_DATA"]["PHONE"]["TIMEZONE"]["VALUE"], fixed_timezone = config["PHONE_DATA_CONFIGURATION"]["TIMEZONE"]["VALUE"],
day_segments_type = config["DAY_SEGMENTS"]["TYPE"], day_segments_type = config["DAY_SEGMENTS"]["TYPE"],
include_past_periodic_segments = config["DAY_SEGMENTS"]["INCLUDE_PAST_PERIODIC_SEGMENTS"] include_past_periodic_segments = config["DAY_SEGMENTS"]["INCLUDE_PAST_PERIODIC_SEGMENTS"]
output: output:
@ -152,8 +152,8 @@ rule resample_episodes_with_datetime:
sensor_input = "data/interim/{pid}/{sensor}_episodes_resampled.csv", sensor_input = "data/interim/{pid}/{sensor}_episodes_resampled.csv",
day_segments = "data/interim/day_segments/{pid}_day_segments.csv" day_segments = "data/interim/day_segments/{pid}_day_segments.csv"
params: params:
timezones = config["DEVICE_DATA"]["PHONE"]["TIMEZONE"]["TYPE"], timezones = config["PHONE_DATA_CONFIGURATION"]["TIMEZONE"]["TYPE"],
fixed_timezone = config["DEVICE_DATA"]["PHONE"]["TIMEZONE"]["VALUE"], fixed_timezone = config["PHONE_DATA_CONFIGURATION"]["TIMEZONE"]["VALUE"],
day_segments_type = config["DAY_SEGMENTS"]["TYPE"], day_segments_type = config["DAY_SEGMENTS"]["TYPE"],
include_past_periodic_segments = config["DAY_SEGMENTS"]["INCLUDE_PAST_PERIODIC_SEGMENTS"] include_past_periodic_segments = config["DAY_SEGMENTS"]["INCLUDE_PAST_PERIODIC_SEGMENTS"]
output: output:
@ -178,9 +178,9 @@ rule fitbit_parse_heartrate:
input: input:
"data/raw/{pid}/fitbit_heartrate_{fitbit_data_type}_raw.csv" "data/raw/{pid}/fitbit_heartrate_{fitbit_data_type}_raw.csv"
params: params:
timezone = config["DEVICE_DATA"]["PHONE"]["TIMEZONE"]["VALUE"], timezone = config["FITBIT_DATA_CONFIGURATION"]["TIMEZONE"]["VALUE"],
table = lambda wildcards: config["FITBIT_HEARTRATE_"+str(wildcards.fitbit_data_type).upper()]["TABLE"], table = lambda wildcards: config["FITBIT_HEARTRATE_"+str(wildcards.fitbit_data_type).upper()]["TABLE"],
column_format = config["DEVICE_DATA"]["FITBIT"]["SOURCE"]["COLUMN_FORMAT"], column_format = config["FITBIT_DATA_CONFIGURATION"]["SOURCE"]["COLUMN_FORMAT"],
fitbit_data_type = "{fitbit_data_type}" fitbit_data_type = "{fitbit_data_type}"
output: output:
"data/raw/{pid}/fitbit_heartrate_{fitbit_data_type}_parsed.csv" "data/raw/{pid}/fitbit_heartrate_{fitbit_data_type}_parsed.csv"
@ -191,9 +191,9 @@ rule fitbit_parse_steps:
input: input:
"data/raw/{pid}/fitbit_steps_{fitbit_data_type}_raw.csv" "data/raw/{pid}/fitbit_steps_{fitbit_data_type}_raw.csv"
params: params:
timezone = config["DEVICE_DATA"]["PHONE"]["TIMEZONE"]["VALUE"], timezone = config["FITBIT_DATA_CONFIGURATION"]["TIMEZONE"]["VALUE"],
table = lambda wildcards: config["FITBIT_STEPS_"+str(wildcards.fitbit_data_type).upper()]["TABLE"], table = lambda wildcards: config["FITBIT_STEPS_"+str(wildcards.fitbit_data_type).upper()]["TABLE"],
column_format = config["DEVICE_DATA"]["FITBIT"]["SOURCE"]["COLUMN_FORMAT"], column_format = config["FITBIT_DATA_CONFIGURATION"]["SOURCE"]["COLUMN_FORMAT"],
fitbit_data_type = "{fitbit_data_type}" fitbit_data_type = "{fitbit_data_type}"
output: output:
"data/raw/{pid}/fitbit_steps_{fitbit_data_type}_parsed.csv" "data/raw/{pid}/fitbit_steps_{fitbit_data_type}_parsed.csv"
@ -204,9 +204,9 @@ rule fitbit_parse_sleep:
input: input:
"data/raw/{pid}/fitbit_sleep_{fitbit_data_type}_raw.csv" "data/raw/{pid}/fitbit_sleep_{fitbit_data_type}_raw.csv"
params: params:
timezone = config["DEVICE_DATA"]["PHONE"]["TIMEZONE"]["VALUE"], timezone = config["FITBIT_DATA_CONFIGURATION"]["TIMEZONE"]["VALUE"],
table = lambda wildcards: config["FITBIT_SLEEP_"+str(wildcards.fitbit_data_type).upper()]["TABLE"], table = lambda wildcards: config["FITBIT_SLEEP_"+str(wildcards.fitbit_data_type).upper()]["TABLE"],
column_format = config["DEVICE_DATA"]["FITBIT"]["SOURCE"]["COLUMN_FORMAT"], column_format = config["FITBIT_DATA_CONFIGURATION"]["SOURCE"]["COLUMN_FORMAT"],
fitbit_data_type = "{fitbit_data_type}", fitbit_data_type = "{fitbit_data_type}",
sleep_episode_timestamp = config["FITBIT_SLEEP_SUMMARY"]["SLEEP_EPISODE_TIMESTAMP"] sleep_episode_timestamp = config["FITBIT_SLEEP_SUMMARY"]["SLEEP_EPISODE_TIMESTAMP"]
output: output:
@ -214,25 +214,25 @@ rule fitbit_parse_sleep:
script: script:
"../src/data/fitbit_parse_sleep.py" "../src/data/fitbit_parse_sleep.py"
rule fitbit_parse_calories: # rule fitbit_parse_calories:
input: # input:
data = expand("data/raw/{{pid}}/fitbit_calories_{fitbit_data_type}_raw.csv", fitbit_data_type = (["json"] if config["FITBIT_CALORIES"]["TABLE_FORMAT"] == "JSON" else ["summary", "intraday"])) # data = expand("data/raw/{{pid}}/fitbit_calories_{fitbit_data_type}_raw.csv", fitbit_data_type = (["json"] if config["FITBIT_CALORIES"]["TABLE_FORMAT"] == "JSON" else ["summary", "intraday"]))
params: # params:
timezone = config["DEVICE_DATA"]["PHONE"]["TIMEZONE"]["VALUE"], # timezone = config["FITBIT_DATA_CONFIGURATION"]["TIMEZONE"]["VALUE"],
table = config["FITBIT_CALORIES"]["TABLE"], # table = config["FITBIT_CALORIES"]["TABLE"],
table_format = config["FITBIT_CALORIES"]["TABLE_FORMAT"] # table_format = config["FITBIT_CALORIES"]["TABLE_FORMAT"]
output: # output:
summary_data = "data/raw/{pid}/fitbit_calories_summary_parsed.csv", # summary_data = "data/raw/{pid}/fitbit_calories_summary_parsed.csv",
intraday_data = "data/raw/{pid}/fitbit_calories_intraday_parsed.csv" # intraday_data = "data/raw/{pid}/fitbit_calories_intraday_parsed.csv"
script: # script:
"../src/data/fitbit_parse_calories.py" # "../src/data/fitbit_parse_calories.py"
rule fitbit_readable_datetime: rule fitbit_readable_datetime:
input: input:
sensor_input = "data/raw/{pid}/fitbit_{sensor}_{fitbit_data_type}_parsed.csv", sensor_input = "data/raw/{pid}/fitbit_{sensor}_{fitbit_data_type}_parsed.csv",
day_segments = "data/interim/day_segments/{pid}_day_segments.csv" day_segments = "data/interim/day_segments/{pid}_day_segments.csv"
params: params:
fixed_timezone = config["DEVICE_DATA"]["FITBIT"]["TIMEZONE"]["VALUE"], fixed_timezone = config["FITBIT_DATA_CONFIGURATION"]["TIMEZONE"]["VALUE"],
day_segments_type = config["DAY_SEGMENTS"]["TYPE"], day_segments_type = config["DAY_SEGMENTS"]["TYPE"],
include_past_periodic_segments = config["DAY_SEGMENTS"]["INCLUDE_PAST_PERIODIC_SEGMENTS"] include_past_periodic_segments = config["DAY_SEGMENTS"]["INCLUDE_PAST_PERIODIC_SEGMENTS"]
output: output:

View File

@ -66,7 +66,7 @@ rule overall_compliance_heatmap:
pid_files = expand("data/external/{pid}", pid=config["PIDS"]) pid_files = expand("data/external/{pid}", pid=config["PIDS"])
params: params:
only_show_valid_days = config["OVERALL_COMPLIANCE_HEATMAP"]["ONLY_SHOW_VALID_DAYS"], only_show_valid_days = config["OVERALL_COMPLIANCE_HEATMAP"]["ONLY_SHOW_VALID_DAYS"],
local_timezone = config["DEVICE_DATA"]["PHONE"]["TIMEZONE"]["VALUE"], local_timezone = config["PHONE_DATA_CONFIGURATION"]["TIMEZONE"]["VALUE"],
expected_num_of_days = config["OVERALL_COMPLIANCE_HEATMAP"]["EXPECTED_NUM_OF_DAYS"], expected_num_of_days = config["OVERALL_COMPLIANCE_HEATMAP"]["EXPECTED_NUM_OF_DAYS"],
bin_size = config["OVERALL_COMPLIANCE_HEATMAP"]["BIN_SIZE"], bin_size = config["OVERALL_COMPLIANCE_HEATMAP"]["BIN_SIZE"],
min_bins_per_hour = "{min_valid_bins_per_hour}" min_bins_per_hour = "{min_valid_bins_per_hour}"

View File

@ -8,7 +8,8 @@ library(yaml)
participant_file <- snakemake@input[["participant_file"]] participant_file <- snakemake@input[["participant_file"]]
input_file <- snakemake@input[["input_file"]] input_file <- snakemake@input[["input_file"]]
source <- snakemake@params[["source"]] data_configuration <- snakemake@params[["data_configuration"]]
source <- data_configuration$SOURCE
sensor <- snakemake@params[["sensor"]] sensor <- snakemake@params[["sensor"]]
table <- snakemake@params[["table"]] table <- snakemake@params[["table"]]
sensor_file <- snakemake@output[[1]] sensor_file <- snakemake@output[[1]]
@ -36,7 +37,7 @@ sensor_data <- sensor_data %>%
rename(device_id = source$DEVICE_ID_COLUMN) %>% rename(device_id = source$DEVICE_ID_COLUMN) %>%
mutate(device_id = unified_device_id) # Unify device_id mutate(device_id = unified_device_id) # Unify device_id
if(FALSE) # For MoSHI use, we didn't split fitbit sensors into different tables if("HIDDEN" %in% names(data_configuration) && data_configuration$HIDDEN$SINGLE_FITBIT_TABLE == TRUE) # For MoSHI use, we didn't split fitbit sensors into different tables
sensor_data <- sensor_data %>% filter(fitbit_data_type == str_split(sensor, "_", simplify = TRUE)[[2]]) sensor_data <- sensor_data %>% filter(fitbit_data_type == str_split(sensor, "_", simplify = TRUE)[[2]])
# Droping duplicates on all columns except for _id or id # Droping duplicates on all columns except for _id or id