Updated Snakemake, run_tests.sh and testing_config.yaml in tests directory

pull/103/head
abhineethreddyk 2020-10-26 18:18:11 -04:00
parent b986599226
commit 6f7f88aac3
4 changed files with 133 additions and 48 deletions

View File

@ -144,29 +144,42 @@ for provider in config["PHONE_LOCATIONS"]["PROVIDERS"].keys():
files_to_compute.extend(expand("data/interim/{pid}/phone_locations_features/phone_locations_{language}_{provider_key}.csv", pid=config["PIDS"], language=config["PHONE_LOCATIONS"]["PROVIDERS"][provider]["SRC_LANGUAGE"].lower(), provider_key=provider.lower())) files_to_compute.extend(expand("data/interim/{pid}/phone_locations_features/phone_locations_{language}_{provider_key}.csv", pid=config["PIDS"], language=config["PHONE_LOCATIONS"]["PROVIDERS"][provider]["SRC_LANGUAGE"].lower(), provider_key=provider.lower()))
files_to_compute.extend(expand("data/processed/features/{pid}/phone_locations.csv", pid=config["PIDS"])) files_to_compute.extend(expand("data/processed/features/{pid}/phone_locations.csv", pid=config["PIDS"]))
if config["FITBIT_HEARTRATE"]["TABLE_FORMAT"] not in ["JSON", "CSV"]:
raise ValueError("config['FITBIT_HEARTRATE']['TABLE_FORMAT'] should be JSON or CSV but you typed" + config["FITBIT_HEARTRATE"]["TABLE_FORMAT"])
if config["FITBIT_STEPS"]["TABLE_FORMAT"] not in ["JSON", "CSV"]:
raise ValueError("config['FITBIT_STEPS']['TABLE_FORMAT'] should be JSON or CSV but you typed" + config["FITBIT_STEPS"]["TABLE_FORMAT"])
if config["FITBIT_CALORIES"]["TABLE_FORMAT"] not in ["JSON", "CSV"]:
raise ValueError("config['FITBIT_CALORIES']['TABLE_FORMAT'] should be JSON or CSV but you typed" + config["FITBIT_CALORIES"]["TABLE_FORMAT"])
if config["FITBIT_SLEEP"]["TABLE_FORMAT"] not in ["JSON", "CSV"]:
raise ValueError("config['FITBIT_SLEEP']['TABLE_FORMAT'] should be JSON or CSV but you typed" + config["FITBIT_SLEEP"]["TABLE_FORMAT"])
for provider in config["FITBIT_HEARTRATE"]["PROVIDERS"].keys(): for provider in config["FITBIT_HEARTRATE"]["PROVIDERS"].keys():
if config["FITBIT_HEARTRATE"]["PROVIDERS"][provider]["COMPUTE"]: if config["FITBIT_HEARTRATE"]["PROVIDERS"][provider]["COMPUTE"]:
files_to_compute.extend(expand("data/raw/{pid}/fitbit_heartrate_raw.csv", pid=config["PIDS"])) files_to_compute.extend(expand("data/raw/{pid}/fitbit_heartrate_{fitbit_data_type}_raw.csv", pid=config["PIDS"], fitbit_data_type=(["json"] if config["FITBIT_HEARTRATE"]["TABLE_FORMAT"] == "JSON" else ["summary", "intraday"])))
# files_to_compute.extend(expand("data/raw/{pid}/fitbit_heartrate_{fitbit_data_type}_with_datetime.csv", pid=config["PIDS"], fitbit_data_type=["summary", "intraday"])) files_to_compute.extend(expand("data/raw/{pid}/fitbit_heartrate_{fitbit_data_type}_parsed.csv", pid=config["PIDS"], fitbit_data_type=["summary", "intraday"]))
# files_to_compute.extend(expand("data/processed/{pid}/fitbit_heartrate_{day_segment}.csv", pid = config["PIDS"], day_segment = config["HEARTRATE"]["DAY_SEGMENTS"])) files_to_compute.extend(expand("data/raw/{pid}/fitbit_heartrate_{fitbit_data_type}_parsed_with_datetime.csv", pid=config["PIDS"], fitbit_data_type=["summary", "intraday"]))
for provider in config["FITBIT_STEPS"]["PROVIDERS"].keys(): for provider in config["FITBIT_STEPS"]["PROVIDERS"].keys():
if config["FITBIT_STEPS"]["PROVIDERS"][provider]["COMPUTE"]: if config["FITBIT_STEPS"]["PROVIDERS"][provider]["COMPUTE"]:
files_to_compute.extend(expand("data/raw/{pid}/fitbit_steps_raw.csv", pid=config["PIDS"])) files_to_compute.extend(expand("data/raw/{pid}/fitbit_steps_{fitbit_data_type}_raw.csv", pid=config["PIDS"], fitbit_data_type=(["json"] if config["FITBIT_STEPS"]["TABLE_FORMAT"] == "JSON" else ["summary", "intraday"])))
# if config["STEP"]["COMPUTE"]: files_to_compute.extend(expand("data/raw/{pid}/fitbit_steps_{fitbit_data_type}_parsed.csv", pid=config["PIDS"], fitbit_data_type=["summary", "intraday"]))
# if config["STEP"]["EXCLUDE_SLEEP"]["EXCLUDE"] == True and config["STEP"]["EXCLUDE_SLEEP"]["TYPE"] == "FITBIT_BASED": files_to_compute.extend(expand("data/raw/{pid}/fitbit_steps_{fitbit_data_type}_parsed_with_datetime.csv", pid=config["PIDS"], fitbit_data_type=["summary", "intraday"]))
# files_to_compute.extend(expand("data/raw/{pid}/fitbit_sleep_{fitbit_data_type}_with_datetime.csv", pid=config["PIDS"], fitbit_data_type=["summary"]))
# files_to_compute.extend(expand("data/raw/{pid}/{sensor}_raw.csv", pid=config["PIDS"], sensor=config["STEP"]["TABLE"])) for provider in config["FITBIT_CALORIES"]["PROVIDERS"].keys():
# files_to_compute.extend(expand("data/raw/{pid}/fitbit_step_{fitbit_data_type}_with_datetime.csv", pid=config["PIDS"], fitbit_data_type=["intraday"])) if config["FITBIT_CALORIES"]["PROVIDERS"][provider]["COMPUTE"]:
# files_to_compute.extend(expand("data/processed/{pid}/fitbit_step_{day_segment}.csv", pid = config["PIDS"], day_segment = config["STEP"]["DAY_SEGMENTS"])) files_to_compute.extend(expand("data/raw/{pid}/fitbit_calories_{fitbit_data_type}_raw.csv", pid=config["PIDS"], fitbit_data_type=(["json"] if config["FITBIT_CALORIES"]["TABLE_FORMAT"] == "JSON" else ["summary", "intraday"])))
files_to_compute.extend(expand("data/raw/{pid}/fitbit_calories_{fitbit_data_type}_parsed.csv", pid=config["PIDS"], fitbit_data_type=["summary", "intraday"]))
files_to_compute.extend(expand("data/raw/{pid}/fitbit_calories_{fitbit_data_type}_parsed_with_datetime.csv", pid=config["PIDS"], fitbit_data_type=["summary", "intraday"]))
for provider in config["FITBIT_SLEEP"]["PROVIDERS"].keys(): for provider in config["FITBIT_SLEEP"]["PROVIDERS"].keys():
if config["FITBIT_SLEEP"]["PROVIDERS"][provider]["COMPUTE"]: if config["FITBIT_SLEEP"]["PROVIDERS"][provider]["COMPUTE"]:
files_to_compute.extend(expand("data/raw/{pid}/fitbit_sleep_raw.csv", pid=config["PIDS"])) files_to_compute.extend(expand("data/raw/{pid}/fitbit_sleep_{fitbit_data_type}_raw.csv", pid=config["PIDS"], fitbit_data_type=(["json"] if config["FITBIT_SLEEP"]["TABLE_FORMAT"] == "JSON" else ["summary", "intraday"])))
# files_to_compute.extend(expand("data/raw/{pid}/fitbit_sleep_{fitbit_data_type}_with_datetime.csv", pid=config["PIDS"], fitbit_data_type=["intraday", "summary"])) files_to_compute.extend(expand("data/raw/{pid}/fitbit_sleep_{fitbit_data_type}_parsed_episodes.csv", pid=config["PIDS"], fitbit_data_type=["summary"]))
# files_to_compute.extend(expand("data/processed/{pid}/fitbit_sleep_{day_segment}.csv", pid = config["PIDS"], day_segment = config["SLEEP"]["DAY_SEGMENTS"])) files_to_compute.extend(expand("data/raw/{pid}/fitbit_sleep_{fitbit_data_type}_parsed.csv", pid=config["PIDS"], fitbit_data_type=["intraday"]))
files_to_compute.extend(expand("data/raw/{pid}/fitbit_sleep_{fitbit_data_type}_parsed_with_datetime.csv", pid=config["PIDS"], fitbit_data_type=["intraday"]))
# visualization for data exploration # visualization for data exploration
if config["HEATMAP_FEATURES_CORRELATIONS"]["PLOT"]: if config["HEATMAP_FEATURES_CORRELATIONS"]["PLOT"]:

View File

@ -64,7 +64,7 @@ cp -r tests/data/raw/* data/raw
cp -r tests/data/external/* data/external cp -r tests/data/external/* data/external
echo Disabling downloading of dataset... echo Disabling downloading of dataset...
sed -e '27,51 s/^/#/' -e 's/rules.download_dataset.output/"data\/raw\/\{pid\}\/\{sensor\}_raw\.csv"/' rules/preprocessing.smk > tmp sed -e '27,53 s/^/#/' -e 's/rules.download_dataset.output/"data\/raw\/\{pid\}\/\{sensor\}_raw\.csv"/' rules/preprocessing.smk > tmp
mv tmp rules/preprocessing.smk mv tmp rules/preprocessing.smk
if [ $# -eq 1 ] if [ $# -eq 1 ]

View File

@ -15,14 +15,15 @@ TIMEZONE: &timezone
DATABASE_GROUP: &database_group DATABASE_GROUP: &database_group
MY_GROUP MY_GROUP
# config section for the script that creates participant files automatically
PARTICIPANT_FILES: # run snakemake -j1 -R parse_participant_files PARTICIPANT_FILES: # run snakemake -j1 -R parse_participant_files
PHONE_SECTION: PHONE_SECTION:
INCLUDE: FALSE ADD: FALSE
PARSED_FROM: AWARE_DEVICE_TABLE #AWARE_DEVICE_TABLE or CSV_FILE PARSED_FROM: AWARE_DEVICE_TABLE #AWARE_DEVICE_TABLE or CSV_FILE
PARSED_SOURCE: *database_group # DB credentials group or CSV file path. If CSV file, it should have: device_id, pid (optional), label (optional), start_date (optional), end_date (optional) PARSED_SOURCE: *database_group # DB credentials group or CSV file path. If CSV file, it should have: device_id, pid (optional), label (optional), start_date (optional), end_date (optional)
IGNORED_DEVICE_IDS: [] IGNORED_DEVICE_IDS: []
FITBIT_SECTION: FITBIT_SECTION:
INCLUDE: FALSE ADD: FALSE
SAME_AS_PHONE: FALSE # If TRUE, all config below is ignored SAME_AS_PHONE: FALSE # If TRUE, all config below is ignored
PARSED_FROM: CSV_FILE PARSED_FROM: CSV_FILE
PARSED_SOURCE: "external/my_fitbit_participants.csv" # CSV file should have: device_id, pid (optional), label (optional), start_date (optional), end_date (optional) PARSED_SOURCE: "external/my_fitbit_participants.csv" # CSV file should have: device_id, pid (optional), label (optional), start_date (optional), end_date (optional)
@ -30,7 +31,7 @@ PARTICIPANT_FILES: # run snakemake -j1 -R parse_participant_files
SENSOR_DATA: SENSOR_DATA:
PHONE: PHONE:
SOURCE: SOURCE:
TYPE: DATABASE TYPE: DATABASE # Phone only supports DATABASE for now
DATABASE_GROUP: *database_group DATABASE_GROUP: *database_group
DEVICE_ID_COLUMN: device_id # column name DEVICE_ID_COLUMN: device_id # column name
TIMEZONE: TIMEZONE:
@ -38,9 +39,12 @@ SENSOR_DATA:
VALUE: *timezone # IF TYPE=SINGLE, timezone code (e.g. America/New_York, see attribute TIMEZONE above). If TYPE=MULTIPLE, a table in your database with two columns (timestamp, timezone) where timestamp is a unix timestamp and timezone is one of https://en.wikipedia.org/wiki/List_of_tz_database_time_zones VALUE: *timezone # IF TYPE=SINGLE, timezone code (e.g. America/New_York, see attribute TIMEZONE above). If TYPE=MULTIPLE, a table in your database with two columns (timestamp, timezone) where timestamp is a unix timestamp and timezone is one of https://en.wikipedia.org/wiki/List_of_tz_database_time_zones
FITBIT: FITBIT:
SOURCE: SOURCE:
TYPE: DATABASE # DATABASE or CSV_FILES (set each FITBIT_SENSOR TABLE attribute accordingly) TYPE: DATABASE # DATABASE or FILES (set each FITBIT_SENSOR TABLE attribute accordingly with a table name or a file path)
DATABASE_GROUP: *database_group DATABASE_GROUP: *database_group
DEVICE_ID_COLUMN: device_id # column name DEVICE_ID_COLUMN: device_id # column name
TIMEZONE:
TYPE: SINGLE # Fitbit only supports SINGLE timezones
VALUE: *timezone # timezone code (e.g. America/New_York, see attribute TIMEZONE above and https://en.wikipedia.org/wiki/List_of_tz_database_time_zones)
PHONE_VALID_SENSED_BINS: PHONE_VALID_SENSED_BINS:
COMPUTE: False # This flag is automatically ignored (set to True) if you are extracting PHONE_VALID_SENSED_DAYS or screen or Barnett's location features COMPUTE: False # This flag is automatically ignored (set to True) if you are extracting PHONE_VALID_SENSED_DAYS or screen or Barnett's location features
@ -62,7 +66,7 @@ PHONE_MESSAGES:
TABLE: messages TABLE: messages
PROVIDERS: PROVIDERS:
RAPIDS: RAPIDS:
COMPUTE: True COMPUTE: TRUE
MESSAGES_TYPES : [received, sent] MESSAGES_TYPES : [received, sent]
FEATURES: FEATURES:
received: [count, distinctcontacts, timefirstmessage, timelastmessage, countmostfrequentcontact] received: [count, distinctcontacts, timefirstmessage, timelastmessage, countmostfrequentcontact]
@ -75,7 +79,7 @@ PHONE_CALLS:
TABLE: calls TABLE: calls
PROVIDERS: PROVIDERS:
RAPIDS: RAPIDS:
COMPUTE: True COMPUTE: TRUE
CALL_TYPES: [missed, incoming, outgoing] CALL_TYPES: [missed, incoming, outgoing]
FEATURES: FEATURES:
missed: [count, distinctcontacts, timefirstcall, timelastcall, countmostfrequentcontact] missed: [count, distinctcontacts, timefirstcall, timelastcall, countmostfrequentcontact]
@ -115,7 +119,7 @@ PHONE_BLUETOOTH:
TABLE: bluetooth TABLE: bluetooth
PROVIDERS: PROVIDERS:
RAPIDS: RAPIDS:
COMPUTE: True COMPUTE: TRUE
FEATURES: ["countscans", "uniquedevices", "countscansmostuniquedevice"] FEATURES: ["countscans", "uniquedevices", "countscansmostuniquedevice"]
SRC_FOLDER: "rapids" # inside src/features/phone_bluetooth SRC_FOLDER: "rapids" # inside src/features/phone_bluetooth
SRC_LANGUAGE: "r" SRC_LANGUAGE: "r"
@ -125,6 +129,7 @@ PHONE_ACTIVITY_RECOGNITION:
TABLE: TABLE:
ANDROID: plugin_google_activity_recognition ANDROID: plugin_google_activity_recognition
IOS: plugin_ios_activity_recognition IOS: plugin_ios_activity_recognition
EPISODE_THRESHOLD_BETWEEN_ROWS: 5 # minutes. Max time difference for two consecutive rows to be considered within the same battery episode.
PROVIDERS: PROVIDERS:
RAPIDS: RAPIDS:
COMPUTE: False COMPUTE: False
@ -138,6 +143,7 @@ PHONE_ACTIVITY_RECOGNITION:
PHONE_BATTERY: PHONE_BATTERY:
TABLE: battery TABLE: battery
EPISODE_THRESHOLD_BETWEEN_ROWS: 30 # minutes. Max time difference for two consecutive rows to be considered within the same battery episode.
PROVIDERS: PROVIDERS:
RAPIDS: RAPIDS:
COMPUTE: False COMPUTE: False
@ -162,7 +168,7 @@ PHONE_LIGHT:
TABLE: light TABLE: light
PROVIDERS: PROVIDERS:
RAPIDS: RAPIDS:
COMPUTE: True COMPUTE: TRUE
FEATURES: ["count", "maxlux", "minlux", "avglux", "medianlux", "stdlux"] FEATURES: ["count", "maxlux", "minlux", "avglux", "medianlux", "stdlux"]
SRC_FOLDER: "rapids" # inside src/features/phone_light SRC_FOLDER: "rapids" # inside src/features/phone_light
SRC_LANGUAGE: "python" SRC_LANGUAGE: "python"
@ -194,7 +200,7 @@ PHONE_APPLICATIONS_FOREGROUND:
SCRAPE_MISSING_CATEGORIES: False # whether or not to scrape missing genres, only effective if CATALOGUE_SOURCE is equal to FILE. If CATALOGUE_SOURCE is equal to GOOGLE, all genres are scraped anyway SCRAPE_MISSING_CATEGORIES: False # whether or not to scrape missing genres, only effective if CATALOGUE_SOURCE is equal to FILE. If CATALOGUE_SOURCE is equal to GOOGLE, all genres are scraped anyway
PROVIDERS: PROVIDERS:
RAPIDS: RAPIDS:
COMPUTE: True COMPUTE: TRUE
SINGLE_CATEGORIES: ["all", "email"] SINGLE_CATEGORIES: ["all", "email"]
MULTIPLE_CATEGORIES: MULTIPLE_CATEGORIES:
social: ["socialnetworks", "socialmediatools"] social: ["socialnetworks", "socialmediatools"]
@ -203,14 +209,14 @@ PHONE_APPLICATIONS_FOREGROUND:
EXCLUDED_CATEGORIES: ["systemapp", "tvvideoapps"] EXCLUDED_CATEGORIES: ["systemapp", "tvvideoapps"]
EXCLUDED_APPS: ["com.fitbit.FitbitMobile", "com.aware.plugin.upmc.cancer"] EXCLUDED_APPS: ["com.fitbit.FitbitMobile", "com.aware.plugin.upmc.cancer"]
FEATURES: ["count", "timeoffirstuse", "timeoflastuse", "frequencyentropy"] FEATURES: ["count", "timeoffirstuse", "timeoflastuse", "frequencyentropy"]
SRC_FOLDER: "rapids" # inside src/features/applications_foreground SRC_FOLDER: "rapids" # inside src/features/phone_applications_foreground
SRC_LANGUAGE: "python" SRC_LANGUAGE: "python"
PHONE_WIFI_VISIBLE: PHONE_WIFI_VISIBLE:
TABLE: "wifi" TABLE: "wifi"
PROVIDERS: PROVIDERS:
RAPIDS: RAPIDS:
COMPUTE: True COMPUTE: TRUE
FEATURES: ["countscans", "uniquedevices", "countscansmostuniquedevice"] FEATURES: ["countscans", "uniquedevices", "countscansmostuniquedevice"]
SRC_FOLDER: "rapids" # inside src/features/phone_wifi_visible SRC_FOLDER: "rapids" # inside src/features/phone_wifi_visible
SRC_LANGUAGE: "r" SRC_LANGUAGE: "r"
@ -219,7 +225,7 @@ PHONE_WIFI_CONNECTED:
TABLE: "sensor_wifi" TABLE: "sensor_wifi"
PROVIDERS: PROVIDERS:
RAPIDS: RAPIDS:
COMPUTE: True COMPUTE: TRUE
FEATURES: ["countscans", "uniquedevices", "countscansmostuniquedevice"] FEATURES: ["countscans", "uniquedevices", "countscansmostuniquedevice"]
SRC_FOLDER: "rapids" # inside src/features/phone_wifi_connected SRC_FOLDER: "rapids" # inside src/features/phone_wifi_connected
SRC_LANGUAGE: "r" SRC_LANGUAGE: "r"
@ -241,25 +247,37 @@ PHONE_CONVERSATION:
SRC_FOLDER: "rapids" # inside src/features/phone_conversation SRC_FOLDER: "rapids" # inside src/features/phone_conversation
SRC_LANGUAGE: "python" SRC_LANGUAGE: "python"
############## FITBIT ##########################################################
################################################################################
FITBIT_HEARTRATE: FITBIT_HEARTRATE:
TABLE: "fitbit_data" TABLE_FORMAT: JSON # JSON or CSV
PARSE_JSON: False TABLE:
JSON: fitbit_heartrate
CSV:
SUMMARY: heartrate_summary.csv
INTRADAY: heartrate_intraday.csv
PROVIDERS: PROVIDERS:
RAPIDS: RAPIDS:
COMPUTE: False COMPUTE: False
SUMMARY_FEATURES: ["restinghr"] # calories features' accuracy depend on the accuracy of the participants fitbit profile (e.g. height, weight) use these with care: ["caloriesoutofrange", "caloriesfatburn", "caloriescardio", "caloriespeak"] SUMMARY_FEATURES: ["restinghr"] # calories features' accuracy depend on the accuracy of the participants fitbit profile (e.g. height, weight) use these with care: ["caloriesoutofrange", "caloriesfatburn", "caloriescardio", "caloriespeak"]
INTRADAY_FEATURES: ["maxhr", "minhr", "avghr", "medianhr", "modehr", "stdhr", "diffmaxmodehr", "diffminmodehr", "entropyhr", "minutesonoutofrangezone", "minutesonfatburnzone", "minutesoncardiozone", "minutesonpeakzone"] INTRADAY_FEATURES: ["maxhr", "minhr", "avghr", "medianhr", "modehr", "stdhr", "diffmaxmodehr", "diffminmodehr", "entropyhr", "minutesonoutofrangezone", "minutesonfatburnzone", "minutesoncardiozone", "minutesonpeakzone"]
FITBIT_STEPS: FITBIT_STEPS:
TABLE: fitbit_data TABLE_FORMAT: JSON # JSON or CSV
PARSE_JSON: False TABLE:
EXCLUDE_SLEEP: JSON: fitbit_steps
CSV:
SUMMARY: steps_summary.csv
INTRADAY: steps_intraday.csv
EXCLUDE_SLEEP: # you can exclude sleep periods from the step features computation
EXCLUDE: False EXCLUDE: False
TYPE: FIXED # FIXED OR FITBIT_BASED (configure FITBIT_SLEEP section) TYPE: FIXED # FIXED OR FITBIT_BASED (configure FITBIT_SLEEP section)
FIXED: FIXED:
START: "23:00" START: "23:00"
END: "07:00" END: "07:00"
PROVIDERS: PROVIDERS:
RAPIDS: RAPIDS:
COMPUTE: False COMPUTE: False
@ -271,15 +289,33 @@ FITBIT_STEPS:
INCLUDE_ZERO_STEP_ROWS: False INCLUDE_ZERO_STEP_ROWS: False
FITBIT_SLEEP: FITBIT_SLEEP:
TABLE: fitbit_data TABLE_FORMAT: JSON # JSON or CSV
PARSE_JSON: False TABLE:
JSON: fitbit_sleep
CSV:
SUMMARY: sleep_summary.csv
INTRADAY: sleep_intraday.csv
PROVIDERS: PROVIDERS:
RAPIDS: RAPIDS:
COMPUTE: False COMPUTE: False
SLEEP_TYPES: ["main", "nap", "all"] SLEEP_TYPES: ["main", "nap", "all"]
SUMMARY_FEATURES: ["sumdurationafterwakeup", "sumdurationasleep", "sumdurationawake", "sumdurationtofallasleep", "sumdurationinbed", "avgefficiency", "countepisode"] SUMMARY_FEATURES: ["sumdurationafterwakeup", "sumdurationasleep", "sumdurationawake", "sumdurationtofallasleep", "sumdurationinbed", "avgefficiency", "countepisode"]
### Visualizations ################################################################ FITBIT_CALORIES:
TABLE_FORMAT: JSON # JSON or CSV
TABLE:
JSON: fitbit_calories
CSV:
SUMMARY: calories_summary.csv
INTRADAY: calories_intraday.csv
PROVIDERS:
RAPIDS:
COMPUTE: False
FEATURES: []
### Visualizations #############################################################
################################################################################
HEATMAP_FEATURES_CORRELATIONS: HEATMAP_FEATURES_CORRELATIONS:
PLOT: False PLOT: False
MIN_ROWS_RATIO: 0.5 MIN_ROWS_RATIO: 0.5

View File

@ -15,14 +15,15 @@ TIMEZONE: &timezone
DATABASE_GROUP: &database_group DATABASE_GROUP: &database_group
MY_GROUP MY_GROUP
# config section for the script that creates participant files automatically
PARTICIPANT_FILES: # run snakemake -j1 -R parse_participant_files PARTICIPANT_FILES: # run snakemake -j1 -R parse_participant_files
PHONE_SECTION: PHONE_SECTION:
INCLUDE: FALSE ADD: FALSE
PARSED_FROM: AWARE_DEVICE_TABLE #AWARE_DEVICE_TABLE or CSV_FILE PARSED_FROM: AWARE_DEVICE_TABLE #AWARE_DEVICE_TABLE or CSV_FILE
PARSED_SOURCE: *database_group # DB credentials group or CSV file path. If CSV file, it should have: device_id, pid (optional), label (optional), start_date (optional), end_date (optional) PARSED_SOURCE: *database_group # DB credentials group or CSV file path. If CSV file, it should have: device_id, pid (optional), label (optional), start_date (optional), end_date (optional)
IGNORED_DEVICE_IDS: [] IGNORED_DEVICE_IDS: []
FITBIT_SECTION: FITBIT_SECTION:
INCLUDE: FALSE ADD: FALSE
SAME_AS_PHONE: FALSE # If TRUE, all config below is ignored SAME_AS_PHONE: FALSE # If TRUE, all config below is ignored
PARSED_FROM: CSV_FILE PARSED_FROM: CSV_FILE
PARSED_SOURCE: "external/my_fitbit_participants.csv" # CSV file should have: device_id, pid (optional), label (optional), start_date (optional), end_date (optional) PARSED_SOURCE: "external/my_fitbit_participants.csv" # CSV file should have: device_id, pid (optional), label (optional), start_date (optional), end_date (optional)
@ -30,7 +31,7 @@ PARTICIPANT_FILES: # run snakemake -j1 -R parse_participant_files
SENSOR_DATA: SENSOR_DATA:
PHONE: PHONE:
SOURCE: SOURCE:
TYPE: DATABASE TYPE: DATABASE # Phone only supports DATABASE for now
DATABASE_GROUP: *database_group DATABASE_GROUP: *database_group
DEVICE_ID_COLUMN: device_id # column name DEVICE_ID_COLUMN: device_id # column name
TIMEZONE: TIMEZONE:
@ -38,9 +39,12 @@ SENSOR_DATA:
VALUE: *timezone # IF TYPE=SINGLE, timezone code (e.g. America/New_York, see attribute TIMEZONE above). If TYPE=MULTIPLE, a table in your database with two columns (timestamp, timezone) where timestamp is a unix timestamp and timezone is one of https://en.wikipedia.org/wiki/List_of_tz_database_time_zones VALUE: *timezone # IF TYPE=SINGLE, timezone code (e.g. America/New_York, see attribute TIMEZONE above). If TYPE=MULTIPLE, a table in your database with two columns (timestamp, timezone) where timestamp is a unix timestamp and timezone is one of https://en.wikipedia.org/wiki/List_of_tz_database_time_zones
FITBIT: FITBIT:
SOURCE: SOURCE:
TYPE: DATABASE # DATABASE or CSV_FILES (set each FITBIT_SENSOR TABLE attribute accordingly) TYPE: DATABASE # DATABASE or FILES (set each FITBIT_SENSOR TABLE attribute accordingly with a table name or a file path)
DATABASE_GROUP: *database_group DATABASE_GROUP: *database_group
DEVICE_ID_COLUMN: device_id # column name DEVICE_ID_COLUMN: device_id # column name
TIMEZONE:
TYPE: SINGLE # Fitbit only supports SINGLE timezones
VALUE: *timezone # timezone code (e.g. America/New_York, see attribute TIMEZONE above and https://en.wikipedia.org/wiki/List_of_tz_database_time_zones)
PHONE_VALID_SENSED_BINS: PHONE_VALID_SENSED_BINS:
COMPUTE: False # This flag is automatically ignored (set to True) if you are extracting PHONE_VALID_SENSED_DAYS or screen or Barnett's location features COMPUTE: False # This flag is automatically ignored (set to True) if you are extracting PHONE_VALID_SENSED_DAYS or screen or Barnett's location features
@ -125,6 +129,7 @@ PHONE_ACTIVITY_RECOGNITION:
TABLE: TABLE:
ANDROID: plugin_google_activity_recognition ANDROID: plugin_google_activity_recognition
IOS: plugin_ios_activity_recognition IOS: plugin_ios_activity_recognition
EPISODE_THRESHOLD_BETWEEN_ROWS: 5 # minutes. Max time difference for two consecutive rows to be considered within the same battery episode.
PROVIDERS: PROVIDERS:
RAPIDS: RAPIDS:
COMPUTE: False COMPUTE: False
@ -138,6 +143,7 @@ PHONE_ACTIVITY_RECOGNITION:
PHONE_BATTERY: PHONE_BATTERY:
TABLE: battery TABLE: battery
EPISODE_THRESHOLD_BETWEEN_ROWS: 30 # minutes. Max time difference for two consecutive rows to be considered within the same battery episode.
PROVIDERS: PROVIDERS:
RAPIDS: RAPIDS:
COMPUTE: False COMPUTE: False
@ -203,7 +209,7 @@ PHONE_APPLICATIONS_FOREGROUND:
EXCLUDED_CATEGORIES: ["systemapp", "tvvideoapps"] EXCLUDED_CATEGORIES: ["systemapp", "tvvideoapps"]
EXCLUDED_APPS: ["com.fitbit.FitbitMobile", "com.aware.plugin.upmc.cancer"] EXCLUDED_APPS: ["com.fitbit.FitbitMobile", "com.aware.plugin.upmc.cancer"]
FEATURES: ["count", "timeoffirstuse", "timeoflastuse", "frequencyentropy"] FEATURES: ["count", "timeoffirstuse", "timeoflastuse", "frequencyentropy"]
SRC_FOLDER: "rapids" # inside src/features/applications_foreground SRC_FOLDER: "rapids" # inside src/features/phone_applications_foreground
SRC_LANGUAGE: "python" SRC_LANGUAGE: "python"
PHONE_WIFI_VISIBLE: PHONE_WIFI_VISIBLE:
@ -241,25 +247,37 @@ PHONE_CONVERSATION:
SRC_FOLDER: "rapids" # inside src/features/phone_conversation SRC_FOLDER: "rapids" # inside src/features/phone_conversation
SRC_LANGUAGE: "python" SRC_LANGUAGE: "python"
############## FITBIT ##########################################################
################################################################################
FITBIT_HEARTRATE: FITBIT_HEARTRATE:
TABLE: "fitbit_data" TABLE_FORMAT: JSON # JSON or CSV
PARSE_JSON: False TABLE:
JSON: fitbit_heartrate
CSV:
SUMMARY: heartrate_summary.csv
INTRADAY: heartrate_intraday.csv
PROVIDERS: PROVIDERS:
RAPIDS: RAPIDS:
COMPUTE: False COMPUTE: False
SUMMARY_FEATURES: ["restinghr"] # calories features' accuracy depend on the accuracy of the participants fitbit profile (e.g. height, weight) use these with care: ["caloriesoutofrange", "caloriesfatburn", "caloriescardio", "caloriespeak"] SUMMARY_FEATURES: ["restinghr"] # calories features' accuracy depend on the accuracy of the participants fitbit profile (e.g. height, weight) use these with care: ["caloriesoutofrange", "caloriesfatburn", "caloriescardio", "caloriespeak"]
INTRADAY_FEATURES: ["maxhr", "minhr", "avghr", "medianhr", "modehr", "stdhr", "diffmaxmodehr", "diffminmodehr", "entropyhr", "minutesonoutofrangezone", "minutesonfatburnzone", "minutesoncardiozone", "minutesonpeakzone"] INTRADAY_FEATURES: ["maxhr", "minhr", "avghr", "medianhr", "modehr", "stdhr", "diffmaxmodehr", "diffminmodehr", "entropyhr", "minutesonoutofrangezone", "minutesonfatburnzone", "minutesoncardiozone", "minutesonpeakzone"]
FITBIT_STEPS: FITBIT_STEPS:
TABLE: fitbit_data TABLE_FORMAT: JSON # JSON or CSV
PARSE_JSON: False TABLE:
EXCLUDE_SLEEP: JSON: fitbit_steps
CSV:
SUMMARY: steps_summary.csv
INTRADAY: steps_intraday.csv
EXCLUDE_SLEEP: # you can exclude sleep periods from the step features computation
EXCLUDE: False EXCLUDE: False
TYPE: FIXED # FIXED OR FITBIT_BASED (configure FITBIT_SLEEP section) TYPE: FIXED # FIXED OR FITBIT_BASED (configure FITBIT_SLEEP section)
FIXED: FIXED:
START: "23:00" START: "23:00"
END: "07:00" END: "07:00"
PROVIDERS: PROVIDERS:
RAPIDS: RAPIDS:
COMPUTE: False COMPUTE: False
@ -271,15 +289,33 @@ FITBIT_STEPS:
INCLUDE_ZERO_STEP_ROWS: False INCLUDE_ZERO_STEP_ROWS: False
FITBIT_SLEEP: FITBIT_SLEEP:
TABLE: fitbit_data TABLE_FORMAT: JSON # JSON or CSV
PARSE_JSON: False TABLE:
JSON: fitbit_sleep
CSV:
SUMMARY: sleep_summary.csv
INTRADAY: sleep_intraday.csv
PROVIDERS: PROVIDERS:
RAPIDS: RAPIDS:
COMPUTE: False COMPUTE: False
SLEEP_TYPES: ["main", "nap", "all"] SLEEP_TYPES: ["main", "nap", "all"]
SUMMARY_FEATURES: ["sumdurationafterwakeup", "sumdurationasleep", "sumdurationawake", "sumdurationtofallasleep", "sumdurationinbed", "avgefficiency", "countepisode"] SUMMARY_FEATURES: ["sumdurationafterwakeup", "sumdurationasleep", "sumdurationawake", "sumdurationtofallasleep", "sumdurationinbed", "avgefficiency", "countepisode"]
### Visualizations ################################################################ FITBIT_CALORIES:
TABLE_FORMAT: JSON # JSON or CSV
TABLE:
JSON: fitbit_calories
CSV:
SUMMARY: calories_summary.csv
INTRADAY: calories_intraday.csv
PROVIDERS:
RAPIDS:
COMPUTE: False
FEATURES: []
### Visualizations #############################################################
################################################################################
HEATMAP_FEATURES_CORRELATIONS: HEATMAP_FEATURES_CORRELATIONS:
PLOT: False PLOT: False
MIN_ROWS_RATIO: 0.5 MIN_ROWS_RATIO: 0.5