rapids/config.yaml

# Valid database table names
SENSORS: [applications_crashes, applications_foreground, applications_notifications, battery, bluetooth, calls, locations, messages, plugin_ambient_noise, plugin_device_usage, plugin_google_activity_recognition, plugin_ios_activity_recognition, screen]

FITBIT_TABLE: [fitbit_data]
FITBIT_SENSORS: [heartrate, steps, sleep, calories]

# Participants to include in the analysis
# You must create a file for each participant
# named pXXX containing their device_id
PIDS: [p01, p02]

# Global var with common day segments
DAY_SEGMENTS: &day_segments
  [daily, morning, afternoon, evening, night]

# Global timezone
# Use codes from https://en.wikipedia.org/wiki/List_of_tz_database_time_zones
# Double check your code, for example EST is not US Eastern Time.
TIMEZONE: &timezone
  America/New_York

DATABASE_GROUP: &database_group
  MY_GROUP

DOWNLOAD_PARTICIPANTS:
  IGNORED_DEVICE_IDS: [] # for example "5a1dd68c-6cd1-48fe-ae1e-14344ac5215f"
  GROUP: *database_group

# Download data config
DOWNLOAD_DATASET:
  GROUP: *database_group

# Readable datetime config
READABLE_DATETIME:
  FIXED_TIMEZONE: *timezone

# Communication SMS features config, TYPES and FEATURES keys need to match
SMS:
  TYPES : [received, sent]
  FEATURES: 
    received: [count, distinctcontacts, timefirstsms, timelastsms, countmostfrequentcontact]
    sent: [count, distinctcontacts, timefirstsms, timelastsms, countmostfrequentcontact]
  DAY_SEGMENTS: *day_segments  

# Communication call features config, TYPES and FEATURES keys need to match
CALLS:
  TYPES: [missed, incoming, outgoing]
  FEATURES:
    missed:  [count, distinctcontacts, timefirstcall, timelastcall, countmostfrequentcontact]
    incoming: [count, distinctcontacts, meanduration, sumduration, minduration, maxduration, stdduration, modeduration, hubermduration, varqnduration, entropyduration, timefirstcall, timelastcall, countmostfrequentcontact]
    outgoing: [count, distinctcontacts, meanduration, sumduration, minduration, maxduration, stdduration, modeduration, hubermduration, varqnduration, entropyduration, timefirstcall, timelastcall, countmostfrequentcontact]
  DAY_SEGMENTS: *day_segments

APPLICATION_GENRES:
  CATALOGUE_SOURCE: FILE # FILE (genres are read from CATALOGUE_FILE) or GOOGLE (genres are scrapped from the Play Store)
  CATALOGUE_FILE: "data/external/stachl_application_genre_catalogue.csv"
  UPDATE_CATALOGUE_FILE: false # if CATALOGUE_SOURCE is equal to FILE, whether or not to update CATALOGUE_FILE, if CATALOGUE_SOURCE is equal to GOOGLE all scraped genres will be saved to CATALOGUE_FILE
  SCRAPE_MISSING_GENRES: false # whether or not to scrape missing genres, only effective if CATALOGUE_SOURCE is equal to FILE. If CATALOGUE_SOURCE is equal to GOOGLE, all genres are scraped anyway

PHONE_VALID_SENSED_DAYS:
  BIN_SIZE: 5 # (in minutes)
  MIN_VALID_HOURS: 20 # (out of 24)
  MIN_BINS_PER_HOUR: 8 # (out of 60min/BIN_SIZE bins)

RESAMPLE_FUSED_LOCATION:
  CONSECUTIVE_THRESHOLD: 30 # minutes, only replicate location samples to the next sensed bin if the phone did not stop collecting data for more than this threshold
  TIME_SINCE_VALID_LOCATION: 12 # hours, only replicate location samples to consecutive sensed bins if they were logged within this threshold after a valid location row
  TIMEZONE: *timezone

BARNETT_LOCATION:
  DAY_SEGMENTS: [daily] # These features are only available on a daily basis
  FEATURES: ["hometime","disttravelled","rog","maxdiam","maxhomedist","siglocsvisited","avgflightlen","stdflightlen","avgflightdur","stdflightdur","probpause","siglocentropy","circdnrtn","wkenddayrtn"]
  LOCATIONS_TO_USE: ALL # ALL, ALL_EXCEPT_FUSED OR RESAMPLE_FUSED
  ACCURACY_LIMIT: 51 # meters, drops location coordinates with an accuracy higher than this. This number means there's a 68% probability the true location is within this radius
  TIMEZONE: *timezone

BLUETOOTH:
  DAY_SEGMENTS: *day_segments
  FEATURES: ["countscans", "uniquedevices", "countscansmostuniquedevice"]

ACTIVITY_RECOGNITION:
  DAY_SEGMENTS: *day_segments
  FEATURES: ['count','mostcommonactivity','countuniqueactivities','activitychangecount','sumstationary','summobile','sumvehicle']

BATTERY:
  DAY_SEGMENTS: *day_segments
  FEATURES: ["countdischarge", "sumdurationdischarge", "countcharge", "sumdurationcharge", "avgconsumptionrate", "maxconsumptionrate"]

SCREEN:
  DAY_SEGMENTS: *day_segments
  REFERENCE_HOUR_FIRST_USE: 0
  FEATURES_DELTAS: ["countepisode", "episodepersensedminutes", "sumduration", "maxduration", "minduration", "avgduration", "stdduration", "firstuseafter"]
  EPISODE_TYPES: ["unlock"]

LIGHT:
  DAY_SEGMENTS: *day_segments
  FEATURES: ["count", "maxlux", "minlux", "avglux", "medianlux", "stdlux"]

ACCELEROMETER:
  DAY_SEGMENTS: *day_segments
  FEATURES: ["maxmagnitude", "minmagnitude", "avgmagnitude", "medianmagnitude", "stdmagnitude", "ratioexertionalactivityepisodes", "sumexertionalactivityepisodes", "longestexertionalactivityepisode", "longestnonexertionalactivityepisode", "countexertionalactivityepisodes", "countnonexertionalactivityepisodes"]

APPLICATIONS_FOREGROUND:
  DAY_SEGMENTS: *day_segments
  SINGLE_CATEGORIES: ["all", "video"]
  MULTIPLE_CATEGORIES:
    social: ["socialnetworks", "socialmediatools"]
    entertainment: ["entertainment", "gamingknowledge", "gamingcasual", "gamingadventure", "gamingstrategy", "gamingtoolscommunity", "gamingroleplaying", "gamingaction", "gaminglogic", "gamingsports", "gamingsimulation"]
  SINGLE_APPS: ["top1global", "com.facebook.moments", "com.google.android.youtube", "com.twitter.android"] # There's no entropy for single apps
  EXCLUDED_CATEGORIES: ["system_apps", "video"]
  EXCLUDED_APPS: ["com.fitbit.FitbitMobile", "com.aware.plugin.upmc.cancer"]
  FEATURES: ["count", "timeoffirstuse", "timeoflastuse", "frequencyentropy"]

HEARTRATE:
  DAY_SEGMENTS: *day_segments
  FEATURES: ["maxhr", "minhr", "avghr", "medianhr", "modehr", "stdhr", "diffmaxmodehr", "diffminmodehr", "entropyhr", "lengthoutofrange", "lengthfatburn", "lengthcardio", "lengthpeak"]

STEP:
  DAY_SEGMENTS: *day_segments
  FEATURES:
    ALL_STEPS: ["sumallsteps", "maxallsteps", "minallsteps", "avgallsteps", "stdallsteps"]
    SEDENTARY_BOUT: ["countsedentarybout", "maxdurationsedentarybout", "mindurationsedentarybout", "avgdurationsedentarybout", "stddurationsedentarybout", "sumdurationsedentarybout"]
    ACTIVE_BOUT: ["countactivebout", "maxdurationactivebout", "mindurationactivebout", "avgdurationactivebout", "stddurationactivebout"]
  THRESHOLD_ACTIVE_BOUT: 10 # steps
  INCLUDE_ZERO_STEP_ROWS: True

WIFI:
  DAY_SEGMENTS: *day_segments
  FEATURES: ["countscans", "uniquedevices", "countscansmostuniquedevice"]

PARAMS_FOR_ANALYSIS:
  GROUNDTRUTH_TABLE: participant_info
  SOURCES: &sources ["phone_features", "fitbit_features", "phone_fitbit_features"]
  DAY_SEGMENTS: *day_segments
  PHONE_FEATURES: [accelerometer, applications_foreground, battery, call_incoming, call_missed, call_outgoing, activity_recognition, light, location_barnett, screen, sms_received, sms_sent]
  FITBIT_FEATURES: [fitbit_heartrate, fitbit_step]
  PHONE_FITBIT_FEATURES: "" # This array is merged in the input_merge_features_of_single_participant function in models.snakefile
  DEMOGRAPHIC_FEATURES: [age, gender, inpatientdays]
  CATEGORICAL_DEMOGRAPHIC_FEATURES: ["gender"]
  
  # Whether or not to include only days with enough valid sensed hours
  # logic can be found in rule phone_valid_sensed_days of rules/preprocessing.snakefile
  DROP_VALID_SENSED_DAYS: 
    ENABLED: True

  # Whether or not to include certain days in the analysis, logic can be found in rule days_to_analyse of rules/mystudy.snakefile
  # If you want to include all days downloaded for each participant, set ENABLED to False
  DAYS_TO_ANALYSE:
    ENABLED: True
    DAYS_BEFORE_SURGERY: 15
    DAYS_IN_HOSPITAL: F # T or F
    DAYS_AFTER_DISCHARGE: 7

  # Cleaning Parameters
  COLS_NAN_THRESHOLD: 0.5
  COLS_VAR_THRESHOLD: True
  ROWS_NAN_THRESHOLD: 0.5
  PARTICIPANT_DAYS_BEFORE_THRESHOLD: 7
  PARTICIPANT_DAYS_AFTER_THRESHOLD: 4

  # Extract summarised features from daily features with any of the following substrings
  NUMERICAL_OPERATORS: ["count", "sum", "length", "avg"]
  CATEGORICAL_OPERATORS: ["mostcommon"]
  
  MODEL_NAMES: ["LogReg", "kNN", "SVM", "DT", "RF", "GB", "XGBoost", "LightGBM"]
  CV_METHODS: ["LeaveOneOut"]
  SUMMARISED: ["summarised"] # "summarised" or "notsummarised"
  SCALER: ["notnormalized", "minmaxscaler", "standardscaler", "robustscaler"]
  RESULT_COMPONENTS: ["fold_predictions", "fold_metrics", "overall_results", "fold_feature_importances"]

  MODEL_HYPERPARAMS:
    LogReg:
      {"clf__C": [0.01, 0.1, 1, 10, 100], "clf__solver": ["newton-cg", "lbfgs", "liblinear", "saga"], "clf__penalty": ["l2"]}
    kNN:
      {"clf__n_neighbors": range(1, 21, 2), "clf__weights": ["uniform", "distance"], "clf__metric": ["euclidean", "manhattan", "minkowski"]}
    SVM:
      {"clf__C": [0.01, 0.1, 1, 10, 100], "clf__gamma": ["scale", "auto"], "clf__kernel": ["rbf", "poly", "sigmoid"]}
    DT:
      {"clf__criterion": ["gini", "entropy"], "clf__max_depth": [None, 3, 5, 7, 9], "clf__max_features": [None, "auto", "sqrt", "log2"]}
    RF:
      {"clf__n_estimators": [2, 5, 10, 100],"clf__max_depth": [None, 3, 5, 7, 9]}
    GB:
      {"clf__learning_rate": [0.01, 0.1, 1], "clf__n_estimators": [5, 10, 100, 200], "clf__subsample": [0.5, 0.7, 1.0], "clf__max_depth": [3, 5, 7, 9]}
    XGBoost:
      {"clf__learning_rate": [0.01, 0.1, 1], "clf__n_estimators": [5, 10, 100, 200], "clf__num_leaves": [5, 16, 31, 62]}
    LightGBM:
      {"clf__learning_rate": [0.01, 0.1, 1], "clf__n_estimators": [5, 10, 100, 200], "clf__num_leaves": [5, 16, 31, 62]}


  # Target Settings:
  # 1 => TARGETS_RATIO_THRESHOLD (ceiling) or more of available CESD scores were TARGETS_VALUE_THRESHOLD or higher; 0 => otherwise
  TARGETS_RATIO_THRESHOLD: 0.5
  TARGETS_VALUE_THRESHOLD: 16
Added dataset download rule 2019-10-24 18:11:24 +02:00			`# Valid database table names`
Implement AR features for iOS Co-authored-by: JulioV <juliovhz@gmail.com> 2020-04-21 00:05:54 +02:00			`SENSORS: [applications_crashes, applications_foreground, applications_notifications, battery, bluetooth, calls, locations, messages, plugin_ambient_noise, plugin_device_usage, plugin_google_activity_recognition, plugin_ios_activity_recognition, screen]`
Added dataset download rule 2019-10-24 18:11:24 +02:00
Add fitbit raw data and datetime 2020-01-15 23:18:10 +01:00			`FITBIT_TABLE: [fitbit_data]`
Add calories module for fitbit_readable_datetime.py 2020-04-15 22:01:02 +02:00			`FITBIT_SENSORS: [heartrate, steps, sleep, calories]`
Add fitbit raw data and datetime 2020-01-15 23:18:10 +01:00
Added dataset download rule 2019-10-24 18:11:24 +02:00			`# Participants to include in the analysis`
			`# You must create a file for each participant`
			`# named pXXX containing their device_id`
Add communication sms metrics 2019-10-24 22:27:43 +02:00			`PIDS: [p01, p02]`

			`# Global var with common day segments`
			`DAY_SEGMENTS: &day_segments`
			`[daily, morning, afternoon, evening, night]`

Add Barnett's location features 2019-11-05 21:17:20 +01:00			`# Global timezone`
Fix timezone for US Eastern Time 2019-11-06 23:12:06 +01:00			`# Use codes from https://en.wikipedia.org/wiki/List_of_tz_database_time_zones`
			`# Double check your code, for example EST is not US Eastern Time.`
Add Barnett's location features 2019-11-05 21:17:20 +01:00			`TIMEZONE: &timezone`
Fix timezone for US Eastern Time 2019-11-06 23:12:06 +01:00			`America/New_York`
Add Barnett's location features 2019-11-05 21:17:20 +01:00
Add download_participants functionality 2020-02-10 22:45:34 +01:00			`DATABASE_GROUP: &database_group`
Change default database group name 2020-03-09 17:55:43 +01:00			`MY_GROUP`
Add download_participants functionality 2020-02-10 22:45:34 +01:00
			`DOWNLOAD_PARTICIPANTS:`
			`IGNORED_DEVICE_IDS: [] # for example "5a1dd68c-6cd1-48fe-ae1e-14344ac5215f"`
			`GROUP: *database_group`

Modularise config for download and readable datetime rules 2019-10-24 23:27:00 +02:00			`# Download data config`
			`DOWNLOAD_DATASET:`
Add download_participants functionality 2020-02-10 22:45:34 +01:00			`GROUP: *database_group`
Modularise config for download and readable datetime rules 2019-10-24 23:27:00 +02:00
			`# Readable datetime config`
			`READABLE_DATETIME:`
Add Barnett's location features 2019-11-05 21:17:20 +01:00			`FIXED_TIMEZONE: *timezone`
Modularise config for download and readable datetime rules 2019-10-24 23:27:00 +02:00
Refactor sms feature: replace "metrics" with "features" Co-authored-by: Meng Li <AnnieLM1996@gmail.com> 2020-04-08 22:02:58 +02:00			`# Communication SMS features config, TYPES and FEATURES keys need to match`
Refactor sms metrics to produce a single file 2019-11-06 21:38:08 +01:00			`SMS:`
			`TYPES : [received, sent]`
Refactor sms feature: replace "metrics" with "features" Co-authored-by: Meng Li <AnnieLM1996@gmail.com> 2020-04-08 22:02:58 +02:00			`FEATURES:`
Add three more features to sms 2019-11-12 21:53:59 +01:00			`received: [count, distinctcontacts, timefirstsms, timelastsms, countmostfrequentcontact]`
			`sent: [count, distinctcontacts, timefirstsms, timelastsms, countmostfrequentcontact]`
Refactor sms metrics to produce a single file 2019-11-06 21:38:08 +01:00			`DAY_SEGMENTS: *day_segments`

Refactor call features: replace "metrics" with "features" Co-authored-by: Meng Li <AnnieLM1996@gmail.com> 2020-04-08 17:51:18 +02:00			`# Communication call features config, TYPES and FEATURES keys need to match`
Refactor call features to produce a single file 2019-11-06 20:47:33 +01:00			`CALLS:`
			`TYPES: [missed, incoming, outgoing]`
Refactor call features: replace "metrics" with "features" Co-authored-by: Meng Li <AnnieLM1996@gmail.com> 2020-04-08 17:51:18 +02:00			`FEATURES:`
Add three more features to missing calls 2019-11-12 21:44:26 +01:00			`missed: [count, distinctcontacts, timefirstcall, timelastcall, countmostfrequentcontact]`
Add three more features to calls 2019-11-12 21:40:48 +01:00			`incoming: [count, distinctcontacts, meanduration, sumduration, minduration, maxduration, stdduration, modeduration, hubermduration, varqnduration, entropyduration, timefirstcall, timelastcall, countmostfrequentcontact]`
			`outgoing: [count, distinctcontacts, meanduration, sumduration, minduration, maxduration, stdduration, modeduration, hubermduration, varqnduration, entropyduration, timefirstcall, timelastcall, countmostfrequentcontact]`
Add call features 2019-10-25 16:21:09 +02:00			`DAY_SEGMENTS: *day_segments`
Add valid sensed days 2019-11-05 18:34:22 +01:00
Add category (genre) to foreground apps 2020-01-16 00:28:56 +01:00			`APPLICATION_GENRES:`
			`CATALOGUE_SOURCE: FILE # FILE (genres are read from CATALOGUE_FILE) or GOOGLE (genres are scrapped from the Play Store)`
			`CATALOGUE_FILE: "data/external/stachl_application_genre_catalogue.csv"`
			`UPDATE_CATALOGUE_FILE: false # if CATALOGUE_SOURCE is equal to FILE, whether or not to update CATALOGUE_FILE, if CATALOGUE_SOURCE is equal to GOOGLE all scraped genres will be saved to CATALOGUE_FILE`
			`SCRAPE_MISSING_GENRES: false # whether or not to scrape missing genres, only effective if CATALOGUE_SOURCE is equal to FILE. If CATALOGUE_SOURCE is equal to GOOGLE, all genres are scraped anyway`

Add valid sensed days 2019-11-05 18:34:22 +01:00			`PHONE_VALID_SENSED_DAYS:`
			`BIN_SIZE: 5 # (in minutes)`
			`MIN_VALID_HOURS: 20 # (out of 24)`
Add Barnett's location features 2019-11-05 21:17:20 +01:00			`MIN_BINS_PER_HOUR: 8 # (out of 60min/BIN_SIZE bins)`

Add resampling for fused location 2019-12-10 00:23:00 +01:00			`RESAMPLE_FUSED_LOCATION:`
			`CONSECUTIVE_THRESHOLD: 30 # minutes, only replicate location samples to the next sensed bin if the phone did not stop collecting data for more than this threshold`
			`TIME_SINCE_VALID_LOCATION: 12 # hours, only replicate location samples to consecutive sensed bins if they were logged within this threshold after a valid location row`
			`TIMEZONE: *timezone`

Add Barnett's location features 2019-11-05 21:17:20 +01:00			`BARNETT_LOCATION:`
Refactor battery feature: replace "metrics" with "features" Co-authored-by: Meng Li <AnnieLM1996@gmail.com> 2020-04-09 22:06:25 +02:00			`DAY_SEGMENTS: [daily] # These features are only available on a daily basis`
Refactor location_barnett features: replace "metrics" with "features" Co-authored-by: Meng Li <AnnieLM1996@gmail.com> 2020-04-09 19:20:39 +02:00			`FEATURES: ["hometime","disttravelled","rog","maxdiam","maxhomedist","siglocsvisited","avgflightlen","stdflightlen","avgflightdur","stdflightdur","probpause","siglocentropy","circdnrtn","wkenddayrtn"]`
Add metric filter to barnett location features 2020-02-21 16:58:35 +01:00			`LOCATIONS_TO_USE: ALL # ALL, ALL_EXCEPT_FUSED OR RESAMPLE_FUSED`
Add switch to barnet_locations.R to use all, all except fused or resample fused locations 2019-12-10 01:15:10 +01:00			`ACCURACY_LIMIT: 51 # meters, drops location coordinates with an accuracy higher than this. This number means there's a 68% probability the true location is within this radius`
Add bluetooth features 2019-11-06 18:19:30 +01:00			`TIMEZONE: *timezone`

			`BLUETOOTH:`
			`DAY_SEGMENTS: *day_segments`
Refactor fitbit_step features: replace "metrics" with "features" Co-authored-by: Meng Li <AnnieLM1996@gmail.com> Co-authored-by: JulioV <juliovhz@gmail.com> 2020-04-03 23:03:45 +02:00			`FEATURES: ["countscans", "uniquedevices", "countscansmostuniquedevice"]`
Updated G_A_R features with epochs 2019-11-18 20:22:08 +01:00
Implement AR features for iOS Co-authored-by: JulioV <juliovhz@gmail.com> 2020-04-21 00:05:54 +02:00			`ACTIVITY_RECOGNITION:`
Updated G_A_R features with epochs 2019-11-18 20:22:08 +01:00			`DAY_SEGMENTS: *day_segments`
Refactor google_activity_recognition feature: replace "metrics" with "features" Co-authored-by: Meng Li <AnnieLM1996@gmail.com> 2020-04-08 20:36:36 +02:00			`FEATURES: ['count','mostcommonactivity','countuniqueactivities','activitychangecount','sumstationary','summobile','sumvehicle']`
Add day epochs to battery metrics and fix some of them 2019-11-25 18:53:32 +01:00
			`BATTERY:`
			`DAY_SEGMENTS: *day_segments`
Refactor battery feature: replace "metrics" with "features" Co-authored-by: Meng Li <AnnieLM1996@gmail.com> 2020-04-09 22:06:25 +02:00			`FEATURES: ["countdischarge", "sumdurationdischarge", "countcharge", "sumdurationcharge", "avgconsumptionrate", "maxconsumptionrate"]`
Extract screen features 2019-11-27 20:25:17 +01:00
			`SCREEN:`
			`DAY_SEGMENTS: *day_segments`
Add firstuseafterTIME feature for screen sensor 2020-03-04 18:21:36 +01:00			`REFERENCE_HOUR_FIRST_USE: 0`
Refactor screen features: replace "metrics" with "features" Co-authored-by: Meng Li <AnnieLM1996@gmail.com> 2020-04-08 17:05:16 +02:00			`FEATURES_DELTAS: ["countepisode", "episodepersensedminutes", "sumduration", "maxduration", "minduration", "avgduration", "stdduration", "firstuseafter"]`
Delete screen event features and add countepisode and episodepersensedminutes 2020-03-03 23:31:15 +01:00			`EPISODE_TYPES: ["unlock"]`
Add light features 2020-01-14 15:51:39 +01:00
			`LIGHT:`
			`DAY_SEGMENTS: *day_segments`
Refactor light features: replace "metrics" with "features" Co-authored-by: Meng Li <AnnieLM1996@gmail.com> 2020-04-08 20:02:53 +02:00			`FEATURES: ["count", "maxlux", "minlux", "avglux", "medianlux", "stdlux"]`
Add accelerometer features 2020-01-15 20:15:24 +01:00
			`ACCELEROMETER:`
			`DAY_SEGMENTS: *day_segments`
Modify the Accelerometer ‘Metrics’ to Accelerometer ‘Features’ Co-authored-by: Meng Li <AnnieLM1996@gmail.com> Co-authored-by: JulioV <juliovhz@gmail.com> 2020-04-02 23:36:28 +02:00			`FEATURES: ["maxmagnitude", "minmagnitude", "avgmagnitude", "medianmagnitude", "stdmagnitude", "ratioexertionalactivityepisodes", "sumexertionalactivityepisodes", "longestexertionalactivityepisode", "longestnonexertionalactivityepisode", "countexertionalactivityepisodes", "countnonexertionalactivityepisodes"]`
Add Fitbit steps feature extraction 2020-01-29 22:22:53 +01:00
Add applications_foreground features 2020-02-07 17:52:55 +01:00			`APPLICATIONS_FOREGROUND:`
			`DAY_SEGMENTS: *day_segments`
			`SINGLE_CATEGORIES: ["all", "video"]`
			`MULTIPLE_CATEGORIES:`
			`social: ["socialnetworks", "socialmediatools"]`
			`entertainment: ["entertainment", "gamingknowledge", "gamingcasual", "gamingadventure", "gamingstrategy", "gamingtoolscommunity", "gamingroleplaying", "gamingaction", "gaminglogic", "gamingsports", "gamingsimulation"]`
			`SINGLE_APPS: ["top1global", "com.facebook.moments", "com.google.android.youtube", "com.twitter.android"] # There's no entropy for single apps`
			`EXCLUDED_CATEGORIES: ["system_apps", "video"]`
			`EXCLUDED_APPS: ["com.fitbit.FitbitMobile", "com.aware.plugin.upmc.cancer"]`
Refactor applications_foreground features: replace "metrics" with "features" Co-authored-by: Meng Li <AnnieLM1996@gmail.com> 2020-04-08 19:31:43 +02:00			`FEATURES: ["count", "timeoffirstuse", "timeoflastuse", "frequencyentropy"]`
Add applications_foreground features 2020-02-07 17:52:55 +01:00
Add heartrate features 2020-02-07 17:35:15 +01:00			`HEARTRATE:`
			`DAY_SEGMENTS: *day_segments`
Refactor fitbit_heartrate feature: replace "metrics" with "features" Co-authored-by: Meng Li <AnnieLM1996@gmail.com> 2020-04-08 21:31:43 +02:00			`FEATURES: ["maxhr", "minhr", "avghr", "medianhr", "modehr", "stdhr", "diffmaxmodehr", "diffminmodehr", "entropyhr", "lengthoutofrange", "lengthfatburn", "lengthcardio", "lengthpeak"]`
Add heartrate features 2020-02-07 17:35:15 +01:00
Add Fitbit steps feature extraction 2020-01-29 22:22:53 +01:00			`STEP:`
			`DAY_SEGMENTS: *day_segments`
Refactor fitbit_step features: replace "metrics" with "features" Co-authored-by: Meng Li <AnnieLM1996@gmail.com> Co-authored-by: JulioV <juliovhz@gmail.com> 2020-04-03 23:03:45 +02:00			`FEATURES:`
Add Fitbit steps feature extraction 2020-01-29 22:22:53 +01:00			`ALL_STEPS: ["sumallsteps", "maxallsteps", "minallsteps", "avgallsteps", "stdallsteps"]`
Add sumdurationsedentarybout of the fitbit_step sensor Co-authored-by: Meng Li <AnnieLM1996@gmail.com> 2020-04-02 20:36:54 +02:00			`SEDENTARY_BOUT: ["countsedentarybout", "maxdurationsedentarybout", "mindurationsedentarybout", "avgdurationsedentarybout", "stddurationsedentarybout", "sumdurationsedentarybout"]`
Add Fitbit steps feature extraction 2020-01-29 22:22:53 +01:00			`ACTIVE_BOUT: ["countactivebout", "maxdurationactivebout", "mindurationactivebout", "avgdurationactivebout", "stddurationactivebout"]`
Add download_participants functionality 2020-02-10 22:45:34 +01:00			`THRESHOLD_ACTIVE_BOUT: 10 # steps`
Add INCLUDE_ZERO_STEP_ROWS flag for step sensor 2020-03-09 20:59:51 +01:00			`INCLUDE_ZERO_STEP_ROWS: True`
Add merge metrics module for analysis rules 2020-03-09 18:32:14 +01:00
Add wifi features 2020-04-13 19:24:52 +02:00			`WIFI:`
			`DAY_SEGMENTS: *day_segments`
			`FEATURES: ["countscans", "uniquedevices", "countscansmostuniquedevice"]`

Add demographic_features and targets module; refactor analysis code Co-authored-by: JulioV <juliovhz@gmail.com> 2020-04-16 18:38:28 +02:00			`PARAMS_FOR_ANALYSIS:`
Refactor select_days_to_analyse, fix merge bugs, add clean metrics for model 2020-03-18 02:15:53 +01:00			`GROUNDTRUTH_TABLE: participant_info`
Add demographic_features and targets module; refactor analysis code Co-authored-by: JulioV <juliovhz@gmail.com> 2020-04-16 18:38:28 +02:00			`SOURCES: &sources ["phone_features", "fitbit_features", "phone_fitbit_features"]`
Add merge metrics module for analysis rules 2020-03-09 18:32:14 +01:00			`DAY_SEGMENTS: *day_segments`
Implement AR features for iOS Co-authored-by: JulioV <juliovhz@gmail.com> 2020-04-21 00:05:54 +02:00			`PHONE_FEATURES: [accelerometer, applications_foreground, battery, call_incoming, call_missed, call_outgoing, activity_recognition, light, location_barnett, screen, sms_received, sms_sent]`
Add demographic_features and targets module; refactor analysis code Co-authored-by: JulioV <juliovhz@gmail.com> 2020-04-16 18:38:28 +02:00			`FITBIT_FEATURES: [fitbit_heartrate, fitbit_step]`
			`PHONE_FITBIT_FEATURES: "" # This array is merged in the input_merge_features_of_single_participant function in models.snakefile`
			`DEMOGRAPHIC_FEATURES: [age, gender, inpatientdays]`
Add modeling module 2020-04-30 00:53:54 +02:00			`CATEGORICAL_DEMOGRAPHIC_FEATURES: ["gender"]`
Refactor select_days_to_analyse, fix merge bugs, add clean metrics for model 2020-03-18 02:15:53 +01:00
			`# Whether or not to include only days with enough valid sensed hours`
			`# logic can be found in rule phone_valid_sensed_days of rules/preprocessing.snakefile`
			`DROP_VALID_SENSED_DAYS:`
			`ENABLED: True`

			`# Whether or not to include certain days in the analysis, logic can be found in rule days_to_analyse of rules/mystudy.snakefile`
			`# If you want to include all days downloaded for each participant, set ENABLED to False`
			`DAYS_TO_ANALYSE:`
			`ENABLED: True`
			`DAYS_BEFORE_SURGERY: 15`
			`DAYS_IN_HOSPITAL: F # T or F`
			`DAYS_AFTER_DISCHARGE: 7`

			`# Cleaning Parameters`
			`COLS_NAN_THRESHOLD: 0.5`
			`COLS_VAR_THRESHOLD: True`
			`ROWS_NAN_THRESHOLD: 0.5`
Split days threshold of data cleaning into days_before_surgery and days_after_discharge Co-authored-by: JulioV <juliovhz@gmail.com> 2020-04-29 20:37:40 +02:00			`PARTICIPANT_DAYS_BEFORE_THRESHOLD: 7`
			`PARTICIPANT_DAYS_AFTER_THRESHOLD: 4`
Add demographic_features and targets module; refactor analysis code Co-authored-by: JulioV <juliovhz@gmail.com> 2020-04-16 18:38:28 +02:00
Add modeling module 2020-04-30 00:53:54 +02:00			`# Extract summarised features from daily features with any of the following substrings`
			`NUMERICAL_OPERATORS: ["count", "sum", "length", "avg"]`
			`CATEGORICAL_OPERATORS: ["mostcommon"]`

			`MODEL_NAMES: ["LogReg", "kNN", "SVM", "DT", "RF", "GB", "XGBoost", "LightGBM"]`
			`CV_METHODS: ["LeaveOneOut"]`
Add merge module for demographic features and target 2020-04-16 20:20:16 +02:00			`SUMMARISED: ["summarised"] # "summarised" or "notsummarised"`
Add modeling module 2020-04-30 00:53:54 +02:00			`SCALER: ["notnormalized", "minmaxscaler", "standardscaler", "robustscaler"]`
			`RESULT_COMPONENTS: ["fold_predictions", "fold_metrics", "overall_results", "fold_feature_importances"]`

			`MODEL_HYPERPARAMS:`
			`LogReg:`
			`{"clf__C": [0.01, 0.1, 1, 10, 100], "clf__solver": ["newton-cg", "lbfgs", "liblinear", "saga"], "clf__penalty": ["l2"]}`
			`kNN:`
			`{"clf__n_neighbors": range(1, 21, 2), "clf__weights": ["uniform", "distance"], "clf__metric": ["euclidean", "manhattan", "minkowski"]}`
			`SVM:`
			`{"clf__C": [0.01, 0.1, 1, 10, 100], "clf__gamma": ["scale", "auto"], "clf__kernel": ["rbf", "poly", "sigmoid"]}`
			`DT:`
			`{"clf__criterion": ["gini", "entropy"], "clf__max_depth": [None, 3, 5, 7, 9], "clf__max_features": [None, "auto", "sqrt", "log2"]}`
			`RF:`
			`{"clf__n_estimators": [2, 5, 10, 100],"clf__max_depth": [None, 3, 5, 7, 9]}`
			`GB:`
			`{"clf__learning_rate": [0.01, 0.1, 1], "clf__n_estimators": [5, 10, 100, 200], "clf__subsample": [0.5, 0.7, 1.0], "clf__max_depth": [3, 5, 7, 9]}`
			`XGBoost:`
			`{"clf__learning_rate": [0.01, 0.1, 1], "clf__n_estimators": [5, 10, 100, 200], "clf__num_leaves": [5, 16, 31, 62]}`
			`LightGBM:`
			`{"clf__learning_rate": [0.01, 0.1, 1], "clf__n_estimators": [5, 10, 100, 200], "clf__num_leaves": [5, 16, 31, 62]}`

Add merge module for demographic features and target 2020-04-16 20:20:16 +02:00
Add demographic_features and targets module; refactor analysis code Co-authored-by: JulioV <juliovhz@gmail.com> 2020-04-16 18:38:28 +02:00			`# Target Settings:`
			`# 1 => TARGETS_RATIO_THRESHOLD (ceiling) or more of available CESD scores were TARGETS_VALUE_THRESHOLD or higher; 0 => otherwise`
			`TARGETS_RATIO_THRESHOLD: 0.5`
			`TARGETS_VALUE_THRESHOLD: 16`