From e1cfcd46e4bdc53444fbffcf5c25327900860887 Mon Sep 17 00:00:00 2001 From: Meng Li <34143965+Meng6@users.noreply.github.com> Date: Thu, 1 Jul 2021 18:08:33 -0400 Subject: [PATCH] Update example workflow for app episode features --- config.yaml | 10 +++++----- docs/workflow-examples/analysis.md | 4 ++-- example_profile/example_config.yaml | 7 ++++++- 3 files changed, 13 insertions(+), 8 deletions(-) diff --git a/config.yaml b/config.yaml index 7ee17e54..9a72becc 100644 --- a/config.yaml +++ b/config.yaml @@ -113,18 +113,18 @@ PHONE_APPLICATIONS_FOREGROUND: COMPUTE: False INCLUDE_EPISODE_FEATURES: False SINGLE_CATEGORIES: ["all", "email"] - CUSTOM_CATEGORIES: - social_media: ['com.google.android.youtube','com.snapchat.android','com.instagram.android','com.zhiliaoapp.musically','com.facebook.katana'] - dating: ['com.tinder','com.relance.happycouple','com.kiwi.joyride'] MULTIPLE_CATEGORIES: social: ["socialnetworks", "socialmediatools"] entertainment: ["entertainment", "gamingknowledge", "gamingcasual", "gamingadventure", "gamingstrategy", "gamingtoolscommunity", "gamingroleplaying", "gamingaction", "gaminglogic", "gamingsports", "gamingsimulation"] + CUSTOM_CATEGORIES: + social_media: ["com.google.android.youtube", "com.snapchat.android", "com.instagram.android", "com.zhiliaoapp.musically", "com.facebook.katana"] + dating: ["com.tinder", "com.relance.happycouple", "com.kiwi.joyride"] SINGLE_APPS: ["top1global", "com.facebook.moments", "com.google.android.youtube", "com.twitter.android"] # There's no entropy for single apps EXCLUDED_CATEGORIES: [] EXCLUDED_APPS: ["com.fitbit.FitbitMobile", "com.aware.plugin.upmc.cancer"] FEATURES: - APP_EVENTS: ["count", "timeoffirstuse", "timeoflastuse", "frequencyentropy"] - APP_EPISODES: ["minduration", "maxduration", "meanduration", "sumduration"] + APP_EVENTS: ["countevent", "timeoffirstuse", "timeoflastuse", "frequencyentropy"] + APP_EPISODES: ["countepisode", "minduration", "maxduration", "meanduration", "sumduration"] IGNORE_EPISODES_SHORTER_THAN: 0 # in minutes, set to 0 to disable IGNORE_EPISODES_LONGER_THAN: 300 # in minutes, set to 0 to disable SRC_SCRIPT: src/features/phone_applications_foreground/rapids/main.py diff --git a/docs/workflow-examples/analysis.md b/docs/workflow-examples/analysis.md index c5288e97..053c1825 100644 --- a/docs/workflow-examples/analysis.md +++ b/docs/workflow-examples/analysis.md @@ -52,7 +52,7 @@ Note you will see a lot of warning messages, you can ignore them since they happ ## Modules of our analysis workflow example ??? info "1. Feature extraction" - We extract daily behavioral features for data yield, received and sent messages, missed, incoming and outgoing calls, resample fused location data using Doryab provider, activity recognition, battery, Bluetooth, screen, light, applications foreground, conversations, Wi-Fi connected, Wi-Fi visible, Fitbit heart rate summary and intraday data, Fitbit sleep summary data, and Fitbit step summary and intraday data without excluding sleep periods with an active bout threshold of 10 steps. In total, we obtained 237 daily sensor features over 12 days per participant. + We extract daily behavioral features for data yield, received and sent messages, missed, incoming and outgoing calls, resample fused location data using Doryab provider, activity recognition, battery, Bluetooth, screen, light, applications foreground, conversations, Wi-Fi connected, Wi-Fi visible, Fitbit heart rate summary and intraday data, Fitbit sleep summary data, and Fitbit step summary and intraday data without excluding sleep periods with an active bout threshold of 10 steps. In total, we obtained 245 daily sensor features over 12 days per participant. ??? info "2. Extract demographic data." It is common to have demographic data in addition to mobile and target (ground truth) data. In this example we include participants’ age, gender and the number of days they spent in hospital after their surgery as features in our model. We extract these three columns from the `data/external/example_workflow/participant_info.csv` file. As these three features remain the same within participants, they are used only on the population model. Refer to the `demographic_features` rule in `rules/models.smk`. @@ -69,7 +69,7 @@ Note you will see a lot of warning messages, you can ignore them since they happ ??? info "6. Feature cleaning." In this stage we perform four steps to clean our sensor feature file. First, we discard days with a data yield hour ratio less than or equal to 0.75, i.e. we include days with at least 18 hours of data. Second, we drop columns (features) with more than 30% of missing rows. Third, we drop columns with zero variance. Fourth, we drop rows (days) with more than 30% of missing columns (features). In this cleaning stage several parameters are created and exposed in `example_profile/example_config.yaml`. - After this step, we kept 161 features over 11 days for the individual model of p01, 101 features over 12 days for the individual model of p02 and 109 features over 20 days for the population model. Note that the difference in the number of features between p01 and p02 is mostly due to iOS restrictions that stops researchers from collecting the same number of sensors than in Android phones. + After this step, we kept 163 features over 11 days for the individual model of p01, 101 features over 12 days for the individual model of p02 and 109 features over 20 days for the population model. Note that the difference in the number of features between p01 and p02 is mostly due to iOS restrictions that stops researchers from collecting the same number of sensors than in Android phones. Feature cleaning for the individual models is done in the `clean_sensor_features_for_individual_participants` rule and for the population model in the `clean_sensor_features_for_all_participants` rule in `rules/models.smk`. diff --git a/example_profile/example_config.yaml b/example_profile/example_config.yaml index c2f269c7..6bf936c1 100644 --- a/example_profile/example_config.yaml +++ b/example_profile/example_config.yaml @@ -98,6 +98,7 @@ PHONE_APPLICATIONS_FOREGROUND: PROVIDERS: RAPIDS: COMPUTE: True + INCLUDE_EPISODE_FEATURES: False SINGLE_CATEGORIES: ["all", "email"] MULTIPLE_CATEGORIES: social: ["socialnetworks", "socialmediatools"] @@ -105,7 +106,11 @@ PHONE_APPLICATIONS_FOREGROUND: SINGLE_APPS: ["top1global", "com.facebook.moments", "com.google.android.youtube", "com.twitter.android"] # There's no entropy for single apps EXCLUDED_CATEGORIES: ["system_apps"] EXCLUDED_APPS: ["com.fitbit.FitbitMobile", "com.aware.plugin.upmc.cancer"] - FEATURES: ["count", "timeoffirstuse", "timeoflastuse", "frequencyentropy"] + FEATURES: + APP_EVENTS: ["countevent", "timeoffirstuse", "timeoflastuse", "frequencyentropy"] + APP_EPISODES: ["countepisode", "minduration", "maxduration", "meanduration", "sumduration"] + IGNORE_EPISODES_SHORTER_THAN: 0 # in minutes, set to 0 to disable + IGNORE_EPISODES_LONGER_THAN: 300 # in minutes, set to 0 to disable SRC_SCRIPT: src/features/phone_applications_foreground/rapids/main.py # See https://www.rapids.science/latest/features/phone-applications-notifications/