From 585bf7bc5da2345927f2e8cff98a72523a0d3b3b Mon Sep 17 00:00:00 2001 From: JulioV Date: Wed, 13 Jan 2021 19:05:36 -0500 Subject: [PATCH] Add code so new feature providers can be added for the new four sensors --- Snakefile | 43 ++++++++ config.yaml | 10 ++ docs/change-log.md | 3 + docs/features/phone-applications-crashes.md | 14 +++ .../phone-applications-notifications.md | 14 +++ docs/features/phone-aware-log.md | 10 ++ docs/features/phone-keyboard.md | 10 ++ mkdocs.yml | 4 + rules/features.smk | 104 ++++++++++++++++++ rules/preprocessing.smk | 12 +- 10 files changed, 218 insertions(+), 6 deletions(-) create mode 100644 docs/features/phone-applications-crashes.md create mode 100644 docs/features/phone-applications-notifications.md create mode 100644 docs/features/phone-aware-log.md create mode 100644 docs/features/phone-keyboard.md diff --git a/Snakefile b/Snakefile index 04882a79..98f99230 100644 --- a/Snakefile +++ b/Snakefile @@ -147,6 +147,49 @@ for provider in config["PHONE_CONVERSATION"]["PROVIDERS"].keys(): files_to_compute.extend(expand("data/processed/features/{pid}/all_sensor_features.csv", pid=config["PIDS"])) files_to_compute.append("data/processed/features/all_participants/all_sensor_features.csv") +# We can delete these if's as soon as we add feature PROVIDERS to any of these sensors +if isinstance(config["PHONE_APPLICATIONS_CRASHES"]["PROVIDERS"], dict): + for provider in config["PHONE_APPLICATIONS_CRASHES"]["PROVIDERS"].keys(): + if config["PHONE_APPLICATIONS_CRASHES"]["PROVIDERS"][provider]["COMPUTE"]: + files_to_compute.extend(expand("data/raw/{pid}/phone_applications_crashes_raw.csv", pid=config["PIDS"])) + files_to_compute.extend(expand("data/raw/{pid}/phone_applications_crashes_with_datetime.csv", pid=config["PIDS"])) + files_to_compute.extend(expand("data/raw/{pid}/phone_applications_crashes_with_datetime_with_categories.csv", pid=config["PIDS"])) + files_to_compute.extend(expand("data/interim/{pid}/phone_applications_crashes_features/phone_applications_crashes_{language}_{provider_key}.csv", pid=config["PIDS"], language=config["PHONE_APPLICATIONS_CRASHES"]["PROVIDERS"][provider]["SRC_LANGUAGE"].lower(), provider_key=provider.lower())) + files_to_compute.extend(expand("data/processed/features/{pid}/phone_applications_crashes.csv", pid=config["PIDS"])) + files_to_compute.extend(expand("data/processed/features/{pid}/all_sensor_features.csv", pid=config["PIDS"])) + files_to_compute.append("data/processed/features/all_participants/all_sensor_features.csv") + +if isinstance(config["PHONE_APPLICATIONS_NOTIFICATIONS"]["PROVIDERS"], dict): + for provider in config["PHONE_APPLICATIONS_NOTIFICATIONS"]["PROVIDERS"].keys(): + if config["PHONE_APPLICATIONS_NOTIFICATIONS"]["PROVIDERS"][provider]["COMPUTE"]: + files_to_compute.extend(expand("data/raw/{pid}/phone_applications_notifications_raw.csv", pid=config["PIDS"])) + files_to_compute.extend(expand("data/raw/{pid}/phone_applications_notifications_with_datetime.csv", pid=config["PIDS"])) + files_to_compute.extend(expand("data/raw/{pid}/phone_applications_notifications_with_datetime_with_categories.csv", pid=config["PIDS"])) + files_to_compute.extend(expand("data/interim/{pid}/phone_applications_notifications_features/phone_applications_notifications_{language}_{provider_key}.csv", pid=config["PIDS"], language=config["PHONE_APPLICATIONS_NOTIFICATIONS"]["PROVIDERS"][provider]["SRC_LANGUAGE"].lower(), provider_key=provider.lower())) + files_to_compute.extend(expand("data/processed/features/{pid}/phone_applications_notifications.csv", pid=config["PIDS"])) + files_to_compute.extend(expand("data/processed/features/{pid}/all_sensor_features.csv", pid=config["PIDS"])) + files_to_compute.append("data/processed/features/all_participants/all_sensor_features.csv") + +if isinstance(config["PHONE_KEYBOARD"]["PROVIDERS"], dict): + for provider in config["PHONE_KEYBOARD"]["PROVIDERS"].keys(): + if config["PHONE_KEYBOARD"]["PROVIDERS"][provider]["COMPUTE"]: + files_to_compute.extend(expand("data/raw/{pid}/phone_keyboard_raw.csv", pid=config["PIDS"])) + files_to_compute.extend(expand("data/raw/{pid}/phone_keyboard_with_datetime.csv", pid=config["PIDS"])) + files_to_compute.extend(expand("data/interim/{pid}/phone_keyboard_features/phone_keyboard_{language}_{provider_key}.csv", pid=config["PIDS"], language=config["PHONE_KEYBOARD"]["PROVIDERS"][provider]["SRC_LANGUAGE"].lower(), provider_key=provider.lower())) + files_to_compute.extend(expand("data/processed/features/{pid}/phone_keyboard.csv", pid=config["PIDS"])) + files_to_compute.extend(expand("data/processed/features/{pid}/all_sensor_features.csv", pid=config["PIDS"])) + files_to_compute.append("data/processed/features/all_participants/all_sensor_features.csv") + +if isinstance(config["PHONE_AWARE_LOG"]["PROVIDERS"], dict): + for provider in config["PHONE_AWARE_LOG"]["PROVIDERS"].keys(): + if config["PHONE_AWARE_LOG"]["PROVIDERS"][provider]["COMPUTE"]: + files_to_compute.extend(expand("data/raw/{pid}/phone_aware_log_raw.csv", pid=config["PIDS"])) + files_to_compute.extend(expand("data/raw/{pid}/phone_aware_log_with_datetime.csv", pid=config["PIDS"])) + files_to_compute.extend(expand("data/interim/{pid}/phone_aware_log_features/phone_aware_log_{language}_{provider_key}.csv", pid=config["PIDS"], language=config["PHONE_AWARE_LOG"]["PROVIDERS"][provider]["SRC_LANGUAGE"].lower(), provider_key=provider.lower())) + files_to_compute.extend(expand("data/processed/features/{pid}/phone_aware_log.csv", pid=config["PIDS"])) + files_to_compute.extend(expand("data/processed/features/{pid}/all_sensor_features.csv", pid=config["PIDS"])) + files_to_compute.append("data/processed/features/all_participants/all_sensor_features.csv") + for provider in config["PHONE_LOCATIONS"]["PROVIDERS"].keys(): if config["PHONE_LOCATIONS"]["PROVIDERS"][provider]["COMPUTE"]: if config["PHONE_LOCATIONS"]["LOCATIONS_TO_USE"] == "FUSED_RESAMPLED": diff --git a/config.yaml b/config.yaml index 75c504ba..32449943 100644 --- a/config.yaml +++ b/config.yaml @@ -88,6 +88,11 @@ PHONE_ACTIVITY_RECOGNITION: # See https://www.rapids.science/latest/features/phone-applications-crashes/ PHONE_APPLICATIONS_CRASHES: TABLE: applications_crashes + APPLICATION_CATEGORIES: + CATALOGUE_SOURCE: FILE # FILE (genres are read from CATALOGUE_FILE) or GOOGLE (genres are scrapped from the Play Store) + CATALOGUE_FILE: "data/external/stachl_application_genre_catalogue.csv" + UPDATE_CATALOGUE_FILE: False # if CATALOGUE_SOURCE is equal to FILE, whether or not to update CATALOGUE_FILE, if CATALOGUE_SOURCE is equal to GOOGLE all scraped genres will be saved to CATALOGUE_FILE + SCRAPE_MISSING_CATEGORIES: False # whether or not to scrape missing genres, only effective if CATALOGUE_SOURCE is equal to FILE. If CATALOGUE_SOURCE is equal to GOOGLE, all genres are scraped anyway PROVIDERS: # None implemented yet but this sensor can be used in PHONE_DATA_YIELD # See https://www.rapids.science/latest/features/phone-applications-foreground/ @@ -115,6 +120,11 @@ PHONE_APPLICATIONS_FOREGROUND: # See https://www.rapids.science/latest/features/phone-applications-notifications/ PHONE_APPLICATIONS_NOTIFICATIONS: TABLE: applications_notifications + APPLICATION_CATEGORIES: + CATALOGUE_SOURCE: FILE # FILE (genres are read from CATALOGUE_FILE) or GOOGLE (genres are scrapped from the Play Store) + CATALOGUE_FILE: "data/external/stachl_application_genre_catalogue.csv" + UPDATE_CATALOGUE_FILE: False # if CATALOGUE_SOURCE is equal to FILE, whether or not to update CATALOGUE_FILE, if CATALOGUE_SOURCE is equal to GOOGLE all scraped genres will be saved to CATALOGUE_FILE + SCRAPE_MISSING_CATEGORIES: False # whether or not to scrape missing genres, only effective if CATALOGUE_SOURCE is equal to FILE. If CATALOGUE_SOURCE is equal to GOOGLE, all genres are scraped anyway PROVIDERS: # None implemented yet but this sensor can be used in PHONE_DATA_YIELD # See https://www.rapids.science/latest/features/phone-aware-log/ diff --git a/docs/change-log.md b/docs/change-log.md index 7b05e06d..077c261b 100644 --- a/docs/change-log.md +++ b/docs/change-log.md @@ -1,5 +1,8 @@ # Change Log +## next version v0.4.0 +- Add four new phone sensors that can be used for PHONE_DATA_YIELD +- Add code so new feature providers can be added for the new four sensors ## v0.3.2 - Update docker and linux instructions to use RSPM binary repo for for faster installation - Update CI to create a release on a tagged push that passes the tests diff --git a/docs/features/phone-applications-crashes.md b/docs/features/phone-applications-crashes.md new file mode 100644 index 00000000..dcf4cd38 --- /dev/null +++ b/docs/features/phone-applications-crashes.md @@ -0,0 +1,14 @@ +# Phone Applications Crashes + +Sensor parameters description for `[PHONE_APPLICATIONS_CRASHES]`: + +|Key                              | Description | +|----------------|----------------------------------------------------------------------------------------------------------------------------------- +|`[TABLE]`| Database table where the applications crashes data is stored +|`[APPLICATION_CATEGORIES][CATALOGUE_SOURCE]` | `FILE` or `GOOGLE`. If `FILE`, app categories (genres) are read from `[CATALOGUE_FILE]`. If `[GOOGLE]`, app categories (genres) are scrapped from the Play Store +|`[APPLICATION_CATEGORIES][CATALOGUE_FILE]` | CSV file with a `package_name` and `genre` column. By default we provide the catalogue created by [Stachl et al](../../citation#stachl-applications-crashes) in `data/external/stachl_application_genre_catalogue.csv` +|`[APPLICATION_CATEGORIES][UPDATE_CATALOGUE_FILE]` | if `[CATALOGUE_SOURCE]` is equal to `FILE`, this flag signals whether or not to update `[CATALOGUE_FILE]`, if `[CATALOGUE_SOURCE]` is equal to `GOOGLE` all scraped genres will be saved to `[CATALOGUE_FILE]` +|`[APPLICATION_CATEGORIES][SCRAPE_MISSING_CATEGORIES]` | This flag signals whether or not to scrape categories (genres) missing from the `[CATALOGUE_FILE]`. If `[CATALOGUE_SOURCE]` is equal to `GOOGLE`, all genres are scraped anyway (this flag is ignored) + +!!! note + No feature providers have been implemented for this sensor yet, however you can use its key (`PHONE_APPLICATIONS_CRASHES`) to improve [`PHONE_DATA_YIELD`](../phone-data-yield) or you can [implement your own features](../add-new-features). \ No newline at end of file diff --git a/docs/features/phone-applications-notifications.md b/docs/features/phone-applications-notifications.md new file mode 100644 index 00000000..b32afde7 --- /dev/null +++ b/docs/features/phone-applications-notifications.md @@ -0,0 +1,14 @@ +# Phone Applications Notifications + +Sensor parameters description for `[PHONE_APPLICATIONS_NOTIFICATIONS]`: + +|Key                              | Description | +|----------------|----------------------------------------------------------------------------------------------------------------------------------- +|`[TABLE]`| Database table where the applications notifications data is stored +|`[APPLICATION_CATEGORIES][CATALOGUE_SOURCE]` | `FILE` or `GOOGLE`. If `FILE`, app categories (genres) are read from `[CATALOGUE_FILE]`. If `[GOOGLE]`, app categories (genres) are scrapped from the Play Store +|`[APPLICATION_CATEGORIES][CATALOGUE_FILE]` | CSV file with a `package_name` and `genre` column. By default we provide the catalogue created by [Stachl et al](../../citation#stachl-applications-notifications) in `data/external/stachl_application_genre_catalogue.csv` +|`[APPLICATION_CATEGORIES][UPDATE_CATALOGUE_FILE]` | if `[CATALOGUE_SOURCE]` is equal to `FILE`, this flag signals whether or not to update `[CATALOGUE_FILE]`, if `[CATALOGUE_SOURCE]` is equal to `GOOGLE` all scraped genres will be saved to `[CATALOGUE_FILE]` +|`[APPLICATION_CATEGORIES][SCRAPE_MISSING_CATEGORIES]` | This flag signals whether or not to scrape categories (genres) missing from the `[CATALOGUE_FILE]`. If `[CATALOGUE_SOURCE]` is equal to `GOOGLE`, all genres are scraped anyway (this flag is ignored) + +!!! note + No feature providers have been implemented for this sensor yet, however you can use its key (`PHONE_APPLICATIONS_NOTIFICATIONS`) to improve [`PHONE_DATA_YIELD`](../phone-data-yield) or you can [implement your own features](../add-new-features). \ No newline at end of file diff --git a/docs/features/phone-aware-log.md b/docs/features/phone-aware-log.md new file mode 100644 index 00000000..cbeb9d37 --- /dev/null +++ b/docs/features/phone-aware-log.md @@ -0,0 +1,10 @@ +# Phone Aware + +Sensor parameters description for `[PHONE_AWARE_LOG]`: + +|Key                              | Description | +|----------------|----------------------------------------------------------------------------------------------------------------------------------- +|`[TABLE]`| Database table where the aware data is stored + +!!! note + No feature providers have been implemented for this sensor yet, however you can use its key (`PHONE_AWARE_LOG`) to improve [`PHONE_DATA_YIELD`](../phone-data-yield) or you can [implement your own features](../add-new-features). \ No newline at end of file diff --git a/docs/features/phone-keyboard.md b/docs/features/phone-keyboard.md new file mode 100644 index 00000000..4aba2c62 --- /dev/null +++ b/docs/features/phone-keyboard.md @@ -0,0 +1,10 @@ +# Phone Keyboard + +Sensor parameters description for `[PHONE_KEYBOARD]`: + +|Key                              | Description | +|----------------|----------------------------------------------------------------------------------------------------------------------------------- +|`[TABLE]`| Database table where the keyboard data is stored + +!!! note + No feature providers have been implemented for this sensor yet, however you can use its key (`PHONE_KEYBOARD`) to improve [`PHONE_DATA_YIELD`](../phone-data-yield) or you can [implement your own features](../add-new-features). \ No newline at end of file diff --git a/mkdocs.yml b/mkdocs.yml index a11cd595..c6f03c63 100644 --- a/mkdocs.yml +++ b/mkdocs.yml @@ -82,12 +82,16 @@ nav: - Phone: - Phone Accelerometer: features/phone-accelerometer.md - Phone Activity Recognition: features/phone-activity-recognition.md + - Phone Applications Crashes: features/phone-applications-crashes.md - Phone Applications Foreground: features/phone-applications-foreground.md + - Phone Applications Notifications: features/phone-applications-notifications.md + - Phone Aware Log: features/phone-aware-log.md - Phone Battery: features/phone-battery.md - Phone Bluetooth: features/phone-bluetooth.md - Phone Calls: features/phone-calls.md - Phone Conversation: features/phone-conversation.md - Phone Data Yield: features/phone-data-yield.md + - Phone Keyboard: features/phone-keyboard.md - Phone Light: features/phone-light.md - Phone Locations: features/phone-locations.md - Phone Messages: features/phone-messages.md diff --git a/rules/features.smk b/rules/features.smk index c106912f..d66fbf19 100644 --- a/rules/features.smk +++ b/rules/features.smk @@ -96,6 +96,32 @@ rule phone_activity_recognition_r_features: script: "../src/features/entry.R" +rule phone_applications_crashes_python_features: + input: + sensor_data = "data/raw/{pid}/phone_applications_crashes_with_datetime_with_categories.csv", + time_segments_labels = "data/interim/time_segments/{pid}_time_segments_labels.csv" + params: + provider = lambda wildcards: config["PHONE_APPLICATIONS_CRASHES"]["PROVIDERS"][wildcards.provider_key.upper()], + provider_key = "{provider_key}", + sensor_key = "phone_applications_crashes" + output: + "data/interim/{pid}/phone_applications_crashes_features/phone_applications_crashes_python_{provider_key}.csv" + script: + "../src/features/entry.py" + +rule phone_applications_crashes_r_features: + input: + sensor_data = "data/raw/{pid}/phone_applications_crashes_with_datetime_with_categories.csv", + time_segments_labels = "data/interim/time_segments/{pid}_time_segments_labels.csv" + params: + provider = lambda wildcards: config["PHONE_APPLICATIONS_CRASHES"]["PROVIDERS"][wildcards.provider_key.upper()], + provider_key = "{provider_key}", + sensor_key = "phone_applications_crashes" + output: + "data/interim/{pid}/phone_applications_crashes_features/phone_applications_crashes_r_{provider_key}.csv" + script: + "../src/features/entry.R" + rule phone_applications_foreground_python_features: input: sensor_data = "data/raw/{pid}/phone_applications_foreground_with_datetime_with_categories.csv", @@ -122,6 +148,58 @@ rule phone_applications_foreground_r_features: script: "../src/features/entry.R" +rule phone_applications_notifications_python_features: + input: + sensor_data = "data/raw/{pid}/phone_applications_notifications_with_datetime_with_categories.csv", + time_segments_labels = "data/interim/time_segments/{pid}_time_segments_labels.csv" + params: + provider = lambda wildcards: config["PHONE_APPLICATIONS_NOTIFICATIONS"]["PROVIDERS"][wildcards.provider_key.upper()], + provider_key = "{provider_key}", + sensor_key = "phone_applications_notifications" + output: + "data/interim/{pid}/phone_applications_notifications_features/phone_applications_notifications_python_{provider_key}.csv" + script: + "../src/features/entry.py" + +rule phone_applications_notifications_r_features: + input: + sensor_data = "data/raw/{pid}/phone_applications_notifications_with_datetime_with_categories.csv", + time_segments_labels = "data/interim/time_segments/{pid}_time_segments_labels.csv" + params: + provider = lambda wildcards: config["PHONE_APPLICATIONS_NOTIFICATIONS"]["PROVIDERS"][wildcards.provider_key.upper()], + provider_key = "{provider_key}", + sensor_key = "phone_applications_notifications" + output: + "data/interim/{pid}/phone_applications_notifications_features/phone_applications_notifications_r_{provider_key}.csv" + script: + "../src/features/entry.R" + +rule phone_aware_log_python_features: + input: + sensor_data = "data/raw/{pid}/phone_aware_log_with_datetime.csv", + time_segments_labels = "data/interim/time_segments/{pid}_time_segments_labels.csv" + params: + provider = lambda wildcards: config["PHONE_AWARE_LOG"]["PROVIDERS"][wildcards.provider_key.upper()], + provider_key = "{provider_key}", + sensor_key = "phone_aware_log" + output: + "data/interim/{pid}/phone_aware_log_features/phone_aware_log_python_{provider_key}.csv" + script: + "../src/features/entry.py" + +rule phone_aware_log_r_features: + input: + sensor_data = "data/raw/{pid}/phone_aware_log_with_datetime.csv", + time_segments_labels = "data/interim/time_segments/{pid}_time_segments_labels.csv" + params: + provider = lambda wildcards: config["PHONE_AWARE_LOG"]["PROVIDERS"][wildcards.provider_key.upper()], + provider_key = "{provider_key}", + sensor_key = "phone_aware_log" + output: + "data/interim/{pid}/phone_aware_log_features/phone_aware_log_r_{provider_key}.csv" + script: + "../src/features/entry.R" + rule battery_episodes: input: "data/raw/{pid}/phone_battery_raw.csv" @@ -236,6 +314,32 @@ rule conversation_r_features: script: "../src/features/entry.R" +rule phone_keyboard_python_features: + input: + sensor_data = "data/raw/{pid}/phone_keyboard_with_datetime.csv", + time_segments_labels = "data/interim/time_segments/{pid}_time_segments_labels.csv" + params: + provider = lambda wildcards: config["PHONE_KEYBOARD"]["PROVIDERS"][wildcards.provider_key.upper()], + provider_key = "{provider_key}", + sensor_key = "phone_keyboard" + output: + "data/interim/{pid}/phone_keyboard_features/phone_keyboard_python_{provider_key}.csv" + script: + "../src/features/entry.py" + +rule phone_keyboard_r_features: + input: + sensor_data = "data/raw/{pid}/phone_keyboard_with_datetime.csv", + time_segments_labels = "data/interim/time_segments/{pid}_time_segments_labels.csv" + params: + provider = lambda wildcards: config["PHONE_KEYBOARD"]["PROVIDERS"][wildcards.provider_key.upper()], + provider_key = "{provider_key}", + sensor_key = "phone_keyboard" + output: + "data/interim/{pid}/phone_keyboard_features/phone_keyboard_r_{provider_key}.csv" + script: + "../src/features/entry.R" + rule phone_light_python_features: input: sensor_data = "data/raw/{pid}/phone_light_with_datetime.csv", diff --git a/rules/preprocessing.smk b/rules/preprocessing.smk index 8f754885..e660d13a 100644 --- a/rules/preprocessing.smk +++ b/rules/preprocessing.smk @@ -163,14 +163,14 @@ rule resample_episodes_with_datetime: rule phone_application_categories: input: - "data/raw/{pid}/phone_applications_foreground_with_datetime.csv" + "data/raw/{pid}/phone_applications_{type}_with_datetime.csv" params: - catalogue_source = config["PHONE_APPLICATIONS_FOREGROUND"]["APPLICATION_CATEGORIES"]["CATALOGUE_SOURCE"], - catalogue_file = config["PHONE_APPLICATIONS_FOREGROUND"]["APPLICATION_CATEGORIES"]["CATALOGUE_FILE"], - update_catalogue_file = config["PHONE_APPLICATIONS_FOREGROUND"]["APPLICATION_CATEGORIES"]["UPDATE_CATALOGUE_FILE"], - scrape_missing_genres = config["PHONE_APPLICATIONS_FOREGROUND"]["APPLICATION_CATEGORIES"]["SCRAPE_MISSING_CATEGORIES"] + catalogue_source = lambda wildcards: config["PHONE_APPLICATIONS_" + str(wildcards.type).upper()]["APPLICATION_CATEGORIES"]["CATALOGUE_SOURCE"], + catalogue_file = lambda wildcards: config["PHONE_APPLICATIONS_" + str(wildcards.type).upper()]["APPLICATION_CATEGORIES"]["CATALOGUE_FILE"], + update_catalogue_file = lambda wildcards: config["PHONE_APPLICATIONS_" + str(wildcards.type).upper()]["APPLICATION_CATEGORIES"]["UPDATE_CATALOGUE_FILE"], + scrape_missing_genres = lambda wildcards: config["PHONE_APPLICATIONS_" + str(wildcards.type).upper()]["APPLICATION_CATEGORIES"]["SCRAPE_MISSING_CATEGORIES"] output: - "data/raw/{pid}/phone_applications_foreground_with_datetime_with_categories.csv" + "data/raw/{pid}/phone_applications_{type}_with_datetime_with_categories.csv" script: "../src/data/application_categories.R"