diff --git a/tests/Snakefile b/tests/Snakefile index 4d78b5f2..8bc2b96c 100644 --- a/tests/Snakefile +++ b/tests/Snakefile @@ -14,6 +14,15 @@ if len(config["PIDS"]) == 0: for provider in config["PHONE_DATA_YIELD"]["PROVIDERS"].keys(): if config["PHONE_DATA_YIELD"]["PROVIDERS"][provider]["COMPUTE"]: + + allowed_phone_sensors = get_phone_sensor_names() + if not (set(config["PHONE_DATA_YIELD"]["SENSORS"]) <= set(allowed_phone_sensors)): + raise ValueError('\nInvalid sensor(s) for PHONE_DATA_YIELD. config["PHONE_DATA_YIELD"]["SENSORS"] can have ' + 'one or more of the following phone sensors: {}.\nInstead you provided "{}".\n' + 'Keep in mind that the sensors\' TABLE attribute must point to a valid database table'\ + .format(', '.join(allowed_phone_sensors), + ', '.join(set(config["PHONE_DATA_YIELD"]["SENSORS"]) - set(allowed_phone_sensors)))) + files_to_compute.extend(expand("data/raw/{pid}/{sensor}_raw.csv", pid=config["PIDS"], sensor=map(str.lower, config["PHONE_DATA_YIELD"]["SENSORS"]))) files_to_compute.extend(expand("data/interim/{pid}/phone_yielded_timestamps.csv", pid=config["PIDS"])) files_to_compute.extend(expand("data/interim/{pid}/phone_yielded_timestamps_with_datetime.csv", pid=config["PIDS"])) @@ -147,6 +156,49 @@ for provider in config["PHONE_CONVERSATION"]["PROVIDERS"].keys(): files_to_compute.extend(expand("data/processed/features/{pid}/all_sensor_features.csv", pid=config["PIDS"])) files_to_compute.append("data/processed/features/all_participants/all_sensor_features.csv") +# We can delete these if's as soon as we add feature PROVIDERS to any of these sensors +if isinstance(config["PHONE_APPLICATIONS_CRASHES"]["PROVIDERS"], dict): + for provider in config["PHONE_APPLICATIONS_CRASHES"]["PROVIDERS"].keys(): + if config["PHONE_APPLICATIONS_CRASHES"]["PROVIDERS"][provider]["COMPUTE"]: + files_to_compute.extend(expand("data/raw/{pid}/phone_applications_crashes_raw.csv", pid=config["PIDS"])) + files_to_compute.extend(expand("data/raw/{pid}/phone_applications_crashes_with_datetime.csv", pid=config["PIDS"])) + files_to_compute.extend(expand("data/raw/{pid}/phone_applications_crashes_with_datetime_with_categories.csv", pid=config["PIDS"])) + files_to_compute.extend(expand("data/interim/{pid}/phone_applications_crashes_features/phone_applications_crashes_{language}_{provider_key}.csv", pid=config["PIDS"], language=config["PHONE_APPLICATIONS_CRASHES"]["PROVIDERS"][provider]["SRC_LANGUAGE"].lower(), provider_key=provider.lower())) + files_to_compute.extend(expand("data/processed/features/{pid}/phone_applications_crashes.csv", pid=config["PIDS"])) + files_to_compute.extend(expand("data/processed/features/{pid}/all_sensor_features.csv", pid=config["PIDS"])) + files_to_compute.append("data/processed/features/all_participants/all_sensor_features.csv") + +if isinstance(config["PHONE_APPLICATIONS_NOTIFICATIONS"]["PROVIDERS"], dict): + for provider in config["PHONE_APPLICATIONS_NOTIFICATIONS"]["PROVIDERS"].keys(): + if config["PHONE_APPLICATIONS_NOTIFICATIONS"]["PROVIDERS"][provider]["COMPUTE"]: + files_to_compute.extend(expand("data/raw/{pid}/phone_applications_notifications_raw.csv", pid=config["PIDS"])) + files_to_compute.extend(expand("data/raw/{pid}/phone_applications_notifications_with_datetime.csv", pid=config["PIDS"])) + files_to_compute.extend(expand("data/raw/{pid}/phone_applications_notifications_with_datetime_with_categories.csv", pid=config["PIDS"])) + files_to_compute.extend(expand("data/interim/{pid}/phone_applications_notifications_features/phone_applications_notifications_{language}_{provider_key}.csv", pid=config["PIDS"], language=config["PHONE_APPLICATIONS_NOTIFICATIONS"]["PROVIDERS"][provider]["SRC_LANGUAGE"].lower(), provider_key=provider.lower())) + files_to_compute.extend(expand("data/processed/features/{pid}/phone_applications_notifications.csv", pid=config["PIDS"])) + files_to_compute.extend(expand("data/processed/features/{pid}/all_sensor_features.csv", pid=config["PIDS"])) + files_to_compute.append("data/processed/features/all_participants/all_sensor_features.csv") + +if isinstance(config["PHONE_KEYBOARD"]["PROVIDERS"], dict): + for provider in config["PHONE_KEYBOARD"]["PROVIDERS"].keys(): + if config["PHONE_KEYBOARD"]["PROVIDERS"][provider]["COMPUTE"]: + files_to_compute.extend(expand("data/raw/{pid}/phone_keyboard_raw.csv", pid=config["PIDS"])) + files_to_compute.extend(expand("data/raw/{pid}/phone_keyboard_with_datetime.csv", pid=config["PIDS"])) + files_to_compute.extend(expand("data/interim/{pid}/phone_keyboard_features/phone_keyboard_{language}_{provider_key}.csv", pid=config["PIDS"], language=config["PHONE_KEYBOARD"]["PROVIDERS"][provider]["SRC_LANGUAGE"].lower(), provider_key=provider.lower())) + files_to_compute.extend(expand("data/processed/features/{pid}/phone_keyboard.csv", pid=config["PIDS"])) + files_to_compute.extend(expand("data/processed/features/{pid}/all_sensor_features.csv", pid=config["PIDS"])) + files_to_compute.append("data/processed/features/all_participants/all_sensor_features.csv") + +if isinstance(config["PHONE_AWARE_LOG"]["PROVIDERS"], dict): + for provider in config["PHONE_AWARE_LOG"]["PROVIDERS"].keys(): + if config["PHONE_AWARE_LOG"]["PROVIDERS"][provider]["COMPUTE"]: + files_to_compute.extend(expand("data/raw/{pid}/phone_aware_log_raw.csv", pid=config["PIDS"])) + files_to_compute.extend(expand("data/raw/{pid}/phone_aware_log_with_datetime.csv", pid=config["PIDS"])) + files_to_compute.extend(expand("data/interim/{pid}/phone_aware_log_features/phone_aware_log_{language}_{provider_key}.csv", pid=config["PIDS"], language=config["PHONE_AWARE_LOG"]["PROVIDERS"][provider]["SRC_LANGUAGE"].lower(), provider_key=provider.lower())) + files_to_compute.extend(expand("data/processed/features/{pid}/phone_aware_log.csv", pid=config["PIDS"])) + files_to_compute.extend(expand("data/processed/features/{pid}/all_sensor_features.csv", pid=config["PIDS"])) + files_to_compute.append("data/processed/features/all_participants/all_sensor_features.csv") + for provider in config["PHONE_LOCATIONS"]["PROVIDERS"].keys(): if config["PHONE_LOCATIONS"]["PROVIDERS"][provider]["COMPUTE"]: if config["PHONE_LOCATIONS"]["LOCATIONS_TO_USE"] == "FUSED_RESAMPLED": diff --git a/tests/settings/frequency/testing_config.yaml b/tests/settings/frequency/testing_config.yaml index e033b0a1..bf8a134e 100644 --- a/tests/settings/frequency/testing_config.yaml +++ b/tests/settings/frequency/testing_config.yaml @@ -85,6 +85,16 @@ PHONE_ACTIVITY_RECOGNITION: SRC_FOLDER: "rapids" # inside src/features/phone_activity_recognition SRC_LANGUAGE: "python" +# See https://www.rapids.science/latest/features/phone-applications-crashes/ +PHONE_APPLICATIONS_CRASHES: + TABLE: applications_crashes + APPLICATION_CATEGORIES: + CATALOGUE_SOURCE: FILE # FILE (genres are read from CATALOGUE_FILE) or GOOGLE (genres are scrapped from the Play Store) + CATALOGUE_FILE: "data/external/stachl_application_genre_catalogue.csv" + UPDATE_CATALOGUE_FILE: False # if CATALOGUE_SOURCE is equal to FILE, whether or not to update CATALOGUE_FILE, if CATALOGUE_SOURCE is equal to GOOGLE all scraped genres will be saved to CATALOGUE_FILE + SCRAPE_MISSING_CATEGORIES: False # whether or not to scrape missing genres, only effective if CATALOGUE_SOURCE is equal to FILE. If CATALOGUE_SOURCE is equal to GOOGLE, all genres are scraped anyway + PROVIDERS: # None implemented yet but this sensor can be used in PHONE_DATA_YIELD + # See https://www.rapids.science/latest/features/phone-applications-foreground/ PHONE_APPLICATIONS_FOREGROUND: TABLE: applications_foreground @@ -107,6 +117,21 @@ PHONE_APPLICATIONS_FOREGROUND: SRC_FOLDER: "rapids" # inside src/features/phone_applications_foreground SRC_LANGUAGE: "python" +# See https://www.rapids.science/latest/features/phone-applications-notifications/ +PHONE_APPLICATIONS_NOTIFICATIONS: + TABLE: applications_notifications + APPLICATION_CATEGORIES: + CATALOGUE_SOURCE: FILE # FILE (genres are read from CATALOGUE_FILE) or GOOGLE (genres are scrapped from the Play Store) + CATALOGUE_FILE: "data/external/stachl_application_genre_catalogue.csv" + UPDATE_CATALOGUE_FILE: False # if CATALOGUE_SOURCE is equal to FILE, whether or not to update CATALOGUE_FILE, if CATALOGUE_SOURCE is equal to GOOGLE all scraped genres will be saved to CATALOGUE_FILE + SCRAPE_MISSING_CATEGORIES: False # whether or not to scrape missing genres, only effective if CATALOGUE_SOURCE is equal to FILE. If CATALOGUE_SOURCE is equal to GOOGLE, all genres are scraped anyway + PROVIDERS: # None implemented yet but this sensor can be used in PHONE_DATA_YIELD + +# See https://www.rapids.science/latest/features/phone-aware-log/ +PHONE_AWARE_LOG: + TABLE: aware_log + PROVIDERS: # None implemented yet but this sensor can be used in PHONE_DATA_YIELD + # See https://www.rapids.science/latest/features/phone-battery/ PHONE_BATTERY: TABLE: battery @@ -189,14 +214,10 @@ PHONE_DATA_YIELD: SRC_LANGUAGE: "r" SRC_FOLDER: "rapids" # inside src/features/phone_data_yield +# See https://www.rapids.science/latest/features/phone-keyboard/ PHONE_KEYBOARD: TABLE: keyboard - PROVIDERS: - RAPIDS: - COMPUTE: FALSE - FEATURES: [] - SRC_FOLDER: "rapids" # inside src/features/phone_keyboard - SRC_LANGUAGE: "python" + PROVIDERS: # None implemented yet but this sensor can be used in PHONE_DATA_YIELD # See https://www.rapids.science/latest/features/phone-light/ PHONE_LIGHT: @@ -211,19 +232,22 @@ PHONE_LIGHT: # See https://www.rapids.science/latest/features/phone-locations/ PHONE_LOCATIONS: TABLE: locations - LOCATIONS_TO_USE: FUSED_RESAMPLED # ALL, GPS OR FUSED_RESAMPLED + LOCATIONS_TO_USE: ALL_RESAMPLED # ALL, GPS, ALL_RESAMPLED, OR FUSED_RESAMPLED FUSED_RESAMPLED_CONSECUTIVE_THRESHOLD: 30 # minutes, only replicate location samples to the next sensed bin if the phone did not stop collecting data for more than this threshold FUSED_RESAMPLED_TIME_SINCE_VALID_LOCATION: 720 # minutes, only replicate location samples to consecutive sensed bins if they were logged within this threshold after a valid location row PROVIDERS: DORYAB: COMPUTE: False FEATURES: ["locationvariance","loglocationvariance","totaldistance","averagespeed","varspeed","circadianmovement","numberofsignificantplaces","numberlocationtransitions","radiusgyration","timeattop1location","timeattop2location","timeattop3location","movingtostaticratio","outlierstimepercent","maxlengthstayatclusters","minlengthstayatclusters","meanlengthstayatclusters","stdlengthstayatclusters","locationentropy","normalizedlocationentropy"] + ACCURACY_LIMIT: 51 # meters, drops location coordinates with an accuracy higher than this. This number means there's a 68% probability the true location is within this radius DBSCAN_EPS: 10 # meters DBSCAN_MINSAMPLES: 5 THRESHOLD_STATIC : 1 # km/h MAXIMUM_GAP_ALLOWED: 300 MINUTES_DATA_USED: False SAMPLING_FREQUENCY: 0 + CLUSTER_ON: PARTICIPANT_DATASET # PARTICIPANT_DATASET,TIME_SEGMENT + CLUSTERING_ALGORITHM: DBSCAN #DBSCAN,OPTICS SRC_FOLDER: "doryab" # inside src/features/phone_locations SRC_LANGUAGE: "python" diff --git a/tests/settings/periodic/testing_config.yaml b/tests/settings/periodic/testing_config.yaml index fba16554..d4b22b2e 100644 --- a/tests/settings/periodic/testing_config.yaml +++ b/tests/settings/periodic/testing_config.yaml @@ -85,6 +85,16 @@ PHONE_ACTIVITY_RECOGNITION: SRC_FOLDER: "rapids" # inside src/features/phone_activity_recognition SRC_LANGUAGE: "python" +# See https://www.rapids.science/latest/features/phone-applications-crashes/ +PHONE_APPLICATIONS_CRASHES: + TABLE: applications_crashes + APPLICATION_CATEGORIES: + CATALOGUE_SOURCE: FILE # FILE (genres are read from CATALOGUE_FILE) or GOOGLE (genres are scrapped from the Play Store) + CATALOGUE_FILE: "data/external/stachl_application_genre_catalogue.csv" + UPDATE_CATALOGUE_FILE: False # if CATALOGUE_SOURCE is equal to FILE, whether or not to update CATALOGUE_FILE, if CATALOGUE_SOURCE is equal to GOOGLE all scraped genres will be saved to CATALOGUE_FILE + SCRAPE_MISSING_CATEGORIES: False # whether or not to scrape missing genres, only effective if CATALOGUE_SOURCE is equal to FILE. If CATALOGUE_SOURCE is equal to GOOGLE, all genres are scraped anyway + PROVIDERS: # None implemented yet but this sensor can be used in PHONE_DATA_YIELD + # See https://www.rapids.science/latest/features/phone-applications-foreground/ PHONE_APPLICATIONS_FOREGROUND: TABLE: applications_foreground @@ -107,6 +117,21 @@ PHONE_APPLICATIONS_FOREGROUND: SRC_FOLDER: "rapids" # inside src/features/phone_applications_foreground SRC_LANGUAGE: "python" +# See https://www.rapids.science/latest/features/phone-applications-notifications/ +PHONE_APPLICATIONS_NOTIFICATIONS: + TABLE: applications_notifications + APPLICATION_CATEGORIES: + CATALOGUE_SOURCE: FILE # FILE (genres are read from CATALOGUE_FILE) or GOOGLE (genres are scrapped from the Play Store) + CATALOGUE_FILE: "data/external/stachl_application_genre_catalogue.csv" + UPDATE_CATALOGUE_FILE: False # if CATALOGUE_SOURCE is equal to FILE, whether or not to update CATALOGUE_FILE, if CATALOGUE_SOURCE is equal to GOOGLE all scraped genres will be saved to CATALOGUE_FILE + SCRAPE_MISSING_CATEGORIES: False # whether or not to scrape missing genres, only effective if CATALOGUE_SOURCE is equal to FILE. If CATALOGUE_SOURCE is equal to GOOGLE, all genres are scraped anyway + PROVIDERS: # None implemented yet but this sensor can be used in PHONE_DATA_YIELD + +# See https://www.rapids.science/latest/features/phone-aware-log/ +PHONE_AWARE_LOG: + TABLE: aware_log + PROVIDERS: # None implemented yet but this sensor can be used in PHONE_DATA_YIELD + # See https://www.rapids.science/latest/features/phone-battery/ PHONE_BATTERY: TABLE: battery @@ -189,14 +214,10 @@ PHONE_DATA_YIELD: SRC_LANGUAGE: "r" SRC_FOLDER: "rapids" # inside src/features/phone_data_yield +# See https://www.rapids.science/latest/features/phone-keyboard/ PHONE_KEYBOARD: TABLE: keyboard - PROVIDERS: - RAPIDS: - COMPUTE: False - FEATURES: [] - SRC_FOLDER: "rapids" # inside src/features/phone_keyboard - SRC_LANGUAGE: "python" + PROVIDERS: # None implemented yet but this sensor can be used in PHONE_DATA_YIELD # See https://www.rapids.science/latest/features/phone-light/ PHONE_LIGHT: @@ -211,19 +232,22 @@ PHONE_LIGHT: # See https://www.rapids.science/latest/features/phone-locations/ PHONE_LOCATIONS: TABLE: locations - LOCATIONS_TO_USE: FUSED_RESAMPLED # ALL, GPS OR FUSED_RESAMPLED + LOCATIONS_TO_USE: ALL_RESAMPLED # ALL, GPS, ALL_RESAMPLED, OR FUSED_RESAMPLED FUSED_RESAMPLED_CONSECUTIVE_THRESHOLD: 30 # minutes, only replicate location samples to the next sensed bin if the phone did not stop collecting data for more than this threshold FUSED_RESAMPLED_TIME_SINCE_VALID_LOCATION: 720 # minutes, only replicate location samples to consecutive sensed bins if they were logged within this threshold after a valid location row PROVIDERS: DORYAB: COMPUTE: False FEATURES: ["locationvariance","loglocationvariance","totaldistance","averagespeed","varspeed","circadianmovement","numberofsignificantplaces","numberlocationtransitions","radiusgyration","timeattop1location","timeattop2location","timeattop3location","movingtostaticratio","outlierstimepercent","maxlengthstayatclusters","minlengthstayatclusters","meanlengthstayatclusters","stdlengthstayatclusters","locationentropy","normalizedlocationentropy"] + ACCURACY_LIMIT: 51 # meters, drops location coordinates with an accuracy higher than this. This number means there's a 68% probability the true location is within this radius DBSCAN_EPS: 10 # meters DBSCAN_MINSAMPLES: 5 THRESHOLD_STATIC : 1 # km/h MAXIMUM_GAP_ALLOWED: 300 MINUTES_DATA_USED: False SAMPLING_FREQUENCY: 0 + CLUSTER_ON: PARTICIPANT_DATASET # PARTICIPANT_DATASET,TIME_SEGMENT + CLUSTERING_ALGORITHM: DBSCAN #DBSCAN,OPTICS SRC_FOLDER: "doryab" # inside src/features/phone_locations SRC_LANGUAGE: "python"