Update snakefile and config for tests

pull/112/head
JulioV 2021-01-14 15:24:55 -05:00
parent fa10cbf86d
commit 8ef2e6191f
3 changed files with 114 additions and 14 deletions

View File

@ -14,6 +14,15 @@ if len(config["PIDS"]) == 0:
for provider in config["PHONE_DATA_YIELD"]["PROVIDERS"].keys():
if config["PHONE_DATA_YIELD"]["PROVIDERS"][provider]["COMPUTE"]:
allowed_phone_sensors = get_phone_sensor_names()
if not (set(config["PHONE_DATA_YIELD"]["SENSORS"]) <= set(allowed_phone_sensors)):
raise ValueError('\nInvalid sensor(s) for PHONE_DATA_YIELD. config["PHONE_DATA_YIELD"]["SENSORS"] can have '
'one or more of the following phone sensors: {}.\nInstead you provided "{}".\n'
'Keep in mind that the sensors\' TABLE attribute must point to a valid database table'\
.format(', '.join(allowed_phone_sensors),
', '.join(set(config["PHONE_DATA_YIELD"]["SENSORS"]) - set(allowed_phone_sensors))))
files_to_compute.extend(expand("data/raw/{pid}/{sensor}_raw.csv", pid=config["PIDS"], sensor=map(str.lower, config["PHONE_DATA_YIELD"]["SENSORS"])))
files_to_compute.extend(expand("data/interim/{pid}/phone_yielded_timestamps.csv", pid=config["PIDS"]))
files_to_compute.extend(expand("data/interim/{pid}/phone_yielded_timestamps_with_datetime.csv", pid=config["PIDS"]))
@ -147,6 +156,49 @@ for provider in config["PHONE_CONVERSATION"]["PROVIDERS"].keys():
files_to_compute.extend(expand("data/processed/features/{pid}/all_sensor_features.csv", pid=config["PIDS"]))
files_to_compute.append("data/processed/features/all_participants/all_sensor_features.csv")
# We can delete these if's as soon as we add feature PROVIDERS to any of these sensors
if isinstance(config["PHONE_APPLICATIONS_CRASHES"]["PROVIDERS"], dict):
for provider in config["PHONE_APPLICATIONS_CRASHES"]["PROVIDERS"].keys():
if config["PHONE_APPLICATIONS_CRASHES"]["PROVIDERS"][provider]["COMPUTE"]:
files_to_compute.extend(expand("data/raw/{pid}/phone_applications_crashes_raw.csv", pid=config["PIDS"]))
files_to_compute.extend(expand("data/raw/{pid}/phone_applications_crashes_with_datetime.csv", pid=config["PIDS"]))
files_to_compute.extend(expand("data/raw/{pid}/phone_applications_crashes_with_datetime_with_categories.csv", pid=config["PIDS"]))
files_to_compute.extend(expand("data/interim/{pid}/phone_applications_crashes_features/phone_applications_crashes_{language}_{provider_key}.csv", pid=config["PIDS"], language=config["PHONE_APPLICATIONS_CRASHES"]["PROVIDERS"][provider]["SRC_LANGUAGE"].lower(), provider_key=provider.lower()))
files_to_compute.extend(expand("data/processed/features/{pid}/phone_applications_crashes.csv", pid=config["PIDS"]))
files_to_compute.extend(expand("data/processed/features/{pid}/all_sensor_features.csv", pid=config["PIDS"]))
files_to_compute.append("data/processed/features/all_participants/all_sensor_features.csv")
if isinstance(config["PHONE_APPLICATIONS_NOTIFICATIONS"]["PROVIDERS"], dict):
for provider in config["PHONE_APPLICATIONS_NOTIFICATIONS"]["PROVIDERS"].keys():
if config["PHONE_APPLICATIONS_NOTIFICATIONS"]["PROVIDERS"][provider]["COMPUTE"]:
files_to_compute.extend(expand("data/raw/{pid}/phone_applications_notifications_raw.csv", pid=config["PIDS"]))
files_to_compute.extend(expand("data/raw/{pid}/phone_applications_notifications_with_datetime.csv", pid=config["PIDS"]))
files_to_compute.extend(expand("data/raw/{pid}/phone_applications_notifications_with_datetime_with_categories.csv", pid=config["PIDS"]))
files_to_compute.extend(expand("data/interim/{pid}/phone_applications_notifications_features/phone_applications_notifications_{language}_{provider_key}.csv", pid=config["PIDS"], language=config["PHONE_APPLICATIONS_NOTIFICATIONS"]["PROVIDERS"][provider]["SRC_LANGUAGE"].lower(), provider_key=provider.lower()))
files_to_compute.extend(expand("data/processed/features/{pid}/phone_applications_notifications.csv", pid=config["PIDS"]))
files_to_compute.extend(expand("data/processed/features/{pid}/all_sensor_features.csv", pid=config["PIDS"]))
files_to_compute.append("data/processed/features/all_participants/all_sensor_features.csv")
if isinstance(config["PHONE_KEYBOARD"]["PROVIDERS"], dict):
for provider in config["PHONE_KEYBOARD"]["PROVIDERS"].keys():
if config["PHONE_KEYBOARD"]["PROVIDERS"][provider]["COMPUTE"]:
files_to_compute.extend(expand("data/raw/{pid}/phone_keyboard_raw.csv", pid=config["PIDS"]))
files_to_compute.extend(expand("data/raw/{pid}/phone_keyboard_with_datetime.csv", pid=config["PIDS"]))
files_to_compute.extend(expand("data/interim/{pid}/phone_keyboard_features/phone_keyboard_{language}_{provider_key}.csv", pid=config["PIDS"], language=config["PHONE_KEYBOARD"]["PROVIDERS"][provider]["SRC_LANGUAGE"].lower(), provider_key=provider.lower()))
files_to_compute.extend(expand("data/processed/features/{pid}/phone_keyboard.csv", pid=config["PIDS"]))
files_to_compute.extend(expand("data/processed/features/{pid}/all_sensor_features.csv", pid=config["PIDS"]))
files_to_compute.append("data/processed/features/all_participants/all_sensor_features.csv")
if isinstance(config["PHONE_AWARE_LOG"]["PROVIDERS"], dict):
for provider in config["PHONE_AWARE_LOG"]["PROVIDERS"].keys():
if config["PHONE_AWARE_LOG"]["PROVIDERS"][provider]["COMPUTE"]:
files_to_compute.extend(expand("data/raw/{pid}/phone_aware_log_raw.csv", pid=config["PIDS"]))
files_to_compute.extend(expand("data/raw/{pid}/phone_aware_log_with_datetime.csv", pid=config["PIDS"]))
files_to_compute.extend(expand("data/interim/{pid}/phone_aware_log_features/phone_aware_log_{language}_{provider_key}.csv", pid=config["PIDS"], language=config["PHONE_AWARE_LOG"]["PROVIDERS"][provider]["SRC_LANGUAGE"].lower(), provider_key=provider.lower()))
files_to_compute.extend(expand("data/processed/features/{pid}/phone_aware_log.csv", pid=config["PIDS"]))
files_to_compute.extend(expand("data/processed/features/{pid}/all_sensor_features.csv", pid=config["PIDS"]))
files_to_compute.append("data/processed/features/all_participants/all_sensor_features.csv")
for provider in config["PHONE_LOCATIONS"]["PROVIDERS"].keys():
if config["PHONE_LOCATIONS"]["PROVIDERS"][provider]["COMPUTE"]:
if config["PHONE_LOCATIONS"]["LOCATIONS_TO_USE"] == "FUSED_RESAMPLED":

View File

@ -85,6 +85,16 @@ PHONE_ACTIVITY_RECOGNITION:
SRC_FOLDER: "rapids" # inside src/features/phone_activity_recognition
SRC_LANGUAGE: "python"
# See https://www.rapids.science/latest/features/phone-applications-crashes/
PHONE_APPLICATIONS_CRASHES:
TABLE: applications_crashes
APPLICATION_CATEGORIES:
CATALOGUE_SOURCE: FILE # FILE (genres are read from CATALOGUE_FILE) or GOOGLE (genres are scrapped from the Play Store)
CATALOGUE_FILE: "data/external/stachl_application_genre_catalogue.csv"
UPDATE_CATALOGUE_FILE: False # if CATALOGUE_SOURCE is equal to FILE, whether or not to update CATALOGUE_FILE, if CATALOGUE_SOURCE is equal to GOOGLE all scraped genres will be saved to CATALOGUE_FILE
SCRAPE_MISSING_CATEGORIES: False # whether or not to scrape missing genres, only effective if CATALOGUE_SOURCE is equal to FILE. If CATALOGUE_SOURCE is equal to GOOGLE, all genres are scraped anyway
PROVIDERS: # None implemented yet but this sensor can be used in PHONE_DATA_YIELD
# See https://www.rapids.science/latest/features/phone-applications-foreground/
PHONE_APPLICATIONS_FOREGROUND:
TABLE: applications_foreground
@ -107,6 +117,21 @@ PHONE_APPLICATIONS_FOREGROUND:
SRC_FOLDER: "rapids" # inside src/features/phone_applications_foreground
SRC_LANGUAGE: "python"
# See https://www.rapids.science/latest/features/phone-applications-notifications/
PHONE_APPLICATIONS_NOTIFICATIONS:
TABLE: applications_notifications
APPLICATION_CATEGORIES:
CATALOGUE_SOURCE: FILE # FILE (genres are read from CATALOGUE_FILE) or GOOGLE (genres are scrapped from the Play Store)
CATALOGUE_FILE: "data/external/stachl_application_genre_catalogue.csv"
UPDATE_CATALOGUE_FILE: False # if CATALOGUE_SOURCE is equal to FILE, whether or not to update CATALOGUE_FILE, if CATALOGUE_SOURCE is equal to GOOGLE all scraped genres will be saved to CATALOGUE_FILE
SCRAPE_MISSING_CATEGORIES: False # whether or not to scrape missing genres, only effective if CATALOGUE_SOURCE is equal to FILE. If CATALOGUE_SOURCE is equal to GOOGLE, all genres are scraped anyway
PROVIDERS: # None implemented yet but this sensor can be used in PHONE_DATA_YIELD
# See https://www.rapids.science/latest/features/phone-aware-log/
PHONE_AWARE_LOG:
TABLE: aware_log
PROVIDERS: # None implemented yet but this sensor can be used in PHONE_DATA_YIELD
# See https://www.rapids.science/latest/features/phone-battery/
PHONE_BATTERY:
TABLE: battery
@ -189,14 +214,10 @@ PHONE_DATA_YIELD:
SRC_LANGUAGE: "r"
SRC_FOLDER: "rapids" # inside src/features/phone_data_yield
# See https://www.rapids.science/latest/features/phone-keyboard/
PHONE_KEYBOARD:
TABLE: keyboard
PROVIDERS:
RAPIDS:
COMPUTE: FALSE
FEATURES: []
SRC_FOLDER: "rapids" # inside src/features/phone_keyboard
SRC_LANGUAGE: "python"
PROVIDERS: # None implemented yet but this sensor can be used in PHONE_DATA_YIELD
# See https://www.rapids.science/latest/features/phone-light/
PHONE_LIGHT:
@ -211,19 +232,22 @@ PHONE_LIGHT:
# See https://www.rapids.science/latest/features/phone-locations/
PHONE_LOCATIONS:
TABLE: locations
LOCATIONS_TO_USE: FUSED_RESAMPLED # ALL, GPS OR FUSED_RESAMPLED
LOCATIONS_TO_USE: ALL_RESAMPLED # ALL, GPS, ALL_RESAMPLED, OR FUSED_RESAMPLED
FUSED_RESAMPLED_CONSECUTIVE_THRESHOLD: 30 # minutes, only replicate location samples to the next sensed bin if the phone did not stop collecting data for more than this threshold
FUSED_RESAMPLED_TIME_SINCE_VALID_LOCATION: 720 # minutes, only replicate location samples to consecutive sensed bins if they were logged within this threshold after a valid location row
PROVIDERS:
DORYAB:
COMPUTE: False
FEATURES: ["locationvariance","loglocationvariance","totaldistance","averagespeed","varspeed","circadianmovement","numberofsignificantplaces","numberlocationtransitions","radiusgyration","timeattop1location","timeattop2location","timeattop3location","movingtostaticratio","outlierstimepercent","maxlengthstayatclusters","minlengthstayatclusters","meanlengthstayatclusters","stdlengthstayatclusters","locationentropy","normalizedlocationentropy"]
ACCURACY_LIMIT: 51 # meters, drops location coordinates with an accuracy higher than this. This number means there's a 68% probability the true location is within this radius
DBSCAN_EPS: 10 # meters
DBSCAN_MINSAMPLES: 5
THRESHOLD_STATIC : 1 # km/h
MAXIMUM_GAP_ALLOWED: 300
MINUTES_DATA_USED: False
SAMPLING_FREQUENCY: 0
CLUSTER_ON: PARTICIPANT_DATASET # PARTICIPANT_DATASET,TIME_SEGMENT
CLUSTERING_ALGORITHM: DBSCAN #DBSCAN,OPTICS
SRC_FOLDER: "doryab" # inside src/features/phone_locations
SRC_LANGUAGE: "python"

View File

@ -85,6 +85,16 @@ PHONE_ACTIVITY_RECOGNITION:
SRC_FOLDER: "rapids" # inside src/features/phone_activity_recognition
SRC_LANGUAGE: "python"
# See https://www.rapids.science/latest/features/phone-applications-crashes/
PHONE_APPLICATIONS_CRASHES:
TABLE: applications_crashes
APPLICATION_CATEGORIES:
CATALOGUE_SOURCE: FILE # FILE (genres are read from CATALOGUE_FILE) or GOOGLE (genres are scrapped from the Play Store)
CATALOGUE_FILE: "data/external/stachl_application_genre_catalogue.csv"
UPDATE_CATALOGUE_FILE: False # if CATALOGUE_SOURCE is equal to FILE, whether or not to update CATALOGUE_FILE, if CATALOGUE_SOURCE is equal to GOOGLE all scraped genres will be saved to CATALOGUE_FILE
SCRAPE_MISSING_CATEGORIES: False # whether or not to scrape missing genres, only effective if CATALOGUE_SOURCE is equal to FILE. If CATALOGUE_SOURCE is equal to GOOGLE, all genres are scraped anyway
PROVIDERS: # None implemented yet but this sensor can be used in PHONE_DATA_YIELD
# See https://www.rapids.science/latest/features/phone-applications-foreground/
PHONE_APPLICATIONS_FOREGROUND:
TABLE: applications_foreground
@ -107,6 +117,21 @@ PHONE_APPLICATIONS_FOREGROUND:
SRC_FOLDER: "rapids" # inside src/features/phone_applications_foreground
SRC_LANGUAGE: "python"
# See https://www.rapids.science/latest/features/phone-applications-notifications/
PHONE_APPLICATIONS_NOTIFICATIONS:
TABLE: applications_notifications
APPLICATION_CATEGORIES:
CATALOGUE_SOURCE: FILE # FILE (genres are read from CATALOGUE_FILE) or GOOGLE (genres are scrapped from the Play Store)
CATALOGUE_FILE: "data/external/stachl_application_genre_catalogue.csv"
UPDATE_CATALOGUE_FILE: False # if CATALOGUE_SOURCE is equal to FILE, whether or not to update CATALOGUE_FILE, if CATALOGUE_SOURCE is equal to GOOGLE all scraped genres will be saved to CATALOGUE_FILE
SCRAPE_MISSING_CATEGORIES: False # whether or not to scrape missing genres, only effective if CATALOGUE_SOURCE is equal to FILE. If CATALOGUE_SOURCE is equal to GOOGLE, all genres are scraped anyway
PROVIDERS: # None implemented yet but this sensor can be used in PHONE_DATA_YIELD
# See https://www.rapids.science/latest/features/phone-aware-log/
PHONE_AWARE_LOG:
TABLE: aware_log
PROVIDERS: # None implemented yet but this sensor can be used in PHONE_DATA_YIELD
# See https://www.rapids.science/latest/features/phone-battery/
PHONE_BATTERY:
TABLE: battery
@ -189,14 +214,10 @@ PHONE_DATA_YIELD:
SRC_LANGUAGE: "r"
SRC_FOLDER: "rapids" # inside src/features/phone_data_yield
# See https://www.rapids.science/latest/features/phone-keyboard/
PHONE_KEYBOARD:
TABLE: keyboard
PROVIDERS:
RAPIDS:
COMPUTE: False
FEATURES: []
SRC_FOLDER: "rapids" # inside src/features/phone_keyboard
SRC_LANGUAGE: "python"
PROVIDERS: # None implemented yet but this sensor can be used in PHONE_DATA_YIELD
# See https://www.rapids.science/latest/features/phone-light/
PHONE_LIGHT:
@ -211,19 +232,22 @@ PHONE_LIGHT:
# See https://www.rapids.science/latest/features/phone-locations/
PHONE_LOCATIONS:
TABLE: locations
LOCATIONS_TO_USE: FUSED_RESAMPLED # ALL, GPS OR FUSED_RESAMPLED
LOCATIONS_TO_USE: ALL_RESAMPLED # ALL, GPS, ALL_RESAMPLED, OR FUSED_RESAMPLED
FUSED_RESAMPLED_CONSECUTIVE_THRESHOLD: 30 # minutes, only replicate location samples to the next sensed bin if the phone did not stop collecting data for more than this threshold
FUSED_RESAMPLED_TIME_SINCE_VALID_LOCATION: 720 # minutes, only replicate location samples to consecutive sensed bins if they were logged within this threshold after a valid location row
PROVIDERS:
DORYAB:
COMPUTE: False
FEATURES: ["locationvariance","loglocationvariance","totaldistance","averagespeed","varspeed","circadianmovement","numberofsignificantplaces","numberlocationtransitions","radiusgyration","timeattop1location","timeattop2location","timeattop3location","movingtostaticratio","outlierstimepercent","maxlengthstayatclusters","minlengthstayatclusters","meanlengthstayatclusters","stdlengthstayatclusters","locationentropy","normalizedlocationentropy"]
ACCURACY_LIMIT: 51 # meters, drops location coordinates with an accuracy higher than this. This number means there's a 68% probability the true location is within this radius
DBSCAN_EPS: 10 # meters
DBSCAN_MINSAMPLES: 5
THRESHOLD_STATIC : 1 # km/h
MAXIMUM_GAP_ALLOWED: 300
MINUTES_DATA_USED: False
SAMPLING_FREQUENCY: 0
CLUSTER_ON: PARTICIPANT_DATASET # PARTICIPANT_DATASET,TIME_SEGMENT
CLUSTERING_ALGORITHM: DBSCAN #DBSCAN,OPTICS
SRC_FOLDER: "doryab" # inside src/features/phone_locations
SRC_LANGUAGE: "python"