Update snakefile and config for tests
parent
fa10cbf86d
commit
8ef2e6191f
|
@ -14,6 +14,15 @@ if len(config["PIDS"]) == 0:
|
|||
|
||||
for provider in config["PHONE_DATA_YIELD"]["PROVIDERS"].keys():
|
||||
if config["PHONE_DATA_YIELD"]["PROVIDERS"][provider]["COMPUTE"]:
|
||||
|
||||
allowed_phone_sensors = get_phone_sensor_names()
|
||||
if not (set(config["PHONE_DATA_YIELD"]["SENSORS"]) <= set(allowed_phone_sensors)):
|
||||
raise ValueError('\nInvalid sensor(s) for PHONE_DATA_YIELD. config["PHONE_DATA_YIELD"]["SENSORS"] can have '
|
||||
'one or more of the following phone sensors: {}.\nInstead you provided "{}".\n'
|
||||
'Keep in mind that the sensors\' TABLE attribute must point to a valid database table'\
|
||||
.format(', '.join(allowed_phone_sensors),
|
||||
', '.join(set(config["PHONE_DATA_YIELD"]["SENSORS"]) - set(allowed_phone_sensors))))
|
||||
|
||||
files_to_compute.extend(expand("data/raw/{pid}/{sensor}_raw.csv", pid=config["PIDS"], sensor=map(str.lower, config["PHONE_DATA_YIELD"]["SENSORS"])))
|
||||
files_to_compute.extend(expand("data/interim/{pid}/phone_yielded_timestamps.csv", pid=config["PIDS"]))
|
||||
files_to_compute.extend(expand("data/interim/{pid}/phone_yielded_timestamps_with_datetime.csv", pid=config["PIDS"]))
|
||||
|
@ -147,6 +156,49 @@ for provider in config["PHONE_CONVERSATION"]["PROVIDERS"].keys():
|
|||
files_to_compute.extend(expand("data/processed/features/{pid}/all_sensor_features.csv", pid=config["PIDS"]))
|
||||
files_to_compute.append("data/processed/features/all_participants/all_sensor_features.csv")
|
||||
|
||||
# We can delete these if's as soon as we add feature PROVIDERS to any of these sensors
|
||||
if isinstance(config["PHONE_APPLICATIONS_CRASHES"]["PROVIDERS"], dict):
|
||||
for provider in config["PHONE_APPLICATIONS_CRASHES"]["PROVIDERS"].keys():
|
||||
if config["PHONE_APPLICATIONS_CRASHES"]["PROVIDERS"][provider]["COMPUTE"]:
|
||||
files_to_compute.extend(expand("data/raw/{pid}/phone_applications_crashes_raw.csv", pid=config["PIDS"]))
|
||||
files_to_compute.extend(expand("data/raw/{pid}/phone_applications_crashes_with_datetime.csv", pid=config["PIDS"]))
|
||||
files_to_compute.extend(expand("data/raw/{pid}/phone_applications_crashes_with_datetime_with_categories.csv", pid=config["PIDS"]))
|
||||
files_to_compute.extend(expand("data/interim/{pid}/phone_applications_crashes_features/phone_applications_crashes_{language}_{provider_key}.csv", pid=config["PIDS"], language=config["PHONE_APPLICATIONS_CRASHES"]["PROVIDERS"][provider]["SRC_LANGUAGE"].lower(), provider_key=provider.lower()))
|
||||
files_to_compute.extend(expand("data/processed/features/{pid}/phone_applications_crashes.csv", pid=config["PIDS"]))
|
||||
files_to_compute.extend(expand("data/processed/features/{pid}/all_sensor_features.csv", pid=config["PIDS"]))
|
||||
files_to_compute.append("data/processed/features/all_participants/all_sensor_features.csv")
|
||||
|
||||
if isinstance(config["PHONE_APPLICATIONS_NOTIFICATIONS"]["PROVIDERS"], dict):
|
||||
for provider in config["PHONE_APPLICATIONS_NOTIFICATIONS"]["PROVIDERS"].keys():
|
||||
if config["PHONE_APPLICATIONS_NOTIFICATIONS"]["PROVIDERS"][provider]["COMPUTE"]:
|
||||
files_to_compute.extend(expand("data/raw/{pid}/phone_applications_notifications_raw.csv", pid=config["PIDS"]))
|
||||
files_to_compute.extend(expand("data/raw/{pid}/phone_applications_notifications_with_datetime.csv", pid=config["PIDS"]))
|
||||
files_to_compute.extend(expand("data/raw/{pid}/phone_applications_notifications_with_datetime_with_categories.csv", pid=config["PIDS"]))
|
||||
files_to_compute.extend(expand("data/interim/{pid}/phone_applications_notifications_features/phone_applications_notifications_{language}_{provider_key}.csv", pid=config["PIDS"], language=config["PHONE_APPLICATIONS_NOTIFICATIONS"]["PROVIDERS"][provider]["SRC_LANGUAGE"].lower(), provider_key=provider.lower()))
|
||||
files_to_compute.extend(expand("data/processed/features/{pid}/phone_applications_notifications.csv", pid=config["PIDS"]))
|
||||
files_to_compute.extend(expand("data/processed/features/{pid}/all_sensor_features.csv", pid=config["PIDS"]))
|
||||
files_to_compute.append("data/processed/features/all_participants/all_sensor_features.csv")
|
||||
|
||||
if isinstance(config["PHONE_KEYBOARD"]["PROVIDERS"], dict):
|
||||
for provider in config["PHONE_KEYBOARD"]["PROVIDERS"].keys():
|
||||
if config["PHONE_KEYBOARD"]["PROVIDERS"][provider]["COMPUTE"]:
|
||||
files_to_compute.extend(expand("data/raw/{pid}/phone_keyboard_raw.csv", pid=config["PIDS"]))
|
||||
files_to_compute.extend(expand("data/raw/{pid}/phone_keyboard_with_datetime.csv", pid=config["PIDS"]))
|
||||
files_to_compute.extend(expand("data/interim/{pid}/phone_keyboard_features/phone_keyboard_{language}_{provider_key}.csv", pid=config["PIDS"], language=config["PHONE_KEYBOARD"]["PROVIDERS"][provider]["SRC_LANGUAGE"].lower(), provider_key=provider.lower()))
|
||||
files_to_compute.extend(expand("data/processed/features/{pid}/phone_keyboard.csv", pid=config["PIDS"]))
|
||||
files_to_compute.extend(expand("data/processed/features/{pid}/all_sensor_features.csv", pid=config["PIDS"]))
|
||||
files_to_compute.append("data/processed/features/all_participants/all_sensor_features.csv")
|
||||
|
||||
if isinstance(config["PHONE_AWARE_LOG"]["PROVIDERS"], dict):
|
||||
for provider in config["PHONE_AWARE_LOG"]["PROVIDERS"].keys():
|
||||
if config["PHONE_AWARE_LOG"]["PROVIDERS"][provider]["COMPUTE"]:
|
||||
files_to_compute.extend(expand("data/raw/{pid}/phone_aware_log_raw.csv", pid=config["PIDS"]))
|
||||
files_to_compute.extend(expand("data/raw/{pid}/phone_aware_log_with_datetime.csv", pid=config["PIDS"]))
|
||||
files_to_compute.extend(expand("data/interim/{pid}/phone_aware_log_features/phone_aware_log_{language}_{provider_key}.csv", pid=config["PIDS"], language=config["PHONE_AWARE_LOG"]["PROVIDERS"][provider]["SRC_LANGUAGE"].lower(), provider_key=provider.lower()))
|
||||
files_to_compute.extend(expand("data/processed/features/{pid}/phone_aware_log.csv", pid=config["PIDS"]))
|
||||
files_to_compute.extend(expand("data/processed/features/{pid}/all_sensor_features.csv", pid=config["PIDS"]))
|
||||
files_to_compute.append("data/processed/features/all_participants/all_sensor_features.csv")
|
||||
|
||||
for provider in config["PHONE_LOCATIONS"]["PROVIDERS"].keys():
|
||||
if config["PHONE_LOCATIONS"]["PROVIDERS"][provider]["COMPUTE"]:
|
||||
if config["PHONE_LOCATIONS"]["LOCATIONS_TO_USE"] == "FUSED_RESAMPLED":
|
||||
|
|
|
@ -85,6 +85,16 @@ PHONE_ACTIVITY_RECOGNITION:
|
|||
SRC_FOLDER: "rapids" # inside src/features/phone_activity_recognition
|
||||
SRC_LANGUAGE: "python"
|
||||
|
||||
# See https://www.rapids.science/latest/features/phone-applications-crashes/
|
||||
PHONE_APPLICATIONS_CRASHES:
|
||||
TABLE: applications_crashes
|
||||
APPLICATION_CATEGORIES:
|
||||
CATALOGUE_SOURCE: FILE # FILE (genres are read from CATALOGUE_FILE) or GOOGLE (genres are scrapped from the Play Store)
|
||||
CATALOGUE_FILE: "data/external/stachl_application_genre_catalogue.csv"
|
||||
UPDATE_CATALOGUE_FILE: False # if CATALOGUE_SOURCE is equal to FILE, whether or not to update CATALOGUE_FILE, if CATALOGUE_SOURCE is equal to GOOGLE all scraped genres will be saved to CATALOGUE_FILE
|
||||
SCRAPE_MISSING_CATEGORIES: False # whether or not to scrape missing genres, only effective if CATALOGUE_SOURCE is equal to FILE. If CATALOGUE_SOURCE is equal to GOOGLE, all genres are scraped anyway
|
||||
PROVIDERS: # None implemented yet but this sensor can be used in PHONE_DATA_YIELD
|
||||
|
||||
# See https://www.rapids.science/latest/features/phone-applications-foreground/
|
||||
PHONE_APPLICATIONS_FOREGROUND:
|
||||
TABLE: applications_foreground
|
||||
|
@ -107,6 +117,21 @@ PHONE_APPLICATIONS_FOREGROUND:
|
|||
SRC_FOLDER: "rapids" # inside src/features/phone_applications_foreground
|
||||
SRC_LANGUAGE: "python"
|
||||
|
||||
# See https://www.rapids.science/latest/features/phone-applications-notifications/
|
||||
PHONE_APPLICATIONS_NOTIFICATIONS:
|
||||
TABLE: applications_notifications
|
||||
APPLICATION_CATEGORIES:
|
||||
CATALOGUE_SOURCE: FILE # FILE (genres are read from CATALOGUE_FILE) or GOOGLE (genres are scrapped from the Play Store)
|
||||
CATALOGUE_FILE: "data/external/stachl_application_genre_catalogue.csv"
|
||||
UPDATE_CATALOGUE_FILE: False # if CATALOGUE_SOURCE is equal to FILE, whether or not to update CATALOGUE_FILE, if CATALOGUE_SOURCE is equal to GOOGLE all scraped genres will be saved to CATALOGUE_FILE
|
||||
SCRAPE_MISSING_CATEGORIES: False # whether or not to scrape missing genres, only effective if CATALOGUE_SOURCE is equal to FILE. If CATALOGUE_SOURCE is equal to GOOGLE, all genres are scraped anyway
|
||||
PROVIDERS: # None implemented yet but this sensor can be used in PHONE_DATA_YIELD
|
||||
|
||||
# See https://www.rapids.science/latest/features/phone-aware-log/
|
||||
PHONE_AWARE_LOG:
|
||||
TABLE: aware_log
|
||||
PROVIDERS: # None implemented yet but this sensor can be used in PHONE_DATA_YIELD
|
||||
|
||||
# See https://www.rapids.science/latest/features/phone-battery/
|
||||
PHONE_BATTERY:
|
||||
TABLE: battery
|
||||
|
@ -189,14 +214,10 @@ PHONE_DATA_YIELD:
|
|||
SRC_LANGUAGE: "r"
|
||||
SRC_FOLDER: "rapids" # inside src/features/phone_data_yield
|
||||
|
||||
# See https://www.rapids.science/latest/features/phone-keyboard/
|
||||
PHONE_KEYBOARD:
|
||||
TABLE: keyboard
|
||||
PROVIDERS:
|
||||
RAPIDS:
|
||||
COMPUTE: FALSE
|
||||
FEATURES: []
|
||||
SRC_FOLDER: "rapids" # inside src/features/phone_keyboard
|
||||
SRC_LANGUAGE: "python"
|
||||
PROVIDERS: # None implemented yet but this sensor can be used in PHONE_DATA_YIELD
|
||||
|
||||
# See https://www.rapids.science/latest/features/phone-light/
|
||||
PHONE_LIGHT:
|
||||
|
@ -211,19 +232,22 @@ PHONE_LIGHT:
|
|||
# See https://www.rapids.science/latest/features/phone-locations/
|
||||
PHONE_LOCATIONS:
|
||||
TABLE: locations
|
||||
LOCATIONS_TO_USE: FUSED_RESAMPLED # ALL, GPS OR FUSED_RESAMPLED
|
||||
LOCATIONS_TO_USE: ALL_RESAMPLED # ALL, GPS, ALL_RESAMPLED, OR FUSED_RESAMPLED
|
||||
FUSED_RESAMPLED_CONSECUTIVE_THRESHOLD: 30 # minutes, only replicate location samples to the next sensed bin if the phone did not stop collecting data for more than this threshold
|
||||
FUSED_RESAMPLED_TIME_SINCE_VALID_LOCATION: 720 # minutes, only replicate location samples to consecutive sensed bins if they were logged within this threshold after a valid location row
|
||||
PROVIDERS:
|
||||
DORYAB:
|
||||
COMPUTE: False
|
||||
FEATURES: ["locationvariance","loglocationvariance","totaldistance","averagespeed","varspeed","circadianmovement","numberofsignificantplaces","numberlocationtransitions","radiusgyration","timeattop1location","timeattop2location","timeattop3location","movingtostaticratio","outlierstimepercent","maxlengthstayatclusters","minlengthstayatclusters","meanlengthstayatclusters","stdlengthstayatclusters","locationentropy","normalizedlocationentropy"]
|
||||
ACCURACY_LIMIT: 51 # meters, drops location coordinates with an accuracy higher than this. This number means there's a 68% probability the true location is within this radius
|
||||
DBSCAN_EPS: 10 # meters
|
||||
DBSCAN_MINSAMPLES: 5
|
||||
THRESHOLD_STATIC : 1 # km/h
|
||||
MAXIMUM_GAP_ALLOWED: 300
|
||||
MINUTES_DATA_USED: False
|
||||
SAMPLING_FREQUENCY: 0
|
||||
CLUSTER_ON: PARTICIPANT_DATASET # PARTICIPANT_DATASET,TIME_SEGMENT
|
||||
CLUSTERING_ALGORITHM: DBSCAN #DBSCAN,OPTICS
|
||||
SRC_FOLDER: "doryab" # inside src/features/phone_locations
|
||||
SRC_LANGUAGE: "python"
|
||||
|
||||
|
|
|
@ -85,6 +85,16 @@ PHONE_ACTIVITY_RECOGNITION:
|
|||
SRC_FOLDER: "rapids" # inside src/features/phone_activity_recognition
|
||||
SRC_LANGUAGE: "python"
|
||||
|
||||
# See https://www.rapids.science/latest/features/phone-applications-crashes/
|
||||
PHONE_APPLICATIONS_CRASHES:
|
||||
TABLE: applications_crashes
|
||||
APPLICATION_CATEGORIES:
|
||||
CATALOGUE_SOURCE: FILE # FILE (genres are read from CATALOGUE_FILE) or GOOGLE (genres are scrapped from the Play Store)
|
||||
CATALOGUE_FILE: "data/external/stachl_application_genre_catalogue.csv"
|
||||
UPDATE_CATALOGUE_FILE: False # if CATALOGUE_SOURCE is equal to FILE, whether or not to update CATALOGUE_FILE, if CATALOGUE_SOURCE is equal to GOOGLE all scraped genres will be saved to CATALOGUE_FILE
|
||||
SCRAPE_MISSING_CATEGORIES: False # whether or not to scrape missing genres, only effective if CATALOGUE_SOURCE is equal to FILE. If CATALOGUE_SOURCE is equal to GOOGLE, all genres are scraped anyway
|
||||
PROVIDERS: # None implemented yet but this sensor can be used in PHONE_DATA_YIELD
|
||||
|
||||
# See https://www.rapids.science/latest/features/phone-applications-foreground/
|
||||
PHONE_APPLICATIONS_FOREGROUND:
|
||||
TABLE: applications_foreground
|
||||
|
@ -107,6 +117,21 @@ PHONE_APPLICATIONS_FOREGROUND:
|
|||
SRC_FOLDER: "rapids" # inside src/features/phone_applications_foreground
|
||||
SRC_LANGUAGE: "python"
|
||||
|
||||
# See https://www.rapids.science/latest/features/phone-applications-notifications/
|
||||
PHONE_APPLICATIONS_NOTIFICATIONS:
|
||||
TABLE: applications_notifications
|
||||
APPLICATION_CATEGORIES:
|
||||
CATALOGUE_SOURCE: FILE # FILE (genres are read from CATALOGUE_FILE) or GOOGLE (genres are scrapped from the Play Store)
|
||||
CATALOGUE_FILE: "data/external/stachl_application_genre_catalogue.csv"
|
||||
UPDATE_CATALOGUE_FILE: False # if CATALOGUE_SOURCE is equal to FILE, whether or not to update CATALOGUE_FILE, if CATALOGUE_SOURCE is equal to GOOGLE all scraped genres will be saved to CATALOGUE_FILE
|
||||
SCRAPE_MISSING_CATEGORIES: False # whether or not to scrape missing genres, only effective if CATALOGUE_SOURCE is equal to FILE. If CATALOGUE_SOURCE is equal to GOOGLE, all genres are scraped anyway
|
||||
PROVIDERS: # None implemented yet but this sensor can be used in PHONE_DATA_YIELD
|
||||
|
||||
# See https://www.rapids.science/latest/features/phone-aware-log/
|
||||
PHONE_AWARE_LOG:
|
||||
TABLE: aware_log
|
||||
PROVIDERS: # None implemented yet but this sensor can be used in PHONE_DATA_YIELD
|
||||
|
||||
# See https://www.rapids.science/latest/features/phone-battery/
|
||||
PHONE_BATTERY:
|
||||
TABLE: battery
|
||||
|
@ -189,14 +214,10 @@ PHONE_DATA_YIELD:
|
|||
SRC_LANGUAGE: "r"
|
||||
SRC_FOLDER: "rapids" # inside src/features/phone_data_yield
|
||||
|
||||
# See https://www.rapids.science/latest/features/phone-keyboard/
|
||||
PHONE_KEYBOARD:
|
||||
TABLE: keyboard
|
||||
PROVIDERS:
|
||||
RAPIDS:
|
||||
COMPUTE: False
|
||||
FEATURES: []
|
||||
SRC_FOLDER: "rapids" # inside src/features/phone_keyboard
|
||||
SRC_LANGUAGE: "python"
|
||||
PROVIDERS: # None implemented yet but this sensor can be used in PHONE_DATA_YIELD
|
||||
|
||||
# See https://www.rapids.science/latest/features/phone-light/
|
||||
PHONE_LIGHT:
|
||||
|
@ -211,19 +232,22 @@ PHONE_LIGHT:
|
|||
# See https://www.rapids.science/latest/features/phone-locations/
|
||||
PHONE_LOCATIONS:
|
||||
TABLE: locations
|
||||
LOCATIONS_TO_USE: FUSED_RESAMPLED # ALL, GPS OR FUSED_RESAMPLED
|
||||
LOCATIONS_TO_USE: ALL_RESAMPLED # ALL, GPS, ALL_RESAMPLED, OR FUSED_RESAMPLED
|
||||
FUSED_RESAMPLED_CONSECUTIVE_THRESHOLD: 30 # minutes, only replicate location samples to the next sensed bin if the phone did not stop collecting data for more than this threshold
|
||||
FUSED_RESAMPLED_TIME_SINCE_VALID_LOCATION: 720 # minutes, only replicate location samples to consecutive sensed bins if they were logged within this threshold after a valid location row
|
||||
PROVIDERS:
|
||||
DORYAB:
|
||||
COMPUTE: False
|
||||
FEATURES: ["locationvariance","loglocationvariance","totaldistance","averagespeed","varspeed","circadianmovement","numberofsignificantplaces","numberlocationtransitions","radiusgyration","timeattop1location","timeattop2location","timeattop3location","movingtostaticratio","outlierstimepercent","maxlengthstayatclusters","minlengthstayatclusters","meanlengthstayatclusters","stdlengthstayatclusters","locationentropy","normalizedlocationentropy"]
|
||||
ACCURACY_LIMIT: 51 # meters, drops location coordinates with an accuracy higher than this. This number means there's a 68% probability the true location is within this radius
|
||||
DBSCAN_EPS: 10 # meters
|
||||
DBSCAN_MINSAMPLES: 5
|
||||
THRESHOLD_STATIC : 1 # km/h
|
||||
MAXIMUM_GAP_ALLOWED: 300
|
||||
MINUTES_DATA_USED: False
|
||||
SAMPLING_FREQUENCY: 0
|
||||
CLUSTER_ON: PARTICIPANT_DATASET # PARTICIPANT_DATASET,TIME_SEGMENT
|
||||
CLUSTERING_ALGORITHM: DBSCAN #DBSCAN,OPTICS
|
||||
SRC_FOLDER: "doryab" # inside src/features/phone_locations
|
||||
SRC_LANGUAGE: "python"
|
||||
|
||||
|
|
Loading…
Reference in New Issue