Update file names
parent
d32771fd9e
commit
24bf62a7ab
218
Snakefile
218
Snakefile
|
@ -13,17 +13,11 @@ if len(config["PIDS"]) == 0:
|
|||
raise ValueError("Add participants IDs to PIDS in config.yaml. Remember to create their participant files in data/external")
|
||||
|
||||
if config["PHONE_VALID_SENSED_BINS"]["COMPUTE"] or config["PHONE_VALID_SENSED_DAYS"]["COMPUTE"]: # valid sensed bins is necessary for sensed days, so we add these files anyways if sensed days are requested
|
||||
if len(config["PHONE_VALID_SENSED_BINS"]["DB_TABLES"]) == 0:
|
||||
raise ValueError("If you want to compute PHONE_VALID_SENSED_BINS or PHONE_VALID_SENSED_DAYS, you need to add at least one table to [PHONE_VALID_SENSED_BINS][DB_TABLES] in config.yaml")
|
||||
if len(config["PHONE_VALID_SENSED_BINS"]["PHONE_SENSORS"]) == 0:
|
||||
raise ValueError("If you want to compute PHONE_VALID_SENSED_BINS or PHONE_VALID_SENSED_DAYS, you need to add at least one PHONE_SENSOR to [PHONE_VALID_SENSED_BINS][PHONE_SENSORS] in config.yaml")
|
||||
|
||||
pids_android = list(filter(lambda pid: infer_participant_platform("data/external/" + pid) == "android", config["PIDS"]))
|
||||
pids_ios = list(filter(lambda pid: infer_participant_platform("data/external/" + pid) == "ios", config["PIDS"]))
|
||||
tables_android = [table for table in config["PHONE_VALID_SENSED_BINS"]["DB_TABLES"] if table not in [config["CONVERSATION"]["DB_TABLE"]["IOS"], config["ACTIVITY_RECOGNITION"]["DB_TABLE"]["IOS"]]] # for android, discard any ios tables that may exist
|
||||
tables_ios = [table for table in config["PHONE_VALID_SENSED_BINS"]["DB_TABLES"] if table not in [config["CONVERSATION"]["DB_TABLE"]["ANDROID"], config["ACTIVITY_RECOGNITION"]["DB_TABLE"]["ANDROID"]]] # for ios, discard any android tables that may exist
|
||||
|
||||
for pids,table in zip([pids_android, pids_ios], [tables_android, tables_ios]):
|
||||
files_to_compute.extend(expand("data/raw/{pid}/{sensor}_raw.csv", pid=pids, sensor=table))
|
||||
files_to_compute.extend(expand("data/raw/{pid}/{sensor}_with_datetime.csv", pid=pids, sensor=table))
|
||||
files_to_compute.extend(expand("data/raw/{pid}/{sensor}_raw.csv", pid=config["PIDS"], sensor=map(str.lower, config["PHONE_VALID_SENSED_BINS"]["PHONE_SENSORS"])))
|
||||
files_to_compute.extend(expand("data/raw/{pid}/{sensor}_with_datetime.csv", pid=config["PIDS"], sensor=map(str.lower, config["PHONE_VALID_SENSED_BINS"]["PHONE_SENSORS"])))
|
||||
files_to_compute.extend(expand("data/interim/{pid}/phone_sensed_bins.csv", pid=config["PIDS"]))
|
||||
files_to_compute.extend(expand("data/interim/{pid}/phone_sensed_timestamps.csv", pid=config["PIDS"]))
|
||||
|
||||
|
@ -33,106 +27,100 @@ if config["PHONE_VALID_SENSED_DAYS"]["COMPUTE"]:
|
|||
min_valid_hours_per_day=config["PHONE_VALID_SENSED_DAYS"]["MIN_VALID_HOURS_PER_DAY"],
|
||||
min_valid_bins_per_hour=config["PHONE_VALID_SENSED_DAYS"]["MIN_VALID_BINS_PER_HOUR"]))
|
||||
|
||||
for provider in config["MESSAGES"]["PROVIDERS"].keys():
|
||||
if config["MESSAGES"]["PROVIDERS"][provider]["COMPUTE"]:
|
||||
files_to_compute.extend(expand("data/raw/{pid}/{sensor}_raw.csv", pid=config["PIDS"], sensor=config["MESSAGES"]["DB_TABLE"]))
|
||||
files_to_compute.extend(expand("data/raw/{pid}/{sensor}_with_datetime.csv", pid=config["PIDS"], sensor=config["MESSAGES"]["DB_TABLE"]))
|
||||
files_to_compute.extend(expand("data/interim/{pid}/{sensor_key}_features/{sensor_key}_{language}_{provider_key}.csv", pid=config["PIDS"], language=config["MESSAGES"]["PROVIDERS"][provider]["SRC_LANGUAGE"], provider_key=provider, sensor_key="MESSAGES".lower()))
|
||||
files_to_compute.extend(expand("data/processed/features/{pid}/{sensor_key}.csv", pid=config["PIDS"], sensor_key="MESSAGES".lower()))
|
||||
for provider in config["PHONE_MESSAGES"]["PROVIDERS"].keys():
|
||||
if config["PHONE_MESSAGES"]["PROVIDERS"][provider]["COMPUTE"]:
|
||||
files_to_compute.extend(expand("data/raw/{pid}/phone_messages_raw.csv", pid=config["PIDS"]))
|
||||
files_to_compute.extend(expand("data/raw/{pid}/phone_messages_with_datetime.csv", pid=config["PIDS"]))
|
||||
files_to_compute.extend(expand("data/interim/{pid}/phone_messages_features/phone_messages_{language}_{provider_key}.csv", pid=config["PIDS"], language=config["PHONE_MESSAGES"]["PROVIDERS"][provider]["SRC_LANGUAGE"].lower(), provider_key=provider.lower()))
|
||||
files_to_compute.extend(expand("data/processed/features/{pid}/phone_messages.csv", pid=config["PIDS"]))
|
||||
|
||||
for provider in config["CALLS"]["PROVIDERS"].keys():
|
||||
if config["CALLS"]["PROVIDERS"][provider]["COMPUTE"]:
|
||||
files_to_compute.extend(expand("data/raw/{pid}/{sensor}_raw.csv", pid=config["PIDS"], sensor=config["CALLS"]["DB_TABLE"]))
|
||||
files_to_compute.extend(expand("data/raw/{pid}/{sensor}_with_datetime.csv", pid=config["PIDS"], sensor=config["CALLS"]["DB_TABLE"]))
|
||||
files_to_compute.extend(expand("data/raw/{pid}/{sensor}_with_datetime_unified.csv", pid=config["PIDS"], sensor=config["CALLS"]["DB_TABLE"]))
|
||||
files_to_compute.extend(expand("data/interim/{pid}/{sensor_key}_features/{sensor_key}_{language}_{provider_key}.csv", pid=config["PIDS"], language=config["CALLS"]["PROVIDERS"][provider]["SRC_LANGUAGE"], provider_key=provider, sensor_key="CALLS".lower()))
|
||||
files_to_compute.extend(expand("data/processed/features/{pid}/{sensor_key}.csv", pid=config["PIDS"], sensor_key="CALLS".lower()))
|
||||
for provider in config["PHONE_CALLS"]["PROVIDERS"].keys():
|
||||
if config["PHONE_CALLS"]["PROVIDERS"][provider]["COMPUTE"]:
|
||||
files_to_compute.extend(expand("data/raw/{pid}/phone_calls_raw.csv", pid=config["PIDS"], sensor=config["PHONE_CALLS"]["DB_TABLE"]))
|
||||
files_to_compute.extend(expand("data/raw/{pid}/phone_calls_with_datetime.csv", pid=config["PIDS"], sensor=config["PHONE_CALLS"]["DB_TABLE"]))
|
||||
files_to_compute.extend(expand("data/raw/{pid}/phone_calls_with_datetime_unified.csv", pid=config["PIDS"], sensor=config["PHONE_CALLS"]["DB_TABLE"]))
|
||||
files_to_compute.extend(expand("data/interim/{pid}/phone_calls_features/phone_calls_{language}_{provider_key}.csv", pid=config["PIDS"], language=config["PHONE_CALLS"]["PROVIDERS"][provider]["SRC_LANGUAGE"].lower(), provider_key=provider.lower()))
|
||||
files_to_compute.extend(expand("data/processed/features/{pid}/phone_calls.csv", pid=config["PIDS"]))
|
||||
|
||||
for provider in config["BLUETOOTH"]["PROVIDERS"].keys():
|
||||
if config["BLUETOOTH"]["PROVIDERS"][provider]["COMPUTE"]:
|
||||
files_to_compute.extend(expand("data/raw/{pid}/{sensor}_raw.csv", pid=config["PIDS"], sensor=config["BLUETOOTH"]["DB_TABLE"]))
|
||||
files_to_compute.extend(expand("data/raw/{pid}/{sensor}_with_datetime.csv", pid=config["PIDS"], sensor=config["BLUETOOTH"]["DB_TABLE"]))
|
||||
files_to_compute.extend(expand("data/interim/{pid}/{sensor_key}_features/{sensor_key}_{language}_{provider_key}.csv", pid=config["PIDS"], language=config["BLUETOOTH"]["PROVIDERS"][provider]["SRC_LANGUAGE"], provider_key=provider, sensor_key="BLUETOOTH".lower()))
|
||||
files_to_compute.extend(expand("data/processed/features/{pid}/{sensor_key}.csv", pid=config["PIDS"], sensor_key="BLUETOOTH".lower()))
|
||||
for provider in config["PHONE_BLUETOOTH"]["PROVIDERS"].keys():
|
||||
if config["PHONE_BLUETOOTH"]["PROVIDERS"][provider]["COMPUTE"]:
|
||||
files_to_compute.extend(expand("data/raw/{pid}/phone_bluetooth_raw.csv", pid=config["PIDS"]))
|
||||
files_to_compute.extend(expand("data/raw/{pid}/phone_bluetooth_with_datetime.csv", pid=config["PIDS"]))
|
||||
files_to_compute.extend(expand("data/interim/{pid}/phone_bluetooth_features/phone_bluetooth_{language}_{provider_key}.csv", pid=config["PIDS"], language=config["PHONE_BLUETOOTH"]["PROVIDERS"][provider]["SRC_LANGUAGE"].lower(), provider_key=provider.lower()))
|
||||
files_to_compute.extend(expand("data/processed/features/{pid}/phone_bluetooth.csv", pid=config["PIDS"]))
|
||||
|
||||
for provider in config["ACTIVITY_RECOGNITION"]["PROVIDERS"].keys():
|
||||
if config["ACTIVITY_RECOGNITION"]["PROVIDERS"][provider]["COMPUTE"]:
|
||||
pids_android = list(filter(lambda pid: infer_participant_platform("data/external/" + pid) == "android", config["PIDS"]))
|
||||
pids_ios = list(filter(lambda pid: infer_participant_platform("data/external/" + pid) == "ios", config["PIDS"]))
|
||||
|
||||
for pids,table in zip([pids_android, pids_ios], [config["ACTIVITY_RECOGNITION"]["DB_TABLE"]["ANDROID"], config["ACTIVITY_RECOGNITION"]["DB_TABLE"]["IOS"]]):
|
||||
files_to_compute.extend(expand("data/raw/{pid}/{sensor}_raw.csv", pid=pids, sensor=table))
|
||||
files_to_compute.extend(expand("data/raw/{pid}/{sensor}_with_datetime.csv", pid=pids, sensor=table))
|
||||
files_to_compute.extend(expand("data/raw/{pid}/{sensor}_with_datetime_unified.csv", pid=pids, sensor=table))
|
||||
|
||||
files_to_compute.extend(expand("data/interim/{pid}/activity_recognition_episodes.csv", pid=config["PIDS"]))
|
||||
files_to_compute.extend(expand("data/interim/{pid}/activity_recognition_episodes_resampled.csv", pid=config["PIDS"]))
|
||||
files_to_compute.extend(expand("data/interim/{pid}/activity_recognition_episodes_resampled_with_datetime.csv", pid=config["PIDS"]))
|
||||
files_to_compute.extend(expand("data/interim/{pid}/{sensor_key}_features/{sensor_key}_{language}_{provider_key}.csv", pid=config["PIDS"], language=config["ACTIVITY_RECOGNITION"]["PROVIDERS"][provider]["SRC_LANGUAGE"], provider_key=provider, sensor_key="ACTIVITY_RECOGNITION".lower()))
|
||||
files_to_compute.extend(expand("data/processed/features/{pid}/{sensor_key}.csv", pid=config["PIDS"], sensor_key="ACTIVITY_RECOGNITION".lower()))
|
||||
for provider in config["PHONE_ACTIVITY_RECOGNITION"]["PROVIDERS"].keys():
|
||||
if config["PHONE_ACTIVITY_RECOGNITION"]["PROVIDERS"][provider]["COMPUTE"]:
|
||||
files_to_compute.extend(expand("data/raw/{pid}/phone_activity_recognition_raw.csv", pid=config["PIDS"]))
|
||||
files_to_compute.extend(expand("data/raw/{pid}/phone_activity_recognition_with_datetime.csv", pid=config["PIDS"]))
|
||||
files_to_compute.extend(expand("data/raw/{pid}/phone_activity_recognition_with_datetime_unified.csv", pid=config["PIDS"]))
|
||||
files_to_compute.extend(expand("data/interim/{pid}/phone_activity_recognition_episodes.csv", pid=config["PIDS"]))
|
||||
files_to_compute.extend(expand("data/interim/{pid}/phone_activity_recognition_episodes_resampled.csv", pid=config["PIDS"]))
|
||||
files_to_compute.extend(expand("data/interim/{pid}/phone_activity_recognition_episodes_resampled_with_datetime.csv", pid=config["PIDS"]))
|
||||
files_to_compute.extend(expand("data/interim/{pid}/phone_activity_recognition_features/phone_activity_recognition_{language}_{provider_key}.csv", pid=config["PIDS"], language=config["PHONE_ACTIVITY_RECOGNITION"]["PROVIDERS"][provider]["SRC_LANGUAGE"].lower(), provider_key=provider.lower()))
|
||||
files_to_compute.extend(expand("data/processed/features/{pid}/phone_activity_recognition.csv", pid=config["PIDS"]))
|
||||
|
||||
|
||||
for provider in config["BATTERY"]["PROVIDERS"].keys():
|
||||
if config["BATTERY"]["PROVIDERS"][provider]["COMPUTE"]:
|
||||
files_to_compute.extend(expand("data/raw/{pid}/{sensor}_raw.csv", pid=config["PIDS"], sensor=config["BATTERY"]["DB_TABLE"]))
|
||||
files_to_compute.extend(expand("data/interim/{pid}/battery_episodes.csv", pid=config["PIDS"]))
|
||||
files_to_compute.extend(expand("data/interim/{pid}/battery_episodes_resampled.csv", pid=config["PIDS"]))
|
||||
files_to_compute.extend(expand("data/interim/{pid}/battery_episodes_resampled_with_datetime.csv", pid=config["PIDS"]))
|
||||
files_to_compute.extend(expand("data/interim/{pid}/{sensor_key}_features/{sensor_key}_{language}_{provider_key}.csv", pid=config["PIDS"], language=config["SCREEN"]["PROVIDERS"][provider]["SRC_LANGUAGE"], provider_key=provider, sensor_key="BATTERY".lower()))
|
||||
files_to_compute.extend(expand("data/processed/features/{pid}/{sensor_key}.csv", pid=config["PIDS"], sensor_key="BATTERY".lower()))
|
||||
for provider in config["PHONE_BATTERY"]["PROVIDERS"].keys():
|
||||
if config["PHONE_BATTERY"]["PROVIDERS"][provider]["COMPUTE"]:
|
||||
files_to_compute.extend(expand("data/raw/{pid}/phone_battery_raw.csv", pid=config["PIDS"]))
|
||||
files_to_compute.extend(expand("data/interim/{pid}/phone_battery_episodes.csv", pid=config["PIDS"]))
|
||||
files_to_compute.extend(expand("data/interim/{pid}/phone_battery_episodes_resampled.csv", pid=config["PIDS"]))
|
||||
files_to_compute.extend(expand("data/interim/{pid}/phone_battery_episodes_resampled_with_datetime.csv", pid=config["PIDS"]))
|
||||
files_to_compute.extend(expand("data/interim/{pid}/phone_battery_features/phone_battery_{language}_{provider_key}.csv", pid=config["PIDS"], language=config["PHONE_BATTERY"]["PROVIDERS"][provider]["SRC_LANGUAGE"].lower(), provider_key=provider.lower()))
|
||||
files_to_compute.extend(expand("data/processed/features/{pid}/phone_battery.csv", pid=config["PIDS"]))
|
||||
|
||||
|
||||
for provider in config["SCREEN"]["PROVIDERS"].keys():
|
||||
if config["SCREEN"]["PROVIDERS"][provider]["COMPUTE"]:
|
||||
if config["SCREEN"]["DB_TABLE"] in config["PHONE_VALID_SENSED_BINS"]["DB_TABLES"]:
|
||||
for provider in config["PHONE_SCREEN"]["PROVIDERS"].keys():
|
||||
if config["PHONE_SCREEN"]["PROVIDERS"][provider]["COMPUTE"]:
|
||||
if "PHONE_SCREEN" in config["PHONE_VALID_SENSED_BINS"]["PHONE_SENSORS"]:
|
||||
files_to_compute.extend(expand("data/interim/{pid}/phone_sensed_bins.csv", pid=config["PIDS"]))
|
||||
else:
|
||||
raise ValueError("Error: Add your screen table (and as many sensor tables as you have) to [PHONE_VALID_SENSED_BINS][DB_TABLES] in config.yaml. This is necessary to compute phone_sensed_bins (bins of time when the smartphone was sensing data)")
|
||||
files_to_compute.extend(expand("data/raw/{pid}/{sensor}_raw.csv", pid=config["PIDS"], sensor=config["SCREEN"]["DB_TABLE"]))
|
||||
files_to_compute.extend(expand("data/raw/{pid}/{sensor}_with_datetime.csv", pid=config["PIDS"], sensor=config["SCREEN"]["DB_TABLE"]))
|
||||
files_to_compute.extend(expand("data/raw/{pid}/{sensor}_with_datetime_unified.csv", pid=config["PIDS"], sensor=config["SCREEN"]["DB_TABLE"]))
|
||||
files_to_compute.extend(expand("data/interim/{pid}/screen_episodes.csv", pid=config["PIDS"]))
|
||||
files_to_compute.extend(expand("data/interim/{pid}/screen_episodes_resampled.csv", pid=config["PIDS"]))
|
||||
files_to_compute.extend(expand("data/interim/{pid}/screen_episodes_resampled_with_datetime.csv", pid=config["PIDS"]))
|
||||
files_to_compute.extend(expand("data/interim/{pid}/{sensor_key}_features/{sensor_key}_{language}_{provider_key}.csv", pid=config["PIDS"], language=config["SCREEN"]["PROVIDERS"][provider]["SRC_LANGUAGE"], provider_key=provider, sensor_key="SCREEN".lower()))
|
||||
files_to_compute.extend(expand("data/processed/features/{pid}/{sensor_key}.csv", pid=config["PIDS"], sensor_key="SCREEN".lower()))
|
||||
raise ValueError("Error: Add PHONE_SCREEN (and as many phone sensor as you have in your database) to [PHONE_VALID_SENSED_BINS][PHONE_SENSORS] in config.yaml. This is necessary to compute phone_sensed_bins (bins of time when the smartphone was sensing data)")
|
||||
files_to_compute.extend(expand("data/raw/{pid}/phone_screen_raw.csv", pid=config["PIDS"]))
|
||||
files_to_compute.extend(expand("data/raw/{pid}/phone_screen_with_datetime.csv", pid=config["PIDS"]))
|
||||
files_to_compute.extend(expand("data/raw/{pid}/phone_screen_with_datetime_unified.csv", pid=config["PIDS"]))
|
||||
files_to_compute.extend(expand("data/interim/{pid}/phone_screen_episodes.csv", pid=config["PIDS"]))
|
||||
files_to_compute.extend(expand("data/interim/{pid}/phone_screen_episodes_resampled.csv", pid=config["PIDS"]))
|
||||
files_to_compute.extend(expand("data/interim/{pid}/phone_screen_episodes_resampled_with_datetime.csv", pid=config["PIDS"]))
|
||||
files_to_compute.extend(expand("data/interim/{pid}/phone_screen_features/phone_screen_{language}_{provider_key}.csv", pid=config["PIDS"], language=config["PHONE_SCREEN"]["PROVIDERS"][provider]["SRC_LANGUAGE"].lower(), provider_key=provider.lower()))
|
||||
files_to_compute.extend(expand("data/processed/features/{pid}/phone_screen.csv", pid=config["PIDS"]))
|
||||
|
||||
for provider in config["LIGHT"]["PROVIDERS"].keys():
|
||||
if config["LIGHT"]["PROVIDERS"][provider]["COMPUTE"]:
|
||||
files_to_compute.extend(expand("data/raw/{pid}/{sensor}_raw.csv", pid=config["PIDS"], sensor=config["LIGHT"]["DB_TABLE"]))
|
||||
files_to_compute.extend(expand("data/raw/{pid}/{sensor}_with_datetime.csv", pid=config["PIDS"], sensor=config["LIGHT"]["DB_TABLE"]))
|
||||
files_to_compute.extend(expand("data/interim/{pid}/{sensor_key}_features/{sensor_key}_{language}_{provider_key}.csv", pid=config["PIDS"], language=config["LIGHT"]["PROVIDERS"][provider]["SRC_LANGUAGE"], provider_key=provider, sensor_key="LIGHT".lower()))
|
||||
files_to_compute.extend(expand("data/processed/features/{pid}/{sensor_key}.csv", pid=config["PIDS"], sensor_key="LIGHT".lower()))
|
||||
for provider in config["PHONE_LIGHT"]["PROVIDERS"].keys():
|
||||
if config["PHONE_LIGHT"]["PROVIDERS"][provider]["COMPUTE"]:
|
||||
files_to_compute.extend(expand("data/raw/{pid}/phone_light_raw.csv", pid=config["PIDS"]))
|
||||
files_to_compute.extend(expand("data/raw/{pid}/phone_light_with_datetime.csv", pid=config["PIDS"]))
|
||||
files_to_compute.extend(expand("data/interim/{pid}/phone_light_features/phone_light_{language}_{provider_key}.csv", pid=config["PIDS"], language=config["PHONE_LIGHT"]["PROVIDERS"][provider]["SRC_LANGUAGE"].lower(), provider_key=provider.lower()))
|
||||
files_to_compute.extend(expand("data/processed/features/{pid}/phone_light.csv", pid=config["PIDS"],))
|
||||
|
||||
for provider in config["ACCELEROMETER"]["PROVIDERS"].keys():
|
||||
if config["ACCELEROMETER"]["PROVIDERS"][provider]["COMPUTE"]:
|
||||
files_to_compute.extend(expand("data/raw/{pid}/{sensor}_raw.csv", pid=config["PIDS"], sensor=config["ACCELEROMETER"]["DB_TABLE"]))
|
||||
files_to_compute.extend(expand("data/raw/{pid}/{sensor}_with_datetime.csv", pid=config["PIDS"], sensor=config["ACCELEROMETER"]["DB_TABLE"]))
|
||||
files_to_compute.extend(expand("data/interim/{pid}/{sensor_key}_features/{sensor_key}_{language}_{provider_key}.csv", pid=config["PIDS"], language=config["ACCELEROMETER"]["PROVIDERS"][provider]["SRC_LANGUAGE"], provider_key=provider, sensor_key="ACCELEROMETER".lower()))
|
||||
files_to_compute.extend(expand("data/processed/features/{pid}/{sensor_key}.csv", pid=config["PIDS"], sensor_key="ACCELEROMETER".lower()))
|
||||
for provider in config["PHONE_ACCELEROMETER"]["PROVIDERS"].keys():
|
||||
if config["PHONE_ACCELEROMETER"]["PROVIDERS"][provider]["COMPUTE"]:
|
||||
files_to_compute.extend(expand("data/raw/{pid}/phone_accelerometer_raw.csv", pid=config["PIDS"]))
|
||||
files_to_compute.extend(expand("data/raw/{pid}/phone_accelerometer_with_datetime.csv", pid=config["PIDS"]))
|
||||
files_to_compute.extend(expand("data/interim/{pid}/phone_accelerometer_features/phone_accelerometer_{language}_{provider_key}.csv", pid=config["PIDS"], language=config["PHONE_ACCELEROMETER"]["PROVIDERS"][provider]["SRC_LANGUAGE"].lower(), provider_key=provider.lower()))
|
||||
files_to_compute.extend(expand("data/processed/features/{pid}/phone_accelerometer.csv", pid=config["PIDS"]))
|
||||
|
||||
for provider in config["APPLICATIONS_FOREGROUND"]["PROVIDERS"].keys():
|
||||
if config["APPLICATIONS_FOREGROUND"]["PROVIDERS"][provider]["COMPUTE"]:
|
||||
files_to_compute.extend(expand("data/raw/{pid}/{sensor}_raw.csv", pid=config["PIDS"], sensor=config["APPLICATIONS_FOREGROUND"]["DB_TABLE"]))
|
||||
files_to_compute.extend(expand("data/raw/{pid}/{sensor}_with_datetime.csv", pid=config["PIDS"], sensor=config["APPLICATIONS_FOREGROUND"]["DB_TABLE"]))
|
||||
files_to_compute.extend(expand("data/raw/{pid}/{sensor}_with_datetime_with_genre.csv", pid=config["PIDS"], sensor=config["APPLICATIONS_FOREGROUND"]["DB_TABLE"]))
|
||||
files_to_compute.extend(expand("data/interim/{pid}/{sensor_key}_features/{sensor_key}_{language}_{provider_key}.csv", pid=config["PIDS"], language=config["APPLICATIONS_FOREGROUND"]["PROVIDERS"][provider]["SRC_LANGUAGE"], provider_key=provider, sensor_key="APPLICATIONS_FOREGROUND".lower()))
|
||||
files_to_compute.extend(expand("data/processed/features/{pid}/{sensor_key}.csv", pid=config["PIDS"], sensor_key="APPLICATIONS_FOREGROUND".lower()))
|
||||
for provider in config["PHONE_APPLICATIONS_FOREGROUND"]["PROVIDERS"].keys():
|
||||
if config["PHONE_APPLICATIONS_FOREGROUND"]["PROVIDERS"][provider]["COMPUTE"]:
|
||||
files_to_compute.extend(expand("data/raw/{pid}/phone_applications_foreground_raw.csv", pid=config["PIDS"]))
|
||||
files_to_compute.extend(expand("data/raw/{pid}/phone_applications_foreground_with_datetime.csv", pid=config["PIDS"]))
|
||||
files_to_compute.extend(expand("data/raw/{pid}/phone_applications_foreground_with_datetime_with_categories.csv", pid=config["PIDS"]))
|
||||
files_to_compute.extend(expand("data/interim/{pid}/phone_applications_foreground_features/phone_applications_foreground_{language}_{provider_key}.csv", pid=config["PIDS"], language=config["PHONE_APPLICATIONS_FOREGROUND"]["PROVIDERS"][provider]["SRC_LANGUAGE"].lower(), provider_key=provider.lower()))
|
||||
files_to_compute.extend(expand("data/processed/features/{pid}/phone_applications_foreground.csv", pid=config["PIDS"]))
|
||||
|
||||
for provider in config["WIFI"]["PROVIDERS"].keys():
|
||||
if config["WIFI"]["PROVIDERS"][provider]["COMPUTE"]:
|
||||
if len(config["WIFI"]["DB_TABLE"]["VISIBLE_ACCESS_POINTS"]) > 0:
|
||||
files_to_compute.extend(expand("data/raw/{pid}/{sensor}_raw.csv", pid=config["PIDS"], sensor=config["WIFI"]["DB_TABLE"]["VISIBLE_ACCESS_POINTS"]))
|
||||
files_to_compute.extend(expand("data/raw/{pid}/{sensor}_with_datetime.csv", pid=config["PIDS"], sensor=config["WIFI"]["DB_TABLE"]["VISIBLE_ACCESS_POINTS"]))
|
||||
files_to_compute.extend(expand("data/raw/{pid}/{sensor_key}_with_datetime_visibleandconnected.csv", pid=config["PIDS"], sensor_key="WIFI".lower()))
|
||||
files_to_compute.extend(expand("data/interim/{pid}/{sensor_key}_features/{sensor_key}_{language}_{provider_key}.csv", pid=config["PIDS"], language=config["WIFI"]["PROVIDERS"][provider]["SRC_LANGUAGE"], provider_key=provider, sensor_key="WIFI".lower()))
|
||||
files_to_compute.extend(expand("data/processed/features/{pid}/{sensor_key}.csv", pid=config["PIDS"], sensor_key="WIFI".lower()))
|
||||
if len(config["WIFI"]["DB_TABLE"]["CONNECTED_ACCESS_POINTS"]) > 0:
|
||||
files_to_compute.extend(expand("data/raw/{pid}/{sensor}_raw.csv", pid=config["PIDS"], sensor=config["WIFI"]["DB_TABLE"]["CONNECTED_ACCESS_POINTS"]))
|
||||
files_to_compute.extend(expand("data/raw/{pid}/{sensor}_with_datetime.csv", pid=config["PIDS"], sensor=config["WIFI"]["DB_TABLE"]["CONNECTED_ACCESS_POINTS"]))
|
||||
files_to_compute.extend(expand("data/raw/{pid}/{sensor_key}_with_datetime_visibleandconnected.csv", pid=config["PIDS"], sensor_key="WIFI".lower()))
|
||||
files_to_compute.extend(expand("data/interim/{pid}/{sensor_key}_features/{sensor_key}_{language}_{provider_key}.csv", pid=config["PIDS"], language=config["WIFI"]["PROVIDERS"][provider]["SRC_LANGUAGE"], provider_key=provider, sensor_key="WIFI".lower()))
|
||||
files_to_compute.extend(expand("data/processed/features/{pid}/{sensor_key}.csv", pid=config["PIDS"], sensor_key="WIFI".lower()))
|
||||
for provider in config["PHONE_WIFI_VISIBLE"]["PROVIDERS"].keys():
|
||||
if config["PHONE_WIFI_VISIBLE"]["PROVIDERS"][provider]["COMPUTE"]:
|
||||
files_to_compute.extend(expand("data/raw/{pid}/phone_wifi_visible_raw.csv", pid=config["PIDS"]))
|
||||
files_to_compute.extend(expand("data/raw/{pid}/phone_wifi_visible_with_datetime.csv", pid=config["PIDS"]))
|
||||
files_to_compute.extend(expand("data/interim/{pid}/phone_wifi_visible_features/phone_wifi_visible_{language}_{provider_key}.csv", pid=config["PIDS"], language=config["PHONE_WIFI_VISIBLE"]["PROVIDERS"][provider]["SRC_LANGUAGE"].lower(), provider_key=provider.lower()))
|
||||
files_to_compute.extend(expand("data/processed/features/{pid}/phone_wifi_visible.csv", pid=config["PIDS"]))
|
||||
|
||||
for provider in config["PHONE_WIFI_CONNECTED"]["PROVIDERS"].keys():
|
||||
if config["PHONE_WIFI_CONNECTED"]["PROVIDERS"][provider]["COMPUTE"]:
|
||||
files_to_compute.extend(expand("data/raw/{pid}/phone_wifi_connected_raw.csv", pid=config["PIDS"]))
|
||||
files_to_compute.extend(expand("data/raw/{pid}/phone_wifi_connected_with_datetime.csv", pid=config["PIDS"]))
|
||||
files_to_compute.extend(expand("data/interim/{pid}/phone_wifi_connected_features/phone_wifi_connected_{language}_{provider_key}.csv", pid=config["PIDS"], language=config["PHONE_WIFI_CONNECTED"]["PROVIDERS"][provider]["SRC_LANGUAGE"].lower(), provider_key=provider.lower()))
|
||||
files_to_compute.extend(expand("data/processed/features/{pid}/phone_wifi_connected.csv", pid=config["PIDS"]))
|
||||
|
||||
if config["HEARTRATE"]["COMPUTE"]:
|
||||
files_to_compute.extend(expand("data/raw/{pid}/{sensor}_raw.csv", pid=config["PIDS"], sensor=config["HEARTRATE"]["DB_TABLE"]))
|
||||
|
@ -151,31 +139,27 @@ if config["SLEEP"]["COMPUTE"]:
|
|||
files_to_compute.extend(expand("data/raw/{pid}/fitbit_sleep_{fitbit_data_type}_with_datetime.csv", pid=config["PIDS"], fitbit_data_type=["intraday", "summary"]))
|
||||
files_to_compute.extend(expand("data/processed/{pid}/fitbit_sleep_{day_segment}.csv", pid = config["PIDS"], day_segment = config["SLEEP"]["DAY_SEGMENTS"]))
|
||||
|
||||
for provider in config["CONVERSATION"]["PROVIDERS"].keys():
|
||||
if config["CONVERSATION"]["PROVIDERS"][provider]["COMPUTE"]:
|
||||
pids_android = list(filter(lambda pid: infer_participant_platform("data/external/" + pid) == "android", config["PIDS"]))
|
||||
pids_ios = list(filter(lambda pid: infer_participant_platform("data/external/" + pid) == "ios", config["PIDS"]))
|
||||
for pids,table in zip([pids_android, pids_ios], [config["CONVERSATION"]["DB_TABLE"]["ANDROID"], config["CONVERSATION"]["DB_TABLE"]["IOS"]]):
|
||||
files_to_compute.extend(expand("data/raw/{pid}/{sensor}_raw.csv", pid=pids, sensor=table))
|
||||
files_to_compute.extend(expand("data/raw/{pid}/{sensor}_with_datetime.csv", pid=pids, sensor=table))
|
||||
files_to_compute.extend(expand("data/raw/{pid}/{sensor}_with_datetime_unified.csv", pid=pids, sensor=table))
|
||||
files_to_compute.extend(expand("data/interim/{pid}/{sensor_key}_features/{sensor_key}_{language}_{provider_key}.csv", pid=config["PIDS"], language=config["CONVERSATION"]["PROVIDERS"][provider]["SRC_LANGUAGE"], provider_key=provider, sensor_key="CONVERSATION".lower()))
|
||||
files_to_compute.extend(expand("data/processed/features/{pid}/{sensor_key}.csv", pid=config["PIDS"], sensor_key="CONVERSATION".lower()))
|
||||
for provider in config["PHONE_CONVERSATION"]["PROVIDERS"].keys():
|
||||
if config["PHONE_CONVERSATION"]["PROVIDERS"][provider]["COMPUTE"]:
|
||||
files_to_compute.extend(expand("data/raw/{pid}/phone_conversation_raw.csv", pid=config["PIDS"]))
|
||||
files_to_compute.extend(expand("data/raw/{pid}/phone_conversation_with_datetime.csv", pid=config["PIDS"]))
|
||||
files_to_compute.extend(expand("data/raw/{pid}/phone_conversation_with_datetime_unified.csv", pid=config["PIDS"]))
|
||||
files_to_compute.extend(expand("data/interim/{pid}/phone_conversation_features/phone_conversation_{language}_{provider_key}.csv", pid=config["PIDS"], language=config["PHONE_CONVERSATION"]["PROVIDERS"][provider]["SRC_LANGUAGE"].lower(), provider_key=provider.lower()))
|
||||
files_to_compute.extend(expand("data/processed/features/{pid}/phone_conversation.csv", pid=config["PIDS"]))
|
||||
|
||||
for provider in config["LOCATIONS"]["PROVIDERS"].keys():
|
||||
if config["LOCATIONS"]["PROVIDERS"][provider]["COMPUTE"]:
|
||||
if config["LOCATIONS"]["LOCATIONS_TO_USE"] == "RESAMPLE_FUSED":
|
||||
if config["LOCATIONS"]["DB_TABLE"] in config["PHONE_VALID_SENSED_BINS"]["DB_TABLES"]:
|
||||
for provider in config["PHONE_LOCATIONS"]["PROVIDERS"].keys():
|
||||
if config["PHONE_LOCATIONS"]["PROVIDERS"][provider]["COMPUTE"]:
|
||||
if config["PHONE_LOCATIONS"]["LOCATIONS_TO_USE"] == "RESAMPLE_FUSED":
|
||||
if config["PHONE_LOCATIONS"]["DB_TABLE"] in config["PHONE_VALID_SENSED_BINS"]["DB_TABLES"]:
|
||||
files_to_compute.extend(expand("data/interim/{pid}/phone_sensed_bins.csv", pid=config["PIDS"]))
|
||||
else:
|
||||
raise ValueError("Error: Add your locations table (and as many sensor tables as you have) to [PHONE_VALID_SENSED_BINS][DB_TABLES] in config.yaml. This is necessary to compute phone_sensed_bins (bins of time when the smartphone was sensing data) which is used to resample fused location data (RESAMPLED_FUSED)")
|
||||
|
||||
files_to_compute.extend(expand("data/raw/{pid}/{sensor}_raw.csv", pid=config["PIDS"], sensor=config["LOCATIONS"]["DB_TABLE"]))
|
||||
files_to_compute.extend(expand("data/raw/{pid}/{sensor}_with_datetime.csv", pid=config["PIDS"], sensor=config["LOCATIONS"]["DB_TABLE"]))
|
||||
files_to_compute.extend(expand("data/interim/{pid}/{sensor}_processed_{locations_to_use}.csv", pid=config["PIDS"], sensor=config["LOCATIONS"]["DB_TABLE"], locations_to_use=config["LOCATIONS"]["LOCATIONS_TO_USE"]))
|
||||
files_to_compute.extend(expand("data/interim/{pid}/{sensor}_processed_{locations_to_use}_with_datetime.csv", pid=config["PIDS"], sensor=config["LOCATIONS"]["DB_TABLE"], locations_to_use=config["LOCATIONS"]["LOCATIONS_TO_USE"]))
|
||||
files_to_compute.extend(expand("data/interim/{pid}/{sensor_key}_features/{sensor_key}_{language}_{provider_key}.csv", pid=config["PIDS"], language=config["LOCATIONS"]["PROVIDERS"][provider]["SRC_LANGUAGE"], provider_key=provider, sensor_key="LOCATIONS".lower()))
|
||||
files_to_compute.extend(expand("data/processed/features/{pid}/{sensor_key}.csv", pid=config["PIDS"], sensor_key="LOCATIONS".lower()))
|
||||
files_to_compute.extend(expand("data/raw/{pid}/phone_locations_raw.csv", pid=config["PIDS"]))
|
||||
files_to_compute.extend(expand("data/interim/{pid}/phone_locations_processed.csv", pid=config["PIDS"]))
|
||||
files_to_compute.extend(expand("data/interim/{pid}/phone_locations_processed_with_datetime.csv", pid=config["PIDS"]))
|
||||
files_to_compute.extend(expand("data/interim/{pid}/phone_locations_features/phone_locations_{language}_{provider_key}.csv", pid=config["PIDS"], language=config["PHONE_LOCATIONS"]["PROVIDERS"][provider]["SRC_LANGUAGE"].lower(), provider_key=provider.lower()))
|
||||
files_to_compute.extend(expand("data/processed/features/{pid}/phone_locations.csv", pid=config["PIDS"]))
|
||||
|
||||
# visualization for data exploration
|
||||
if config["HEATMAP_FEATURES_CORRELATIONS"]["PLOT"]:
|
||||
|
|
129
config.yaml
129
config.yaml
|
@ -32,9 +32,12 @@ READABLE_DATETIME:
|
|||
PHONE_VALID_SENSED_BINS:
|
||||
COMPUTE: False # This flag is automatically ignored (set to True) if you are extracting PHONE_VALID_SENSED_DAYS or screen or Barnett's location features
|
||||
BIN_SIZE: &bin_size 5 # (in minutes)
|
||||
# Add as many sensor tables as you have, they all improve the computation of PHONE_VALID_SENSED_BINS and PHONE_VALID_SENSED_DAYS.
|
||||
# If you are extracting screen or Barnett's location features, screen and locations tables are mandatory.
|
||||
DB_TABLES: []
|
||||
# Add as many PHONE sensors as you have, they all improve the computation of PHONE_VALID_SENSED_BINS and PHONE_VALID_SENSED_DAYS.
|
||||
# If you are extracting screen or Barnett/Doryab location features, PHONE_SCREEN and PHONE_LOCATIONS tables are mandatory.
|
||||
# You can choose any of the keys shown below, just make sure its DB_TABLE exists in your database!
|
||||
# PHONE_MESSAGES, PHONE_CALLS, PHONE_LOCATIONS, PHONE_BLUETOOTH, PHONE_ACTIVITY_RECOGNITION, PHONE_BATTERY, PHONE_SCREEN, PHONE_LIGHT,
|
||||
# PHONE_ACCELEROMETER, PHONE_APPLICATIONS_FOREGROUND, PHONE_WIFI_VISIBLE, PHONE_WIFI_CONNECTED, PHONE_CONVERSATION
|
||||
PHONE_SENSORS: []
|
||||
|
||||
PHONE_VALID_SENSED_DAYS:
|
||||
COMPUTE: False
|
||||
|
@ -42,7 +45,7 @@ PHONE_VALID_SENSED_DAYS:
|
|||
MIN_VALID_BINS_PER_HOUR: &min_valid_bins_per_hour [6] # (out of 60min/BIN_SIZE bins)
|
||||
|
||||
# Communication SMS features config, TYPES and FEATURES keys need to match
|
||||
MESSAGES:
|
||||
PHONE_MESSAGES:
|
||||
DB_TABLE: messages
|
||||
PROVIDERS:
|
||||
RAPIDS:
|
||||
|
@ -52,10 +55,10 @@ MESSAGES:
|
|||
received: [count, distinctcontacts, timefirstmessage, timelastmessage, countmostfrequentcontact]
|
||||
sent: [count, distinctcontacts, timefirstmessage, timelastmessage, countmostfrequentcontact]
|
||||
SRC_LANGUAGE: "r"
|
||||
SRC_FOLDER: "rapids" # inside src/features/messages
|
||||
SRC_FOLDER: "rapids" # inside src/features/phone_messages
|
||||
|
||||
# Communication call features config, TYPES and FEATURES keys need to match
|
||||
CALLS:
|
||||
PHONE_CALLS:
|
||||
DB_TABLE: calls
|
||||
PROVIDERS:
|
||||
RAPIDS:
|
||||
|
@ -66,20 +69,13 @@ CALLS:
|
|||
incoming: [count, distinctcontacts, meanduration, sumduration, minduration, maxduration, stdduration, modeduration, entropyduration, timefirstcall, timelastcall, countmostfrequentcontact]
|
||||
outgoing: [count, distinctcontacts, meanduration, sumduration, minduration, maxduration, stdduration, modeduration, entropyduration, timefirstcall, timelastcall, countmostfrequentcontact]
|
||||
SRC_LANGUAGE: "r"
|
||||
SRC_FOLDER: "rapids" # inside src/features/calls
|
||||
SRC_FOLDER: "rapids" # inside src/features/phone_calls
|
||||
|
||||
APPLICATION_GENRES:
|
||||
CATALOGUE_SOURCE: FILE # FILE (genres are read from CATALOGUE_FILE) or GOOGLE (genres are scrapped from the Play Store)
|
||||
CATALOGUE_FILE: "data/external/stachl_application_genre_catalogue.csv"
|
||||
UPDATE_CATALOGUE_FILE: false # if CATALOGUE_SOURCE is equal to FILE, whether or not to update CATALOGUE_FILE, if CATALOGUE_SOURCE is equal to GOOGLE all scraped genres will be saved to CATALOGUE_FILE
|
||||
SCRAPE_MISSING_GENRES: false # whether or not to scrape missing genres, only effective if CATALOGUE_SOURCE is equal to FILE. If CATALOGUE_SOURCE is equal to GOOGLE, all genres are scraped anyway
|
||||
|
||||
LOCATIONS:
|
||||
PHONE_LOCATIONS:
|
||||
DB_TABLE: locations
|
||||
LOCATIONS_TO_USE: FUSED_RESAMPLED # ALL, GPS OR FUSED_RESAMPLED
|
||||
FUSED_RESAMPLED_CONSECUTIVE_THRESHOLD: 30 # minutes, only replicate location samples to the next sensed bin if the phone did not stop collecting data for more than this threshold
|
||||
FUSED_RESAMPLED_TIME_SINCE_VALID_LOCATION: 720 # minutes, only replicate location samples to consecutive sensed bins if they were logged within this threshold after a valid location row
|
||||
TIMEZONE: *timezone
|
||||
PROVIDERS:
|
||||
DORYAB:
|
||||
COMPUTE: False
|
||||
|
@ -90,7 +86,7 @@ LOCATIONS:
|
|||
MAXIMUM_GAP_ALLOWED: 300
|
||||
MINUTES_DATA_USED: False
|
||||
SAMPLING_FREQUENCY: 0
|
||||
SRC_FOLDER: "doryab" # inside src/features/locations
|
||||
SRC_FOLDER: "doryab" # inside src/features/phone_locations
|
||||
SRC_LANGUAGE: "python"
|
||||
|
||||
BARNETT:
|
||||
|
@ -99,20 +95,20 @@ LOCATIONS:
|
|||
ACCURACY_LIMIT: 51 # meters, drops location coordinates with an accuracy higher than this. This number means there's a 68% probability the true location is within this radius
|
||||
TIMEZONE: *timezone
|
||||
MINUTES_DATA_USED: False # Use this for quality control purposes, how many minutes of data (location coordinates gruped by minute) were used to compute features
|
||||
SRC_FOLDER: "barnett" # inside src/features/locations
|
||||
SRC_FOLDER: "barnett" # inside src/features/phone_locations
|
||||
SRC_LANGUAGE: "r"
|
||||
|
||||
BLUETOOTH:
|
||||
PHONE_BLUETOOTH:
|
||||
DB_TABLE: bluetooth
|
||||
PROVIDERS:
|
||||
RAPIDS:
|
||||
COMPUTE: False
|
||||
FEATURES: ["countscans", "uniquedevices", "countscansmostuniquedevice"]
|
||||
SRC_FOLDER: "rapids" # inside src/features/bluetooth
|
||||
SRC_FOLDER: "rapids" # inside src/features/phone_bluetooth
|
||||
SRC_LANGUAGE: "r"
|
||||
|
||||
|
||||
ACTIVITY_RECOGNITION:
|
||||
PHONE_ACTIVITY_RECOGNITION:
|
||||
DB_TABLE:
|
||||
ANDROID: plugin_google_activity_recognition
|
||||
IOS: plugin_ios_activity_recognition
|
||||
|
@ -124,19 +120,19 @@ ACTIVITY_RECOGNITION:
|
|||
STATIONARY: ["still", "tilting"]
|
||||
MOBILE: ["on_foot", "walking", "running", "on_bicycle"]
|
||||
VEHICLE: ["in_vehicle"]
|
||||
SRC_FOLDER: "rapids" # inside src/features/activity_recognition
|
||||
SRC_FOLDER: "rapids" # inside src/features/phone_activity_recognition
|
||||
SRC_LANGUAGE: "python"
|
||||
|
||||
BATTERY:
|
||||
PHONE_BATTERY:
|
||||
DB_TABLE: battery
|
||||
PROVIDERS:
|
||||
RAPIDS:
|
||||
COMPUTE: False
|
||||
FEATURES: ["countdischarge", "sumdurationdischarge", "countcharge", "sumdurationcharge", "avgconsumptionrate", "maxconsumptionrate"]
|
||||
SRC_FOLDER: "rapids" # inside src/features/battery
|
||||
SRC_FOLDER: "rapids" # inside src/features/phone_battery
|
||||
SRC_LANGUAGE: "python"
|
||||
|
||||
SCREEN:
|
||||
PHONE_SCREEN:
|
||||
DB_TABLE: screen
|
||||
PROVIDERS:
|
||||
RAPIDS:
|
||||
|
@ -146,25 +142,25 @@ SCREEN:
|
|||
IGNORE_EPISODES_LONGER_THAN: 0 # in minutes, set to 0 to disable
|
||||
FEATURES: ["countepisode", "sumduration", "maxduration", "minduration", "avgduration", "stdduration", "firstuseafter"] # "episodepersensedminutes" needs to be added later
|
||||
EPISODE_TYPES: ["unlock"]
|
||||
SRC_FOLDER: "rapids" # inside src/features/screen
|
||||
SRC_FOLDER: "rapids" # inside src/features/phone_screen
|
||||
SRC_LANGUAGE: "python"
|
||||
|
||||
LIGHT:
|
||||
PHONE_LIGHT:
|
||||
DB_TABLE: light
|
||||
PROVIDERS:
|
||||
RAPIDS:
|
||||
COMPUTE: False
|
||||
FEATURES: ["count", "maxlux", "minlux", "avglux", "medianlux", "stdlux"]
|
||||
SRC_FOLDER: "rapids" # inside src/features/light
|
||||
SRC_FOLDER: "rapids" # inside src/features/phone_light
|
||||
SRC_LANGUAGE: "python"
|
||||
|
||||
ACCELEROMETER:
|
||||
PHONE_ACCELEROMETER:
|
||||
DB_TABLE: accelerometer
|
||||
PROVIDERS:
|
||||
RAPIDS:
|
||||
COMPUTE: False
|
||||
FEATURES: ["maxmagnitude", "minmagnitude", "avgmagnitude", "medianmagnitude", "stdmagnitude"]
|
||||
SRC_FOLDER: "rapids" # inside src/features/accelerometer
|
||||
SRC_FOLDER: "rapids" # inside src/features/phone_accelerometer
|
||||
SRC_LANGUAGE: "python"
|
||||
|
||||
PANDA:
|
||||
|
@ -173,11 +169,16 @@ ACCELEROMETER:
|
|||
FEATURES:
|
||||
exertional_activity_episode: ["sumduration", "maxduration", "minduration", "avgduration", "medianduration", "stdduration"]
|
||||
nonexertional_activity_episode: ["sumduration", "maxduration", "minduration", "avgduration", "medianduration", "stdduration"]
|
||||
SRC_FOLDER: "panda" # inside src/features/accelerometer
|
||||
SRC_FOLDER: "panda" # inside src/features/phone_accelerometer
|
||||
SRC_LANGUAGE: "python"
|
||||
|
||||
APPLICATIONS_FOREGROUND:
|
||||
PHONE_APPLICATIONS_FOREGROUND:
|
||||
DB_TABLE: applications_foreground
|
||||
APPLICATION_CATEGORIES:
|
||||
CATALOGUE_SOURCE: FILE # FILE (genres are read from CATALOGUE_FILE) or GOOGLE (genres are scrapped from the Play Store)
|
||||
CATALOGUE_FILE: "data/external/stachl_application_genre_catalogue.csv"
|
||||
UPDATE_CATALOGUE_FILE: False # if CATALOGUE_SOURCE is equal to FILE, whether or not to update CATALOGUE_FILE, if CATALOGUE_SOURCE is equal to GOOGLE all scraped genres will be saved to CATALOGUE_FILE
|
||||
SCRAPE_MISSING_CATEGORIES: False # whether or not to scrape missing genres, only effective if CATALOGUE_SOURCE is equal to FILE. If CATALOGUE_SOURCE is equal to GOOGLE, all genres are scraped anyway
|
||||
PROVIDERS:
|
||||
RAPIDS:
|
||||
COMPUTE: False
|
||||
|
@ -189,9 +190,45 @@ APPLICATIONS_FOREGROUND:
|
|||
EXCLUDED_CATEGORIES: []
|
||||
EXCLUDED_APPS: ["com.fitbit.FitbitMobile", "com.aware.plugin.upmc.cancer"]
|
||||
FEATURES: ["count", "timeoffirstuse", "timeoflastuse", "frequencyentropy"]
|
||||
SRC_FOLDER: "rapids" # inside src/features/applications_foreground
|
||||
SRC_FOLDER: "rapids" # inside src/features/phone_applications_foreground
|
||||
SRC_LANGUAGE: "python"
|
||||
|
||||
PHONE_WIFI_VISIBLE:
|
||||
DB_TABLE: "wifi"
|
||||
PROVIDERS:
|
||||
RAPIDS:
|
||||
COMPUTE: False
|
||||
FEATURES: ["countscans", "uniquedevices", "countscansmostuniquedevice"]
|
||||
SRC_FOLDER: "rapids" # inside src/features/phone_wifi_visible
|
||||
SRC_LANGUAGE: "r"
|
||||
|
||||
PHONE_WIFI_CONNECTED:
|
||||
DB_TABLE: "sensor_wifi"
|
||||
PROVIDERS:
|
||||
RAPIDS:
|
||||
COMPUTE: False
|
||||
FEATURES: ["countscans", "uniquedevices", "countscansmostuniquedevice"]
|
||||
SRC_FOLDER: "rapids" # inside src/features/phone_wifi_connected
|
||||
SRC_LANGUAGE: "r"
|
||||
|
||||
PHONE_CONVERSATION:
|
||||
DB_TABLE:
|
||||
ANDROID: plugin_studentlife_audio_android
|
||||
IOS: plugin_studentlife_audio
|
||||
PROVIDERS:
|
||||
RAPIDS:
|
||||
COMPUTE: False
|
||||
FEATURES: ["minutessilence", "minutesnoise", "minutesvoice", "minutesunknown","sumconversationduration","avgconversationduration",
|
||||
"sdconversationduration","minconversationduration","maxconversationduration","timefirstconversation","timelastconversation","sumenergy",
|
||||
"avgenergy","sdenergy","minenergy","maxenergy","silencesensedfraction","noisesensedfraction",
|
||||
"voicesensedfraction","unknownsensedfraction","silenceexpectedfraction","noiseexpectedfraction","voiceexpectedfraction",
|
||||
"unknownexpectedfraction","countconversation"]
|
||||
RECORDING_MINUTES: 1
|
||||
PAUSED_MINUTES : 3
|
||||
SRC_FOLDER: "rapids" # inside src/features/phone_conversation
|
||||
SRC_LANGUAGE: "python"
|
||||
|
||||
|
||||
HEARTRATE:
|
||||
COMPUTE: False
|
||||
DB_TABLE: fitbit_data
|
||||
|
@ -223,34 +260,6 @@ SLEEP:
|
|||
SLEEP_TYPES: ["main", "nap", "all"]
|
||||
SUMMARY_FEATURES: ["sumdurationafterwakeup", "sumdurationasleep", "sumdurationawake", "sumdurationtofallasleep", "sumdurationinbed", "avgefficiency", "countepisode"]
|
||||
|
||||
WIFI:
|
||||
DB_TABLE:
|
||||
VISIBLE_ACCESS_POINTS: "wifi" # if you only have a CONNECTED_ACCESS_POINTS table, set this value to ""
|
||||
CONNECTED_ACCESS_POINTS: "sensor_wifi" # if you only have a VISIBLE_ACCESS_POINTS table, set this value to ""
|
||||
PROVIDERS:
|
||||
RAPIDS:
|
||||
COMPUTE: False
|
||||
FEATURES: ["countscans", "uniquedevices", "countscansmostuniquedevice"]
|
||||
SRC_FOLDER: "rapids" # inside src/features/bluetooth
|
||||
SRC_LANGUAGE: "r"
|
||||
|
||||
CONVERSATION:
|
||||
DB_TABLE:
|
||||
ANDROID: plugin_studentlife_audio_android
|
||||
IOS: plugin_studentlife_audio
|
||||
PROVIDERS:
|
||||
RAPIDS:
|
||||
COMPUTE: False
|
||||
FEATURES: ["minutessilence", "minutesnoise", "minutesvoice", "minutesunknown","sumconversationduration","avgconversationduration",
|
||||
"sdconversationduration","minconversationduration","maxconversationduration","timefirstconversation","timelastconversation","sumenergy",
|
||||
"avgenergy","sdenergy","minenergy","maxenergy","silencesensedfraction","noisesensedfraction",
|
||||
"voicesensedfraction","unknownsensedfraction","silenceexpectedfraction","noiseexpectedfraction","voiceexpectedfraction",
|
||||
"unknownexpectedfraction","countconversation"]
|
||||
RECORDING_MINUTES: 1
|
||||
PAUSED_MINUTES : 3
|
||||
SRC_FOLDER: "rapids" # inside src/features/conversation
|
||||
SRC_LANGUAGE: "python"
|
||||
|
||||
### Visualizations ################################################################
|
||||
HEATMAP_FEATURES_CORRELATIONS:
|
||||
PLOT: False
|
||||
|
|
|
@ -14,69 +14,20 @@ def infer_participant_platform(participant_file):
|
|||
|
||||
return platform
|
||||
|
||||
# Preprocessing.smk ####################################################################################################
|
||||
|
||||
def optional_phone_sensed_bins_input(wildcards):
|
||||
platform = infer_participant_platform("data/external/"+wildcards.pid)
|
||||
|
||||
if platform == "android":
|
||||
tables_platform = [table for table in config["PHONE_VALID_SENSED_BINS"]["DB_TABLES"] if table not in [config["CONVERSATION"]["DB_TABLE"]["IOS"], config["ACTIVITY_RECOGNITION"]["DB_TABLE"]["IOS"]]] # for android, discard any ios tables that may exist
|
||||
elif platform == "ios":
|
||||
tables_platform = [table for table in config["PHONE_VALID_SENSED_BINS"]["DB_TABLES"] if table not in [config["CONVERSATION"]["DB_TABLE"]["ANDROID"], config["ACTIVITY_RECOGNITION"]["DB_TABLE"]["ANDROID"]]] # for ios, discard any android tables that may exist
|
||||
|
||||
return expand("data/raw/{{pid}}/{table}_with_datetime.csv", table = tables_platform)
|
||||
|
||||
def optional_phone_sensed_timestamps_input(wildcards):
|
||||
platform = infer_participant_platform("data/external/"+wildcards.pid)
|
||||
|
||||
if platform == "android":
|
||||
tables_platform = [table for table in config["PHONE_VALID_SENSED_BINS"]["DB_TABLES"] if table not in [config["CONVERSATION"]["DB_TABLE"]["IOS"], config["ACTIVITY_RECOGNITION"]["DB_TABLE"]["IOS"]]] # for android, discard any ios tables that may exist
|
||||
elif platform == "ios":
|
||||
tables_platform = [table for table in config["PHONE_VALID_SENSED_BINS"]["DB_TABLES"] if table not in [config["CONVERSATION"]["DB_TABLE"]["ANDROID"], config["ACTIVITY_RECOGNITION"]["DB_TABLE"]["ANDROID"]]] # for ios, discard any android tables that may exist
|
||||
|
||||
return expand("data/raw/{{pid}}/{table}_raw.csv", table = tables_platform)
|
||||
|
||||
# Features.smk #########################################################################################################
|
||||
def find_features_files(wildcards):
|
||||
feature_files = []
|
||||
for provider_key, provider in config[(wildcards.sensor_key).upper()]["PROVIDERS"].items():
|
||||
if provider["COMPUTE"]:
|
||||
feature_files.extend(expand("data/interim/{{pid}}/{sensor_key}_features/{sensor_key}_{language}_{provider_key}.csv", sensor_key=(wildcards.sensor_key).lower(), language=provider["SRC_LANGUAGE"].lower(), provider_key=provider_key))
|
||||
feature_files.extend(expand("data/interim/{{pid}}/{sensor_key}_features/{sensor_key}_{language}_{provider_key}.csv", sensor_key=wildcards.sensor_key.lower(), language=provider["SRC_LANGUAGE"].lower(), provider_key=provider_key.lower()))
|
||||
return(feature_files)
|
||||
|
||||
def optional_ar_input(wildcards):
|
||||
platform = infer_participant_platform("data/external/"+wildcards.pid)
|
||||
|
||||
if platform == "android":
|
||||
return expand("data/raw/{{pid}}/{sensor}_with_datetime_unified.csv", sensor=config["ACTIVITY_RECOGNITION"]["DB_TABLE"]["ANDROID"])
|
||||
elif platform == "ios":
|
||||
return expand("data/raw/{{pid}}/{sensor}_with_datetime_unified.csv", sensor=config["ACTIVITY_RECOGNITION"]["DB_TABLE"]["IOS"])
|
||||
|
||||
def optional_conversation_input(wildcards):
|
||||
platform = infer_participant_platform("data/external/"+wildcards.pid)
|
||||
|
||||
if platform == "android":
|
||||
return expand("data/raw/{{pid}}/{sensor}_with_datetime_unified.csv", sensor=config["CONVERSATION"]["DB_TABLE"]["ANDROID"])[0]
|
||||
elif platform == "ios":
|
||||
return expand("data/raw/{{pid}}/{sensor}_with_datetime_unified.csv", sensor=config["CONVERSATION"]["DB_TABLE"]["IOS"])[0]
|
||||
|
||||
def optional_steps_sleep_input(wildcards):
|
||||
if config["STEP"]["EXCLUDE_SLEEP"]["EXCLUDE"] == True and config["STEP"]["EXCLUDE_SLEEP"]["TYPE"] == "FITBIT_BASED":
|
||||
return "data/raw/{pid}/fitbit_sleep_summary_with_datetime.csv"
|
||||
else:
|
||||
return []
|
||||
|
||||
def optional_wifi_input(wildcards):
|
||||
if len(config["WIFI"]["DB_TABLE"]["VISIBLE_ACCESS_POINTS"]) > 0 and len(config["WIFI"]["DB_TABLE"]["CONNECTED_ACCESS_POINTS"]) == 0:
|
||||
return {"visible_access_points": expand("data/raw/{{pid}}/{sensor}_with_datetime.csv", sensor=config["WIFI"]["DB_TABLE"]["VISIBLE_ACCESS_POINTS"])}
|
||||
elif len(config["WIFI"]["DB_TABLE"]["VISIBLE_ACCESS_POINTS"]) == 0 and len(config["WIFI"]["DB_TABLE"]["CONNECTED_ACCESS_POINTS"]) > 0:
|
||||
return {"connected_access_points": expand("data/raw/{{pid}}/{sensor}_with_datetime.csv", sensor=config["WIFI"]["DB_TABLE"]["CONNECTED_ACCESS_POINTS"])}
|
||||
elif len(config["WIFI"]["DB_TABLE"]["VISIBLE_ACCESS_POINTS"]) > 0 and len(config["WIFI"]["DB_TABLE"]["CONNECTED_ACCESS_POINTS"]) > 0:
|
||||
return {"visible_access_points": expand("data/raw/{{pid}}/{sensor}_with_datetime.csv", sensor=config["WIFI"]["DB_TABLE"]["VISIBLE_ACCESS_POINTS"]), "connected_access_points": expand("data/raw/{{pid}}/{sensor}_with_datetime.csv", sensor=config["WIFI"]["DB_TABLE"]["CONNECTED_ACCESS_POINTS"])}
|
||||
else:
|
||||
raise ValueError("If you are computing WIFI features you need to provide either VISIBLE_ACCESS_POINTS, CONNECTED_ACCESS_POINTS or both")
|
||||
|
||||
|
||||
# Models.smk ###########################################################################################################
|
||||
|
||||
def input_merge_features_of_single_participant(wildcards):
|
||||
|
|
|
@ -28,341 +28,211 @@ rule resample_episodes_with_datetime:
|
|||
script:
|
||||
"../src/data/readable_datetime.R"
|
||||
|
||||
rule accelerometer_r_features:
|
||||
rule phone_accelerometer_python_features:
|
||||
input:
|
||||
sensor_data = expand("data/raw/{{pid}}/{sensor}_with_datetime.csv", sensor=config["ACCELEROMETER"]["DB_TABLE"])[0],
|
||||
sensor_data = "data/raw/{pid}/phone_accelerometer_with_datetime.csv",
|
||||
day_segments_labels = "data/interim/day_segments/{pid}_day_segments_labels.csv"
|
||||
params:
|
||||
provider = lambda wildcards: config["ACCELEROMETER"]["PROVIDERS"][wildcards.provider_key],
|
||||
provider = lambda wildcards: config["PHONE_ACCELEROMETER"]["PROVIDERS"][wildcards.provider_key.upper()],
|
||||
provider_key = "{provider_key}",
|
||||
sensor_key = "accelerometer"
|
||||
sensor_key = "phone_accelerometer"
|
||||
output:
|
||||
"data/interim/{pid}/accelerometer_features/accelerometer_r_{provider_key}.csv"
|
||||
script:
|
||||
"../src/features/entry.R"
|
||||
|
||||
rule accelerometer_python_features:
|
||||
input:
|
||||
sensor_data = expand("data/raw/{{pid}}/{sensor}_with_datetime.csv", sensor=config["ACCELEROMETER"]["DB_TABLE"])[0],
|
||||
day_segments_labels = "data/interim/day_segments/{pid}_day_segments_labels.csv"
|
||||
params:
|
||||
provider = lambda wildcards: config["ACCELEROMETER"]["PROVIDERS"][wildcards.provider_key],
|
||||
provider_key = "{provider_key}",
|
||||
sensor_key = "accelerometer"
|
||||
output:
|
||||
"data/interim/{pid}/accelerometer_features/accelerometer_python_{provider_key}.csv"
|
||||
"data/interim/{pid}/phone_accelerometer_features/phone_accelerometer_python_{provider_key}.csv"
|
||||
script:
|
||||
"../src/features/entry.py"
|
||||
|
||||
rule activity_recognition_episodes:
|
||||
input:
|
||||
optional_ar_input
|
||||
sensor_data = "data/raw/{pid}/phone_activity_recognition_with_datetime_unified.csv"
|
||||
output:
|
||||
"data/interim/{pid}/activity_recognition_episodes.csv"
|
||||
"data/interim/{pid}/phone_activity_recognition_episodes.csv"
|
||||
script:
|
||||
"../src/features/activity_recognition/episodes/activity_recognition_episodes.R"
|
||||
"../src/features/phone_activity_recognition/episodes/activity_recognition_episodes.R"
|
||||
|
||||
rule activity_recognition_r_features:
|
||||
rule phone_activity_recognition_python_features:
|
||||
input:
|
||||
sensor_episodes = "data/interim/{pid}/activity_recognition_episodes_resampled_with_datetime.csv",
|
||||
sensor_episodes = "data/interim/{pid}/phone_activity_recognition_episodes_resampled_with_datetime.csv",
|
||||
day_segments_labels = "data/interim/day_segments/{pid}_day_segments_labels.csv"
|
||||
params:
|
||||
provider = lambda wildcards: config["ACTIVITY_RECOGNITION"]["PROVIDERS"][wildcards.provider_key],
|
||||
provider = lambda wildcards: config["PHONE_ACTIVITY_RECOGNITION"]["PROVIDERS"][wildcards.provider_key.upper()],
|
||||
provider_key = "{provider_key}",
|
||||
sensor_key = "activity_recognition"
|
||||
sensor_key = "phone_activity_recognition"
|
||||
output:
|
||||
"data/interim/{pid}/activity_recognition_features/activity_recognition_r_{provider_key}.csv"
|
||||
script:
|
||||
"../src/features/entry.R"
|
||||
|
||||
rule activity_recognition_python_features:
|
||||
input:
|
||||
sensor_episodes = "data/interim/{pid}/activity_recognition_episodes_resampled_with_datetime.csv",
|
||||
day_segments_labels = "data/interim/day_segments/{pid}_day_segments_labels.csv"
|
||||
params:
|
||||
provider = lambda wildcards: config["ACTIVITY_RECOGNITION"]["PROVIDERS"][wildcards.provider_key],
|
||||
provider_key = "{provider_key}",
|
||||
sensor_key = "activity_recognition"
|
||||
output:
|
||||
"data/interim/{pid}/activity_recognition_features/activity_recognition_python_{provider_key}.csv"
|
||||
"data/interim/{pid}/phone_activity_recognition_features/phone_activity_recognition_python_{provider_key}.csv"
|
||||
script:
|
||||
"../src/features/entry.py"
|
||||
|
||||
rule applications_foreground_r_features:
|
||||
rule phone_applications_foreground_python_features:
|
||||
input:
|
||||
sensor_data = expand("data/raw/{{pid}}/{sensor}_with_datetime_with_genre.csv", sensor=config["APPLICATIONS_FOREGROUND"]["DB_TABLE"])[0],
|
||||
sensor_data = "data/raw/{pid}/phone_applications_foreground_with_datetime_with_categories.csv",
|
||||
day_segments_labels = "data/interim/day_segments/{pid}_day_segments_labels.csv"
|
||||
params:
|
||||
provider = lambda wildcards: config["APPLICATIONS_FOREGROUND"]["PROVIDERS"][wildcards.provider_key],
|
||||
provider = lambda wildcards: config["PHONE_APPLICATIONS_FOREGROUND"]["PROVIDERS"][wildcards.provider_key.upper()],
|
||||
provider_key = "{provider_key}",
|
||||
sensor_key = "applications_foreground"
|
||||
sensor_key = "phone_applications_foreground"
|
||||
output:
|
||||
"data/interim/{pid}/applications_foreground_features/applications_foreground_r_{provider_key}.csv"
|
||||
script:
|
||||
"../src/features/entry.R"
|
||||
|
||||
rule applications_foreground_python_features:
|
||||
input:
|
||||
sensor_data = expand("data/raw/{{pid}}/{sensor}_with_datetime_with_genre.csv", sensor=config["APPLICATIONS_FOREGROUND"]["DB_TABLE"])[0],
|
||||
day_segments_labels = "data/interim/day_segments/{pid}_day_segments_labels.csv"
|
||||
params:
|
||||
provider = lambda wildcards: config["APPLICATIONS_FOREGROUND"]["PROVIDERS"][wildcards.provider_key],
|
||||
provider_key = "{provider_key}",
|
||||
sensor_key = "applications_foreground"
|
||||
output:
|
||||
"data/interim/{pid}/applications_foreground_features/applications_foreground_python_{provider_key}.csv"
|
||||
"data/interim/{pid}/phone_applications_foreground_features/phone_applications_foreground_python_{provider_key}.csv"
|
||||
script:
|
||||
"../src/features/entry.py"
|
||||
|
||||
rule battery_episodes:
|
||||
input:
|
||||
expand("data/raw/{{pid}}/{sensor}_raw.csv", sensor=config["BATTERY"]["DB_TABLE"])
|
||||
"data/raw/{pid}/phone_battery_raw.csv"
|
||||
output:
|
||||
"data/interim/{pid}/battery_episodes.csv"
|
||||
"data/interim/{pid}/phone_battery_episodes.csv"
|
||||
script:
|
||||
"../src/features/battery/episodes/battery_episodes.R"
|
||||
"../src/features/phone_battery/episodes/battery_episodes.R"
|
||||
|
||||
rule battery_r_features:
|
||||
rule phone_battery_python_features:
|
||||
input:
|
||||
sensor_episodes = "data/interim/{pid}/battery_episodes_resampled_with_datetime.csv",
|
||||
sensor_episodes = "data/interim/{pid}/phone_battery_episodes_resampled_with_datetime.csv",
|
||||
day_segments_labels = "data/interim/day_segments/{pid}_day_segments_labels.csv"
|
||||
params:
|
||||
provider = lambda wildcards: config["BATTERY"]["PROVIDERS"][wildcards.provider_key],
|
||||
provider = lambda wildcards: config["PHONE_BATTERY"]["PROVIDERS"][wildcards.provider_key.upper()],
|
||||
provider_key = "{provider_key}",
|
||||
sensor_key = "battery"
|
||||
sensor_key = "phone_battery"
|
||||
output:
|
||||
"data/interim/{pid}/battery_features/battery_r_{provider_key}.csv"
|
||||
script:
|
||||
"../src/features/entry.R"
|
||||
|
||||
rule battery_python_features:
|
||||
input:
|
||||
sensor_episodes = "data/interim/{pid}/battery_episodes_resampled_with_datetime.csv",
|
||||
day_segments_labels = "data/interim/day_segments/{pid}_day_segments_labels.csv"
|
||||
params:
|
||||
provider = lambda wildcards: config["BATTERY"]["PROVIDERS"][wildcards.provider_key],
|
||||
provider_key = "{provider_key}",
|
||||
sensor_key = "battery"
|
||||
output:
|
||||
"data/interim/{pid}/battery_features/battery_python_{provider_key}.csv"
|
||||
"data/interim/{pid}/phone_battery_features/phone_battery_python_{provider_key}.csv"
|
||||
script:
|
||||
"../src/features/entry.py"
|
||||
|
||||
rule bluetooth_r_features:
|
||||
rule phone_bluetooth_r_features:
|
||||
input:
|
||||
sensor_data = expand("data/raw/{{pid}}/{sensor}_with_datetime.csv", sensor=config["BLUETOOTH"]["DB_TABLE"])[0],
|
||||
sensor_data = "data/raw/{pid}/phone_bluetooth_with_datetime.csv",
|
||||
day_segments_labels = "data/interim/day_segments/{pid}_day_segments_labels.csv"
|
||||
params:
|
||||
provider = lambda wildcards: config["BLUETOOTH"]["PROVIDERS"][wildcards.provider_key],
|
||||
provider = lambda wildcards: config["PHONE_BLUETOOTH"]["PROVIDERS"][wildcards.provider_key.upper()],
|
||||
provider_key = "{provider_key}",
|
||||
sensor_key = "bluetooth"
|
||||
sensor_key = "phone_bluetooth"
|
||||
output:
|
||||
"data/interim/{pid}/bluetooth_features/bluetooth_r_{provider_key}.csv"
|
||||
"data/interim/{pid}/phone_bluetooth_features/phone_bluetooth_r_{provider_key}.csv"
|
||||
script:
|
||||
"../src/features/entry.R"
|
||||
|
||||
rule bluetooth_python_features:
|
||||
input:
|
||||
sensor_data = expand("data/raw/{{pid}}/{sensor}_with_datetime.csv", sensor=config["BLUETOOTH"]["DB_TABLE"])[0],
|
||||
day_segments_labels = "data/interim/day_segments/{pid}_day_segments_labels.csv"
|
||||
params:
|
||||
provider = lambda wildcards: config["BLUETOOTH"]["PROVIDERS"][wildcards.provider_key],
|
||||
provider_key = "{provider_key}",
|
||||
sensor_key = "bluetooth"
|
||||
output:
|
||||
"data/interim/{pid}/bluetooth_features/bluetooth_python_{provider_key}.csv"
|
||||
script:
|
||||
"../src/features/entry.py"
|
||||
|
||||
rule calls_r_features:
|
||||
input:
|
||||
sensor_data = expand("data/raw/{{pid}}/{sensor}_with_datetime_unified.csv", sensor=config["CALLS"]["DB_TABLE"])[0],
|
||||
sensor_data = "data/raw/{pid}/phone_calls_with_datetime_unified.csv",
|
||||
day_segments_labels = "data/interim/day_segments/{pid}_day_segments_labels.csv"
|
||||
params:
|
||||
provider = lambda wildcards: config["CALLS"]["PROVIDERS"][wildcards.provider_key],
|
||||
provider = lambda wildcards: config["PHONE_CALLS"]["PROVIDERS"][wildcards.provider_key.upper()],
|
||||
provider_key = "{provider_key}",
|
||||
sensor_key = "calls"
|
||||
sensor_key = "phone_calls"
|
||||
output:
|
||||
"data/interim/{pid}/calls_features/calls_r_{provider_key}.csv"
|
||||
script:
|
||||
"../src/features/entry.R"
|
||||
|
||||
rule calls_python_features:
|
||||
input:
|
||||
sensor_data = expand("data/raw/{{pid}}/{sensor}_with_datetime_unified.csv", sensor=config["CALLS"]["DB_TABLE"])[0],
|
||||
day_segments_labels = "data/interim/day_segments/{pid}_day_segments_labels.csv"
|
||||
params:
|
||||
provider = lambda wildcards: config["CALLS"]["PROVIDERS"][wildcards.provider_key],
|
||||
provider_key = "{provider_key}",
|
||||
sensor_key = "calls"
|
||||
output:
|
||||
"data/interim/{pid}/calls_features/calls_python_{provider_key}.csv"
|
||||
script:
|
||||
"../src/features/entry.py"
|
||||
|
||||
rule conversation_r_features:
|
||||
input:
|
||||
sensor_data = optional_conversation_input,
|
||||
day_segments_labels = "data/interim/day_segments/{pid}_day_segments_labels.csv"
|
||||
params:
|
||||
provider = lambda wildcards: config["CONVERSATION"]["PROVIDERS"][wildcards.provider_key],
|
||||
provider_key = "{provider_key}",
|
||||
sensor_key = "conversation"
|
||||
output:
|
||||
"data/interim/{pid}/conversation_features/conversation_r_{provider_key}.csv"
|
||||
"data/interim/{pid}/phone_calls_features/phone_calls_r_{provider_key}.csv"
|
||||
script:
|
||||
"../src/features/entry.R"
|
||||
|
||||
rule conversation_python_features:
|
||||
input:
|
||||
sensor_data = optional_conversation_input,
|
||||
sensor_data = "data/raw/{pid}/phone_conversation_with_datetime_unified.csv",
|
||||
day_segments_labels = "data/interim/day_segments/{pid}_day_segments_labels.csv"
|
||||
params:
|
||||
provider = lambda wildcards: config["CONVERSATION"]["PROVIDERS"][wildcards.provider_key],
|
||||
provider = lambda wildcards: config["PHONE_CONVERSATION"]["PROVIDERS"][wildcards.provider_key.upper()],
|
||||
provider_key = "{provider_key}",
|
||||
sensor_key = "conversation"
|
||||
sensor_key = "phone_conversation"
|
||||
output:
|
||||
"data/interim/{pid}/conversation_features/conversation_python_{provider_key}.csv"
|
||||
"data/interim/{pid}/phone_conversation_features/phone_conversation_python_{provider_key}.csv"
|
||||
script:
|
||||
"../src/features/entry.py"
|
||||
|
||||
rule light_r_features:
|
||||
rule phone_light_python_features:
|
||||
input:
|
||||
sensor_data = expand("data/raw/{{pid}}/{sensor}_with_datetime.csv", sensor=config["LIGHT"]["DB_TABLE"])[0],
|
||||
sensor_data = "data/raw/{pid}/phone_light_with_datetime.csv",
|
||||
day_segments_labels = "data/interim/day_segments/{pid}_day_segments_labels.csv"
|
||||
params:
|
||||
provider = lambda wildcards: config["LIGHT"]["PROVIDERS"][wildcards.provider_key],
|
||||
provider = lambda wildcards: config["PHONE_LIGHT"]["PROVIDERS"][wildcards.provider_key.upper()],
|
||||
provider_key = "{provider_key}",
|
||||
sensor_key = "light"
|
||||
sensor_key = "phone_light"
|
||||
output:
|
||||
"data/interim/{pid}/light_features/light_r_{provider_key}.csv"
|
||||
"data/interim/{pid}/phone_light_features/phone_light_python_{provider_key}.csv"
|
||||
script:
|
||||
"../src/features/entry.py"
|
||||
|
||||
rule phone_locations_r_features:
|
||||
input:
|
||||
sensor_data = "data/interim/{pid}/phone_locations_processed_with_datetime.csv",
|
||||
day_segments_labels = "data/interim/day_segments/{pid}_day_segments_labels.csv"
|
||||
params:
|
||||
provider = lambda wildcards: config["PHONE_LOCATIONS"]["PROVIDERS"][wildcards.provider_key.upper()],
|
||||
provider_key = "{provider_key}",
|
||||
sensor_key = "phone_locations"
|
||||
output:
|
||||
"data/interim/{pid}/phone_locations_features/phone_locations_r_{provider_key}.csv"
|
||||
script:
|
||||
"../src/features/entry.R"
|
||||
|
||||
rule light_python_features:
|
||||
rule phone_locations_python_features:
|
||||
input:
|
||||
sensor_data = expand("data/raw/{{pid}}/{sensor}_with_datetime.csv", sensor=config["LIGHT"]["DB_TABLE"])[0],
|
||||
sensor_data = "data/interim/{pid}/phone_locations_processed_with_datetime.csv",
|
||||
day_segments_labels = "data/interim/day_segments/{pid}_day_segments_labels.csv"
|
||||
params:
|
||||
provider = lambda wildcards: config["LIGHT"]["PROVIDERS"][wildcards.provider_key],
|
||||
provider = lambda wildcards: config["PHONE_LOCATIONS"]["PROVIDERS"][wildcards.provider_key.upper()],
|
||||
provider_key = "{provider_key}",
|
||||
sensor_key = "light"
|
||||
sensor_key = "phone_locations"
|
||||
output:
|
||||
"data/interim/{pid}/light_features/light_python_{provider_key}.csv"
|
||||
"data/interim/{pid}/phone_locations_features/phone_locations_python_{provider_key}.csv"
|
||||
script:
|
||||
"../src/features/entry.py"
|
||||
|
||||
rule locations_r_features:
|
||||
rule phone_messages_r_features:
|
||||
input:
|
||||
sensor_data = expand("data/interim/{{pid}}/{sensor}_processed_{locations_to_use}_with_datetime.csv", sensor=config["LOCATIONS"]["DB_TABLE"], locations_to_use=config["LOCATIONS"]["LOCATIONS_TO_USE"])[0],
|
||||
sensor_data = "data/raw/{pid}/phone_messages_with_datetime.csv",
|
||||
day_segments_labels = "data/interim/day_segments/{pid}_day_segments_labels.csv"
|
||||
params:
|
||||
provider = lambda wildcards: config["LOCATIONS"]["PROVIDERS"][wildcards.provider_key],
|
||||
provider = lambda wildcards: config["PHONE_MESSAGES"]["PROVIDERS"][wildcards.provider_key.upper()],
|
||||
provider_key = "{provider_key}",
|
||||
sensor_key = "locations"
|
||||
sensor_key = "phone_messages"
|
||||
output:
|
||||
"data/interim/{pid}/locations_features/locations_r_{provider_key}.csv"
|
||||
"data/interim/{pid}/phone_messages_features/phone_messages_r_{provider_key}.csv"
|
||||
script:
|
||||
"../src/features/entry.R"
|
||||
|
||||
rule locations_python_features:
|
||||
input:
|
||||
sensor_data = expand("data/interim/{{pid}}/{sensor}_processed_{locations_to_use}_with_datetime.csv", sensor=config["LOCATIONS"]["DB_TABLE"], locations_to_use=config["LOCATIONS"]["LOCATIONS_TO_USE"])[0],
|
||||
day_segments_labels = "data/interim/day_segments/{pid}_day_segments_labels.csv"
|
||||
params:
|
||||
provider = lambda wildcards: config["LOCATIONS"]["PROVIDERS"][wildcards.provider_key],
|
||||
provider_key = "{provider_key}",
|
||||
sensor_key = "locations"
|
||||
output:
|
||||
"data/interim/{pid}/locations_features/locations_python_{provider_key}.csv"
|
||||
script:
|
||||
"../src/features/entry.py"
|
||||
|
||||
rule messages_r_features:
|
||||
input:
|
||||
sensor_data = expand("data/raw/{{pid}}/{sensor}_with_datetime.csv", sensor=config["MESSAGES"]["DB_TABLE"])[0],
|
||||
day_segments_labels = "data/interim/day_segments/{pid}_day_segments_labels.csv"
|
||||
params:
|
||||
provider = lambda wildcards: config["MESSAGES"]["PROVIDERS"][wildcards.provider_key],
|
||||
provider_key = "{provider_key}",
|
||||
sensor_key = "messages"
|
||||
output:
|
||||
"data/interim/{pid}/messages_features/messages_r_{provider_key}.csv"
|
||||
script:
|
||||
"../src/features/entry.R"
|
||||
|
||||
rule messages_python_features:
|
||||
input:
|
||||
sensor_data = expand("data/raw/{{pid}}/{sensor}_with_datetime.csv", sensor=config["MESSAGES"]["DB_TABLE"])[0],
|
||||
day_segments_labels = "data/interim/day_segments/{pid}_day_segments_labels.csv"
|
||||
params:
|
||||
provider = lambda wildcards: config["MESSAGES"]["PROVIDERS"][wildcards.provider_key],
|
||||
provider_key = "{provider_key}",
|
||||
sensor_key = "messages"
|
||||
output:
|
||||
"data/interim/{pid}/messages_features/messages_python_{provider_key}.csv"
|
||||
script:
|
||||
"../src/features/entry.py"
|
||||
|
||||
rule screen_episodes:
|
||||
input:
|
||||
screen = expand("data/raw/{{pid}}/{sensor}_with_datetime_unified.csv", sensor=config["SCREEN"]["DB_TABLE"])
|
||||
screen = "data/raw/{pid}/phone_screen_with_datetime_unified.csv"
|
||||
output:
|
||||
"data/interim/{pid}/screen_episodes.csv"
|
||||
"data/interim/{pid}/phone_screen_episodes.csv"
|
||||
script:
|
||||
"../src/features/screen/episodes/screen_episodes.R"
|
||||
"../src/features/phone_screen/episodes/screen_episodes.R"
|
||||
|
||||
rule screen_r_features:
|
||||
rule phone_screen_python_features:
|
||||
input:
|
||||
sensor_episodes = "data/interim/{pid}/screen_episodes_resampled_with_datetime.csv",
|
||||
sensor_episodes = "data/interim/{pid}/phone_screen_episodes_resampled_with_datetime.csv",
|
||||
day_segments_labels = "data/interim/day_segments/{pid}_day_segments_labels.csv"
|
||||
params:
|
||||
provider = lambda wildcards: config["SCREEN"]["PROVIDERS"][wildcards.provider_key],
|
||||
provider = lambda wildcards: config["PHONE_SCREEN"]["PROVIDERS"][wildcards.provider_key.upper()],
|
||||
provider_key = "{provider_key}",
|
||||
sensor_key = "screen"
|
||||
sensor_key = "phone_screen"
|
||||
output:
|
||||
"data/interim/{pid}/screen_features/screen_r_{provider_key}.csv"
|
||||
script:
|
||||
"../src/features/entry.R"
|
||||
|
||||
rule screen_python_features:
|
||||
input:
|
||||
sensor_episodes = "data/interim/{pid}/screen_episodes_resampled_with_datetime.csv",
|
||||
day_segments_labels = "data/interim/day_segments/{pid}_day_segments_labels.csv"
|
||||
params:
|
||||
provider = lambda wildcards: config["SCREEN"]["PROVIDERS"][wildcards.provider_key],
|
||||
provider_key = "{provider_key}",
|
||||
sensor_key = "screen"
|
||||
output:
|
||||
"data/interim/{pid}/screen_features/screen_python_{provider_key}.csv"
|
||||
"data/interim/{pid}/phone_screen_features/phone_screen_python_{provider_key}.csv"
|
||||
script:
|
||||
"../src/features/entry.py"
|
||||
|
||||
rule wifi_r_features:
|
||||
rule phone_wifi_connected_r_features:
|
||||
input:
|
||||
sensor_data = expand("data/raw/{{pid}}/{sensor_key}_with_datetime_visibleandconnected.csv", sensor_key="WIFI".lower())[0],
|
||||
sensor_data = "data/raw/{pid}/phone_wifi_connected_with_datetime.csv",
|
||||
day_segments_labels = "data/interim/day_segments/{pid}_day_segments_labels.csv"
|
||||
params:
|
||||
provider = lambda wildcards: config["WIFI"]["PROVIDERS"][wildcards.provider_key],
|
||||
provider = lambda wildcards: config["PHONE_WIFI_CONNECTED"]["PROVIDERS"][wildcards.provider_key.upper()],
|
||||
provider_key = "{provider_key}",
|
||||
sensor_key = "wifi"
|
||||
sensor_key = "phone_wifi_connected"
|
||||
output:
|
||||
"data/interim/{pid}/wifi_features/wifi_r_{provider_key}.csv"
|
||||
"data/interim/{pid}/phone_wifi_connected_features/phone_wifi_connected_r_{provider_key}.csv"
|
||||
script:
|
||||
"../src/features/entry.R"
|
||||
|
||||
rule wifi_python_features:
|
||||
rule phone_wifi_visible_r_features:
|
||||
input:
|
||||
sensor_data = expand("data/raw/{{pid}}/{sensor_key}_with_datetime_visibleandconnected.csv", sensor_key="WIFI".lower())[0],
|
||||
sensor_data = "data/raw/{pid}/phone_wifi_visible_with_datetime.csv",
|
||||
day_segments_labels = "data/interim/day_segments/{pid}_day_segments_labels.csv"
|
||||
params:
|
||||
provider = lambda wildcards: config["WIFI"]["PROVIDERS"][wildcards.provider_key],
|
||||
provider = lambda wildcards: config["PHONE_WIFI_VISIBLE"]["PROVIDERS"][wildcards.provider_key.upper()],
|
||||
provider_key = "{provider_key}",
|
||||
sensor_key = "wifi"
|
||||
sensor_key = "phone_wifi_visible"
|
||||
output:
|
||||
"data/interim/{pid}/wifi_features/wifi_python_{provider_key}.csv"
|
||||
"data/interim/{pid}/phone_wifi_visible_features/phone_wifi_visible_r_{provider_key}.csv"
|
||||
script:
|
||||
"../src/features/entry.py"
|
||||
"../src/features/entry.R"
|
||||
|
||||
rule fitbit_heartrate_features:
|
||||
input:
|
||||
|
|
|
@ -29,10 +29,10 @@ rule download_dataset:
|
|||
"data/external/{pid}"
|
||||
params:
|
||||
group = config["DOWNLOAD_DATASET"]["GROUP"],
|
||||
table = "{sensor}",
|
||||
sensor = "{sensor}",
|
||||
table = lambda wildcards: config[str(wildcards.sensor).upper()]["DB_TABLE"],
|
||||
timezone = config["TIMEZONE"],
|
||||
aware_multiplatform_tables = config["ACTIVITY_RECOGNITION"]["DB_TABLE"]["ANDROID"] + "," + config["ACTIVITY_RECOGNITION"]["DB_TABLE"]["IOS"] + "," + config["CONVERSATION"]["DB_TABLE"]["ANDROID"] + "," + config["CONVERSATION"]["DB_TABLE"]["IOS"],
|
||||
unifiable_sensors = {"calls": config["CALLS"]["DB_TABLE"], "battery": config["BATTERY"]["DB_TABLE"], "screen": config["SCREEN"]["DB_TABLE"], "ios_activity_recognition": config["ACTIVITY_RECOGNITION"]["DB_TABLE"]["IOS"], "ios_conversation": config["CONVERSATION"]["DB_TABLE"]["IOS"]}
|
||||
aware_multiplatform_tables = config["PHONE_ACTIVITY_RECOGNITION"]["DB_TABLE"]["ANDROID"] + "," + config["PHONE_ACTIVITY_RECOGNITION"]["DB_TABLE"]["IOS"] + "," + config["PHONE_CONVERSATION"]["DB_TABLE"]["ANDROID"] + "," + config["PHONE_CONVERSATION"]["DB_TABLE"]["IOS"],
|
||||
output:
|
||||
"data/raw/{pid}/{sensor}_raw.csv"
|
||||
script:
|
||||
|
@ -50,35 +50,23 @@ rule compute_day_segments:
|
|||
script:
|
||||
"../src/data/compute_day_segments.py"
|
||||
|
||||
PHONE_SENSORS = []
|
||||
PHONE_SENSORS.extend([config["MESSAGES"]["DB_TABLE"], config["CALLS"]["DB_TABLE"], config["LOCATIONS"]["DB_TABLE"], config["BLUETOOTH"]["DB_TABLE"], config["BATTERY"]["DB_TABLE"], config["SCREEN"]["DB_TABLE"], config["LIGHT"]["DB_TABLE"], config["ACCELEROMETER"]["DB_TABLE"], config["APPLICATIONS_FOREGROUND"]["DB_TABLE"], config["CONVERSATION"]["DB_TABLE"]["ANDROID"], config["CONVERSATION"]["DB_TABLE"]["IOS"], config["ACTIVITY_RECOGNITION"]["DB_TABLE"]["ANDROID"], config["ACTIVITY_RECOGNITION"]["DB_TABLE"]["IOS"]])
|
||||
PHONE_SENSORS.extend(config["PHONE_VALID_SENSED_BINS"]["DB_TABLES"])
|
||||
|
||||
if len(config["WIFI"]["DB_TABLE"]["VISIBLE_ACCESS_POINTS"]) > 0:
|
||||
PHONE_SENSORS.append(config["WIFI"]["DB_TABLE"]["VISIBLE_ACCESS_POINTS"])
|
||||
if len(config["WIFI"]["DB_TABLE"]["CONNECTED_ACCESS_POINTS"]) > 0:
|
||||
PHONE_SENSORS.append(config["WIFI"]["DB_TABLE"]["CONNECTED_ACCESS_POINTS"])
|
||||
|
||||
|
||||
rule readable_datetime:
|
||||
rule phone_readable_datetime:
|
||||
input:
|
||||
sensor_input = "data/raw/{pid}/{sensor}_raw.csv",
|
||||
sensor_input = "data/raw/{pid}/phone_{sensor}_raw.csv",
|
||||
day_segments = "data/interim/day_segments/{pid}_day_segments.csv"
|
||||
params:
|
||||
timezones = None,
|
||||
fixed_timezone = config["READABLE_DATETIME"]["FIXED_TIMEZONE"],
|
||||
day_segments_type = config["DAY_SEGMENTS"]["TYPE"],
|
||||
include_past_periodic_segments = config["DAY_SEGMENTS"]["INCLUDE_PAST_PERIODIC_SEGMENTS"]
|
||||
wildcard_constraints:
|
||||
sensor = '(' + '|'.join([re.escape(x) for x in PHONE_SENSORS]) + ')' # only process smartphone sensors, not fitbit
|
||||
output:
|
||||
"data/raw/{pid}/{sensor}_with_datetime.csv"
|
||||
"data/raw/{pid}/phone_{sensor}_with_datetime.csv"
|
||||
script:
|
||||
"../src/data/readable_datetime.R"
|
||||
|
||||
rule phone_sensed_bins:
|
||||
input:
|
||||
all_sensors = optional_phone_sensed_bins_input
|
||||
all_sensors = expand("data/raw/{{pid}}/{sensor}_with_datetime.csv", sensor = map(str.lower, config["PHONE_VALID_SENSED_BINS"]["PHONE_SENSORS"]))
|
||||
params:
|
||||
bin_size = config["PHONE_VALID_SENSED_BINS"]["BIN_SIZE"]
|
||||
output:
|
||||
|
@ -88,7 +76,7 @@ rule phone_sensed_bins:
|
|||
|
||||
rule phone_sensed_timestamps:
|
||||
input:
|
||||
all_sensors = optional_phone_sensed_timestamps_input
|
||||
all_sensors = expand("data/raw/{{pid}}/{sensor}_raw.csv", sensor = map(str.lower, config["PHONE_VALID_SENSED_BINS"]["PHONE_SENSORS"]))
|
||||
output:
|
||||
"data/interim/{pid}/phone_sensed_timestamps.csv"
|
||||
script:
|
||||
|
@ -112,55 +100,50 @@ rule unify_ios_android:
|
|||
participant_info = "data/external/{pid}"
|
||||
params:
|
||||
sensor = "{sensor}",
|
||||
unifiable_sensors = {"calls": config["CALLS"]["DB_TABLE"], "battery": config["BATTERY"]["DB_TABLE"], "screen": config["SCREEN"]["DB_TABLE"], "ios_activity_recognition": config["ACTIVITY_RECOGNITION"]["DB_TABLE"]["IOS"], "ios_conversation": config["CONVERSATION"]["DB_TABLE"]["IOS"]}
|
||||
output:
|
||||
"data/raw/{pid}/{sensor}_with_datetime_unified.csv"
|
||||
script:
|
||||
"../src/data/unify_ios_android.R"
|
||||
|
||||
rule process_location_types:
|
||||
rule process_phone_location_types:
|
||||
input:
|
||||
locations = "data/raw/{pid}/{sensor}_raw.csv",
|
||||
locations = "data/raw/{pid}/phone_locations_raw.csv",
|
||||
phone_sensed_timestamps = "data/interim/{pid}/phone_sensed_timestamps.csv",
|
||||
params:
|
||||
consecutive_threshold = config["LOCATIONS"]["FUSED_RESAMPLED_CONSECUTIVE_THRESHOLD"],
|
||||
time_since_valid_location = config["LOCATIONS"]["FUSED_RESAMPLED_TIME_SINCE_VALID_LOCATION"],
|
||||
locations_to_use = "{locations_to_use}"
|
||||
wildcard_constraints:
|
||||
locations_to_use = '(ALL|GPS|FUSED_RESAMPLED)'
|
||||
consecutive_threshold = config["PHONE_LOCATIONS"]["FUSED_RESAMPLED_CONSECUTIVE_THRESHOLD"],
|
||||
time_since_valid_location = config["PHONE_LOCATIONS"]["FUSED_RESAMPLED_TIME_SINCE_VALID_LOCATION"],
|
||||
locations_to_use = config["PHONE_LOCATIONS"]["LOCATIONS_TO_USE"]
|
||||
output:
|
||||
"data/interim/{pid}/{sensor}_processed_{locations_to_use}.csv"
|
||||
"data/interim/{pid}/phone_locations_processed.csv"
|
||||
script:
|
||||
"../src/data/process_location_types.R"
|
||||
|
||||
rule readable_datetime_location_processed:
|
||||
input:
|
||||
sensor_input = expand("data/interim/{{pid}}/{sensor}_processed_{locations_to_use}.csv", sensor=config["LOCATIONS"]["DB_TABLE"], locations_to_use=config["LOCATIONS"]["LOCATIONS_TO_USE"]),
|
||||
sensor_input = "data/interim/{pid}/phone_locations_processed.csv",
|
||||
day_segments = "data/interim/day_segments/{pid}_day_segments.csv"
|
||||
params:
|
||||
timezones = None,
|
||||
fixed_timezone = config["READABLE_DATETIME"]["FIXED_TIMEZONE"],
|
||||
day_segments_type = config["DAY_SEGMENTS"]["TYPE"],
|
||||
include_past_periodic_segments = config["DAY_SEGMENTS"]["INCLUDE_PAST_PERIODIC_SEGMENTS"]
|
||||
wildcard_constraints:
|
||||
locations_to_use = '(ALL|GPS|FUSED_RESAMPLED)'
|
||||
output:
|
||||
expand("data/interim/{{pid}}/{sensor}_processed_{locations_to_use}_with_datetime.csv", sensor=config["LOCATIONS"]["DB_TABLE"], locations_to_use=config["LOCATIONS"]["LOCATIONS_TO_USE"])
|
||||
"data/interim/{pid}/phone_locations_processed_with_datetime.csv"
|
||||
script:
|
||||
"../src/data/readable_datetime.R"
|
||||
|
||||
rule application_genres:
|
||||
rule phone_application_categories:
|
||||
input:
|
||||
"data/raw/{pid}/{sensor}_with_datetime.csv"
|
||||
"data/raw/{pid}/phone_applications_foreground_with_datetime.csv"
|
||||
params:
|
||||
catalogue_source = config["APPLICATION_GENRES"]["CATALOGUE_SOURCE"],
|
||||
catalogue_file = config["APPLICATION_GENRES"]["CATALOGUE_FILE"],
|
||||
update_catalogue_file = config["APPLICATION_GENRES"]["UPDATE_CATALOGUE_FILE"],
|
||||
scrape_missing_genres = config["APPLICATION_GENRES"]["SCRAPE_MISSING_GENRES"]
|
||||
catalogue_source = config["PHONE_APPLICATIONS_FOREGROUND"]["APPLICATION_CATEGORIES"]["CATALOGUE_SOURCE"],
|
||||
catalogue_file = config["PHONE_APPLICATIONS_FOREGROUND"]["APPLICATION_CATEGORIES"]["CATALOGUE_FILE"],
|
||||
update_catalogue_file = config["PHONE_APPLICATIONS_FOREGROUND"]["APPLICATION_CATEGORIES"]["UPDATE_CATALOGUE_FILE"],
|
||||
scrape_missing_genres = config["PHONE_APPLICATIONS_FOREGROUND"]["APPLICATION_CATEGORIES"]["SCRAPE_MISSING_CATEGORIES"]
|
||||
output:
|
||||
"data/raw/{pid}/{sensor}_with_datetime_with_genre.csv"
|
||||
"data/raw/{pid}/phone_applications_foreground_with_datetime_with_categories.csv"
|
||||
script:
|
||||
"../src/data/application_genres.R"
|
||||
"../src/data/application_categories.R"
|
||||
|
||||
rule fitbit_heartrate_with_datetime:
|
||||
input:
|
||||
|
@ -196,11 +179,3 @@ rule fitbit_sleep_with_datetime:
|
|||
intraday_data = "data/raw/{pid}/fitbit_sleep_intraday_with_datetime.csv"
|
||||
script:
|
||||
"../src/data/fitbit_readable_datetime.py"
|
||||
|
||||
rule join_wifi_tables:
|
||||
input:
|
||||
unpack(optional_wifi_input)
|
||||
output:
|
||||
"data/raw/{pid}/wifi_with_datetime_visibleandconnected.csv"
|
||||
script:
|
||||
"../src/data/join_visible_and_connected_wifi.R"
|
|
@ -2,31 +2,59 @@ library("tidyverse")
|
|||
library("lubridate")
|
||||
options(scipen=999)
|
||||
|
||||
find_segments_frequency <- function(local_date, local_time, local_timezone, segments){
|
||||
|
||||
assigned_segments <- segments[segments$segment_start<= local_time & segments$segment_end >= local_time, ]
|
||||
assigned_segments["segment_start_ts"] = as.numeric(lubridate::as_datetime(stringi::stri_c(local_date,assigned_segments$segment_id_start_time), tz = local_timezone)) * 1000
|
||||
assigned_segments["segment_end_ts"] = as.numeric(lubridate::as_datetime(stringi::stri_c(local_date,assigned_segments$segment_id_end_time), tz = local_timezone)) * 1000 + 999
|
||||
|
||||
return(stringi::stri_c(stringi::stri_c("[",
|
||||
assigned_segments[["label"]], "#",
|
||||
local_date, " ",
|
||||
assigned_segments[["segment_id_start_time"]], ",",
|
||||
local_date, " ",
|
||||
assigned_segments[["segment_id_end_time"]], ";",
|
||||
assigned_segments[["segment_start_ts"]], ",",
|
||||
assigned_segments[["segment_end_ts"]],
|
||||
"]"), collapse = "|"))
|
||||
day_type_delay <- function(day_type, include_past_periodic_segments){
|
||||
delay <- day_segments %>% mutate(length_duration = duration(length)) %>% filter(repeats_on == day_type) %>% arrange(-length_duration) %>% pull(length_duration) %>% first()
|
||||
return(if_else(is.na(delay) | include_past_periodic_segments == FALSE, duration("0days"), delay))
|
||||
}
|
||||
|
||||
find_segments_periodic <- function(timestamp, segments){
|
||||
# crossing and pivot_longer make segments a tibble, thus we need to extract [["segment_id"]]
|
||||
return(stringi::stri_c(segments[[1]][segments[[1]]$segment_start_ts<= timestamp & segments[[1]]$segment_end_ts >= timestamp, "segment_id"][["segment_id"]], collapse = "|"))
|
||||
get_segment_dates <- function(data, local_timezone, day_type, delay){
|
||||
dates <- data %>%
|
||||
distinct(local_date) %>%
|
||||
mutate(local_date_obj = date(lubridate::ymd(local_date, tz = local_timezone))) %>%
|
||||
complete(local_date_obj = seq(date(min(local_date_obj) - delay), max(local_date_obj), by="days")) %>%
|
||||
mutate(local_date = replace_na(as.character(date(local_date_obj))))
|
||||
|
||||
if(day_type == "every_day")
|
||||
dates <- dates %>% mutate(every_day = 0)
|
||||
else if (day_type == "wday")
|
||||
dates <- dates %>% mutate(wday = wday(local_date_obj, week_start = 1))
|
||||
else if (day_type == "mday")
|
||||
dates <- dates %>% mutate(mday = mday(local_date_obj))
|
||||
else if (day_type == "qday")
|
||||
dates <- dates %>% mutate(qday = qday(local_date_obj))
|
||||
else if (day_type == "yday")
|
||||
dates <- dates %>% mutate(yday = yday(local_date_obj))
|
||||
return(dates)
|
||||
}
|
||||
|
||||
find_segments_event <- function(timestamp, segments){
|
||||
# segments is a data.frame, we don't need to extract [["segment_id"]] like in find_segments_periodic
|
||||
return(stringi::stri_c(segments[[1]][segments[[1]]$segment_start_ts<= timestamp & segments[[1]]$segment_end_ts >= timestamp, "segment_id"], collapse = "|"))
|
||||
assign_rows_to_segments <- function(nested_data, nested_inferred_day_segments){
|
||||
nested_data <- nested_data %>% mutate(assigned_segments = "")
|
||||
for(i in 1:nrow(nested_inferred_day_segments)) {
|
||||
segment <- nested_inferred_day_segments[i,]
|
||||
nested_data$assigned_segments <- ifelse(segment$segment_start_ts<= nested_data$timestamp & segment$segment_end_ts >= nested_data$timestamp,
|
||||
stringi::stri_c(nested_data$assigned_segments, segment$segment_id, sep = "|"), nested_data$assigned_segments)
|
||||
}
|
||||
nested_data$assigned_segments <- substring(nested_data$assigned_segments, 2)
|
||||
return(nested_data)
|
||||
}
|
||||
|
||||
assign_rows_to_segments_frequency <- function(nested_data, nested_timezone, day_segments){
|
||||
for(i in 1:nrow(day_segments)) {
|
||||
segment <- day_segments[i,]
|
||||
nested_data$assigned_segments <- ifelse(segment$segment_start_ts<= nested_data$local_time_obj & segment$segment_end_ts >= nested_data$local_time_obj,
|
||||
# The segment_id is assambled on the fly because it depends on each row's local_date and timezone
|
||||
stringi::stri_c("[",
|
||||
segment[["label"]], "#",
|
||||
nested_data$local_date, " ",
|
||||
segment[["segment_id_start_time"]], ",",
|
||||
nested_data$local_date, " ",
|
||||
segment[["segment_id_end_time"]], ";",
|
||||
as.numeric(lubridate::as_datetime(stringi::stri_c(nested_data$local_date, segment$segment_id_start_time), tz = nested_timezone)) * 1000, ",",
|
||||
as.numeric(lubridate::as_datetime(stringi::stri_c(nested_data$local_date, segment$segment_id_end_time), tz = nested_timezone)) * 1000 + 999,
|
||||
"]"),
|
||||
nested_data$assigned_segments)
|
||||
}
|
||||
return(nested_data)
|
||||
}
|
||||
|
||||
assign_to_day_segment <- function(sensor_data, day_segments, day_segments_type, include_past_periodic_segments){
|
||||
|
@ -34,133 +62,102 @@ assign_to_day_segment <- function(sensor_data, day_segments, day_segments_type,
|
|||
if(nrow(sensor_data) == 0)
|
||||
return(sensor_data %>% mutate(assigned_segments = NA))
|
||||
|
||||
if(day_segments_type == "FREQUENCY"){ #FREQUENCY
|
||||
if(day_segments_type == "FREQUENCY"){
|
||||
|
||||
day_segments <- day_segments %>% mutate(start_time = lubridate::hm(start_time),
|
||||
end_time = start_time + minutes(length) - seconds(1),
|
||||
segment_id_start_time = paste(str_pad(hour(start_time),2, pad="0"), str_pad(minute(start_time),2, pad="0"), str_pad(second(start_time),2, pad="0"),sep =":"),
|
||||
segment_id_end_time = paste(str_pad(hour(ymd("1970-01-01") + end_time),2, pad="0"), str_pad(minute(ymd("1970-01-01") + end_time),2, pad="0"), str_pad(second(ymd("1970-01-01") + end_time),2, pad="0"),sep =":"), # add ymd("1970-01-01") to get a real time instead of duration
|
||||
segment_start = as.numeric(start_time),
|
||||
segment_end = as.numeric(end_time))
|
||||
segment_start_ts = as.numeric(start_time),
|
||||
segment_end_ts = as.numeric(end_time))
|
||||
|
||||
sensor_data <- sensor_data %>% mutate(local_time_obj = as.numeric(lubridate::hms(local_time)),
|
||||
assigned_segments = pmap_chr(list(local_date, local_time_obj, local_timezone), find_segments_frequency, day_segments)) %>% select(-local_time_obj)
|
||||
assigned_segments = "")
|
||||
|
||||
} else if (day_segments_type == "PERIODIC"){ #PERIODIC
|
||||
sensor_data <- sensor_data %>%
|
||||
group_by(local_timezone) %>%
|
||||
nest() %>%
|
||||
mutate(data = map2(data, local_timezone, assign_rows_to_segments_frequency, day_segments)) %>%
|
||||
unnest(cols = data) %>%
|
||||
arrange(timestamp) %>%
|
||||
select(-local_time_obj)
|
||||
|
||||
return(sensor_data)
|
||||
|
||||
|
||||
} else if (day_segments_type == "PERIODIC"){
|
||||
|
||||
# We need to take into account segment start dates that could include the first day of data
|
||||
day_segments <- day_segments %>% mutate(length_duration = duration(length))
|
||||
wday_delay <- day_segments %>% mutate(length_duration = duration(length)) %>% filter(repeats_on == "wday") %>% arrange(-length_duration) %>% pull(length_duration) %>% first()
|
||||
wday_delay <- if_else(is.na(wday_delay) | include_past_periodic_segments == FALSE, duration("0days"), wday_delay)
|
||||
|
||||
mday_delay <- day_segments %>% mutate(length_duration = duration(length)) %>% filter(repeats_on == "mday") %>% arrange(-length_duration) %>% pull(length_duration) %>% first()
|
||||
mday_delay <- if_else(is.na(mday_delay) | include_past_periodic_segments == FALSE, duration("0days"), mday_delay)
|
||||
|
||||
qday_delay <- day_segments %>% mutate(length_duration = duration(length)) %>% filter(repeats_on == "qday") %>% arrange(-length_duration) %>% pull(length_duration) %>% first()
|
||||
qday_delay <- if_else(is.na(qday_delay) | include_past_periodic_segments == FALSE, duration("0days"), qday_delay)
|
||||
|
||||
yday_delay <- day_segments %>% mutate(length_duration = duration(length)) %>% filter(repeats_on == "yday") %>% arrange(-length_duration) %>% pull(length_duration) %>% first()
|
||||
yday_delay <- if_else(is.na(yday_delay) | include_past_periodic_segments == FALSE, duration("0days"), yday_delay)
|
||||
every_day_delay <- duration("0days")
|
||||
wday_delay <- day_type_delay("wday", include_past_periodic_segments)
|
||||
mday_delay <- day_type_delay("mday", include_past_periodic_segments)
|
||||
qday_delay <- day_type_delay("qday", include_past_periodic_segments)
|
||||
yday_delay <- day_type_delay("yday", include_past_periodic_segments)
|
||||
|
||||
sensor_data <- sensor_data %>%
|
||||
# mutate(row_n = row_number()) %>%
|
||||
group_by(local_timezone) %>%
|
||||
nest() %>%
|
||||
# get existent days that we need to start segments from
|
||||
mutate(every_date = map(data, ~.x %>%
|
||||
distinct(local_date) %>%
|
||||
mutate(local_date_obj = date(lubridate::ymd(local_date, tz = local_timezone))) %>%
|
||||
complete(local_date_obj = seq(min(local_date_obj), max(local_date_obj), by="days")) %>%
|
||||
mutate(local_date = replace_na(as.character(date(local_date_obj)))) %>%
|
||||
mutate(every_day = 0)),
|
||||
week_dates = map(data, ~.x %>%
|
||||
distinct(local_date) %>%
|
||||
mutate(local_date_obj = date(lubridate::ymd(local_date, tz = local_timezone))) %>%
|
||||
complete(local_date_obj = seq(date(min(local_date_obj) - wday_delay), max(local_date_obj), by="days")) %>%
|
||||
mutate(local_date = replace_na(as.character(date(local_date_obj)))) %>%
|
||||
mutate(wday = wday(local_date_obj, week_start = 1)) ),
|
||||
month_dates = map(data, ~.x %>%
|
||||
distinct(local_date) %>%
|
||||
mutate(local_date_obj = date(lubridate::ymd(local_date, tz = local_timezone))) %>%
|
||||
complete(local_date_obj = seq(date(min(local_date_obj) - mday_delay), max(local_date_obj), by="days")) %>%
|
||||
mutate(local_date = replace_na(as.character(date(local_date_obj)))) %>%
|
||||
mutate(mday = mday(local_date_obj))),
|
||||
quarter_dates = map(data, ~.x %>%
|
||||
distinct(local_date) %>%
|
||||
mutate(local_date_obj = date(lubridate::ymd(local_date, tz = local_timezone))) %>%
|
||||
complete(local_date_obj = seq(date(min(local_date_obj) - qday_delay), max(local_date_obj), by="days")) %>%
|
||||
mutate(local_date = replace_na(as.character(date(local_date_obj)))) %>%
|
||||
mutate(qday = qday(local_date_obj)) ),
|
||||
year_dates = map(data, ~.x %>%
|
||||
distinct(local_date) %>%
|
||||
mutate(local_date_obj = date(lubridate::ymd(local_date, tz = local_timezone))) %>%
|
||||
complete(local_date_obj = seq(date(min(local_date_obj) - yday_delay), max(local_date_obj), by="days")) %>%
|
||||
mutate(local_date = replace_na(as.character(date(local_date_obj)))) %>%
|
||||
mutate(yday = yday(local_date_obj)) ),
|
||||
mutate(every_date = map2(data, local_timezone, get_segment_dates, "every_day", every_day_delay),
|
||||
week_dates = map2(data, local_timezone, get_segment_dates, "wday", wday_delay),
|
||||
month_dates = map2(data, local_timezone, get_segment_dates, "mday", mday_delay),
|
||||
quarter_dates = map2(data, local_timezone, get_segment_dates, "qday", qday_delay),
|
||||
year_dates = map2(data, local_timezone, get_segment_dates, "yday", yday_delay),
|
||||
existent_dates = pmap(list(every_date, week_dates, month_dates, quarter_dates, year_dates),
|
||||
function(every_date, week_dates, month_dates, quarter_dates, year_dates) reduce(list(every_date, week_dates,month_dates, quarter_dates, year_dates), .f=full_join)),
|
||||
every_date = NULL,
|
||||
week_dates = NULL,
|
||||
month_dates = NULL,
|
||||
quarter_dates = NULL,
|
||||
year_dates = NULL,
|
||||
# build the actual day segments taking into account the users requested leangth and repeat schedule
|
||||
# build the actual day segments taking into account the users requested length and repeat schedule
|
||||
inferred_day_segments = map(existent_dates,
|
||||
~ crossing(day_segments, .x) %>%
|
||||
pivot_longer(cols = c(every_day,wday, mday, qday, yday), names_to = "day_type", values_to = "day_value") %>%
|
||||
filter(repeats_on == day_type & repeats_value == day_value) %>%
|
||||
mutate(segment_id_start = lubridate::parse_date_time(paste(local_date, start_time), orders = c("Ymd HMS", "Ymd HM")), # The segment ids (label#start#end) are computed in UTC to avoid having different labels for instances of a segment that happen in different timezones
|
||||
# The segment ids (segment_id_start and segment_id_end) are computed in UTC to avoid having different labels for instances of a segment that happen in different timezones
|
||||
mutate(segment_id_start = lubridate::parse_date_time(paste(local_date, start_time), orders = c("Ymd HMS", "Ymd HM")),
|
||||
segment_id_end = segment_id_start + lubridate::duration(length),
|
||||
segment_start_ts = as.numeric(lubridate::parse_date_time(paste(local_date, start_time), orders = c("Ymd HMS", "Ymd HM"), tz = local_timezone)) * 1000, # The actual segments are computed using timestamps taking into account the timezone
|
||||
# The actual segments are computed using timestamps taking into account the timezone
|
||||
segment_start_ts = as.numeric(lubridate::parse_date_time(paste(local_date, start_time), orders = c("Ymd HMS", "Ymd HM"), tz = local_timezone)) * 1000,
|
||||
segment_end_ts = segment_start_ts + as.numeric(lubridate::duration(length)) * 1000 + 999,
|
||||
segment_id = paste0("[",
|
||||
paste0(
|
||||
label,"#",
|
||||
paste0(label,"#",
|
||||
paste0(lubridate::date(segment_id_start), " ",
|
||||
paste(str_pad(hour(segment_id_start),2, pad="0"), str_pad(minute(segment_id_start),2, pad="0"), str_pad(second(segment_id_start),2, pad="0"),sep =":"), ",",
|
||||
lubridate::date(segment_id_end), " ",
|
||||
paste(str_pad(hour(segment_id_end),2, pad="0"), str_pad(minute(segment_id_end),2, pad="0"), str_pad(second(segment_id_end),2, pad="0"),sep =":")),";",
|
||||
paste0(segment_start_ts, ",", segment_end_ts)
|
||||
),
|
||||
paste0(segment_start_ts, ",", segment_end_ts)),
|
||||
"]")) %>%
|
||||
select(segment_start_ts, segment_end_ts, segment_id) %>%
|
||||
drop_na(segment_start_ts, segment_end_ts)), # drop day segments with an invalid start or end time (mostly due to daylight saving changes, e.g. 2020-03-08 02:00:00 EST does not exist, clock jumps from 1am to 3am)
|
||||
data = map2(data, inferred_day_segments, ~ .x %>% mutate(row_date_time = as.numeric(lubridate::ymd_hms(local_date_time, tz = local_timezone)) * 1000,
|
||||
assigned_segments = map_chr(row_date_time, ~find_segments_periodic(.x, inferred_day_segments)),
|
||||
row_date_time = NULL))
|
||||
# drop day segments with an invalid start or end time (mostly due to daylight saving changes, e.g. 2020-03-08 02:00:00 EST does not exist, clock jumps from 01:59am to 03:00am)
|
||||
drop_na(segment_start_ts, segment_end_ts)),
|
||||
data = map2(data, inferred_day_segments, assign_rows_to_segments)
|
||||
) %>%
|
||||
select(-existent_dates, -inferred_day_segments) %>%
|
||||
select(-existent_dates, -inferred_day_segments, -every_date, -week_dates, -month_dates, -quarter_dates, -year_dates) %>%
|
||||
unnest(cols = data) %>%
|
||||
arrange(timestamp)
|
||||
|
||||
|
||||
} else if ( day_segments_type == "EVENT"){
|
||||
|
||||
sensor_data <- sensor_data %>%
|
||||
group_by(local_timezone) %>%
|
||||
nest() %>%
|
||||
mutate(inferred_day_segments = map(local_timezone, ~ day_segments %>% mutate(shift = ifelse(shift == "0", "0seconds", shift),
|
||||
mutate(inferred_day_segments = map(local_timezone, ~ day_segments %>%
|
||||
mutate(shift = ifelse(shift == "0", "0seconds", shift),
|
||||
segment_start_ts = event_timestamp + (as.integer(seconds(lubridate::duration(shift))) * ifelse(shift_direction >= 0, 1, -1) * 1000),
|
||||
segment_end_ts = segment_start_ts + (as.integer(seconds(lubridate::duration(length))) * 1000),
|
||||
segment_id_start = lubridate::as_datetime(segment_start_ts/1000, tz = .x), # these start and end datetime objects are for labeling only
|
||||
# these start and end datetime objects are for labeling only
|
||||
segment_id_start = lubridate::as_datetime(segment_start_ts/1000, tz = .x),
|
||||
segment_id_end = lubridate::as_datetime(segment_end_ts/1000, tz = .x),
|
||||
segment_end_ts = segment_end_ts + 999,
|
||||
segment_id = paste0("[",
|
||||
paste0(
|
||||
label,"#",
|
||||
paste0(label,"#",
|
||||
paste0(lubridate::date(segment_id_start), " ",
|
||||
paste(str_pad(hour(segment_id_start),2, pad="0"), str_pad(minute(segment_id_start),2, pad="0"), str_pad(second(segment_id_start),2, pad="0"),sep =":"), ",",
|
||||
lubridate::date(segment_id_end), " ",
|
||||
paste(str_pad(hour(segment_id_end),2, pad="0"), str_pad(minute(segment_id_end),2, pad="0"), str_pad(second(segment_id_end),2, pad="0"),sep =":")),";",
|
||||
paste0(segment_start_ts, ",", segment_end_ts)
|
||||
),
|
||||
"]")) %>%
|
||||
select(-segment_id_start, -segment_id_end)),
|
||||
data = map2(data, inferred_day_segments, ~ .x %>% mutate(assigned_segments = map_chr(timestamp, ~find_segments_event(.x, inferred_day_segments))))) %>%
|
||||
paste0(segment_start_ts, ",", segment_end_ts)),
|
||||
"]"))),
|
||||
data = map2(data, inferred_day_segments, assign_rows_to_segments)) %>%
|
||||
select(-inferred_day_segments) %>%
|
||||
unnest(data) %>%
|
||||
arrange(timestamp)
|
||||
|
||||
}
|
||||
|
||||
return(sensor_data)
|
||||
|
|
|
@ -40,9 +40,9 @@ is_multiplaform_participant <- function(dbEngine, device_ids, platforms){
|
|||
participant <- snakemake@input[[1]]
|
||||
group <- snakemake@params[["group"]]
|
||||
table <- snakemake@params[["table"]]
|
||||
sensor <- snakemake@params[["sensor"]]
|
||||
timezone <- snakemake@params[["timezone"]]
|
||||
aware_multiplatform_tables <- str_split(snakemake@params[["aware_multiplatform_tables"]], ",")[[1]]
|
||||
unifiable_tables = snakemake@params[["unifiable_sensors"]]
|
||||
sensor_file <- snakemake@output[[1]]
|
||||
|
||||
device_ids <- strsplit(readLines(participant, n=1), ",")[[1]]
|
||||
|
@ -58,20 +58,19 @@ end_datetime_utc = format(as.POSIXct(paste0(end_date, " 23:59:59"),format="%Y/%m
|
|||
|
||||
dbEngine <- dbConnect(MySQL(), default.file = "./.env", group = group)
|
||||
|
||||
# Get existent columns in table
|
||||
available_columns <- colnames(dbGetQuery(dbEngine, paste0("SELECT * FROM ", table, " LIMIT 1")))
|
||||
|
||||
if("device_id" %in% available_columns){
|
||||
if(is_multiplaform_participant(dbEngine, device_ids, platforms)){
|
||||
sensor_data <- unify_raw_data(dbEngine, table, start_datetime_utc, end_datetime_utc, aware_multiplatform_tables, unifiable_tables, device_ids, platforms)
|
||||
sensor_data <- unify_raw_data(dbEngine, table, sensor, start_datetime_utc, end_datetime_utc, aware_multiplatform_tables, device_ids, platforms)
|
||||
}else {
|
||||
# table has two elements for conversation and activity recognition (they store data on a different table for ios and android)
|
||||
if(length(table) > 1){
|
||||
table <- table[[toupper(platforms[1])]]
|
||||
}
|
||||
query <- paste0("SELECT * FROM ", table, " WHERE device_id IN ('", paste0(device_ids, collapse = "','"), "')")
|
||||
if("timestamp" %in% available_columns && !(is.na(start_datetime_utc)) && !(is.na(end_datetime_utc)) && start_datetime_utc < end_datetime_utc)
|
||||
if(!(is.na(start_datetime_utc)) && !(is.na(end_datetime_utc)) && start_datetime_utc < end_datetime_utc)
|
||||
query <- paste0(query, "AND timestamp BETWEEN 1000*UNIX_TIMESTAMP('", start_datetime_utc, "') AND 1000*UNIX_TIMESTAMP('", end_datetime_utc, "')")
|
||||
sensor_data <- dbGetQuery(dbEngine, query)
|
||||
}
|
||||
|
||||
if("timestamp" %in% available_columns)
|
||||
sensor_data <- sensor_data %>% arrange(timestamp)
|
||||
|
||||
# Unify device_id
|
||||
|
@ -80,8 +79,5 @@ if("device_id" %in% available_columns){
|
|||
# Droping duplicates on all columns except for _id or id
|
||||
sensor_data <- sensor_data %>% distinct(!!!syms(setdiff(names(sensor_data), c("_id", "id"))))
|
||||
|
||||
} else
|
||||
stop(paste0("Table ", table, "does not have a device_id column (Aware ID) to link its data to a participant"))
|
||||
|
||||
write_csv(sensor_data, sensor_file)
|
||||
dbDisconnect(dbEngine)
|
|
@ -4,11 +4,10 @@ source("src/data/unify_utils.R")
|
|||
sensor_data <- read.csv(snakemake@input[["sensor_data"]], stringsAsFactors = FALSE)
|
||||
participant_info <- snakemake@input[["participant_info"]]
|
||||
sensor <- snakemake@params[["sensor"]]
|
||||
unifiable_sensors = snakemake@params[["unifiable_sensors"]]
|
||||
|
||||
platforms <- strsplit(readLines(participant_info, n=2)[[2]], ",")[[1]]
|
||||
platform <- ifelse(platforms[1] == "multiple" | (length(platforms) > 1 & "android" %in% platforms & "ios" %in% platforms), "android", platforms[1])
|
||||
|
||||
sensor_data <- unify_data(sensor_data, sensor, platform, unifiable_sensors)
|
||||
sensor_data <- unify_data(sensor_data, sensor, platform)
|
||||
|
||||
write.csv(sensor_data, snakemake@output[[1]], row.names = FALSE)
|
||||
|
|
|
@ -101,7 +101,7 @@ clean_ios_activity_column <- function(ios_gar){
|
|||
return(ios_gar)
|
||||
}
|
||||
|
||||
unify_ios_gar <- function(ios_gar){
|
||||
unify_ios_activity_recognition <- function(ios_gar){
|
||||
# We only need to unify Google Activity Recognition data for iOS
|
||||
# discard rows where activities column is blank
|
||||
ios_gar <- ios_gar[-which(ios_gar$activities == ""), ]
|
||||
|
@ -138,7 +138,7 @@ unify_ios_conversation <- function(conversation){
|
|||
}
|
||||
|
||||
# This function is used in download_dataset.R
|
||||
unify_raw_data <- function(dbEngine, table, start_datetime_utc, end_datetime_utc, aware_multiplatform_tables, unifiable_tables, device_ids, platforms){
|
||||
unify_raw_data <- function(dbEngine, sensor_table, sensor, start_datetime_utc, end_datetime_utc, aware_multiplatform_tables, device_ids, platforms){
|
||||
# If platforms is 'multiple', fetch each device_id's platform from aware_device, otherwise, use those given by the user
|
||||
if(length(platforms) == 1 && platforms == "multiple")
|
||||
devices_platforms <- dbGetQuery(dbEngine, paste0("SELECT device_id,brand FROM aware_device WHERE device_id IN ('", paste0(device_ids, collapse = "','"), "')")) %>%
|
||||
|
@ -147,8 +147,9 @@ unify_raw_data <- function(dbEngine, table, start_datetime_utc, end_datetime_utc
|
|||
devices_platforms <- data.frame(device_id = device_ids, platform = platforms)
|
||||
|
||||
# Get existent tables in database
|
||||
available_tables_in_db <- dbGetQuery(dbEngine, paste0("SELECT table_name FROM information_schema.tables WHERE table_type = 'base table' AND table_schema='", dbGetInfo(dbEngine)$dbname,"'")) %>% pull(table_name)
|
||||
|
||||
available_tables_in_db <- dbGetQuery(dbEngine, paste0("SELECT table_name FROM information_schema.tables WHERE table_schema='", dbGetInfo(dbEngine)$dbname,"'"))[[1]]
|
||||
if(!any(sensor_table %in% available_tables_in_db))
|
||||
stop(paste0("You requested data from these table(s) ", paste0(sensor_table, collapse=", "), " but they don't exist in your database ", dbGetInfo(dbEngine)$dbname))
|
||||
# Parse the table names for activity recognition and conversation plugins because they are different between android and ios
|
||||
ar_tables <- setNames(aware_multiplatform_tables[1:2], c("android", "ios"))
|
||||
conversation_tables <- setNames(aware_multiplatform_tables[3:4], c("android", "ios"))
|
||||
|
@ -160,17 +161,19 @@ unify_raw_data <- function(dbEngine, table, start_datetime_utc, end_datetime_utc
|
|||
platform <- row$platform
|
||||
|
||||
# Handle special cases when tables for the same sensor have different names for Android and iOS (AR and conversation)
|
||||
if(table %in% ar_tables)
|
||||
if(length(sensor_table) == 1)
|
||||
table <- sensor_table
|
||||
else if(all(sensor_table == ar_tables))
|
||||
table <- ar_tables[[platform]]
|
||||
else if(table %in% conversation_tables)
|
||||
else if(all(sensor_table == conversation_tables))
|
||||
table <- conversation_tables[[platform]]
|
||||
|
||||
if(table %in% available_tables_in_db){
|
||||
query <- paste0("SELECT * FROM ", table, " WHERE device_id IN ('", device_id, "')")
|
||||
if("timestamp" %in% available_columns && !(is.na(start_datetime_utc)) && !(is.na(end_datetime_utc)) && start_datetime_utc < end_datetime_utc){
|
||||
if(!(is.na(start_datetime_utc)) && !(is.na(end_datetime_utc)) && start_datetime_utc < end_datetime_utc){
|
||||
query <- paste0(query, "AND timestamp BETWEEN 1000*UNIX_TIMESTAMP('", start_datetime_utc, "') AND 1000*UNIX_TIMESTAMP('", end_datetime_utc, "')")
|
||||
}
|
||||
sensor_data <- unify_data(dbGetQuery(dbEngine, query), table, platform, unifiable_tables)
|
||||
sensor_data <- unify_data(dbGetQuery(dbEngine, query), sensor, platform)
|
||||
participants_sensordata <- append(participants_sensordata, list(sensor_data))
|
||||
}else{
|
||||
warning(paste0("Missing ", table, " table. We unified the data from ", paste0(devices_platforms$device_id, collapse = " and "), " but without records from this missing table for ", device_id))
|
||||
|
@ -182,25 +185,16 @@ unify_raw_data <- function(dbEngine, table, start_datetime_utc, end_datetime_utc
|
|||
}
|
||||
|
||||
# This function is used in unify_ios_android.R and unify_raw_data function
|
||||
unify_data <- function(sensor_data, sensor, platform, unifiable_sensors){
|
||||
if(sensor == unifiable_sensors$calls){
|
||||
if(platform == "ios"){
|
||||
unify_data <- function(sensor_data, sensor, platform){
|
||||
if(sensor == "phone_calls" & platform == "ios"){
|
||||
sensor_data = unify_ios_calls(sensor_data)
|
||||
}
|
||||
# android calls remain unchanged
|
||||
} else if(sensor == unifiable_sensors$battery){
|
||||
if(platform == "ios"){
|
||||
} else if(sensor == "phone_battery" & platform == "ios"){
|
||||
sensor_data = unify_ios_battery(sensor_data)
|
||||
}
|
||||
# android battery remains unchanged
|
||||
} else if(sensor == unifiable_sensors$ios_activity_recognition){
|
||||
sensor_data = unify_ios_gar(sensor_data)
|
||||
} else if(sensor == unifiable_sensors$screen){
|
||||
if(platform == "ios"){
|
||||
} else if(sensor == "phone_activity_recognition" & platform == "ios"){
|
||||
sensor_data = unify_ios_activity_recognition(sensor_data)
|
||||
} else if(sensor == "phone_screen" & platform == "ios"){
|
||||
sensor_data = unify_ios_screen(sensor_data)
|
||||
}
|
||||
# android screen remains unchanged
|
||||
} else if(sensor == unifiable_sensors$ios_conversation){
|
||||
} else if(sensor == "phone_conversation" & platform == "ios"){
|
||||
sensor_data = unify_ios_conversation(sensor_data)
|
||||
}
|
||||
return(sensor_data)
|
||||
|
|
|
@ -3,7 +3,7 @@ library("dplyr")
|
|||
library("stringr")
|
||||
|
||||
# Load Ian Barnett's code. Taken from https://scholar.harvard.edu/ibarnett/software/gpsmobility
|
||||
file.sources = list.files(c("src/features/locations/barnett/library"), pattern="*.R$", full.names=TRUE, ignore.case=TRUE)
|
||||
file.sources = list.files(c("src/features/phone_locations/barnett/library"), pattern="*.R$", full.names=TRUE, ignore.case=TRUE)
|
||||
sapply(file.sources,source,.GlobalEnv)
|
||||
|
||||
create_empty_file <- function(requested_features){
|
||||
|
@ -52,10 +52,13 @@ barnett_features <- function(sensor_data_files, day_segment, params){
|
|||
if (nrow(location) > 1){
|
||||
# Filter by segment and skipping any non-daily segment
|
||||
location <- location %>% filter_data_by_segment(day_segment)
|
||||
segment <- location %>% head(1) %>% pull(local_segment)
|
||||
segment_data <- str_split(segment, "#")[[1]]
|
||||
if(segment_data[[2]] != segment_data[[4]] || segment_data[[3]] != "00:00:00" || segment_data[[5]] != "23:59:59"){
|
||||
warning(paste("Barnett's location features cannot be computed for day segmentes that are not daily (cover 00:00:00 to 23:59:59 of every day). Skipping for ", segment))
|
||||
|
||||
datetime_start_regex = "[0-9]{4}[\\-|\\/][0-9]{2}[\\-|\\/][0-9]{2} 00:00:00"
|
||||
datetime_end_regex = "[0-9]{4}[\\-|\\/][0-9]{2}[\\-|\\/][0-9]{2} 23:59:59"
|
||||
location <- location %>% mutate(is_daily = str_detect(local_segment, paste0(day_segment, "#", datetime_start_regex, ",", datetime_end_regex)))
|
||||
|
||||
if(!all(location$is_daily)){
|
||||
message(paste("Barnett's location features cannot be computed for day segmentes that are not daily (cover 00:00:00 to 23:59:59 of every day). Skipping ", day_segment))
|
||||
location_features <- create_empty_file(requested_features)
|
||||
} else {
|
||||
# Count how many minutes of data we use to get location features
|
|
@ -0,0 +1,46 @@
|
|||
library(dplyr)
|
||||
|
||||
compute_wifi_feature <- function(data, feature, day_segment){
|
||||
data <- data %>% filter_data_by_segment(day_segment)
|
||||
if(feature %in% c("countscans", "uniquedevices")){
|
||||
data <- data %>% group_by(local_segment)
|
||||
data <- switch(feature,
|
||||
"countscans" = data %>% summarise(!!paste("wifi_rapids", feature, sep = "_") := n()),
|
||||
"uniquedevices" = data %>% summarise(!!paste("wifi_rapids", feature, sep = "_") := n_distinct(bssid)))
|
||||
return(data)
|
||||
} else if(feature == "countscansmostuniquedevice"){
|
||||
# Get the most scanned device
|
||||
mostuniquedevice <- data %>%
|
||||
group_by(bssid) %>%
|
||||
mutate(N=n()) %>%
|
||||
ungroup() %>%
|
||||
filter(N == max(N)) %>%
|
||||
head(1) %>% # if there are multiple device with the same amount of scans pick the first one only
|
||||
pull(bssid)
|
||||
return(data %>%
|
||||
filter(bssid == mostuniquedevice) %>%
|
||||
group_by(local_segment) %>%
|
||||
summarise(!!paste("wifi_rapids", feature, sep = "_") := n()) %>%
|
||||
replace(is.na(.), 0))
|
||||
}
|
||||
}
|
||||
|
||||
rapids_features <- function(sensor_data_files, day_segment, provider){
|
||||
wifi_data <- read.csv(sensor_data_files[["sensor_data"]], stringsAsFactors = FALSE)
|
||||
requested_features <- provider[["FEATURES"]]
|
||||
# Output dataframe
|
||||
features = data.frame(local_segment = character(), stringsAsFactors = FALSE)
|
||||
|
||||
# The name of the features this function can compute
|
||||
base_features_names <- c("countscans", "uniquedevices", "countscansmostuniquedevice")
|
||||
|
||||
# The subset of requested features this function can compute
|
||||
features_to_compute <- intersect(base_features_names, requested_features)
|
||||
|
||||
for(feature_name in features_to_compute){
|
||||
feature <- compute_wifi_feature(wifi_data, feature_name, day_segment)
|
||||
features <- merge(features, feature, by="local_segment", all = TRUE)
|
||||
}
|
||||
|
||||
return(features)
|
||||
}
|
|
@ -1,6 +1,8 @@
|
|||
source("renv/activate.R")
|
||||
library("dplyr")
|
||||
library("tidyr")
|
||||
library("tibble")
|
||||
options(scipen=999)
|
||||
|
||||
# Using mostly indeixng instead of tidyr because is faster
|
||||
resampled_episodes <- read.csv(snakemake@input[[1]])
|
||||
|
|
|
@ -74,7 +74,7 @@ def fetch_provider_features(provider, provider_key, sensor_key, sensor_data_file
|
|||
sensor_features = pd.DataFrame(columns=["local_segment"])
|
||||
day_segments_labels = pd.read_csv(day_segments_file, header=0)
|
||||
if "FEATURES" not in provider:
|
||||
raise ValueError("Provider config[{}][PROVIDERS][{}] is missing a FEATURES attribute in config.yaml".format(sensor_key.upper(), provider_key))
|
||||
raise ValueError("Provider config[{}][PROVIDERS][{}] is missing a FEATURES attribute in config.yaml".format(sensor_key.upper(), provider_key.upper()))
|
||||
|
||||
if provider["COMPUTE"] == True:
|
||||
|
||||
|
|
Loading…
Reference in New Issue