Update file names
parent
d32771fd9e
commit
24bf62a7ab
220
Snakefile
220
Snakefile
|
@ -13,17 +13,11 @@ if len(config["PIDS"]) == 0:
|
||||||
raise ValueError("Add participants IDs to PIDS in config.yaml. Remember to create their participant files in data/external")
|
raise ValueError("Add participants IDs to PIDS in config.yaml. Remember to create their participant files in data/external")
|
||||||
|
|
||||||
if config["PHONE_VALID_SENSED_BINS"]["COMPUTE"] or config["PHONE_VALID_SENSED_DAYS"]["COMPUTE"]: # valid sensed bins is necessary for sensed days, so we add these files anyways if sensed days are requested
|
if config["PHONE_VALID_SENSED_BINS"]["COMPUTE"] or config["PHONE_VALID_SENSED_DAYS"]["COMPUTE"]: # valid sensed bins is necessary for sensed days, so we add these files anyways if sensed days are requested
|
||||||
if len(config["PHONE_VALID_SENSED_BINS"]["DB_TABLES"]) == 0:
|
if len(config["PHONE_VALID_SENSED_BINS"]["PHONE_SENSORS"]) == 0:
|
||||||
raise ValueError("If you want to compute PHONE_VALID_SENSED_BINS or PHONE_VALID_SENSED_DAYS, you need to add at least one table to [PHONE_VALID_SENSED_BINS][DB_TABLES] in config.yaml")
|
raise ValueError("If you want to compute PHONE_VALID_SENSED_BINS or PHONE_VALID_SENSED_DAYS, you need to add at least one PHONE_SENSOR to [PHONE_VALID_SENSED_BINS][PHONE_SENSORS] in config.yaml")
|
||||||
|
|
||||||
pids_android = list(filter(lambda pid: infer_participant_platform("data/external/" + pid) == "android", config["PIDS"]))
|
files_to_compute.extend(expand("data/raw/{pid}/{sensor}_raw.csv", pid=config["PIDS"], sensor=map(str.lower, config["PHONE_VALID_SENSED_BINS"]["PHONE_SENSORS"])))
|
||||||
pids_ios = list(filter(lambda pid: infer_participant_platform("data/external/" + pid) == "ios", config["PIDS"]))
|
files_to_compute.extend(expand("data/raw/{pid}/{sensor}_with_datetime.csv", pid=config["PIDS"], sensor=map(str.lower, config["PHONE_VALID_SENSED_BINS"]["PHONE_SENSORS"])))
|
||||||
tables_android = [table for table in config["PHONE_VALID_SENSED_BINS"]["DB_TABLES"] if table not in [config["CONVERSATION"]["DB_TABLE"]["IOS"], config["ACTIVITY_RECOGNITION"]["DB_TABLE"]["IOS"]]] # for android, discard any ios tables that may exist
|
|
||||||
tables_ios = [table for table in config["PHONE_VALID_SENSED_BINS"]["DB_TABLES"] if table not in [config["CONVERSATION"]["DB_TABLE"]["ANDROID"], config["ACTIVITY_RECOGNITION"]["DB_TABLE"]["ANDROID"]]] # for ios, discard any android tables that may exist
|
|
||||||
|
|
||||||
for pids,table in zip([pids_android, pids_ios], [tables_android, tables_ios]):
|
|
||||||
files_to_compute.extend(expand("data/raw/{pid}/{sensor}_raw.csv", pid=pids, sensor=table))
|
|
||||||
files_to_compute.extend(expand("data/raw/{pid}/{sensor}_with_datetime.csv", pid=pids, sensor=table))
|
|
||||||
files_to_compute.extend(expand("data/interim/{pid}/phone_sensed_bins.csv", pid=config["PIDS"]))
|
files_to_compute.extend(expand("data/interim/{pid}/phone_sensed_bins.csv", pid=config["PIDS"]))
|
||||||
files_to_compute.extend(expand("data/interim/{pid}/phone_sensed_timestamps.csv", pid=config["PIDS"]))
|
files_to_compute.extend(expand("data/interim/{pid}/phone_sensed_timestamps.csv", pid=config["PIDS"]))
|
||||||
|
|
||||||
|
@ -33,106 +27,100 @@ if config["PHONE_VALID_SENSED_DAYS"]["COMPUTE"]:
|
||||||
min_valid_hours_per_day=config["PHONE_VALID_SENSED_DAYS"]["MIN_VALID_HOURS_PER_DAY"],
|
min_valid_hours_per_day=config["PHONE_VALID_SENSED_DAYS"]["MIN_VALID_HOURS_PER_DAY"],
|
||||||
min_valid_bins_per_hour=config["PHONE_VALID_SENSED_DAYS"]["MIN_VALID_BINS_PER_HOUR"]))
|
min_valid_bins_per_hour=config["PHONE_VALID_SENSED_DAYS"]["MIN_VALID_BINS_PER_HOUR"]))
|
||||||
|
|
||||||
for provider in config["MESSAGES"]["PROVIDERS"].keys():
|
for provider in config["PHONE_MESSAGES"]["PROVIDERS"].keys():
|
||||||
if config["MESSAGES"]["PROVIDERS"][provider]["COMPUTE"]:
|
if config["PHONE_MESSAGES"]["PROVIDERS"][provider]["COMPUTE"]:
|
||||||
files_to_compute.extend(expand("data/raw/{pid}/{sensor}_raw.csv", pid=config["PIDS"], sensor=config["MESSAGES"]["DB_TABLE"]))
|
files_to_compute.extend(expand("data/raw/{pid}/phone_messages_raw.csv", pid=config["PIDS"]))
|
||||||
files_to_compute.extend(expand("data/raw/{pid}/{sensor}_with_datetime.csv", pid=config["PIDS"], sensor=config["MESSAGES"]["DB_TABLE"]))
|
files_to_compute.extend(expand("data/raw/{pid}/phone_messages_with_datetime.csv", pid=config["PIDS"]))
|
||||||
files_to_compute.extend(expand("data/interim/{pid}/{sensor_key}_features/{sensor_key}_{language}_{provider_key}.csv", pid=config["PIDS"], language=config["MESSAGES"]["PROVIDERS"][provider]["SRC_LANGUAGE"], provider_key=provider, sensor_key="MESSAGES".lower()))
|
files_to_compute.extend(expand("data/interim/{pid}/phone_messages_features/phone_messages_{language}_{provider_key}.csv", pid=config["PIDS"], language=config["PHONE_MESSAGES"]["PROVIDERS"][provider]["SRC_LANGUAGE"].lower(), provider_key=provider.lower()))
|
||||||
files_to_compute.extend(expand("data/processed/features/{pid}/{sensor_key}.csv", pid=config["PIDS"], sensor_key="MESSAGES".lower()))
|
files_to_compute.extend(expand("data/processed/features/{pid}/phone_messages.csv", pid=config["PIDS"]))
|
||||||
|
|
||||||
for provider in config["CALLS"]["PROVIDERS"].keys():
|
for provider in config["PHONE_CALLS"]["PROVIDERS"].keys():
|
||||||
if config["CALLS"]["PROVIDERS"][provider]["COMPUTE"]:
|
if config["PHONE_CALLS"]["PROVIDERS"][provider]["COMPUTE"]:
|
||||||
files_to_compute.extend(expand("data/raw/{pid}/{sensor}_raw.csv", pid=config["PIDS"], sensor=config["CALLS"]["DB_TABLE"]))
|
files_to_compute.extend(expand("data/raw/{pid}/phone_calls_raw.csv", pid=config["PIDS"], sensor=config["PHONE_CALLS"]["DB_TABLE"]))
|
||||||
files_to_compute.extend(expand("data/raw/{pid}/{sensor}_with_datetime.csv", pid=config["PIDS"], sensor=config["CALLS"]["DB_TABLE"]))
|
files_to_compute.extend(expand("data/raw/{pid}/phone_calls_with_datetime.csv", pid=config["PIDS"], sensor=config["PHONE_CALLS"]["DB_TABLE"]))
|
||||||
files_to_compute.extend(expand("data/raw/{pid}/{sensor}_with_datetime_unified.csv", pid=config["PIDS"], sensor=config["CALLS"]["DB_TABLE"]))
|
files_to_compute.extend(expand("data/raw/{pid}/phone_calls_with_datetime_unified.csv", pid=config["PIDS"], sensor=config["PHONE_CALLS"]["DB_TABLE"]))
|
||||||
files_to_compute.extend(expand("data/interim/{pid}/{sensor_key}_features/{sensor_key}_{language}_{provider_key}.csv", pid=config["PIDS"], language=config["CALLS"]["PROVIDERS"][provider]["SRC_LANGUAGE"], provider_key=provider, sensor_key="CALLS".lower()))
|
files_to_compute.extend(expand("data/interim/{pid}/phone_calls_features/phone_calls_{language}_{provider_key}.csv", pid=config["PIDS"], language=config["PHONE_CALLS"]["PROVIDERS"][provider]["SRC_LANGUAGE"].lower(), provider_key=provider.lower()))
|
||||||
files_to_compute.extend(expand("data/processed/features/{pid}/{sensor_key}.csv", pid=config["PIDS"], sensor_key="CALLS".lower()))
|
files_to_compute.extend(expand("data/processed/features/{pid}/phone_calls.csv", pid=config["PIDS"]))
|
||||||
|
|
||||||
for provider in config["BLUETOOTH"]["PROVIDERS"].keys():
|
for provider in config["PHONE_BLUETOOTH"]["PROVIDERS"].keys():
|
||||||
if config["BLUETOOTH"]["PROVIDERS"][provider]["COMPUTE"]:
|
if config["PHONE_BLUETOOTH"]["PROVIDERS"][provider]["COMPUTE"]:
|
||||||
files_to_compute.extend(expand("data/raw/{pid}/{sensor}_raw.csv", pid=config["PIDS"], sensor=config["BLUETOOTH"]["DB_TABLE"]))
|
files_to_compute.extend(expand("data/raw/{pid}/phone_bluetooth_raw.csv", pid=config["PIDS"]))
|
||||||
files_to_compute.extend(expand("data/raw/{pid}/{sensor}_with_datetime.csv", pid=config["PIDS"], sensor=config["BLUETOOTH"]["DB_TABLE"]))
|
files_to_compute.extend(expand("data/raw/{pid}/phone_bluetooth_with_datetime.csv", pid=config["PIDS"]))
|
||||||
files_to_compute.extend(expand("data/interim/{pid}/{sensor_key}_features/{sensor_key}_{language}_{provider_key}.csv", pid=config["PIDS"], language=config["BLUETOOTH"]["PROVIDERS"][provider]["SRC_LANGUAGE"], provider_key=provider, sensor_key="BLUETOOTH".lower()))
|
files_to_compute.extend(expand("data/interim/{pid}/phone_bluetooth_features/phone_bluetooth_{language}_{provider_key}.csv", pid=config["PIDS"], language=config["PHONE_BLUETOOTH"]["PROVIDERS"][provider]["SRC_LANGUAGE"].lower(), provider_key=provider.lower()))
|
||||||
files_to_compute.extend(expand("data/processed/features/{pid}/{sensor_key}.csv", pid=config["PIDS"], sensor_key="BLUETOOTH".lower()))
|
files_to_compute.extend(expand("data/processed/features/{pid}/phone_bluetooth.csv", pid=config["PIDS"]))
|
||||||
|
|
||||||
for provider in config["ACTIVITY_RECOGNITION"]["PROVIDERS"].keys():
|
for provider in config["PHONE_ACTIVITY_RECOGNITION"]["PROVIDERS"].keys():
|
||||||
if config["ACTIVITY_RECOGNITION"]["PROVIDERS"][provider]["COMPUTE"]:
|
if config["PHONE_ACTIVITY_RECOGNITION"]["PROVIDERS"][provider]["COMPUTE"]:
|
||||||
pids_android = list(filter(lambda pid: infer_participant_platform("data/external/" + pid) == "android", config["PIDS"]))
|
files_to_compute.extend(expand("data/raw/{pid}/phone_activity_recognition_raw.csv", pid=config["PIDS"]))
|
||||||
pids_ios = list(filter(lambda pid: infer_participant_platform("data/external/" + pid) == "ios", config["PIDS"]))
|
files_to_compute.extend(expand("data/raw/{pid}/phone_activity_recognition_with_datetime.csv", pid=config["PIDS"]))
|
||||||
|
files_to_compute.extend(expand("data/raw/{pid}/phone_activity_recognition_with_datetime_unified.csv", pid=config["PIDS"]))
|
||||||
for pids,table in zip([pids_android, pids_ios], [config["ACTIVITY_RECOGNITION"]["DB_TABLE"]["ANDROID"], config["ACTIVITY_RECOGNITION"]["DB_TABLE"]["IOS"]]):
|
files_to_compute.extend(expand("data/interim/{pid}/phone_activity_recognition_episodes.csv", pid=config["PIDS"]))
|
||||||
files_to_compute.extend(expand("data/raw/{pid}/{sensor}_raw.csv", pid=pids, sensor=table))
|
files_to_compute.extend(expand("data/interim/{pid}/phone_activity_recognition_episodes_resampled.csv", pid=config["PIDS"]))
|
||||||
files_to_compute.extend(expand("data/raw/{pid}/{sensor}_with_datetime.csv", pid=pids, sensor=table))
|
files_to_compute.extend(expand("data/interim/{pid}/phone_activity_recognition_episodes_resampled_with_datetime.csv", pid=config["PIDS"]))
|
||||||
files_to_compute.extend(expand("data/raw/{pid}/{sensor}_with_datetime_unified.csv", pid=pids, sensor=table))
|
files_to_compute.extend(expand("data/interim/{pid}/phone_activity_recognition_features/phone_activity_recognition_{language}_{provider_key}.csv", pid=config["PIDS"], language=config["PHONE_ACTIVITY_RECOGNITION"]["PROVIDERS"][provider]["SRC_LANGUAGE"].lower(), provider_key=provider.lower()))
|
||||||
|
files_to_compute.extend(expand("data/processed/features/{pid}/phone_activity_recognition.csv", pid=config["PIDS"]))
|
||||||
files_to_compute.extend(expand("data/interim/{pid}/activity_recognition_episodes.csv", pid=config["PIDS"]))
|
|
||||||
files_to_compute.extend(expand("data/interim/{pid}/activity_recognition_episodes_resampled.csv", pid=config["PIDS"]))
|
|
||||||
files_to_compute.extend(expand("data/interim/{pid}/activity_recognition_episodes_resampled_with_datetime.csv", pid=config["PIDS"]))
|
|
||||||
files_to_compute.extend(expand("data/interim/{pid}/{sensor_key}_features/{sensor_key}_{language}_{provider_key}.csv", pid=config["PIDS"], language=config["ACTIVITY_RECOGNITION"]["PROVIDERS"][provider]["SRC_LANGUAGE"], provider_key=provider, sensor_key="ACTIVITY_RECOGNITION".lower()))
|
|
||||||
files_to_compute.extend(expand("data/processed/features/{pid}/{sensor_key}.csv", pid=config["PIDS"], sensor_key="ACTIVITY_RECOGNITION".lower()))
|
|
||||||
|
|
||||||
|
|
||||||
for provider in config["BATTERY"]["PROVIDERS"].keys():
|
for provider in config["PHONE_BATTERY"]["PROVIDERS"].keys():
|
||||||
if config["BATTERY"]["PROVIDERS"][provider]["COMPUTE"]:
|
if config["PHONE_BATTERY"]["PROVIDERS"][provider]["COMPUTE"]:
|
||||||
files_to_compute.extend(expand("data/raw/{pid}/{sensor}_raw.csv", pid=config["PIDS"], sensor=config["BATTERY"]["DB_TABLE"]))
|
files_to_compute.extend(expand("data/raw/{pid}/phone_battery_raw.csv", pid=config["PIDS"]))
|
||||||
files_to_compute.extend(expand("data/interim/{pid}/battery_episodes.csv", pid=config["PIDS"]))
|
files_to_compute.extend(expand("data/interim/{pid}/phone_battery_episodes.csv", pid=config["PIDS"]))
|
||||||
files_to_compute.extend(expand("data/interim/{pid}/battery_episodes_resampled.csv", pid=config["PIDS"]))
|
files_to_compute.extend(expand("data/interim/{pid}/phone_battery_episodes_resampled.csv", pid=config["PIDS"]))
|
||||||
files_to_compute.extend(expand("data/interim/{pid}/battery_episodes_resampled_with_datetime.csv", pid=config["PIDS"]))
|
files_to_compute.extend(expand("data/interim/{pid}/phone_battery_episodes_resampled_with_datetime.csv", pid=config["PIDS"]))
|
||||||
files_to_compute.extend(expand("data/interim/{pid}/{sensor_key}_features/{sensor_key}_{language}_{provider_key}.csv", pid=config["PIDS"], language=config["SCREEN"]["PROVIDERS"][provider]["SRC_LANGUAGE"], provider_key=provider, sensor_key="BATTERY".lower()))
|
files_to_compute.extend(expand("data/interim/{pid}/phone_battery_features/phone_battery_{language}_{provider_key}.csv", pid=config["PIDS"], language=config["PHONE_BATTERY"]["PROVIDERS"][provider]["SRC_LANGUAGE"].lower(), provider_key=provider.lower()))
|
||||||
files_to_compute.extend(expand("data/processed/features/{pid}/{sensor_key}.csv", pid=config["PIDS"], sensor_key="BATTERY".lower()))
|
files_to_compute.extend(expand("data/processed/features/{pid}/phone_battery.csv", pid=config["PIDS"]))
|
||||||
|
|
||||||
|
|
||||||
for provider in config["SCREEN"]["PROVIDERS"].keys():
|
for provider in config["PHONE_SCREEN"]["PROVIDERS"].keys():
|
||||||
if config["SCREEN"]["PROVIDERS"][provider]["COMPUTE"]:
|
if config["PHONE_SCREEN"]["PROVIDERS"][provider]["COMPUTE"]:
|
||||||
if config["SCREEN"]["DB_TABLE"] in config["PHONE_VALID_SENSED_BINS"]["DB_TABLES"]:
|
if "PHONE_SCREEN" in config["PHONE_VALID_SENSED_BINS"]["PHONE_SENSORS"]:
|
||||||
files_to_compute.extend(expand("data/interim/{pid}/phone_sensed_bins.csv", pid=config["PIDS"]))
|
files_to_compute.extend(expand("data/interim/{pid}/phone_sensed_bins.csv", pid=config["PIDS"]))
|
||||||
else:
|
else:
|
||||||
raise ValueError("Error: Add your screen table (and as many sensor tables as you have) to [PHONE_VALID_SENSED_BINS][DB_TABLES] in config.yaml. This is necessary to compute phone_sensed_bins (bins of time when the smartphone was sensing data)")
|
raise ValueError("Error: Add PHONE_SCREEN (and as many phone sensor as you have in your database) to [PHONE_VALID_SENSED_BINS][PHONE_SENSORS] in config.yaml. This is necessary to compute phone_sensed_bins (bins of time when the smartphone was sensing data)")
|
||||||
files_to_compute.extend(expand("data/raw/{pid}/{sensor}_raw.csv", pid=config["PIDS"], sensor=config["SCREEN"]["DB_TABLE"]))
|
files_to_compute.extend(expand("data/raw/{pid}/phone_screen_raw.csv", pid=config["PIDS"]))
|
||||||
files_to_compute.extend(expand("data/raw/{pid}/{sensor}_with_datetime.csv", pid=config["PIDS"], sensor=config["SCREEN"]["DB_TABLE"]))
|
files_to_compute.extend(expand("data/raw/{pid}/phone_screen_with_datetime.csv", pid=config["PIDS"]))
|
||||||
files_to_compute.extend(expand("data/raw/{pid}/{sensor}_with_datetime_unified.csv", pid=config["PIDS"], sensor=config["SCREEN"]["DB_TABLE"]))
|
files_to_compute.extend(expand("data/raw/{pid}/phone_screen_with_datetime_unified.csv", pid=config["PIDS"]))
|
||||||
files_to_compute.extend(expand("data/interim/{pid}/screen_episodes.csv", pid=config["PIDS"]))
|
files_to_compute.extend(expand("data/interim/{pid}/phone_screen_episodes.csv", pid=config["PIDS"]))
|
||||||
files_to_compute.extend(expand("data/interim/{pid}/screen_episodes_resampled.csv", pid=config["PIDS"]))
|
files_to_compute.extend(expand("data/interim/{pid}/phone_screen_episodes_resampled.csv", pid=config["PIDS"]))
|
||||||
files_to_compute.extend(expand("data/interim/{pid}/screen_episodes_resampled_with_datetime.csv", pid=config["PIDS"]))
|
files_to_compute.extend(expand("data/interim/{pid}/phone_screen_episodes_resampled_with_datetime.csv", pid=config["PIDS"]))
|
||||||
files_to_compute.extend(expand("data/interim/{pid}/{sensor_key}_features/{sensor_key}_{language}_{provider_key}.csv", pid=config["PIDS"], language=config["SCREEN"]["PROVIDERS"][provider]["SRC_LANGUAGE"], provider_key=provider, sensor_key="SCREEN".lower()))
|
files_to_compute.extend(expand("data/interim/{pid}/phone_screen_features/phone_screen_{language}_{provider_key}.csv", pid=config["PIDS"], language=config["PHONE_SCREEN"]["PROVIDERS"][provider]["SRC_LANGUAGE"].lower(), provider_key=provider.lower()))
|
||||||
files_to_compute.extend(expand("data/processed/features/{pid}/{sensor_key}.csv", pid=config["PIDS"], sensor_key="SCREEN".lower()))
|
files_to_compute.extend(expand("data/processed/features/{pid}/phone_screen.csv", pid=config["PIDS"]))
|
||||||
|
|
||||||
for provider in config["LIGHT"]["PROVIDERS"].keys():
|
for provider in config["PHONE_LIGHT"]["PROVIDERS"].keys():
|
||||||
if config["LIGHT"]["PROVIDERS"][provider]["COMPUTE"]:
|
if config["PHONE_LIGHT"]["PROVIDERS"][provider]["COMPUTE"]:
|
||||||
files_to_compute.extend(expand("data/raw/{pid}/{sensor}_raw.csv", pid=config["PIDS"], sensor=config["LIGHT"]["DB_TABLE"]))
|
files_to_compute.extend(expand("data/raw/{pid}/phone_light_raw.csv", pid=config["PIDS"]))
|
||||||
files_to_compute.extend(expand("data/raw/{pid}/{sensor}_with_datetime.csv", pid=config["PIDS"], sensor=config["LIGHT"]["DB_TABLE"]))
|
files_to_compute.extend(expand("data/raw/{pid}/phone_light_with_datetime.csv", pid=config["PIDS"]))
|
||||||
files_to_compute.extend(expand("data/interim/{pid}/{sensor_key}_features/{sensor_key}_{language}_{provider_key}.csv", pid=config["PIDS"], language=config["LIGHT"]["PROVIDERS"][provider]["SRC_LANGUAGE"], provider_key=provider, sensor_key="LIGHT".lower()))
|
files_to_compute.extend(expand("data/interim/{pid}/phone_light_features/phone_light_{language}_{provider_key}.csv", pid=config["PIDS"], language=config["PHONE_LIGHT"]["PROVIDERS"][provider]["SRC_LANGUAGE"].lower(), provider_key=provider.lower()))
|
||||||
files_to_compute.extend(expand("data/processed/features/{pid}/{sensor_key}.csv", pid=config["PIDS"], sensor_key="LIGHT".lower()))
|
files_to_compute.extend(expand("data/processed/features/{pid}/phone_light.csv", pid=config["PIDS"],))
|
||||||
|
|
||||||
for provider in config["ACCELEROMETER"]["PROVIDERS"].keys():
|
for provider in config["PHONE_ACCELEROMETER"]["PROVIDERS"].keys():
|
||||||
if config["ACCELEROMETER"]["PROVIDERS"][provider]["COMPUTE"]:
|
if config["PHONE_ACCELEROMETER"]["PROVIDERS"][provider]["COMPUTE"]:
|
||||||
files_to_compute.extend(expand("data/raw/{pid}/{sensor}_raw.csv", pid=config["PIDS"], sensor=config["ACCELEROMETER"]["DB_TABLE"]))
|
files_to_compute.extend(expand("data/raw/{pid}/phone_accelerometer_raw.csv", pid=config["PIDS"]))
|
||||||
files_to_compute.extend(expand("data/raw/{pid}/{sensor}_with_datetime.csv", pid=config["PIDS"], sensor=config["ACCELEROMETER"]["DB_TABLE"]))
|
files_to_compute.extend(expand("data/raw/{pid}/phone_accelerometer_with_datetime.csv", pid=config["PIDS"]))
|
||||||
files_to_compute.extend(expand("data/interim/{pid}/{sensor_key}_features/{sensor_key}_{language}_{provider_key}.csv", pid=config["PIDS"], language=config["ACCELEROMETER"]["PROVIDERS"][provider]["SRC_LANGUAGE"], provider_key=provider, sensor_key="ACCELEROMETER".lower()))
|
files_to_compute.extend(expand("data/interim/{pid}/phone_accelerometer_features/phone_accelerometer_{language}_{provider_key}.csv", pid=config["PIDS"], language=config["PHONE_ACCELEROMETER"]["PROVIDERS"][provider]["SRC_LANGUAGE"].lower(), provider_key=provider.lower()))
|
||||||
files_to_compute.extend(expand("data/processed/features/{pid}/{sensor_key}.csv", pid=config["PIDS"], sensor_key="ACCELEROMETER".lower()))
|
files_to_compute.extend(expand("data/processed/features/{pid}/phone_accelerometer.csv", pid=config["PIDS"]))
|
||||||
|
|
||||||
for provider in config["APPLICATIONS_FOREGROUND"]["PROVIDERS"].keys():
|
for provider in config["PHONE_APPLICATIONS_FOREGROUND"]["PROVIDERS"].keys():
|
||||||
if config["APPLICATIONS_FOREGROUND"]["PROVIDERS"][provider]["COMPUTE"]:
|
if config["PHONE_APPLICATIONS_FOREGROUND"]["PROVIDERS"][provider]["COMPUTE"]:
|
||||||
files_to_compute.extend(expand("data/raw/{pid}/{sensor}_raw.csv", pid=config["PIDS"], sensor=config["APPLICATIONS_FOREGROUND"]["DB_TABLE"]))
|
files_to_compute.extend(expand("data/raw/{pid}/phone_applications_foreground_raw.csv", pid=config["PIDS"]))
|
||||||
files_to_compute.extend(expand("data/raw/{pid}/{sensor}_with_datetime.csv", pid=config["PIDS"], sensor=config["APPLICATIONS_FOREGROUND"]["DB_TABLE"]))
|
files_to_compute.extend(expand("data/raw/{pid}/phone_applications_foreground_with_datetime.csv", pid=config["PIDS"]))
|
||||||
files_to_compute.extend(expand("data/raw/{pid}/{sensor}_with_datetime_with_genre.csv", pid=config["PIDS"], sensor=config["APPLICATIONS_FOREGROUND"]["DB_TABLE"]))
|
files_to_compute.extend(expand("data/raw/{pid}/phone_applications_foreground_with_datetime_with_categories.csv", pid=config["PIDS"]))
|
||||||
files_to_compute.extend(expand("data/interim/{pid}/{sensor_key}_features/{sensor_key}_{language}_{provider_key}.csv", pid=config["PIDS"], language=config["APPLICATIONS_FOREGROUND"]["PROVIDERS"][provider]["SRC_LANGUAGE"], provider_key=provider, sensor_key="APPLICATIONS_FOREGROUND".lower()))
|
files_to_compute.extend(expand("data/interim/{pid}/phone_applications_foreground_features/phone_applications_foreground_{language}_{provider_key}.csv", pid=config["PIDS"], language=config["PHONE_APPLICATIONS_FOREGROUND"]["PROVIDERS"][provider]["SRC_LANGUAGE"].lower(), provider_key=provider.lower()))
|
||||||
files_to_compute.extend(expand("data/processed/features/{pid}/{sensor_key}.csv", pid=config["PIDS"], sensor_key="APPLICATIONS_FOREGROUND".lower()))
|
files_to_compute.extend(expand("data/processed/features/{pid}/phone_applications_foreground.csv", pid=config["PIDS"]))
|
||||||
|
|
||||||
for provider in config["WIFI"]["PROVIDERS"].keys():
|
for provider in config["PHONE_WIFI_VISIBLE"]["PROVIDERS"].keys():
|
||||||
if config["WIFI"]["PROVIDERS"][provider]["COMPUTE"]:
|
if config["PHONE_WIFI_VISIBLE"]["PROVIDERS"][provider]["COMPUTE"]:
|
||||||
if len(config["WIFI"]["DB_TABLE"]["VISIBLE_ACCESS_POINTS"]) > 0:
|
files_to_compute.extend(expand("data/raw/{pid}/phone_wifi_visible_raw.csv", pid=config["PIDS"]))
|
||||||
files_to_compute.extend(expand("data/raw/{pid}/{sensor}_raw.csv", pid=config["PIDS"], sensor=config["WIFI"]["DB_TABLE"]["VISIBLE_ACCESS_POINTS"]))
|
files_to_compute.extend(expand("data/raw/{pid}/phone_wifi_visible_with_datetime.csv", pid=config["PIDS"]))
|
||||||
files_to_compute.extend(expand("data/raw/{pid}/{sensor}_with_datetime.csv", pid=config["PIDS"], sensor=config["WIFI"]["DB_TABLE"]["VISIBLE_ACCESS_POINTS"]))
|
files_to_compute.extend(expand("data/interim/{pid}/phone_wifi_visible_features/phone_wifi_visible_{language}_{provider_key}.csv", pid=config["PIDS"], language=config["PHONE_WIFI_VISIBLE"]["PROVIDERS"][provider]["SRC_LANGUAGE"].lower(), provider_key=provider.lower()))
|
||||||
files_to_compute.extend(expand("data/raw/{pid}/{sensor_key}_with_datetime_visibleandconnected.csv", pid=config["PIDS"], sensor_key="WIFI".lower()))
|
files_to_compute.extend(expand("data/processed/features/{pid}/phone_wifi_visible.csv", pid=config["PIDS"]))
|
||||||
files_to_compute.extend(expand("data/interim/{pid}/{sensor_key}_features/{sensor_key}_{language}_{provider_key}.csv", pid=config["PIDS"], language=config["WIFI"]["PROVIDERS"][provider]["SRC_LANGUAGE"], provider_key=provider, sensor_key="WIFI".lower()))
|
|
||||||
files_to_compute.extend(expand("data/processed/features/{pid}/{sensor_key}.csv", pid=config["PIDS"], sensor_key="WIFI".lower()))
|
for provider in config["PHONE_WIFI_CONNECTED"]["PROVIDERS"].keys():
|
||||||
if len(config["WIFI"]["DB_TABLE"]["CONNECTED_ACCESS_POINTS"]) > 0:
|
if config["PHONE_WIFI_CONNECTED"]["PROVIDERS"][provider]["COMPUTE"]:
|
||||||
files_to_compute.extend(expand("data/raw/{pid}/{sensor}_raw.csv", pid=config["PIDS"], sensor=config["WIFI"]["DB_TABLE"]["CONNECTED_ACCESS_POINTS"]))
|
files_to_compute.extend(expand("data/raw/{pid}/phone_wifi_connected_raw.csv", pid=config["PIDS"]))
|
||||||
files_to_compute.extend(expand("data/raw/{pid}/{sensor}_with_datetime.csv", pid=config["PIDS"], sensor=config["WIFI"]["DB_TABLE"]["CONNECTED_ACCESS_POINTS"]))
|
files_to_compute.extend(expand("data/raw/{pid}/phone_wifi_connected_with_datetime.csv", pid=config["PIDS"]))
|
||||||
files_to_compute.extend(expand("data/raw/{pid}/{sensor_key}_with_datetime_visibleandconnected.csv", pid=config["PIDS"], sensor_key="WIFI".lower()))
|
files_to_compute.extend(expand("data/interim/{pid}/phone_wifi_connected_features/phone_wifi_connected_{language}_{provider_key}.csv", pid=config["PIDS"], language=config["PHONE_WIFI_CONNECTED"]["PROVIDERS"][provider]["SRC_LANGUAGE"].lower(), provider_key=provider.lower()))
|
||||||
files_to_compute.extend(expand("data/interim/{pid}/{sensor_key}_features/{sensor_key}_{language}_{provider_key}.csv", pid=config["PIDS"], language=config["WIFI"]["PROVIDERS"][provider]["SRC_LANGUAGE"], provider_key=provider, sensor_key="WIFI".lower()))
|
files_to_compute.extend(expand("data/processed/features/{pid}/phone_wifi_connected.csv", pid=config["PIDS"]))
|
||||||
files_to_compute.extend(expand("data/processed/features/{pid}/{sensor_key}.csv", pid=config["PIDS"], sensor_key="WIFI".lower()))
|
|
||||||
|
|
||||||
if config["HEARTRATE"]["COMPUTE"]:
|
if config["HEARTRATE"]["COMPUTE"]:
|
||||||
files_to_compute.extend(expand("data/raw/{pid}/{sensor}_raw.csv", pid=config["PIDS"], sensor=config["HEARTRATE"]["DB_TABLE"]))
|
files_to_compute.extend(expand("data/raw/{pid}/{sensor}_raw.csv", pid=config["PIDS"], sensor=config["HEARTRATE"]["DB_TABLE"]))
|
||||||
|
@ -151,31 +139,27 @@ if config["SLEEP"]["COMPUTE"]:
|
||||||
files_to_compute.extend(expand("data/raw/{pid}/fitbit_sleep_{fitbit_data_type}_with_datetime.csv", pid=config["PIDS"], fitbit_data_type=["intraday", "summary"]))
|
files_to_compute.extend(expand("data/raw/{pid}/fitbit_sleep_{fitbit_data_type}_with_datetime.csv", pid=config["PIDS"], fitbit_data_type=["intraday", "summary"]))
|
||||||
files_to_compute.extend(expand("data/processed/{pid}/fitbit_sleep_{day_segment}.csv", pid = config["PIDS"], day_segment = config["SLEEP"]["DAY_SEGMENTS"]))
|
files_to_compute.extend(expand("data/processed/{pid}/fitbit_sleep_{day_segment}.csv", pid = config["PIDS"], day_segment = config["SLEEP"]["DAY_SEGMENTS"]))
|
||||||
|
|
||||||
for provider in config["CONVERSATION"]["PROVIDERS"].keys():
|
for provider in config["PHONE_CONVERSATION"]["PROVIDERS"].keys():
|
||||||
if config["CONVERSATION"]["PROVIDERS"][provider]["COMPUTE"]:
|
if config["PHONE_CONVERSATION"]["PROVIDERS"][provider]["COMPUTE"]:
|
||||||
pids_android = list(filter(lambda pid: infer_participant_platform("data/external/" + pid) == "android", config["PIDS"]))
|
files_to_compute.extend(expand("data/raw/{pid}/phone_conversation_raw.csv", pid=config["PIDS"]))
|
||||||
pids_ios = list(filter(lambda pid: infer_participant_platform("data/external/" + pid) == "ios", config["PIDS"]))
|
files_to_compute.extend(expand("data/raw/{pid}/phone_conversation_with_datetime.csv", pid=config["PIDS"]))
|
||||||
for pids,table in zip([pids_android, pids_ios], [config["CONVERSATION"]["DB_TABLE"]["ANDROID"], config["CONVERSATION"]["DB_TABLE"]["IOS"]]):
|
files_to_compute.extend(expand("data/raw/{pid}/phone_conversation_with_datetime_unified.csv", pid=config["PIDS"]))
|
||||||
files_to_compute.extend(expand("data/raw/{pid}/{sensor}_raw.csv", pid=pids, sensor=table))
|
files_to_compute.extend(expand("data/interim/{pid}/phone_conversation_features/phone_conversation_{language}_{provider_key}.csv", pid=config["PIDS"], language=config["PHONE_CONVERSATION"]["PROVIDERS"][provider]["SRC_LANGUAGE"].lower(), provider_key=provider.lower()))
|
||||||
files_to_compute.extend(expand("data/raw/{pid}/{sensor}_with_datetime.csv", pid=pids, sensor=table))
|
files_to_compute.extend(expand("data/processed/features/{pid}/phone_conversation.csv", pid=config["PIDS"]))
|
||||||
files_to_compute.extend(expand("data/raw/{pid}/{sensor}_with_datetime_unified.csv", pid=pids, sensor=table))
|
|
||||||
files_to_compute.extend(expand("data/interim/{pid}/{sensor_key}_features/{sensor_key}_{language}_{provider_key}.csv", pid=config["PIDS"], language=config["CONVERSATION"]["PROVIDERS"][provider]["SRC_LANGUAGE"], provider_key=provider, sensor_key="CONVERSATION".lower()))
|
|
||||||
files_to_compute.extend(expand("data/processed/features/{pid}/{sensor_key}.csv", pid=config["PIDS"], sensor_key="CONVERSATION".lower()))
|
|
||||||
|
|
||||||
for provider in config["LOCATIONS"]["PROVIDERS"].keys():
|
for provider in config["PHONE_LOCATIONS"]["PROVIDERS"].keys():
|
||||||
if config["LOCATIONS"]["PROVIDERS"][provider]["COMPUTE"]:
|
if config["PHONE_LOCATIONS"]["PROVIDERS"][provider]["COMPUTE"]:
|
||||||
if config["LOCATIONS"]["LOCATIONS_TO_USE"] == "RESAMPLE_FUSED":
|
if config["PHONE_LOCATIONS"]["LOCATIONS_TO_USE"] == "RESAMPLE_FUSED":
|
||||||
if config["LOCATIONS"]["DB_TABLE"] in config["PHONE_VALID_SENSED_BINS"]["DB_TABLES"]:
|
if config["PHONE_LOCATIONS"]["DB_TABLE"] in config["PHONE_VALID_SENSED_BINS"]["DB_TABLES"]:
|
||||||
files_to_compute.extend(expand("data/interim/{pid}/phone_sensed_bins.csv", pid=config["PIDS"]))
|
files_to_compute.extend(expand("data/interim/{pid}/phone_sensed_bins.csv", pid=config["PIDS"]))
|
||||||
else:
|
else:
|
||||||
raise ValueError("Error: Add your locations table (and as many sensor tables as you have) to [PHONE_VALID_SENSED_BINS][DB_TABLES] in config.yaml. This is necessary to compute phone_sensed_bins (bins of time when the smartphone was sensing data) which is used to resample fused location data (RESAMPLED_FUSED)")
|
raise ValueError("Error: Add your locations table (and as many sensor tables as you have) to [PHONE_VALID_SENSED_BINS][DB_TABLES] in config.yaml. This is necessary to compute phone_sensed_bins (bins of time when the smartphone was sensing data) which is used to resample fused location data (RESAMPLED_FUSED)")
|
||||||
|
|
||||||
files_to_compute.extend(expand("data/raw/{pid}/{sensor}_raw.csv", pid=config["PIDS"], sensor=config["LOCATIONS"]["DB_TABLE"]))
|
files_to_compute.extend(expand("data/raw/{pid}/phone_locations_raw.csv", pid=config["PIDS"]))
|
||||||
files_to_compute.extend(expand("data/raw/{pid}/{sensor}_with_datetime.csv", pid=config["PIDS"], sensor=config["LOCATIONS"]["DB_TABLE"]))
|
files_to_compute.extend(expand("data/interim/{pid}/phone_locations_processed.csv", pid=config["PIDS"]))
|
||||||
files_to_compute.extend(expand("data/interim/{pid}/{sensor}_processed_{locations_to_use}.csv", pid=config["PIDS"], sensor=config["LOCATIONS"]["DB_TABLE"], locations_to_use=config["LOCATIONS"]["LOCATIONS_TO_USE"]))
|
files_to_compute.extend(expand("data/interim/{pid}/phone_locations_processed_with_datetime.csv", pid=config["PIDS"]))
|
||||||
files_to_compute.extend(expand("data/interim/{pid}/{sensor}_processed_{locations_to_use}_with_datetime.csv", pid=config["PIDS"], sensor=config["LOCATIONS"]["DB_TABLE"], locations_to_use=config["LOCATIONS"]["LOCATIONS_TO_USE"]))
|
files_to_compute.extend(expand("data/interim/{pid}/phone_locations_features/phone_locations_{language}_{provider_key}.csv", pid=config["PIDS"], language=config["PHONE_LOCATIONS"]["PROVIDERS"][provider]["SRC_LANGUAGE"].lower(), provider_key=provider.lower()))
|
||||||
files_to_compute.extend(expand("data/interim/{pid}/{sensor_key}_features/{sensor_key}_{language}_{provider_key}.csv", pid=config["PIDS"], language=config["LOCATIONS"]["PROVIDERS"][provider]["SRC_LANGUAGE"], provider_key=provider, sensor_key="LOCATIONS".lower()))
|
files_to_compute.extend(expand("data/processed/features/{pid}/phone_locations.csv", pid=config["PIDS"]))
|
||||||
files_to_compute.extend(expand("data/processed/features/{pid}/{sensor_key}.csv", pid=config["PIDS"], sensor_key="LOCATIONS".lower()))
|
|
||||||
|
|
||||||
# visualization for data exploration
|
# visualization for data exploration
|
||||||
if config["HEATMAP_FEATURES_CORRELATIONS"]["PLOT"]:
|
if config["HEATMAP_FEATURES_CORRELATIONS"]["PLOT"]:
|
||||||
|
|
129
config.yaml
129
config.yaml
|
@ -32,9 +32,12 @@ READABLE_DATETIME:
|
||||||
PHONE_VALID_SENSED_BINS:
|
PHONE_VALID_SENSED_BINS:
|
||||||
COMPUTE: False # This flag is automatically ignored (set to True) if you are extracting PHONE_VALID_SENSED_DAYS or screen or Barnett's location features
|
COMPUTE: False # This flag is automatically ignored (set to True) if you are extracting PHONE_VALID_SENSED_DAYS or screen or Barnett's location features
|
||||||
BIN_SIZE: &bin_size 5 # (in minutes)
|
BIN_SIZE: &bin_size 5 # (in minutes)
|
||||||
# Add as many sensor tables as you have, they all improve the computation of PHONE_VALID_SENSED_BINS and PHONE_VALID_SENSED_DAYS.
|
# Add as many PHONE sensors as you have, they all improve the computation of PHONE_VALID_SENSED_BINS and PHONE_VALID_SENSED_DAYS.
|
||||||
# If you are extracting screen or Barnett's location features, screen and locations tables are mandatory.
|
# If you are extracting screen or Barnett/Doryab location features, PHONE_SCREEN and PHONE_LOCATIONS tables are mandatory.
|
||||||
DB_TABLES: []
|
# You can choose any of the keys shown below, just make sure its DB_TABLE exists in your database!
|
||||||
|
# PHONE_MESSAGES, PHONE_CALLS, PHONE_LOCATIONS, PHONE_BLUETOOTH, PHONE_ACTIVITY_RECOGNITION, PHONE_BATTERY, PHONE_SCREEN, PHONE_LIGHT,
|
||||||
|
# PHONE_ACCELEROMETER, PHONE_APPLICATIONS_FOREGROUND, PHONE_WIFI_VISIBLE, PHONE_WIFI_CONNECTED, PHONE_CONVERSATION
|
||||||
|
PHONE_SENSORS: []
|
||||||
|
|
||||||
PHONE_VALID_SENSED_DAYS:
|
PHONE_VALID_SENSED_DAYS:
|
||||||
COMPUTE: False
|
COMPUTE: False
|
||||||
|
@ -42,7 +45,7 @@ PHONE_VALID_SENSED_DAYS:
|
||||||
MIN_VALID_BINS_PER_HOUR: &min_valid_bins_per_hour [6] # (out of 60min/BIN_SIZE bins)
|
MIN_VALID_BINS_PER_HOUR: &min_valid_bins_per_hour [6] # (out of 60min/BIN_SIZE bins)
|
||||||
|
|
||||||
# Communication SMS features config, TYPES and FEATURES keys need to match
|
# Communication SMS features config, TYPES and FEATURES keys need to match
|
||||||
MESSAGES:
|
PHONE_MESSAGES:
|
||||||
DB_TABLE: messages
|
DB_TABLE: messages
|
||||||
PROVIDERS:
|
PROVIDERS:
|
||||||
RAPIDS:
|
RAPIDS:
|
||||||
|
@ -52,10 +55,10 @@ MESSAGES:
|
||||||
received: [count, distinctcontacts, timefirstmessage, timelastmessage, countmostfrequentcontact]
|
received: [count, distinctcontacts, timefirstmessage, timelastmessage, countmostfrequentcontact]
|
||||||
sent: [count, distinctcontacts, timefirstmessage, timelastmessage, countmostfrequentcontact]
|
sent: [count, distinctcontacts, timefirstmessage, timelastmessage, countmostfrequentcontact]
|
||||||
SRC_LANGUAGE: "r"
|
SRC_LANGUAGE: "r"
|
||||||
SRC_FOLDER: "rapids" # inside src/features/messages
|
SRC_FOLDER: "rapids" # inside src/features/phone_messages
|
||||||
|
|
||||||
# Communication call features config, TYPES and FEATURES keys need to match
|
# Communication call features config, TYPES and FEATURES keys need to match
|
||||||
CALLS:
|
PHONE_CALLS:
|
||||||
DB_TABLE: calls
|
DB_TABLE: calls
|
||||||
PROVIDERS:
|
PROVIDERS:
|
||||||
RAPIDS:
|
RAPIDS:
|
||||||
|
@ -66,20 +69,13 @@ CALLS:
|
||||||
incoming: [count, distinctcontacts, meanduration, sumduration, minduration, maxduration, stdduration, modeduration, entropyduration, timefirstcall, timelastcall, countmostfrequentcontact]
|
incoming: [count, distinctcontacts, meanduration, sumduration, minduration, maxduration, stdduration, modeduration, entropyduration, timefirstcall, timelastcall, countmostfrequentcontact]
|
||||||
outgoing: [count, distinctcontacts, meanduration, sumduration, minduration, maxduration, stdduration, modeduration, entropyduration, timefirstcall, timelastcall, countmostfrequentcontact]
|
outgoing: [count, distinctcontacts, meanduration, sumduration, minduration, maxduration, stdduration, modeduration, entropyduration, timefirstcall, timelastcall, countmostfrequentcontact]
|
||||||
SRC_LANGUAGE: "r"
|
SRC_LANGUAGE: "r"
|
||||||
SRC_FOLDER: "rapids" # inside src/features/calls
|
SRC_FOLDER: "rapids" # inside src/features/phone_calls
|
||||||
|
|
||||||
APPLICATION_GENRES:
|
PHONE_LOCATIONS:
|
||||||
CATALOGUE_SOURCE: FILE # FILE (genres are read from CATALOGUE_FILE) or GOOGLE (genres are scrapped from the Play Store)
|
|
||||||
CATALOGUE_FILE: "data/external/stachl_application_genre_catalogue.csv"
|
|
||||||
UPDATE_CATALOGUE_FILE: false # if CATALOGUE_SOURCE is equal to FILE, whether or not to update CATALOGUE_FILE, if CATALOGUE_SOURCE is equal to GOOGLE all scraped genres will be saved to CATALOGUE_FILE
|
|
||||||
SCRAPE_MISSING_GENRES: false # whether or not to scrape missing genres, only effective if CATALOGUE_SOURCE is equal to FILE. If CATALOGUE_SOURCE is equal to GOOGLE, all genres are scraped anyway
|
|
||||||
|
|
||||||
LOCATIONS:
|
|
||||||
DB_TABLE: locations
|
DB_TABLE: locations
|
||||||
LOCATIONS_TO_USE: FUSED_RESAMPLED # ALL, GPS OR FUSED_RESAMPLED
|
LOCATIONS_TO_USE: FUSED_RESAMPLED # ALL, GPS OR FUSED_RESAMPLED
|
||||||
FUSED_RESAMPLED_CONSECUTIVE_THRESHOLD: 30 # minutes, only replicate location samples to the next sensed bin if the phone did not stop collecting data for more than this threshold
|
FUSED_RESAMPLED_CONSECUTIVE_THRESHOLD: 30 # minutes, only replicate location samples to the next sensed bin if the phone did not stop collecting data for more than this threshold
|
||||||
FUSED_RESAMPLED_TIME_SINCE_VALID_LOCATION: 720 # minutes, only replicate location samples to consecutive sensed bins if they were logged within this threshold after a valid location row
|
FUSED_RESAMPLED_TIME_SINCE_VALID_LOCATION: 720 # minutes, only replicate location samples to consecutive sensed bins if they were logged within this threshold after a valid location row
|
||||||
TIMEZONE: *timezone
|
|
||||||
PROVIDERS:
|
PROVIDERS:
|
||||||
DORYAB:
|
DORYAB:
|
||||||
COMPUTE: False
|
COMPUTE: False
|
||||||
|
@ -90,7 +86,7 @@ LOCATIONS:
|
||||||
MAXIMUM_GAP_ALLOWED: 300
|
MAXIMUM_GAP_ALLOWED: 300
|
||||||
MINUTES_DATA_USED: False
|
MINUTES_DATA_USED: False
|
||||||
SAMPLING_FREQUENCY: 0
|
SAMPLING_FREQUENCY: 0
|
||||||
SRC_FOLDER: "doryab" # inside src/features/locations
|
SRC_FOLDER: "doryab" # inside src/features/phone_locations
|
||||||
SRC_LANGUAGE: "python"
|
SRC_LANGUAGE: "python"
|
||||||
|
|
||||||
BARNETT:
|
BARNETT:
|
||||||
|
@ -99,20 +95,20 @@ LOCATIONS:
|
||||||
ACCURACY_LIMIT: 51 # meters, drops location coordinates with an accuracy higher than this. This number means there's a 68% probability the true location is within this radius
|
ACCURACY_LIMIT: 51 # meters, drops location coordinates with an accuracy higher than this. This number means there's a 68% probability the true location is within this radius
|
||||||
TIMEZONE: *timezone
|
TIMEZONE: *timezone
|
||||||
MINUTES_DATA_USED: False # Use this for quality control purposes, how many minutes of data (location coordinates gruped by minute) were used to compute features
|
MINUTES_DATA_USED: False # Use this for quality control purposes, how many minutes of data (location coordinates gruped by minute) were used to compute features
|
||||||
SRC_FOLDER: "barnett" # inside src/features/locations
|
SRC_FOLDER: "barnett" # inside src/features/phone_locations
|
||||||
SRC_LANGUAGE: "r"
|
SRC_LANGUAGE: "r"
|
||||||
|
|
||||||
BLUETOOTH:
|
PHONE_BLUETOOTH:
|
||||||
DB_TABLE: bluetooth
|
DB_TABLE: bluetooth
|
||||||
PROVIDERS:
|
PROVIDERS:
|
||||||
RAPIDS:
|
RAPIDS:
|
||||||
COMPUTE: False
|
COMPUTE: False
|
||||||
FEATURES: ["countscans", "uniquedevices", "countscansmostuniquedevice"]
|
FEATURES: ["countscans", "uniquedevices", "countscansmostuniquedevice"]
|
||||||
SRC_FOLDER: "rapids" # inside src/features/bluetooth
|
SRC_FOLDER: "rapids" # inside src/features/phone_bluetooth
|
||||||
SRC_LANGUAGE: "r"
|
SRC_LANGUAGE: "r"
|
||||||
|
|
||||||
|
|
||||||
ACTIVITY_RECOGNITION:
|
PHONE_ACTIVITY_RECOGNITION:
|
||||||
DB_TABLE:
|
DB_TABLE:
|
||||||
ANDROID: plugin_google_activity_recognition
|
ANDROID: plugin_google_activity_recognition
|
||||||
IOS: plugin_ios_activity_recognition
|
IOS: plugin_ios_activity_recognition
|
||||||
|
@ -124,19 +120,19 @@ ACTIVITY_RECOGNITION:
|
||||||
STATIONARY: ["still", "tilting"]
|
STATIONARY: ["still", "tilting"]
|
||||||
MOBILE: ["on_foot", "walking", "running", "on_bicycle"]
|
MOBILE: ["on_foot", "walking", "running", "on_bicycle"]
|
||||||
VEHICLE: ["in_vehicle"]
|
VEHICLE: ["in_vehicle"]
|
||||||
SRC_FOLDER: "rapids" # inside src/features/activity_recognition
|
SRC_FOLDER: "rapids" # inside src/features/phone_activity_recognition
|
||||||
SRC_LANGUAGE: "python"
|
SRC_LANGUAGE: "python"
|
||||||
|
|
||||||
BATTERY:
|
PHONE_BATTERY:
|
||||||
DB_TABLE: battery
|
DB_TABLE: battery
|
||||||
PROVIDERS:
|
PROVIDERS:
|
||||||
RAPIDS:
|
RAPIDS:
|
||||||
COMPUTE: False
|
COMPUTE: False
|
||||||
FEATURES: ["countdischarge", "sumdurationdischarge", "countcharge", "sumdurationcharge", "avgconsumptionrate", "maxconsumptionrate"]
|
FEATURES: ["countdischarge", "sumdurationdischarge", "countcharge", "sumdurationcharge", "avgconsumptionrate", "maxconsumptionrate"]
|
||||||
SRC_FOLDER: "rapids" # inside src/features/battery
|
SRC_FOLDER: "rapids" # inside src/features/phone_battery
|
||||||
SRC_LANGUAGE: "python"
|
SRC_LANGUAGE: "python"
|
||||||
|
|
||||||
SCREEN:
|
PHONE_SCREEN:
|
||||||
DB_TABLE: screen
|
DB_TABLE: screen
|
||||||
PROVIDERS:
|
PROVIDERS:
|
||||||
RAPIDS:
|
RAPIDS:
|
||||||
|
@ -146,25 +142,25 @@ SCREEN:
|
||||||
IGNORE_EPISODES_LONGER_THAN: 0 # in minutes, set to 0 to disable
|
IGNORE_EPISODES_LONGER_THAN: 0 # in minutes, set to 0 to disable
|
||||||
FEATURES: ["countepisode", "sumduration", "maxduration", "minduration", "avgduration", "stdduration", "firstuseafter"] # "episodepersensedminutes" needs to be added later
|
FEATURES: ["countepisode", "sumduration", "maxduration", "minduration", "avgduration", "stdduration", "firstuseafter"] # "episodepersensedminutes" needs to be added later
|
||||||
EPISODE_TYPES: ["unlock"]
|
EPISODE_TYPES: ["unlock"]
|
||||||
SRC_FOLDER: "rapids" # inside src/features/screen
|
SRC_FOLDER: "rapids" # inside src/features/phone_screen
|
||||||
SRC_LANGUAGE: "python"
|
SRC_LANGUAGE: "python"
|
||||||
|
|
||||||
LIGHT:
|
PHONE_LIGHT:
|
||||||
DB_TABLE: light
|
DB_TABLE: light
|
||||||
PROVIDERS:
|
PROVIDERS:
|
||||||
RAPIDS:
|
RAPIDS:
|
||||||
COMPUTE: False
|
COMPUTE: False
|
||||||
FEATURES: ["count", "maxlux", "minlux", "avglux", "medianlux", "stdlux"]
|
FEATURES: ["count", "maxlux", "minlux", "avglux", "medianlux", "stdlux"]
|
||||||
SRC_FOLDER: "rapids" # inside src/features/light
|
SRC_FOLDER: "rapids" # inside src/features/phone_light
|
||||||
SRC_LANGUAGE: "python"
|
SRC_LANGUAGE: "python"
|
||||||
|
|
||||||
ACCELEROMETER:
|
PHONE_ACCELEROMETER:
|
||||||
DB_TABLE: accelerometer
|
DB_TABLE: accelerometer
|
||||||
PROVIDERS:
|
PROVIDERS:
|
||||||
RAPIDS:
|
RAPIDS:
|
||||||
COMPUTE: False
|
COMPUTE: False
|
||||||
FEATURES: ["maxmagnitude", "minmagnitude", "avgmagnitude", "medianmagnitude", "stdmagnitude"]
|
FEATURES: ["maxmagnitude", "minmagnitude", "avgmagnitude", "medianmagnitude", "stdmagnitude"]
|
||||||
SRC_FOLDER: "rapids" # inside src/features/accelerometer
|
SRC_FOLDER: "rapids" # inside src/features/phone_accelerometer
|
||||||
SRC_LANGUAGE: "python"
|
SRC_LANGUAGE: "python"
|
||||||
|
|
||||||
PANDA:
|
PANDA:
|
||||||
|
@ -173,11 +169,16 @@ ACCELEROMETER:
|
||||||
FEATURES:
|
FEATURES:
|
||||||
exertional_activity_episode: ["sumduration", "maxduration", "minduration", "avgduration", "medianduration", "stdduration"]
|
exertional_activity_episode: ["sumduration", "maxduration", "minduration", "avgduration", "medianduration", "stdduration"]
|
||||||
nonexertional_activity_episode: ["sumduration", "maxduration", "minduration", "avgduration", "medianduration", "stdduration"]
|
nonexertional_activity_episode: ["sumduration", "maxduration", "minduration", "avgduration", "medianduration", "stdduration"]
|
||||||
SRC_FOLDER: "panda" # inside src/features/accelerometer
|
SRC_FOLDER: "panda" # inside src/features/phone_accelerometer
|
||||||
SRC_LANGUAGE: "python"
|
SRC_LANGUAGE: "python"
|
||||||
|
|
||||||
APPLICATIONS_FOREGROUND:
|
PHONE_APPLICATIONS_FOREGROUND:
|
||||||
DB_TABLE: applications_foreground
|
DB_TABLE: applications_foreground
|
||||||
|
APPLICATION_CATEGORIES:
|
||||||
|
CATALOGUE_SOURCE: FILE # FILE (genres are read from CATALOGUE_FILE) or GOOGLE (genres are scrapped from the Play Store)
|
||||||
|
CATALOGUE_FILE: "data/external/stachl_application_genre_catalogue.csv"
|
||||||
|
UPDATE_CATALOGUE_FILE: False # if CATALOGUE_SOURCE is equal to FILE, whether or not to update CATALOGUE_FILE, if CATALOGUE_SOURCE is equal to GOOGLE all scraped genres will be saved to CATALOGUE_FILE
|
||||||
|
SCRAPE_MISSING_CATEGORIES: False # whether or not to scrape missing genres, only effective if CATALOGUE_SOURCE is equal to FILE. If CATALOGUE_SOURCE is equal to GOOGLE, all genres are scraped anyway
|
||||||
PROVIDERS:
|
PROVIDERS:
|
||||||
RAPIDS:
|
RAPIDS:
|
||||||
COMPUTE: False
|
COMPUTE: False
|
||||||
|
@ -189,9 +190,45 @@ APPLICATIONS_FOREGROUND:
|
||||||
EXCLUDED_CATEGORIES: []
|
EXCLUDED_CATEGORIES: []
|
||||||
EXCLUDED_APPS: ["com.fitbit.FitbitMobile", "com.aware.plugin.upmc.cancer"]
|
EXCLUDED_APPS: ["com.fitbit.FitbitMobile", "com.aware.plugin.upmc.cancer"]
|
||||||
FEATURES: ["count", "timeoffirstuse", "timeoflastuse", "frequencyentropy"]
|
FEATURES: ["count", "timeoffirstuse", "timeoflastuse", "frequencyentropy"]
|
||||||
SRC_FOLDER: "rapids" # inside src/features/applications_foreground
|
SRC_FOLDER: "rapids" # inside src/features/phone_applications_foreground
|
||||||
SRC_LANGUAGE: "python"
|
SRC_LANGUAGE: "python"
|
||||||
|
|
||||||
|
PHONE_WIFI_VISIBLE:
|
||||||
|
DB_TABLE: "wifi"
|
||||||
|
PROVIDERS:
|
||||||
|
RAPIDS:
|
||||||
|
COMPUTE: False
|
||||||
|
FEATURES: ["countscans", "uniquedevices", "countscansmostuniquedevice"]
|
||||||
|
SRC_FOLDER: "rapids" # inside src/features/phone_wifi_visible
|
||||||
|
SRC_LANGUAGE: "r"
|
||||||
|
|
||||||
|
PHONE_WIFI_CONNECTED:
|
||||||
|
DB_TABLE: "sensor_wifi"
|
||||||
|
PROVIDERS:
|
||||||
|
RAPIDS:
|
||||||
|
COMPUTE: False
|
||||||
|
FEATURES: ["countscans", "uniquedevices", "countscansmostuniquedevice"]
|
||||||
|
SRC_FOLDER: "rapids" # inside src/features/phone_wifi_connected
|
||||||
|
SRC_LANGUAGE: "r"
|
||||||
|
|
||||||
|
PHONE_CONVERSATION:
|
||||||
|
DB_TABLE:
|
||||||
|
ANDROID: plugin_studentlife_audio_android
|
||||||
|
IOS: plugin_studentlife_audio
|
||||||
|
PROVIDERS:
|
||||||
|
RAPIDS:
|
||||||
|
COMPUTE: False
|
||||||
|
FEATURES: ["minutessilence", "minutesnoise", "minutesvoice", "minutesunknown","sumconversationduration","avgconversationduration",
|
||||||
|
"sdconversationduration","minconversationduration","maxconversationduration","timefirstconversation","timelastconversation","sumenergy",
|
||||||
|
"avgenergy","sdenergy","minenergy","maxenergy","silencesensedfraction","noisesensedfraction",
|
||||||
|
"voicesensedfraction","unknownsensedfraction","silenceexpectedfraction","noiseexpectedfraction","voiceexpectedfraction",
|
||||||
|
"unknownexpectedfraction","countconversation"]
|
||||||
|
RECORDING_MINUTES: 1
|
||||||
|
PAUSED_MINUTES : 3
|
||||||
|
SRC_FOLDER: "rapids" # inside src/features/phone_conversation
|
||||||
|
SRC_LANGUAGE: "python"
|
||||||
|
|
||||||
|
|
||||||
HEARTRATE:
|
HEARTRATE:
|
||||||
COMPUTE: False
|
COMPUTE: False
|
||||||
DB_TABLE: fitbit_data
|
DB_TABLE: fitbit_data
|
||||||
|
@ -223,34 +260,6 @@ SLEEP:
|
||||||
SLEEP_TYPES: ["main", "nap", "all"]
|
SLEEP_TYPES: ["main", "nap", "all"]
|
||||||
SUMMARY_FEATURES: ["sumdurationafterwakeup", "sumdurationasleep", "sumdurationawake", "sumdurationtofallasleep", "sumdurationinbed", "avgefficiency", "countepisode"]
|
SUMMARY_FEATURES: ["sumdurationafterwakeup", "sumdurationasleep", "sumdurationawake", "sumdurationtofallasleep", "sumdurationinbed", "avgefficiency", "countepisode"]
|
||||||
|
|
||||||
WIFI:
|
|
||||||
DB_TABLE:
|
|
||||||
VISIBLE_ACCESS_POINTS: "wifi" # if you only have a CONNECTED_ACCESS_POINTS table, set this value to ""
|
|
||||||
CONNECTED_ACCESS_POINTS: "sensor_wifi" # if you only have a VISIBLE_ACCESS_POINTS table, set this value to ""
|
|
||||||
PROVIDERS:
|
|
||||||
RAPIDS:
|
|
||||||
COMPUTE: False
|
|
||||||
FEATURES: ["countscans", "uniquedevices", "countscansmostuniquedevice"]
|
|
||||||
SRC_FOLDER: "rapids" # inside src/features/bluetooth
|
|
||||||
SRC_LANGUAGE: "r"
|
|
||||||
|
|
||||||
CONVERSATION:
|
|
||||||
DB_TABLE:
|
|
||||||
ANDROID: plugin_studentlife_audio_android
|
|
||||||
IOS: plugin_studentlife_audio
|
|
||||||
PROVIDERS:
|
|
||||||
RAPIDS:
|
|
||||||
COMPUTE: False
|
|
||||||
FEATURES: ["minutessilence", "minutesnoise", "minutesvoice", "minutesunknown","sumconversationduration","avgconversationduration",
|
|
||||||
"sdconversationduration","minconversationduration","maxconversationduration","timefirstconversation","timelastconversation","sumenergy",
|
|
||||||
"avgenergy","sdenergy","minenergy","maxenergy","silencesensedfraction","noisesensedfraction",
|
|
||||||
"voicesensedfraction","unknownsensedfraction","silenceexpectedfraction","noiseexpectedfraction","voiceexpectedfraction",
|
|
||||||
"unknownexpectedfraction","countconversation"]
|
|
||||||
RECORDING_MINUTES: 1
|
|
||||||
PAUSED_MINUTES : 3
|
|
||||||
SRC_FOLDER: "rapids" # inside src/features/conversation
|
|
||||||
SRC_LANGUAGE: "python"
|
|
||||||
|
|
||||||
### Visualizations ################################################################
|
### Visualizations ################################################################
|
||||||
HEATMAP_FEATURES_CORRELATIONS:
|
HEATMAP_FEATURES_CORRELATIONS:
|
||||||
PLOT: False
|
PLOT: False
|
||||||
|
|
|
@ -14,69 +14,20 @@ def infer_participant_platform(participant_file):
|
||||||
|
|
||||||
return platform
|
return platform
|
||||||
|
|
||||||
# Preprocessing.smk ####################################################################################################
|
|
||||||
|
|
||||||
def optional_phone_sensed_bins_input(wildcards):
|
|
||||||
platform = infer_participant_platform("data/external/"+wildcards.pid)
|
|
||||||
|
|
||||||
if platform == "android":
|
|
||||||
tables_platform = [table for table in config["PHONE_VALID_SENSED_BINS"]["DB_TABLES"] if table not in [config["CONVERSATION"]["DB_TABLE"]["IOS"], config["ACTIVITY_RECOGNITION"]["DB_TABLE"]["IOS"]]] # for android, discard any ios tables that may exist
|
|
||||||
elif platform == "ios":
|
|
||||||
tables_platform = [table for table in config["PHONE_VALID_SENSED_BINS"]["DB_TABLES"] if table not in [config["CONVERSATION"]["DB_TABLE"]["ANDROID"], config["ACTIVITY_RECOGNITION"]["DB_TABLE"]["ANDROID"]]] # for ios, discard any android tables that may exist
|
|
||||||
|
|
||||||
return expand("data/raw/{{pid}}/{table}_with_datetime.csv", table = tables_platform)
|
|
||||||
|
|
||||||
def optional_phone_sensed_timestamps_input(wildcards):
|
|
||||||
platform = infer_participant_platform("data/external/"+wildcards.pid)
|
|
||||||
|
|
||||||
if platform == "android":
|
|
||||||
tables_platform = [table for table in config["PHONE_VALID_SENSED_BINS"]["DB_TABLES"] if table not in [config["CONVERSATION"]["DB_TABLE"]["IOS"], config["ACTIVITY_RECOGNITION"]["DB_TABLE"]["IOS"]]] # for android, discard any ios tables that may exist
|
|
||||||
elif platform == "ios":
|
|
||||||
tables_platform = [table for table in config["PHONE_VALID_SENSED_BINS"]["DB_TABLES"] if table not in [config["CONVERSATION"]["DB_TABLE"]["ANDROID"], config["ACTIVITY_RECOGNITION"]["DB_TABLE"]["ANDROID"]]] # for ios, discard any android tables that may exist
|
|
||||||
|
|
||||||
return expand("data/raw/{{pid}}/{table}_raw.csv", table = tables_platform)
|
|
||||||
|
|
||||||
# Features.smk #########################################################################################################
|
# Features.smk #########################################################################################################
|
||||||
def find_features_files(wildcards):
|
def find_features_files(wildcards):
|
||||||
feature_files = []
|
feature_files = []
|
||||||
for provider_key, provider in config[(wildcards.sensor_key).upper()]["PROVIDERS"].items():
|
for provider_key, provider in config[(wildcards.sensor_key).upper()]["PROVIDERS"].items():
|
||||||
if provider["COMPUTE"]:
|
if provider["COMPUTE"]:
|
||||||
feature_files.extend(expand("data/interim/{{pid}}/{sensor_key}_features/{sensor_key}_{language}_{provider_key}.csv", sensor_key=(wildcards.sensor_key).lower(), language=provider["SRC_LANGUAGE"].lower(), provider_key=provider_key))
|
feature_files.extend(expand("data/interim/{{pid}}/{sensor_key}_features/{sensor_key}_{language}_{provider_key}.csv", sensor_key=wildcards.sensor_key.lower(), language=provider["SRC_LANGUAGE"].lower(), provider_key=provider_key.lower()))
|
||||||
return(feature_files)
|
return(feature_files)
|
||||||
|
|
||||||
def optional_ar_input(wildcards):
|
|
||||||
platform = infer_participant_platform("data/external/"+wildcards.pid)
|
|
||||||
|
|
||||||
if platform == "android":
|
|
||||||
return expand("data/raw/{{pid}}/{sensor}_with_datetime_unified.csv", sensor=config["ACTIVITY_RECOGNITION"]["DB_TABLE"]["ANDROID"])
|
|
||||||
elif platform == "ios":
|
|
||||||
return expand("data/raw/{{pid}}/{sensor}_with_datetime_unified.csv", sensor=config["ACTIVITY_RECOGNITION"]["DB_TABLE"]["IOS"])
|
|
||||||
|
|
||||||
def optional_conversation_input(wildcards):
|
|
||||||
platform = infer_participant_platform("data/external/"+wildcards.pid)
|
|
||||||
|
|
||||||
if platform == "android":
|
|
||||||
return expand("data/raw/{{pid}}/{sensor}_with_datetime_unified.csv", sensor=config["CONVERSATION"]["DB_TABLE"]["ANDROID"])[0]
|
|
||||||
elif platform == "ios":
|
|
||||||
return expand("data/raw/{{pid}}/{sensor}_with_datetime_unified.csv", sensor=config["CONVERSATION"]["DB_TABLE"]["IOS"])[0]
|
|
||||||
|
|
||||||
def optional_steps_sleep_input(wildcards):
|
def optional_steps_sleep_input(wildcards):
|
||||||
if config["STEP"]["EXCLUDE_SLEEP"]["EXCLUDE"] == True and config["STEP"]["EXCLUDE_SLEEP"]["TYPE"] == "FITBIT_BASED":
|
if config["STEP"]["EXCLUDE_SLEEP"]["EXCLUDE"] == True and config["STEP"]["EXCLUDE_SLEEP"]["TYPE"] == "FITBIT_BASED":
|
||||||
return "data/raw/{pid}/fitbit_sleep_summary_with_datetime.csv"
|
return "data/raw/{pid}/fitbit_sleep_summary_with_datetime.csv"
|
||||||
else:
|
else:
|
||||||
return []
|
return []
|
||||||
|
|
||||||
def optional_wifi_input(wildcards):
|
|
||||||
if len(config["WIFI"]["DB_TABLE"]["VISIBLE_ACCESS_POINTS"]) > 0 and len(config["WIFI"]["DB_TABLE"]["CONNECTED_ACCESS_POINTS"]) == 0:
|
|
||||||
return {"visible_access_points": expand("data/raw/{{pid}}/{sensor}_with_datetime.csv", sensor=config["WIFI"]["DB_TABLE"]["VISIBLE_ACCESS_POINTS"])}
|
|
||||||
elif len(config["WIFI"]["DB_TABLE"]["VISIBLE_ACCESS_POINTS"]) == 0 and len(config["WIFI"]["DB_TABLE"]["CONNECTED_ACCESS_POINTS"]) > 0:
|
|
||||||
return {"connected_access_points": expand("data/raw/{{pid}}/{sensor}_with_datetime.csv", sensor=config["WIFI"]["DB_TABLE"]["CONNECTED_ACCESS_POINTS"])}
|
|
||||||
elif len(config["WIFI"]["DB_TABLE"]["VISIBLE_ACCESS_POINTS"]) > 0 and len(config["WIFI"]["DB_TABLE"]["CONNECTED_ACCESS_POINTS"]) > 0:
|
|
||||||
return {"visible_access_points": expand("data/raw/{{pid}}/{sensor}_with_datetime.csv", sensor=config["WIFI"]["DB_TABLE"]["VISIBLE_ACCESS_POINTS"]), "connected_access_points": expand("data/raw/{{pid}}/{sensor}_with_datetime.csv", sensor=config["WIFI"]["DB_TABLE"]["CONNECTED_ACCESS_POINTS"])}
|
|
||||||
else:
|
|
||||||
raise ValueError("If you are computing WIFI features you need to provide either VISIBLE_ACCESS_POINTS, CONNECTED_ACCESS_POINTS or both")
|
|
||||||
|
|
||||||
|
|
||||||
# Models.smk ###########################################################################################################
|
# Models.smk ###########################################################################################################
|
||||||
|
|
||||||
def input_merge_features_of_single_participant(wildcards):
|
def input_merge_features_of_single_participant(wildcards):
|
||||||
|
|
|
@ -28,341 +28,211 @@ rule resample_episodes_with_datetime:
|
||||||
script:
|
script:
|
||||||
"../src/data/readable_datetime.R"
|
"../src/data/readable_datetime.R"
|
||||||
|
|
||||||
rule accelerometer_r_features:
|
rule phone_accelerometer_python_features:
|
||||||
input:
|
input:
|
||||||
sensor_data = expand("data/raw/{{pid}}/{sensor}_with_datetime.csv", sensor=config["ACCELEROMETER"]["DB_TABLE"])[0],
|
sensor_data = "data/raw/{pid}/phone_accelerometer_with_datetime.csv",
|
||||||
day_segments_labels = "data/interim/day_segments/{pid}_day_segments_labels.csv"
|
day_segments_labels = "data/interim/day_segments/{pid}_day_segments_labels.csv"
|
||||||
params:
|
params:
|
||||||
provider = lambda wildcards: config["ACCELEROMETER"]["PROVIDERS"][wildcards.provider_key],
|
provider = lambda wildcards: config["PHONE_ACCELEROMETER"]["PROVIDERS"][wildcards.provider_key.upper()],
|
||||||
provider_key = "{provider_key}",
|
provider_key = "{provider_key}",
|
||||||
sensor_key = "accelerometer"
|
sensor_key = "phone_accelerometer"
|
||||||
output:
|
output:
|
||||||
"data/interim/{pid}/accelerometer_features/accelerometer_r_{provider_key}.csv"
|
"data/interim/{pid}/phone_accelerometer_features/phone_accelerometer_python_{provider_key}.csv"
|
||||||
script:
|
|
||||||
"../src/features/entry.R"
|
|
||||||
|
|
||||||
rule accelerometer_python_features:
|
|
||||||
input:
|
|
||||||
sensor_data = expand("data/raw/{{pid}}/{sensor}_with_datetime.csv", sensor=config["ACCELEROMETER"]["DB_TABLE"])[0],
|
|
||||||
day_segments_labels = "data/interim/day_segments/{pid}_day_segments_labels.csv"
|
|
||||||
params:
|
|
||||||
provider = lambda wildcards: config["ACCELEROMETER"]["PROVIDERS"][wildcards.provider_key],
|
|
||||||
provider_key = "{provider_key}",
|
|
||||||
sensor_key = "accelerometer"
|
|
||||||
output:
|
|
||||||
"data/interim/{pid}/accelerometer_features/accelerometer_python_{provider_key}.csv"
|
|
||||||
script:
|
script:
|
||||||
"../src/features/entry.py"
|
"../src/features/entry.py"
|
||||||
|
|
||||||
rule activity_recognition_episodes:
|
rule activity_recognition_episodes:
|
||||||
input:
|
input:
|
||||||
optional_ar_input
|
sensor_data = "data/raw/{pid}/phone_activity_recognition_with_datetime_unified.csv"
|
||||||
output:
|
output:
|
||||||
"data/interim/{pid}/activity_recognition_episodes.csv"
|
"data/interim/{pid}/phone_activity_recognition_episodes.csv"
|
||||||
script:
|
script:
|
||||||
"../src/features/activity_recognition/episodes/activity_recognition_episodes.R"
|
"../src/features/phone_activity_recognition/episodes/activity_recognition_episodes.R"
|
||||||
|
|
||||||
rule activity_recognition_r_features:
|
rule phone_activity_recognition_python_features:
|
||||||
input:
|
input:
|
||||||
sensor_episodes = "data/interim/{pid}/activity_recognition_episodes_resampled_with_datetime.csv",
|
sensor_episodes = "data/interim/{pid}/phone_activity_recognition_episodes_resampled_with_datetime.csv",
|
||||||
day_segments_labels = "data/interim/day_segments/{pid}_day_segments_labels.csv"
|
day_segments_labels = "data/interim/day_segments/{pid}_day_segments_labels.csv"
|
||||||
params:
|
params:
|
||||||
provider = lambda wildcards: config["ACTIVITY_RECOGNITION"]["PROVIDERS"][wildcards.provider_key],
|
provider = lambda wildcards: config["PHONE_ACTIVITY_RECOGNITION"]["PROVIDERS"][wildcards.provider_key.upper()],
|
||||||
provider_key = "{provider_key}",
|
provider_key = "{provider_key}",
|
||||||
sensor_key = "activity_recognition"
|
sensor_key = "phone_activity_recognition"
|
||||||
output:
|
output:
|
||||||
"data/interim/{pid}/activity_recognition_features/activity_recognition_r_{provider_key}.csv"
|
"data/interim/{pid}/phone_activity_recognition_features/phone_activity_recognition_python_{provider_key}.csv"
|
||||||
script:
|
|
||||||
"../src/features/entry.R"
|
|
||||||
|
|
||||||
rule activity_recognition_python_features:
|
|
||||||
input:
|
|
||||||
sensor_episodes = "data/interim/{pid}/activity_recognition_episodes_resampled_with_datetime.csv",
|
|
||||||
day_segments_labels = "data/interim/day_segments/{pid}_day_segments_labels.csv"
|
|
||||||
params:
|
|
||||||
provider = lambda wildcards: config["ACTIVITY_RECOGNITION"]["PROVIDERS"][wildcards.provider_key],
|
|
||||||
provider_key = "{provider_key}",
|
|
||||||
sensor_key = "activity_recognition"
|
|
||||||
output:
|
|
||||||
"data/interim/{pid}/activity_recognition_features/activity_recognition_python_{provider_key}.csv"
|
|
||||||
script:
|
script:
|
||||||
"../src/features/entry.py"
|
"../src/features/entry.py"
|
||||||
|
|
||||||
rule applications_foreground_r_features:
|
rule phone_applications_foreground_python_features:
|
||||||
input:
|
input:
|
||||||
sensor_data = expand("data/raw/{{pid}}/{sensor}_with_datetime_with_genre.csv", sensor=config["APPLICATIONS_FOREGROUND"]["DB_TABLE"])[0],
|
sensor_data = "data/raw/{pid}/phone_applications_foreground_with_datetime_with_categories.csv",
|
||||||
day_segments_labels = "data/interim/day_segments/{pid}_day_segments_labels.csv"
|
day_segments_labels = "data/interim/day_segments/{pid}_day_segments_labels.csv"
|
||||||
params:
|
params:
|
||||||
provider = lambda wildcards: config["APPLICATIONS_FOREGROUND"]["PROVIDERS"][wildcards.provider_key],
|
provider = lambda wildcards: config["PHONE_APPLICATIONS_FOREGROUND"]["PROVIDERS"][wildcards.provider_key.upper()],
|
||||||
provider_key = "{provider_key}",
|
provider_key = "{provider_key}",
|
||||||
sensor_key = "applications_foreground"
|
sensor_key = "phone_applications_foreground"
|
||||||
output:
|
output:
|
||||||
"data/interim/{pid}/applications_foreground_features/applications_foreground_r_{provider_key}.csv"
|
"data/interim/{pid}/phone_applications_foreground_features/phone_applications_foreground_python_{provider_key}.csv"
|
||||||
script:
|
|
||||||
"../src/features/entry.R"
|
|
||||||
|
|
||||||
rule applications_foreground_python_features:
|
|
||||||
input:
|
|
||||||
sensor_data = expand("data/raw/{{pid}}/{sensor}_with_datetime_with_genre.csv", sensor=config["APPLICATIONS_FOREGROUND"]["DB_TABLE"])[0],
|
|
||||||
day_segments_labels = "data/interim/day_segments/{pid}_day_segments_labels.csv"
|
|
||||||
params:
|
|
||||||
provider = lambda wildcards: config["APPLICATIONS_FOREGROUND"]["PROVIDERS"][wildcards.provider_key],
|
|
||||||
provider_key = "{provider_key}",
|
|
||||||
sensor_key = "applications_foreground"
|
|
||||||
output:
|
|
||||||
"data/interim/{pid}/applications_foreground_features/applications_foreground_python_{provider_key}.csv"
|
|
||||||
script:
|
script:
|
||||||
"../src/features/entry.py"
|
"../src/features/entry.py"
|
||||||
|
|
||||||
rule battery_episodes:
|
rule battery_episodes:
|
||||||
input:
|
input:
|
||||||
expand("data/raw/{{pid}}/{sensor}_raw.csv", sensor=config["BATTERY"]["DB_TABLE"])
|
"data/raw/{pid}/phone_battery_raw.csv"
|
||||||
output:
|
output:
|
||||||
"data/interim/{pid}/battery_episodes.csv"
|
"data/interim/{pid}/phone_battery_episodes.csv"
|
||||||
script:
|
script:
|
||||||
"../src/features/battery/episodes/battery_episodes.R"
|
"../src/features/phone_battery/episodes/battery_episodes.R"
|
||||||
|
|
||||||
rule battery_r_features:
|
rule phone_battery_python_features:
|
||||||
input:
|
input:
|
||||||
sensor_episodes = "data/interim/{pid}/battery_episodes_resampled_with_datetime.csv",
|
sensor_episodes = "data/interim/{pid}/phone_battery_episodes_resampled_with_datetime.csv",
|
||||||
day_segments_labels = "data/interim/day_segments/{pid}_day_segments_labels.csv"
|
day_segments_labels = "data/interim/day_segments/{pid}_day_segments_labels.csv"
|
||||||
params:
|
params:
|
||||||
provider = lambda wildcards: config["BATTERY"]["PROVIDERS"][wildcards.provider_key],
|
provider = lambda wildcards: config["PHONE_BATTERY"]["PROVIDERS"][wildcards.provider_key.upper()],
|
||||||
provider_key = "{provider_key}",
|
provider_key = "{provider_key}",
|
||||||
sensor_key = "battery"
|
sensor_key = "phone_battery"
|
||||||
output:
|
output:
|
||||||
"data/interim/{pid}/battery_features/battery_r_{provider_key}.csv"
|
"data/interim/{pid}/phone_battery_features/phone_battery_python_{provider_key}.csv"
|
||||||
script:
|
|
||||||
"../src/features/entry.R"
|
|
||||||
|
|
||||||
rule battery_python_features:
|
|
||||||
input:
|
|
||||||
sensor_episodes = "data/interim/{pid}/battery_episodes_resampled_with_datetime.csv",
|
|
||||||
day_segments_labels = "data/interim/day_segments/{pid}_day_segments_labels.csv"
|
|
||||||
params:
|
|
||||||
provider = lambda wildcards: config["BATTERY"]["PROVIDERS"][wildcards.provider_key],
|
|
||||||
provider_key = "{provider_key}",
|
|
||||||
sensor_key = "battery"
|
|
||||||
output:
|
|
||||||
"data/interim/{pid}/battery_features/battery_python_{provider_key}.csv"
|
|
||||||
script:
|
script:
|
||||||
"../src/features/entry.py"
|
"../src/features/entry.py"
|
||||||
|
|
||||||
rule bluetooth_r_features:
|
rule phone_bluetooth_r_features:
|
||||||
input:
|
input:
|
||||||
sensor_data = expand("data/raw/{{pid}}/{sensor}_with_datetime.csv", sensor=config["BLUETOOTH"]["DB_TABLE"])[0],
|
sensor_data = "data/raw/{pid}/phone_bluetooth_with_datetime.csv",
|
||||||
day_segments_labels = "data/interim/day_segments/{pid}_day_segments_labels.csv"
|
day_segments_labels = "data/interim/day_segments/{pid}_day_segments_labels.csv"
|
||||||
params:
|
params:
|
||||||
provider = lambda wildcards: config["BLUETOOTH"]["PROVIDERS"][wildcards.provider_key],
|
provider = lambda wildcards: config["PHONE_BLUETOOTH"]["PROVIDERS"][wildcards.provider_key.upper()],
|
||||||
provider_key = "{provider_key}",
|
provider_key = "{provider_key}",
|
||||||
sensor_key = "bluetooth"
|
sensor_key = "phone_bluetooth"
|
||||||
output:
|
output:
|
||||||
"data/interim/{pid}/bluetooth_features/bluetooth_r_{provider_key}.csv"
|
"data/interim/{pid}/phone_bluetooth_features/phone_bluetooth_r_{provider_key}.csv"
|
||||||
script:
|
script:
|
||||||
"../src/features/entry.R"
|
"../src/features/entry.R"
|
||||||
|
|
||||||
rule bluetooth_python_features:
|
|
||||||
input:
|
|
||||||
sensor_data = expand("data/raw/{{pid}}/{sensor}_with_datetime.csv", sensor=config["BLUETOOTH"]["DB_TABLE"])[0],
|
|
||||||
day_segments_labels = "data/interim/day_segments/{pid}_day_segments_labels.csv"
|
|
||||||
params:
|
|
||||||
provider = lambda wildcards: config["BLUETOOTH"]["PROVIDERS"][wildcards.provider_key],
|
|
||||||
provider_key = "{provider_key}",
|
|
||||||
sensor_key = "bluetooth"
|
|
||||||
output:
|
|
||||||
"data/interim/{pid}/bluetooth_features/bluetooth_python_{provider_key}.csv"
|
|
||||||
script:
|
|
||||||
"../src/features/entry.py"
|
|
||||||
|
|
||||||
rule calls_r_features:
|
rule calls_r_features:
|
||||||
input:
|
input:
|
||||||
sensor_data = expand("data/raw/{{pid}}/{sensor}_with_datetime_unified.csv", sensor=config["CALLS"]["DB_TABLE"])[0],
|
sensor_data = "data/raw/{pid}/phone_calls_with_datetime_unified.csv",
|
||||||
day_segments_labels = "data/interim/day_segments/{pid}_day_segments_labels.csv"
|
day_segments_labels = "data/interim/day_segments/{pid}_day_segments_labels.csv"
|
||||||
params:
|
params:
|
||||||
provider = lambda wildcards: config["CALLS"]["PROVIDERS"][wildcards.provider_key],
|
provider = lambda wildcards: config["PHONE_CALLS"]["PROVIDERS"][wildcards.provider_key.upper()],
|
||||||
provider_key = "{provider_key}",
|
provider_key = "{provider_key}",
|
||||||
sensor_key = "calls"
|
sensor_key = "phone_calls"
|
||||||
output:
|
output:
|
||||||
"data/interim/{pid}/calls_features/calls_r_{provider_key}.csv"
|
"data/interim/{pid}/phone_calls_features/phone_calls_r_{provider_key}.csv"
|
||||||
script:
|
|
||||||
"../src/features/entry.R"
|
|
||||||
|
|
||||||
rule calls_python_features:
|
|
||||||
input:
|
|
||||||
sensor_data = expand("data/raw/{{pid}}/{sensor}_with_datetime_unified.csv", sensor=config["CALLS"]["DB_TABLE"])[0],
|
|
||||||
day_segments_labels = "data/interim/day_segments/{pid}_day_segments_labels.csv"
|
|
||||||
params:
|
|
||||||
provider = lambda wildcards: config["CALLS"]["PROVIDERS"][wildcards.provider_key],
|
|
||||||
provider_key = "{provider_key}",
|
|
||||||
sensor_key = "calls"
|
|
||||||
output:
|
|
||||||
"data/interim/{pid}/calls_features/calls_python_{provider_key}.csv"
|
|
||||||
script:
|
|
||||||
"../src/features/entry.py"
|
|
||||||
|
|
||||||
rule conversation_r_features:
|
|
||||||
input:
|
|
||||||
sensor_data = optional_conversation_input,
|
|
||||||
day_segments_labels = "data/interim/day_segments/{pid}_day_segments_labels.csv"
|
|
||||||
params:
|
|
||||||
provider = lambda wildcards: config["CONVERSATION"]["PROVIDERS"][wildcards.provider_key],
|
|
||||||
provider_key = "{provider_key}",
|
|
||||||
sensor_key = "conversation"
|
|
||||||
output:
|
|
||||||
"data/interim/{pid}/conversation_features/conversation_r_{provider_key}.csv"
|
|
||||||
script:
|
script:
|
||||||
"../src/features/entry.R"
|
"../src/features/entry.R"
|
||||||
|
|
||||||
rule conversation_python_features:
|
rule conversation_python_features:
|
||||||
input:
|
input:
|
||||||
sensor_data = optional_conversation_input,
|
sensor_data = "data/raw/{pid}/phone_conversation_with_datetime_unified.csv",
|
||||||
day_segments_labels = "data/interim/day_segments/{pid}_day_segments_labels.csv"
|
day_segments_labels = "data/interim/day_segments/{pid}_day_segments_labels.csv"
|
||||||
params:
|
params:
|
||||||
provider = lambda wildcards: config["CONVERSATION"]["PROVIDERS"][wildcards.provider_key],
|
provider = lambda wildcards: config["PHONE_CONVERSATION"]["PROVIDERS"][wildcards.provider_key.upper()],
|
||||||
provider_key = "{provider_key}",
|
provider_key = "{provider_key}",
|
||||||
sensor_key = "conversation"
|
sensor_key = "phone_conversation"
|
||||||
output:
|
output:
|
||||||
"data/interim/{pid}/conversation_features/conversation_python_{provider_key}.csv"
|
"data/interim/{pid}/phone_conversation_features/phone_conversation_python_{provider_key}.csv"
|
||||||
script:
|
script:
|
||||||
"../src/features/entry.py"
|
"../src/features/entry.py"
|
||||||
|
|
||||||
rule light_r_features:
|
rule phone_light_python_features:
|
||||||
input:
|
input:
|
||||||
sensor_data = expand("data/raw/{{pid}}/{sensor}_with_datetime.csv", sensor=config["LIGHT"]["DB_TABLE"])[0],
|
sensor_data = "data/raw/{pid}/phone_light_with_datetime.csv",
|
||||||
day_segments_labels = "data/interim/day_segments/{pid}_day_segments_labels.csv"
|
day_segments_labels = "data/interim/day_segments/{pid}_day_segments_labels.csv"
|
||||||
params:
|
params:
|
||||||
provider = lambda wildcards: config["LIGHT"]["PROVIDERS"][wildcards.provider_key],
|
provider = lambda wildcards: config["PHONE_LIGHT"]["PROVIDERS"][wildcards.provider_key.upper()],
|
||||||
provider_key = "{provider_key}",
|
provider_key = "{provider_key}",
|
||||||
sensor_key = "light"
|
sensor_key = "phone_light"
|
||||||
output:
|
output:
|
||||||
"data/interim/{pid}/light_features/light_r_{provider_key}.csv"
|
"data/interim/{pid}/phone_light_features/phone_light_python_{provider_key}.csv"
|
||||||
|
script:
|
||||||
|
"../src/features/entry.py"
|
||||||
|
|
||||||
|
rule phone_locations_r_features:
|
||||||
|
input:
|
||||||
|
sensor_data = "data/interim/{pid}/phone_locations_processed_with_datetime.csv",
|
||||||
|
day_segments_labels = "data/interim/day_segments/{pid}_day_segments_labels.csv"
|
||||||
|
params:
|
||||||
|
provider = lambda wildcards: config["PHONE_LOCATIONS"]["PROVIDERS"][wildcards.provider_key.upper()],
|
||||||
|
provider_key = "{provider_key}",
|
||||||
|
sensor_key = "phone_locations"
|
||||||
|
output:
|
||||||
|
"data/interim/{pid}/phone_locations_features/phone_locations_r_{provider_key}.csv"
|
||||||
script:
|
script:
|
||||||
"../src/features/entry.R"
|
"../src/features/entry.R"
|
||||||
|
|
||||||
rule light_python_features:
|
rule phone_locations_python_features:
|
||||||
input:
|
input:
|
||||||
sensor_data = expand("data/raw/{{pid}}/{sensor}_with_datetime.csv", sensor=config["LIGHT"]["DB_TABLE"])[0],
|
sensor_data = "data/interim/{pid}/phone_locations_processed_with_datetime.csv",
|
||||||
day_segments_labels = "data/interim/day_segments/{pid}_day_segments_labels.csv"
|
day_segments_labels = "data/interim/day_segments/{pid}_day_segments_labels.csv"
|
||||||
params:
|
params:
|
||||||
provider = lambda wildcards: config["LIGHT"]["PROVIDERS"][wildcards.provider_key],
|
provider = lambda wildcards: config["PHONE_LOCATIONS"]["PROVIDERS"][wildcards.provider_key.upper()],
|
||||||
provider_key = "{provider_key}",
|
provider_key = "{provider_key}",
|
||||||
sensor_key = "light"
|
sensor_key = "phone_locations"
|
||||||
output:
|
output:
|
||||||
"data/interim/{pid}/light_features/light_python_{provider_key}.csv"
|
"data/interim/{pid}/phone_locations_features/phone_locations_python_{provider_key}.csv"
|
||||||
script:
|
script:
|
||||||
"../src/features/entry.py"
|
"../src/features/entry.py"
|
||||||
|
|
||||||
rule locations_r_features:
|
rule phone_messages_r_features:
|
||||||
input:
|
input:
|
||||||
sensor_data = expand("data/interim/{{pid}}/{sensor}_processed_{locations_to_use}_with_datetime.csv", sensor=config["LOCATIONS"]["DB_TABLE"], locations_to_use=config["LOCATIONS"]["LOCATIONS_TO_USE"])[0],
|
sensor_data = "data/raw/{pid}/phone_messages_with_datetime.csv",
|
||||||
day_segments_labels = "data/interim/day_segments/{pid}_day_segments_labels.csv"
|
day_segments_labels = "data/interim/day_segments/{pid}_day_segments_labels.csv"
|
||||||
params:
|
params:
|
||||||
provider = lambda wildcards: config["LOCATIONS"]["PROVIDERS"][wildcards.provider_key],
|
provider = lambda wildcards: config["PHONE_MESSAGES"]["PROVIDERS"][wildcards.provider_key.upper()],
|
||||||
provider_key = "{provider_key}",
|
provider_key = "{provider_key}",
|
||||||
sensor_key = "locations"
|
sensor_key = "phone_messages"
|
||||||
output:
|
output:
|
||||||
"data/interim/{pid}/locations_features/locations_r_{provider_key}.csv"
|
"data/interim/{pid}/phone_messages_features/phone_messages_r_{provider_key}.csv"
|
||||||
script:
|
script:
|
||||||
"../src/features/entry.R"
|
"../src/features/entry.R"
|
||||||
|
|
||||||
rule locations_python_features:
|
|
||||||
input:
|
|
||||||
sensor_data = expand("data/interim/{{pid}}/{sensor}_processed_{locations_to_use}_with_datetime.csv", sensor=config["LOCATIONS"]["DB_TABLE"], locations_to_use=config["LOCATIONS"]["LOCATIONS_TO_USE"])[0],
|
|
||||||
day_segments_labels = "data/interim/day_segments/{pid}_day_segments_labels.csv"
|
|
||||||
params:
|
|
||||||
provider = lambda wildcards: config["LOCATIONS"]["PROVIDERS"][wildcards.provider_key],
|
|
||||||
provider_key = "{provider_key}",
|
|
||||||
sensor_key = "locations"
|
|
||||||
output:
|
|
||||||
"data/interim/{pid}/locations_features/locations_python_{provider_key}.csv"
|
|
||||||
script:
|
|
||||||
"../src/features/entry.py"
|
|
||||||
|
|
||||||
rule messages_r_features:
|
|
||||||
input:
|
|
||||||
sensor_data = expand("data/raw/{{pid}}/{sensor}_with_datetime.csv", sensor=config["MESSAGES"]["DB_TABLE"])[0],
|
|
||||||
day_segments_labels = "data/interim/day_segments/{pid}_day_segments_labels.csv"
|
|
||||||
params:
|
|
||||||
provider = lambda wildcards: config["MESSAGES"]["PROVIDERS"][wildcards.provider_key],
|
|
||||||
provider_key = "{provider_key}",
|
|
||||||
sensor_key = "messages"
|
|
||||||
output:
|
|
||||||
"data/interim/{pid}/messages_features/messages_r_{provider_key}.csv"
|
|
||||||
script:
|
|
||||||
"../src/features/entry.R"
|
|
||||||
|
|
||||||
rule messages_python_features:
|
|
||||||
input:
|
|
||||||
sensor_data = expand("data/raw/{{pid}}/{sensor}_with_datetime.csv", sensor=config["MESSAGES"]["DB_TABLE"])[0],
|
|
||||||
day_segments_labels = "data/interim/day_segments/{pid}_day_segments_labels.csv"
|
|
||||||
params:
|
|
||||||
provider = lambda wildcards: config["MESSAGES"]["PROVIDERS"][wildcards.provider_key],
|
|
||||||
provider_key = "{provider_key}",
|
|
||||||
sensor_key = "messages"
|
|
||||||
output:
|
|
||||||
"data/interim/{pid}/messages_features/messages_python_{provider_key}.csv"
|
|
||||||
script:
|
|
||||||
"../src/features/entry.py"
|
|
||||||
|
|
||||||
rule screen_episodes:
|
rule screen_episodes:
|
||||||
input:
|
input:
|
||||||
screen = expand("data/raw/{{pid}}/{sensor}_with_datetime_unified.csv", sensor=config["SCREEN"]["DB_TABLE"])
|
screen = "data/raw/{pid}/phone_screen_with_datetime_unified.csv"
|
||||||
output:
|
output:
|
||||||
"data/interim/{pid}/screen_episodes.csv"
|
"data/interim/{pid}/phone_screen_episodes.csv"
|
||||||
script:
|
script:
|
||||||
"../src/features/screen/episodes/screen_episodes.R"
|
"../src/features/phone_screen/episodes/screen_episodes.R"
|
||||||
|
|
||||||
rule screen_r_features:
|
rule phone_screen_python_features:
|
||||||
input:
|
input:
|
||||||
sensor_episodes = "data/interim/{pid}/screen_episodes_resampled_with_datetime.csv",
|
sensor_episodes = "data/interim/{pid}/phone_screen_episodes_resampled_with_datetime.csv",
|
||||||
day_segments_labels = "data/interim/day_segments/{pid}_day_segments_labels.csv"
|
day_segments_labels = "data/interim/day_segments/{pid}_day_segments_labels.csv"
|
||||||
params:
|
params:
|
||||||
provider = lambda wildcards: config["SCREEN"]["PROVIDERS"][wildcards.provider_key],
|
provider = lambda wildcards: config["PHONE_SCREEN"]["PROVIDERS"][wildcards.provider_key.upper()],
|
||||||
provider_key = "{provider_key}",
|
provider_key = "{provider_key}",
|
||||||
sensor_key = "screen"
|
sensor_key = "phone_screen"
|
||||||
output:
|
output:
|
||||||
"data/interim/{pid}/screen_features/screen_r_{provider_key}.csv"
|
"data/interim/{pid}/phone_screen_features/phone_screen_python_{provider_key}.csv"
|
||||||
script:
|
|
||||||
"../src/features/entry.R"
|
|
||||||
|
|
||||||
rule screen_python_features:
|
|
||||||
input:
|
|
||||||
sensor_episodes = "data/interim/{pid}/screen_episodes_resampled_with_datetime.csv",
|
|
||||||
day_segments_labels = "data/interim/day_segments/{pid}_day_segments_labels.csv"
|
|
||||||
params:
|
|
||||||
provider = lambda wildcards: config["SCREEN"]["PROVIDERS"][wildcards.provider_key],
|
|
||||||
provider_key = "{provider_key}",
|
|
||||||
sensor_key = "screen"
|
|
||||||
output:
|
|
||||||
"data/interim/{pid}/screen_features/screen_python_{provider_key}.csv"
|
|
||||||
script:
|
script:
|
||||||
"../src/features/entry.py"
|
"../src/features/entry.py"
|
||||||
|
|
||||||
rule wifi_r_features:
|
rule phone_wifi_connected_r_features:
|
||||||
input:
|
input:
|
||||||
sensor_data = expand("data/raw/{{pid}}/{sensor_key}_with_datetime_visibleandconnected.csv", sensor_key="WIFI".lower())[0],
|
sensor_data = "data/raw/{pid}/phone_wifi_connected_with_datetime.csv",
|
||||||
day_segments_labels = "data/interim/day_segments/{pid}_day_segments_labels.csv"
|
day_segments_labels = "data/interim/day_segments/{pid}_day_segments_labels.csv"
|
||||||
params:
|
params:
|
||||||
provider = lambda wildcards: config["WIFI"]["PROVIDERS"][wildcards.provider_key],
|
provider = lambda wildcards: config["PHONE_WIFI_CONNECTED"]["PROVIDERS"][wildcards.provider_key.upper()],
|
||||||
provider_key = "{provider_key}",
|
provider_key = "{provider_key}",
|
||||||
sensor_key = "wifi"
|
sensor_key = "phone_wifi_connected"
|
||||||
output:
|
output:
|
||||||
"data/interim/{pid}/wifi_features/wifi_r_{provider_key}.csv"
|
"data/interim/{pid}/phone_wifi_connected_features/phone_wifi_connected_r_{provider_key}.csv"
|
||||||
script:
|
script:
|
||||||
"../src/features/entry.R"
|
"../src/features/entry.R"
|
||||||
|
|
||||||
rule wifi_python_features:
|
rule phone_wifi_visible_r_features:
|
||||||
input:
|
input:
|
||||||
sensor_data = expand("data/raw/{{pid}}/{sensor_key}_with_datetime_visibleandconnected.csv", sensor_key="WIFI".lower())[0],
|
sensor_data = "data/raw/{pid}/phone_wifi_visible_with_datetime.csv",
|
||||||
day_segments_labels = "data/interim/day_segments/{pid}_day_segments_labels.csv"
|
day_segments_labels = "data/interim/day_segments/{pid}_day_segments_labels.csv"
|
||||||
params:
|
params:
|
||||||
provider = lambda wildcards: config["WIFI"]["PROVIDERS"][wildcards.provider_key],
|
provider = lambda wildcards: config["PHONE_WIFI_VISIBLE"]["PROVIDERS"][wildcards.provider_key.upper()],
|
||||||
provider_key = "{provider_key}",
|
provider_key = "{provider_key}",
|
||||||
sensor_key = "wifi"
|
sensor_key = "phone_wifi_visible"
|
||||||
output:
|
output:
|
||||||
"data/interim/{pid}/wifi_features/wifi_python_{provider_key}.csv"
|
"data/interim/{pid}/phone_wifi_visible_features/phone_wifi_visible_r_{provider_key}.csv"
|
||||||
script:
|
script:
|
||||||
"../src/features/entry.py"
|
"../src/features/entry.R"
|
||||||
|
|
||||||
rule fitbit_heartrate_features:
|
rule fitbit_heartrate_features:
|
||||||
input:
|
input:
|
||||||
|
|
|
@ -29,10 +29,10 @@ rule download_dataset:
|
||||||
"data/external/{pid}"
|
"data/external/{pid}"
|
||||||
params:
|
params:
|
||||||
group = config["DOWNLOAD_DATASET"]["GROUP"],
|
group = config["DOWNLOAD_DATASET"]["GROUP"],
|
||||||
table = "{sensor}",
|
sensor = "{sensor}",
|
||||||
|
table = lambda wildcards: config[str(wildcards.sensor).upper()]["DB_TABLE"],
|
||||||
timezone = config["TIMEZONE"],
|
timezone = config["TIMEZONE"],
|
||||||
aware_multiplatform_tables = config["ACTIVITY_RECOGNITION"]["DB_TABLE"]["ANDROID"] + "," + config["ACTIVITY_RECOGNITION"]["DB_TABLE"]["IOS"] + "," + config["CONVERSATION"]["DB_TABLE"]["ANDROID"] + "," + config["CONVERSATION"]["DB_TABLE"]["IOS"],
|
aware_multiplatform_tables = config["PHONE_ACTIVITY_RECOGNITION"]["DB_TABLE"]["ANDROID"] + "," + config["PHONE_ACTIVITY_RECOGNITION"]["DB_TABLE"]["IOS"] + "," + config["PHONE_CONVERSATION"]["DB_TABLE"]["ANDROID"] + "," + config["PHONE_CONVERSATION"]["DB_TABLE"]["IOS"],
|
||||||
unifiable_sensors = {"calls": config["CALLS"]["DB_TABLE"], "battery": config["BATTERY"]["DB_TABLE"], "screen": config["SCREEN"]["DB_TABLE"], "ios_activity_recognition": config["ACTIVITY_RECOGNITION"]["DB_TABLE"]["IOS"], "ios_conversation": config["CONVERSATION"]["DB_TABLE"]["IOS"]}
|
|
||||||
output:
|
output:
|
||||||
"data/raw/{pid}/{sensor}_raw.csv"
|
"data/raw/{pid}/{sensor}_raw.csv"
|
||||||
script:
|
script:
|
||||||
|
@ -50,35 +50,23 @@ rule compute_day_segments:
|
||||||
script:
|
script:
|
||||||
"../src/data/compute_day_segments.py"
|
"../src/data/compute_day_segments.py"
|
||||||
|
|
||||||
PHONE_SENSORS = []
|
rule phone_readable_datetime:
|
||||||
PHONE_SENSORS.extend([config["MESSAGES"]["DB_TABLE"], config["CALLS"]["DB_TABLE"], config["LOCATIONS"]["DB_TABLE"], config["BLUETOOTH"]["DB_TABLE"], config["BATTERY"]["DB_TABLE"], config["SCREEN"]["DB_TABLE"], config["LIGHT"]["DB_TABLE"], config["ACCELEROMETER"]["DB_TABLE"], config["APPLICATIONS_FOREGROUND"]["DB_TABLE"], config["CONVERSATION"]["DB_TABLE"]["ANDROID"], config["CONVERSATION"]["DB_TABLE"]["IOS"], config["ACTIVITY_RECOGNITION"]["DB_TABLE"]["ANDROID"], config["ACTIVITY_RECOGNITION"]["DB_TABLE"]["IOS"]])
|
|
||||||
PHONE_SENSORS.extend(config["PHONE_VALID_SENSED_BINS"]["DB_TABLES"])
|
|
||||||
|
|
||||||
if len(config["WIFI"]["DB_TABLE"]["VISIBLE_ACCESS_POINTS"]) > 0:
|
|
||||||
PHONE_SENSORS.append(config["WIFI"]["DB_TABLE"]["VISIBLE_ACCESS_POINTS"])
|
|
||||||
if len(config["WIFI"]["DB_TABLE"]["CONNECTED_ACCESS_POINTS"]) > 0:
|
|
||||||
PHONE_SENSORS.append(config["WIFI"]["DB_TABLE"]["CONNECTED_ACCESS_POINTS"])
|
|
||||||
|
|
||||||
|
|
||||||
rule readable_datetime:
|
|
||||||
input:
|
input:
|
||||||
sensor_input = "data/raw/{pid}/{sensor}_raw.csv",
|
sensor_input = "data/raw/{pid}/phone_{sensor}_raw.csv",
|
||||||
day_segments = "data/interim/day_segments/{pid}_day_segments.csv"
|
day_segments = "data/interim/day_segments/{pid}_day_segments.csv"
|
||||||
params:
|
params:
|
||||||
timezones = None,
|
timezones = None,
|
||||||
fixed_timezone = config["READABLE_DATETIME"]["FIXED_TIMEZONE"],
|
fixed_timezone = config["READABLE_DATETIME"]["FIXED_TIMEZONE"],
|
||||||
day_segments_type = config["DAY_SEGMENTS"]["TYPE"],
|
day_segments_type = config["DAY_SEGMENTS"]["TYPE"],
|
||||||
include_past_periodic_segments = config["DAY_SEGMENTS"]["INCLUDE_PAST_PERIODIC_SEGMENTS"]
|
include_past_periodic_segments = config["DAY_SEGMENTS"]["INCLUDE_PAST_PERIODIC_SEGMENTS"]
|
||||||
wildcard_constraints:
|
|
||||||
sensor = '(' + '|'.join([re.escape(x) for x in PHONE_SENSORS]) + ')' # only process smartphone sensors, not fitbit
|
|
||||||
output:
|
output:
|
||||||
"data/raw/{pid}/{sensor}_with_datetime.csv"
|
"data/raw/{pid}/phone_{sensor}_with_datetime.csv"
|
||||||
script:
|
script:
|
||||||
"../src/data/readable_datetime.R"
|
"../src/data/readable_datetime.R"
|
||||||
|
|
||||||
rule phone_sensed_bins:
|
rule phone_sensed_bins:
|
||||||
input:
|
input:
|
||||||
all_sensors = optional_phone_sensed_bins_input
|
all_sensors = expand("data/raw/{{pid}}/{sensor}_with_datetime.csv", sensor = map(str.lower, config["PHONE_VALID_SENSED_BINS"]["PHONE_SENSORS"]))
|
||||||
params:
|
params:
|
||||||
bin_size = config["PHONE_VALID_SENSED_BINS"]["BIN_SIZE"]
|
bin_size = config["PHONE_VALID_SENSED_BINS"]["BIN_SIZE"]
|
||||||
output:
|
output:
|
||||||
|
@ -88,7 +76,7 @@ rule phone_sensed_bins:
|
||||||
|
|
||||||
rule phone_sensed_timestamps:
|
rule phone_sensed_timestamps:
|
||||||
input:
|
input:
|
||||||
all_sensors = optional_phone_sensed_timestamps_input
|
all_sensors = expand("data/raw/{{pid}}/{sensor}_raw.csv", sensor = map(str.lower, config["PHONE_VALID_SENSED_BINS"]["PHONE_SENSORS"]))
|
||||||
output:
|
output:
|
||||||
"data/interim/{pid}/phone_sensed_timestamps.csv"
|
"data/interim/{pid}/phone_sensed_timestamps.csv"
|
||||||
script:
|
script:
|
||||||
|
@ -112,55 +100,50 @@ rule unify_ios_android:
|
||||||
participant_info = "data/external/{pid}"
|
participant_info = "data/external/{pid}"
|
||||||
params:
|
params:
|
||||||
sensor = "{sensor}",
|
sensor = "{sensor}",
|
||||||
unifiable_sensors = {"calls": config["CALLS"]["DB_TABLE"], "battery": config["BATTERY"]["DB_TABLE"], "screen": config["SCREEN"]["DB_TABLE"], "ios_activity_recognition": config["ACTIVITY_RECOGNITION"]["DB_TABLE"]["IOS"], "ios_conversation": config["CONVERSATION"]["DB_TABLE"]["IOS"]}
|
|
||||||
output:
|
output:
|
||||||
"data/raw/{pid}/{sensor}_with_datetime_unified.csv"
|
"data/raw/{pid}/{sensor}_with_datetime_unified.csv"
|
||||||
script:
|
script:
|
||||||
"../src/data/unify_ios_android.R"
|
"../src/data/unify_ios_android.R"
|
||||||
|
|
||||||
rule process_location_types:
|
rule process_phone_location_types:
|
||||||
input:
|
input:
|
||||||
locations = "data/raw/{pid}/{sensor}_raw.csv",
|
locations = "data/raw/{pid}/phone_locations_raw.csv",
|
||||||
phone_sensed_timestamps = "data/interim/{pid}/phone_sensed_timestamps.csv",
|
phone_sensed_timestamps = "data/interim/{pid}/phone_sensed_timestamps.csv",
|
||||||
params:
|
params:
|
||||||
consecutive_threshold = config["LOCATIONS"]["FUSED_RESAMPLED_CONSECUTIVE_THRESHOLD"],
|
consecutive_threshold = config["PHONE_LOCATIONS"]["FUSED_RESAMPLED_CONSECUTIVE_THRESHOLD"],
|
||||||
time_since_valid_location = config["LOCATIONS"]["FUSED_RESAMPLED_TIME_SINCE_VALID_LOCATION"],
|
time_since_valid_location = config["PHONE_LOCATIONS"]["FUSED_RESAMPLED_TIME_SINCE_VALID_LOCATION"],
|
||||||
locations_to_use = "{locations_to_use}"
|
locations_to_use = config["PHONE_LOCATIONS"]["LOCATIONS_TO_USE"]
|
||||||
wildcard_constraints:
|
|
||||||
locations_to_use = '(ALL|GPS|FUSED_RESAMPLED)'
|
|
||||||
output:
|
output:
|
||||||
"data/interim/{pid}/{sensor}_processed_{locations_to_use}.csv"
|
"data/interim/{pid}/phone_locations_processed.csv"
|
||||||
script:
|
script:
|
||||||
"../src/data/process_location_types.R"
|
"../src/data/process_location_types.R"
|
||||||
|
|
||||||
rule readable_datetime_location_processed:
|
rule readable_datetime_location_processed:
|
||||||
input:
|
input:
|
||||||
sensor_input = expand("data/interim/{{pid}}/{sensor}_processed_{locations_to_use}.csv", sensor=config["LOCATIONS"]["DB_TABLE"], locations_to_use=config["LOCATIONS"]["LOCATIONS_TO_USE"]),
|
sensor_input = "data/interim/{pid}/phone_locations_processed.csv",
|
||||||
day_segments = "data/interim/day_segments/{pid}_day_segments.csv"
|
day_segments = "data/interim/day_segments/{pid}_day_segments.csv"
|
||||||
params:
|
params:
|
||||||
timezones = None,
|
timezones = None,
|
||||||
fixed_timezone = config["READABLE_DATETIME"]["FIXED_TIMEZONE"],
|
fixed_timezone = config["READABLE_DATETIME"]["FIXED_TIMEZONE"],
|
||||||
day_segments_type = config["DAY_SEGMENTS"]["TYPE"],
|
day_segments_type = config["DAY_SEGMENTS"]["TYPE"],
|
||||||
include_past_periodic_segments = config["DAY_SEGMENTS"]["INCLUDE_PAST_PERIODIC_SEGMENTS"]
|
include_past_periodic_segments = config["DAY_SEGMENTS"]["INCLUDE_PAST_PERIODIC_SEGMENTS"]
|
||||||
wildcard_constraints:
|
|
||||||
locations_to_use = '(ALL|GPS|FUSED_RESAMPLED)'
|
|
||||||
output:
|
output:
|
||||||
expand("data/interim/{{pid}}/{sensor}_processed_{locations_to_use}_with_datetime.csv", sensor=config["LOCATIONS"]["DB_TABLE"], locations_to_use=config["LOCATIONS"]["LOCATIONS_TO_USE"])
|
"data/interim/{pid}/phone_locations_processed_with_datetime.csv"
|
||||||
script:
|
script:
|
||||||
"../src/data/readable_datetime.R"
|
"../src/data/readable_datetime.R"
|
||||||
|
|
||||||
rule application_genres:
|
rule phone_application_categories:
|
||||||
input:
|
input:
|
||||||
"data/raw/{pid}/{sensor}_with_datetime.csv"
|
"data/raw/{pid}/phone_applications_foreground_with_datetime.csv"
|
||||||
params:
|
params:
|
||||||
catalogue_source = config["APPLICATION_GENRES"]["CATALOGUE_SOURCE"],
|
catalogue_source = config["PHONE_APPLICATIONS_FOREGROUND"]["APPLICATION_CATEGORIES"]["CATALOGUE_SOURCE"],
|
||||||
catalogue_file = config["APPLICATION_GENRES"]["CATALOGUE_FILE"],
|
catalogue_file = config["PHONE_APPLICATIONS_FOREGROUND"]["APPLICATION_CATEGORIES"]["CATALOGUE_FILE"],
|
||||||
update_catalogue_file = config["APPLICATION_GENRES"]["UPDATE_CATALOGUE_FILE"],
|
update_catalogue_file = config["PHONE_APPLICATIONS_FOREGROUND"]["APPLICATION_CATEGORIES"]["UPDATE_CATALOGUE_FILE"],
|
||||||
scrape_missing_genres = config["APPLICATION_GENRES"]["SCRAPE_MISSING_GENRES"]
|
scrape_missing_genres = config["PHONE_APPLICATIONS_FOREGROUND"]["APPLICATION_CATEGORIES"]["SCRAPE_MISSING_CATEGORIES"]
|
||||||
output:
|
output:
|
||||||
"data/raw/{pid}/{sensor}_with_datetime_with_genre.csv"
|
"data/raw/{pid}/phone_applications_foreground_with_datetime_with_categories.csv"
|
||||||
script:
|
script:
|
||||||
"../src/data/application_genres.R"
|
"../src/data/application_categories.R"
|
||||||
|
|
||||||
rule fitbit_heartrate_with_datetime:
|
rule fitbit_heartrate_with_datetime:
|
||||||
input:
|
input:
|
||||||
|
@ -196,11 +179,3 @@ rule fitbit_sleep_with_datetime:
|
||||||
intraday_data = "data/raw/{pid}/fitbit_sleep_intraday_with_datetime.csv"
|
intraday_data = "data/raw/{pid}/fitbit_sleep_intraday_with_datetime.csv"
|
||||||
script:
|
script:
|
||||||
"../src/data/fitbit_readable_datetime.py"
|
"../src/data/fitbit_readable_datetime.py"
|
||||||
|
|
||||||
rule join_wifi_tables:
|
|
||||||
input:
|
|
||||||
unpack(optional_wifi_input)
|
|
||||||
output:
|
|
||||||
"data/raw/{pid}/wifi_with_datetime_visibleandconnected.csv"
|
|
||||||
script:
|
|
||||||
"../src/data/join_visible_and_connected_wifi.R"
|
|
|
@ -2,166 +2,163 @@ library("tidyverse")
|
||||||
library("lubridate")
|
library("lubridate")
|
||||||
options(scipen=999)
|
options(scipen=999)
|
||||||
|
|
||||||
find_segments_frequency <- function(local_date, local_time, local_timezone, segments){
|
day_type_delay <- function(day_type, include_past_periodic_segments){
|
||||||
|
delay <- day_segments %>% mutate(length_duration = duration(length)) %>% filter(repeats_on == day_type) %>% arrange(-length_duration) %>% pull(length_duration) %>% first()
|
||||||
assigned_segments <- segments[segments$segment_start<= local_time & segments$segment_end >= local_time, ]
|
return(if_else(is.na(delay) | include_past_periodic_segments == FALSE, duration("0days"), delay))
|
||||||
assigned_segments["segment_start_ts"] = as.numeric(lubridate::as_datetime(stringi::stri_c(local_date,assigned_segments$segment_id_start_time), tz = local_timezone)) * 1000
|
|
||||||
assigned_segments["segment_end_ts"] = as.numeric(lubridate::as_datetime(stringi::stri_c(local_date,assigned_segments$segment_id_end_time), tz = local_timezone)) * 1000 + 999
|
|
||||||
|
|
||||||
return(stringi::stri_c(stringi::stri_c("[",
|
|
||||||
assigned_segments[["label"]], "#",
|
|
||||||
local_date, " ",
|
|
||||||
assigned_segments[["segment_id_start_time"]], ",",
|
|
||||||
local_date, " ",
|
|
||||||
assigned_segments[["segment_id_end_time"]], ";",
|
|
||||||
assigned_segments[["segment_start_ts"]], ",",
|
|
||||||
assigned_segments[["segment_end_ts"]],
|
|
||||||
"]"), collapse = "|"))
|
|
||||||
}
|
}
|
||||||
|
|
||||||
find_segments_periodic <- function(timestamp, segments){
|
get_segment_dates <- function(data, local_timezone, day_type, delay){
|
||||||
# crossing and pivot_longer make segments a tibble, thus we need to extract [["segment_id"]]
|
dates <- data %>%
|
||||||
return(stringi::stri_c(segments[[1]][segments[[1]]$segment_start_ts<= timestamp & segments[[1]]$segment_end_ts >= timestamp, "segment_id"][["segment_id"]], collapse = "|"))
|
distinct(local_date) %>%
|
||||||
|
mutate(local_date_obj = date(lubridate::ymd(local_date, tz = local_timezone))) %>%
|
||||||
|
complete(local_date_obj = seq(date(min(local_date_obj) - delay), max(local_date_obj), by="days")) %>%
|
||||||
|
mutate(local_date = replace_na(as.character(date(local_date_obj))))
|
||||||
|
|
||||||
|
if(day_type == "every_day")
|
||||||
|
dates <- dates %>% mutate(every_day = 0)
|
||||||
|
else if (day_type == "wday")
|
||||||
|
dates <- dates %>% mutate(wday = wday(local_date_obj, week_start = 1))
|
||||||
|
else if (day_type == "mday")
|
||||||
|
dates <- dates %>% mutate(mday = mday(local_date_obj))
|
||||||
|
else if (day_type == "qday")
|
||||||
|
dates <- dates %>% mutate(qday = qday(local_date_obj))
|
||||||
|
else if (day_type == "yday")
|
||||||
|
dates <- dates %>% mutate(yday = yday(local_date_obj))
|
||||||
|
return(dates)
|
||||||
}
|
}
|
||||||
|
|
||||||
find_segments_event <- function(timestamp, segments){
|
assign_rows_to_segments <- function(nested_data, nested_inferred_day_segments){
|
||||||
# segments is a data.frame, we don't need to extract [["segment_id"]] like in find_segments_periodic
|
nested_data <- nested_data %>% mutate(assigned_segments = "")
|
||||||
return(stringi::stri_c(segments[[1]][segments[[1]]$segment_start_ts<= timestamp & segments[[1]]$segment_end_ts >= timestamp, "segment_id"], collapse = "|"))
|
for(i in 1:nrow(nested_inferred_day_segments)) {
|
||||||
|
segment <- nested_inferred_day_segments[i,]
|
||||||
|
nested_data$assigned_segments <- ifelse(segment$segment_start_ts<= nested_data$timestamp & segment$segment_end_ts >= nested_data$timestamp,
|
||||||
|
stringi::stri_c(nested_data$assigned_segments, segment$segment_id, sep = "|"), nested_data$assigned_segments)
|
||||||
|
}
|
||||||
|
nested_data$assigned_segments <- substring(nested_data$assigned_segments, 2)
|
||||||
|
return(nested_data)
|
||||||
|
}
|
||||||
|
|
||||||
|
assign_rows_to_segments_frequency <- function(nested_data, nested_timezone, day_segments){
|
||||||
|
for(i in 1:nrow(day_segments)) {
|
||||||
|
segment <- day_segments[i,]
|
||||||
|
nested_data$assigned_segments <- ifelse(segment$segment_start_ts<= nested_data$local_time_obj & segment$segment_end_ts >= nested_data$local_time_obj,
|
||||||
|
# The segment_id is assambled on the fly because it depends on each row's local_date and timezone
|
||||||
|
stringi::stri_c("[",
|
||||||
|
segment[["label"]], "#",
|
||||||
|
nested_data$local_date, " ",
|
||||||
|
segment[["segment_id_start_time"]], ",",
|
||||||
|
nested_data$local_date, " ",
|
||||||
|
segment[["segment_id_end_time"]], ";",
|
||||||
|
as.numeric(lubridate::as_datetime(stringi::stri_c(nested_data$local_date, segment$segment_id_start_time), tz = nested_timezone)) * 1000, ",",
|
||||||
|
as.numeric(lubridate::as_datetime(stringi::stri_c(nested_data$local_date, segment$segment_id_end_time), tz = nested_timezone)) * 1000 + 999,
|
||||||
|
"]"),
|
||||||
|
nested_data$assigned_segments)
|
||||||
|
}
|
||||||
|
return(nested_data)
|
||||||
}
|
}
|
||||||
|
|
||||||
assign_to_day_segment <- function(sensor_data, day_segments, day_segments_type, include_past_periodic_segments){
|
assign_to_day_segment <- function(sensor_data, day_segments, day_segments_type, include_past_periodic_segments){
|
||||||
|
|
||||||
if(nrow(sensor_data) == 0)
|
if(nrow(sensor_data) == 0)
|
||||||
return(sensor_data %>% mutate(assigned_segments = NA))
|
return(sensor_data %>% mutate(assigned_segments = NA))
|
||||||
|
|
||||||
if(day_segments_type == "FREQUENCY"){ #FREQUENCY
|
if(day_segments_type == "FREQUENCY"){
|
||||||
|
|
||||||
day_segments <- day_segments %>% mutate(start_time = lubridate::hm(start_time),
|
day_segments <- day_segments %>% mutate(start_time = lubridate::hm(start_time),
|
||||||
end_time = start_time + minutes(length) - seconds(1),
|
end_time = start_time + minutes(length) - seconds(1),
|
||||||
segment_id_start_time = paste(str_pad(hour(start_time),2, pad="0"), str_pad(minute(start_time),2, pad="0"), str_pad(second(start_time),2, pad="0"),sep =":"),
|
segment_id_start_time = paste(str_pad(hour(start_time),2, pad="0"), str_pad(minute(start_time),2, pad="0"), str_pad(second(start_time),2, pad="0"),sep =":"),
|
||||||
segment_id_end_time = paste(str_pad(hour(ymd("1970-01-01") + end_time),2, pad="0"), str_pad(minute(ymd("1970-01-01") + end_time),2, pad="0"), str_pad(second(ymd("1970-01-01") + end_time),2, pad="0"),sep =":"), # add ymd("1970-01-01") to get a real time instead of duration
|
segment_id_end_time = paste(str_pad(hour(ymd("1970-01-01") + end_time),2, pad="0"), str_pad(minute(ymd("1970-01-01") + end_time),2, pad="0"), str_pad(second(ymd("1970-01-01") + end_time),2, pad="0"),sep =":"), # add ymd("1970-01-01") to get a real time instead of duration
|
||||||
segment_start = as.numeric(start_time),
|
segment_start_ts = as.numeric(start_time),
|
||||||
segment_end = as.numeric(end_time))
|
segment_end_ts = as.numeric(end_time))
|
||||||
|
|
||||||
sensor_data <- sensor_data %>% mutate(local_time_obj = as.numeric(lubridate::hms(local_time)),
|
|
||||||
assigned_segments = pmap_chr(list(local_date, local_time_obj, local_timezone), find_segments_frequency, day_segments)) %>% select(-local_time_obj)
|
|
||||||
|
|
||||||
} else if (day_segments_type == "PERIODIC"){ #PERIODIC
|
sensor_data <- sensor_data %>% mutate(local_time_obj = as.numeric(lubridate::hms(local_time)),
|
||||||
|
assigned_segments = "")
|
||||||
|
|
||||||
|
sensor_data <- sensor_data %>%
|
||||||
|
group_by(local_timezone) %>%
|
||||||
|
nest() %>%
|
||||||
|
mutate(data = map2(data, local_timezone, assign_rows_to_segments_frequency, day_segments)) %>%
|
||||||
|
unnest(cols = data) %>%
|
||||||
|
arrange(timestamp) %>%
|
||||||
|
select(-local_time_obj)
|
||||||
|
|
||||||
|
return(sensor_data)
|
||||||
|
|
||||||
|
|
||||||
|
} else if (day_segments_type == "PERIODIC"){
|
||||||
|
|
||||||
# We need to take into account segment start dates that could include the first day of data
|
# We need to take into account segment start dates that could include the first day of data
|
||||||
day_segments <- day_segments %>% mutate(length_duration = duration(length))
|
day_segments <- day_segments %>% mutate(length_duration = duration(length))
|
||||||
wday_delay <- day_segments %>% mutate(length_duration = duration(length)) %>% filter(repeats_on == "wday") %>% arrange(-length_duration) %>% pull(length_duration) %>% first()
|
every_day_delay <- duration("0days")
|
||||||
wday_delay <- if_else(is.na(wday_delay) | include_past_periodic_segments == FALSE, duration("0days"), wday_delay)
|
wday_delay <- day_type_delay("wday", include_past_periodic_segments)
|
||||||
|
mday_delay <- day_type_delay("mday", include_past_periodic_segments)
|
||||||
mday_delay <- day_segments %>% mutate(length_duration = duration(length)) %>% filter(repeats_on == "mday") %>% arrange(-length_duration) %>% pull(length_duration) %>% first()
|
qday_delay <- day_type_delay("qday", include_past_periodic_segments)
|
||||||
mday_delay <- if_else(is.na(mday_delay) | include_past_periodic_segments == FALSE, duration("0days"), mday_delay)
|
yday_delay <- day_type_delay("yday", include_past_periodic_segments)
|
||||||
|
|
||||||
qday_delay <- day_segments %>% mutate(length_duration = duration(length)) %>% filter(repeats_on == "qday") %>% arrange(-length_duration) %>% pull(length_duration) %>% first()
|
|
||||||
qday_delay <- if_else(is.na(qday_delay) | include_past_periodic_segments == FALSE, duration("0days"), qday_delay)
|
|
||||||
|
|
||||||
yday_delay <- day_segments %>% mutate(length_duration = duration(length)) %>% filter(repeats_on == "yday") %>% arrange(-length_duration) %>% pull(length_duration) %>% first()
|
|
||||||
yday_delay <- if_else(is.na(yday_delay) | include_past_periodic_segments == FALSE, duration("0days"), yday_delay)
|
|
||||||
|
|
||||||
sensor_data <- sensor_data %>%
|
sensor_data <- sensor_data %>%
|
||||||
# mutate(row_n = row_number()) %>%
|
|
||||||
group_by(local_timezone) %>%
|
group_by(local_timezone) %>%
|
||||||
nest() %>%
|
nest() %>%
|
||||||
# get existent days that we need to start segments from
|
# get existent days that we need to start segments from
|
||||||
mutate(every_date = map(data, ~.x %>%
|
mutate(every_date = map2(data, local_timezone, get_segment_dates, "every_day", every_day_delay),
|
||||||
distinct(local_date) %>%
|
week_dates = map2(data, local_timezone, get_segment_dates, "wday", wday_delay),
|
||||||
mutate(local_date_obj = date(lubridate::ymd(local_date, tz = local_timezone))) %>%
|
month_dates = map2(data, local_timezone, get_segment_dates, "mday", mday_delay),
|
||||||
complete(local_date_obj = seq(min(local_date_obj), max(local_date_obj), by="days")) %>%
|
quarter_dates = map2(data, local_timezone, get_segment_dates, "qday", qday_delay),
|
||||||
mutate(local_date = replace_na(as.character(date(local_date_obj)))) %>%
|
year_dates = map2(data, local_timezone, get_segment_dates, "yday", yday_delay),
|
||||||
mutate(every_day = 0)),
|
|
||||||
week_dates = map(data, ~.x %>%
|
|
||||||
distinct(local_date) %>%
|
|
||||||
mutate(local_date_obj = date(lubridate::ymd(local_date, tz = local_timezone))) %>%
|
|
||||||
complete(local_date_obj = seq(date(min(local_date_obj) - wday_delay), max(local_date_obj), by="days")) %>%
|
|
||||||
mutate(local_date = replace_na(as.character(date(local_date_obj)))) %>%
|
|
||||||
mutate(wday = wday(local_date_obj, week_start = 1)) ),
|
|
||||||
month_dates = map(data, ~.x %>%
|
|
||||||
distinct(local_date) %>%
|
|
||||||
mutate(local_date_obj = date(lubridate::ymd(local_date, tz = local_timezone))) %>%
|
|
||||||
complete(local_date_obj = seq(date(min(local_date_obj) - mday_delay), max(local_date_obj), by="days")) %>%
|
|
||||||
mutate(local_date = replace_na(as.character(date(local_date_obj)))) %>%
|
|
||||||
mutate(mday = mday(local_date_obj))),
|
|
||||||
quarter_dates = map(data, ~.x %>%
|
|
||||||
distinct(local_date) %>%
|
|
||||||
mutate(local_date_obj = date(lubridate::ymd(local_date, tz = local_timezone))) %>%
|
|
||||||
complete(local_date_obj = seq(date(min(local_date_obj) - qday_delay), max(local_date_obj), by="days")) %>%
|
|
||||||
mutate(local_date = replace_na(as.character(date(local_date_obj)))) %>%
|
|
||||||
mutate(qday = qday(local_date_obj)) ),
|
|
||||||
year_dates = map(data, ~.x %>%
|
|
||||||
distinct(local_date) %>%
|
|
||||||
mutate(local_date_obj = date(lubridate::ymd(local_date, tz = local_timezone))) %>%
|
|
||||||
complete(local_date_obj = seq(date(min(local_date_obj) - yday_delay), max(local_date_obj), by="days")) %>%
|
|
||||||
mutate(local_date = replace_na(as.character(date(local_date_obj)))) %>%
|
|
||||||
mutate(yday = yday(local_date_obj)) ),
|
|
||||||
existent_dates = pmap(list(every_date, week_dates, month_dates, quarter_dates, year_dates),
|
existent_dates = pmap(list(every_date, week_dates, month_dates, quarter_dates, year_dates),
|
||||||
function(every_date, week_dates, month_dates, quarter_dates, year_dates) reduce(list(every_date, week_dates,month_dates, quarter_dates, year_dates), .f=full_join)),
|
function(every_date, week_dates, month_dates, quarter_dates, year_dates) reduce(list(every_date, week_dates,month_dates, quarter_dates, year_dates), .f=full_join)),
|
||||||
every_date = NULL,
|
# build the actual day segments taking into account the users requested length and repeat schedule
|
||||||
week_dates = NULL,
|
|
||||||
month_dates = NULL,
|
|
||||||
quarter_dates = NULL,
|
|
||||||
year_dates = NULL,
|
|
||||||
# build the actual day segments taking into account the users requested leangth and repeat schedule
|
|
||||||
inferred_day_segments = map(existent_dates,
|
inferred_day_segments = map(existent_dates,
|
||||||
~ crossing(day_segments, .x) %>%
|
~ crossing(day_segments, .x) %>%
|
||||||
pivot_longer(cols = c(every_day,wday, mday, qday, yday), names_to = "day_type", values_to = "day_value") %>%
|
pivot_longer(cols = c(every_day,wday, mday, qday, yday), names_to = "day_type", values_to = "day_value") %>%
|
||||||
filter(repeats_on == day_type & repeats_value == day_value) %>%
|
filter(repeats_on == day_type & repeats_value == day_value) %>%
|
||||||
mutate(segment_id_start = lubridate::parse_date_time(paste(local_date, start_time), orders = c("Ymd HMS", "Ymd HM")), # The segment ids (label#start#end) are computed in UTC to avoid having different labels for instances of a segment that happen in different timezones
|
# The segment ids (segment_id_start and segment_id_end) are computed in UTC to avoid having different labels for instances of a segment that happen in different timezones
|
||||||
|
mutate(segment_id_start = lubridate::parse_date_time(paste(local_date, start_time), orders = c("Ymd HMS", "Ymd HM")),
|
||||||
segment_id_end = segment_id_start + lubridate::duration(length),
|
segment_id_end = segment_id_start + lubridate::duration(length),
|
||||||
segment_start_ts = as.numeric(lubridate::parse_date_time(paste(local_date, start_time), orders = c("Ymd HMS", "Ymd HM"), tz = local_timezone)) * 1000, # The actual segments are computed using timestamps taking into account the timezone
|
# The actual segments are computed using timestamps taking into account the timezone
|
||||||
|
segment_start_ts = as.numeric(lubridate::parse_date_time(paste(local_date, start_time), orders = c("Ymd HMS", "Ymd HM"), tz = local_timezone)) * 1000,
|
||||||
segment_end_ts = segment_start_ts + as.numeric(lubridate::duration(length)) * 1000 + 999,
|
segment_end_ts = segment_start_ts + as.numeric(lubridate::duration(length)) * 1000 + 999,
|
||||||
segment_id = paste0("[",
|
segment_id = paste0("[",
|
||||||
paste0(
|
paste0(label,"#",
|
||||||
label,"#",
|
paste0(lubridate::date(segment_id_start), " ",
|
||||||
paste0(lubridate::date(segment_id_start), " ",
|
paste(str_pad(hour(segment_id_start),2, pad="0"), str_pad(minute(segment_id_start),2, pad="0"), str_pad(second(segment_id_start),2, pad="0"),sep =":"), ",",
|
||||||
paste(str_pad(hour(segment_id_start),2, pad="0"), str_pad(minute(segment_id_start),2, pad="0"), str_pad(second(segment_id_start),2, pad="0"),sep =":"), ",",
|
lubridate::date(segment_id_end), " ",
|
||||||
lubridate::date(segment_id_end), " ",
|
paste(str_pad(hour(segment_id_end),2, pad="0"), str_pad(minute(segment_id_end),2, pad="0"), str_pad(second(segment_id_end),2, pad="0"),sep =":")),";",
|
||||||
paste(str_pad(hour(segment_id_end),2, pad="0"), str_pad(minute(segment_id_end),2, pad="0"), str_pad(second(segment_id_end),2, pad="0"),sep =":")),";",
|
paste0(segment_start_ts, ",", segment_end_ts)),
|
||||||
paste0(segment_start_ts, ",", segment_end_ts)
|
|
||||||
),
|
|
||||||
"]")) %>%
|
"]")) %>%
|
||||||
select(segment_start_ts, segment_end_ts, segment_id) %>%
|
# drop day segments with an invalid start or end time (mostly due to daylight saving changes, e.g. 2020-03-08 02:00:00 EST does not exist, clock jumps from 01:59am to 03:00am)
|
||||||
drop_na(segment_start_ts, segment_end_ts)), # drop day segments with an invalid start or end time (mostly due to daylight saving changes, e.g. 2020-03-08 02:00:00 EST does not exist, clock jumps from 1am to 3am)
|
drop_na(segment_start_ts, segment_end_ts)),
|
||||||
data = map2(data, inferred_day_segments, ~ .x %>% mutate(row_date_time = as.numeric(lubridate::ymd_hms(local_date_time, tz = local_timezone)) * 1000,
|
data = map2(data, inferred_day_segments, assign_rows_to_segments)
|
||||||
assigned_segments = map_chr(row_date_time, ~find_segments_periodic(.x, inferred_day_segments)),
|
|
||||||
row_date_time = NULL))
|
|
||||||
) %>%
|
) %>%
|
||||||
select(-existent_dates, -inferred_day_segments) %>%
|
select(-existent_dates, -inferred_day_segments, -every_date, -week_dates, -month_dates, -quarter_dates, -year_dates) %>%
|
||||||
unnest(cols = data) %>%
|
unnest(cols = data) %>%
|
||||||
arrange(timestamp)
|
arrange(timestamp)
|
||||||
|
|
||||||
|
|
||||||
} else if ( day_segments_type == "EVENT"){
|
} else if ( day_segments_type == "EVENT"){
|
||||||
|
|
||||||
sensor_data <- sensor_data %>%
|
sensor_data <- sensor_data %>%
|
||||||
group_by(local_timezone) %>%
|
group_by(local_timezone) %>%
|
||||||
nest() %>%
|
nest() %>%
|
||||||
mutate(inferred_day_segments = map(local_timezone, ~ day_segments %>% mutate(shift = ifelse(shift == "0", "0seconds", shift),
|
mutate(inferred_day_segments = map(local_timezone, ~ day_segments %>%
|
||||||
segment_start_ts = event_timestamp + (as.integer(seconds(lubridate::duration(shift))) * ifelse(shift_direction >= 0, 1, -1) * 1000),
|
mutate(shift = ifelse(shift == "0", "0seconds", shift),
|
||||||
segment_end_ts = segment_start_ts + (as.integer(seconds(lubridate::duration(length))) * 1000),
|
segment_start_ts = event_timestamp + (as.integer(seconds(lubridate::duration(shift))) * ifelse(shift_direction >= 0, 1, -1) * 1000),
|
||||||
segment_id_start = lubridate::as_datetime(segment_start_ts/1000, tz = .x), # these start and end datetime objects are for labeling only
|
segment_end_ts = segment_start_ts + (as.integer(seconds(lubridate::duration(length))) * 1000),
|
||||||
segment_id_end = lubridate::as_datetime(segment_end_ts/1000, tz = .x),
|
# these start and end datetime objects are for labeling only
|
||||||
segment_end_ts = segment_end_ts + 999,
|
segment_id_start = lubridate::as_datetime(segment_start_ts/1000, tz = .x),
|
||||||
segment_id = paste0("[",
|
segment_id_end = lubridate::as_datetime(segment_end_ts/1000, tz = .x),
|
||||||
paste0(
|
segment_end_ts = segment_end_ts + 999,
|
||||||
label,"#",
|
segment_id = paste0("[",
|
||||||
paste0(lubridate::date(segment_id_start), " ",
|
paste0(label,"#",
|
||||||
paste(str_pad(hour(segment_id_start),2, pad="0"), str_pad(minute(segment_id_start),2, pad="0"), str_pad(second(segment_id_start),2, pad="0"),sep =":"), ",",
|
paste0(lubridate::date(segment_id_start), " ",
|
||||||
lubridate::date(segment_id_end), " ",
|
paste(str_pad(hour(segment_id_start),2, pad="0"), str_pad(minute(segment_id_start),2, pad="0"), str_pad(second(segment_id_start),2, pad="0"),sep =":"), ",",
|
||||||
paste(str_pad(hour(segment_id_end),2, pad="0"), str_pad(minute(segment_id_end),2, pad="0"), str_pad(second(segment_id_end),2, pad="0"),sep =":")),";",
|
lubridate::date(segment_id_end), " ",
|
||||||
paste0(segment_start_ts, ",", segment_end_ts)
|
paste(str_pad(hour(segment_id_end),2, pad="0"), str_pad(minute(segment_id_end),2, pad="0"), str_pad(second(segment_id_end),2, pad="0"),sep =":")),";",
|
||||||
),
|
paste0(segment_start_ts, ",", segment_end_ts)),
|
||||||
"]")) %>%
|
"]"))),
|
||||||
select(-segment_id_start, -segment_id_end)),
|
data = map2(data, inferred_day_segments, assign_rows_to_segments)) %>%
|
||||||
data = map2(data, inferred_day_segments, ~ .x %>% mutate(assigned_segments = map_chr(timestamp, ~find_segments_event(.x, inferred_day_segments))))) %>%
|
|
||||||
select(-inferred_day_segments) %>%
|
select(-inferred_day_segments) %>%
|
||||||
unnest(data) %>%
|
unnest(data) %>%
|
||||||
arrange(timestamp)
|
arrange(timestamp)
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
return(sensor_data)
|
return(sensor_data)
|
||||||
}
|
}
|
|
@ -40,9 +40,9 @@ is_multiplaform_participant <- function(dbEngine, device_ids, platforms){
|
||||||
participant <- snakemake@input[[1]]
|
participant <- snakemake@input[[1]]
|
||||||
group <- snakemake@params[["group"]]
|
group <- snakemake@params[["group"]]
|
||||||
table <- snakemake@params[["table"]]
|
table <- snakemake@params[["table"]]
|
||||||
|
sensor <- snakemake@params[["sensor"]]
|
||||||
timezone <- snakemake@params[["timezone"]]
|
timezone <- snakemake@params[["timezone"]]
|
||||||
aware_multiplatform_tables <- str_split(snakemake@params[["aware_multiplatform_tables"]], ",")[[1]]
|
aware_multiplatform_tables <- str_split(snakemake@params[["aware_multiplatform_tables"]], ",")[[1]]
|
||||||
unifiable_tables = snakemake@params[["unifiable_sensors"]]
|
|
||||||
sensor_file <- snakemake@output[[1]]
|
sensor_file <- snakemake@output[[1]]
|
||||||
|
|
||||||
device_ids <- strsplit(readLines(participant, n=1), ",")[[1]]
|
device_ids <- strsplit(readLines(participant, n=1), ",")[[1]]
|
||||||
|
@ -58,30 +58,26 @@ end_datetime_utc = format(as.POSIXct(paste0(end_date, " 23:59:59"),format="%Y/%m
|
||||||
|
|
||||||
dbEngine <- dbConnect(MySQL(), default.file = "./.env", group = group)
|
dbEngine <- dbConnect(MySQL(), default.file = "./.env", group = group)
|
||||||
|
|
||||||
# Get existent columns in table
|
if(is_multiplaform_participant(dbEngine, device_ids, platforms)){
|
||||||
available_columns <- colnames(dbGetQuery(dbEngine, paste0("SELECT * FROM ", table, " LIMIT 1")))
|
sensor_data <- unify_raw_data(dbEngine, table, sensor, start_datetime_utc, end_datetime_utc, aware_multiplatform_tables, device_ids, platforms)
|
||||||
|
}else {
|
||||||
if("device_id" %in% available_columns){
|
# table has two elements for conversation and activity recognition (they store data on a different table for ios and android)
|
||||||
if(is_multiplaform_participant(dbEngine, device_ids, platforms)){
|
if(length(table) > 1){
|
||||||
sensor_data <- unify_raw_data(dbEngine, table, start_datetime_utc, end_datetime_utc, aware_multiplatform_tables, unifiable_tables, device_ids, platforms)
|
table <- table[[toupper(platforms[1])]]
|
||||||
}else {
|
|
||||||
query <- paste0("SELECT * FROM ", table, " WHERE device_id IN ('", paste0(device_ids, collapse = "','"), "')")
|
|
||||||
if("timestamp" %in% available_columns && !(is.na(start_datetime_utc)) && !(is.na(end_datetime_utc)) && start_datetime_utc < end_datetime_utc)
|
|
||||||
query <- paste0(query, "AND timestamp BETWEEN 1000*UNIX_TIMESTAMP('", start_datetime_utc, "') AND 1000*UNIX_TIMESTAMP('", end_datetime_utc, "')")
|
|
||||||
sensor_data <- dbGetQuery(dbEngine, query)
|
|
||||||
}
|
}
|
||||||
|
query <- paste0("SELECT * FROM ", table, " WHERE device_id IN ('", paste0(device_ids, collapse = "','"), "')")
|
||||||
if("timestamp" %in% available_columns)
|
if(!(is.na(start_datetime_utc)) && !(is.na(end_datetime_utc)) && start_datetime_utc < end_datetime_utc)
|
||||||
sensor_data <- sensor_data %>% arrange(timestamp)
|
query <- paste0(query, "AND timestamp BETWEEN 1000*UNIX_TIMESTAMP('", start_datetime_utc, "') AND 1000*UNIX_TIMESTAMP('", end_datetime_utc, "')")
|
||||||
|
sensor_data <- dbGetQuery(dbEngine, query)
|
||||||
# Unify device_id
|
}
|
||||||
sensor_data <- sensor_data %>% mutate(device_id = unified_device_id)
|
|
||||||
|
sensor_data <- sensor_data %>% arrange(timestamp)
|
||||||
# Droping duplicates on all columns except for _id or id
|
|
||||||
sensor_data <- sensor_data %>% distinct(!!!syms(setdiff(names(sensor_data), c("_id", "id"))))
|
# Unify device_id
|
||||||
|
sensor_data <- sensor_data %>% mutate(device_id = unified_device_id)
|
||||||
} else
|
|
||||||
stop(paste0("Table ", table, "does not have a device_id column (Aware ID) to link its data to a participant"))
|
# Droping duplicates on all columns except for _id or id
|
||||||
|
sensor_data <- sensor_data %>% distinct(!!!syms(setdiff(names(sensor_data), c("_id", "id"))))
|
||||||
|
|
||||||
write_csv(sensor_data, sensor_file)
|
write_csv(sensor_data, sensor_file)
|
||||||
dbDisconnect(dbEngine)
|
dbDisconnect(dbEngine)
|
|
@ -4,11 +4,10 @@ source("src/data/unify_utils.R")
|
||||||
sensor_data <- read.csv(snakemake@input[["sensor_data"]], stringsAsFactors = FALSE)
|
sensor_data <- read.csv(snakemake@input[["sensor_data"]], stringsAsFactors = FALSE)
|
||||||
participant_info <- snakemake@input[["participant_info"]]
|
participant_info <- snakemake@input[["participant_info"]]
|
||||||
sensor <- snakemake@params[["sensor"]]
|
sensor <- snakemake@params[["sensor"]]
|
||||||
unifiable_sensors = snakemake@params[["unifiable_sensors"]]
|
|
||||||
|
|
||||||
platforms <- strsplit(readLines(participant_info, n=2)[[2]], ",")[[1]]
|
platforms <- strsplit(readLines(participant_info, n=2)[[2]], ",")[[1]]
|
||||||
platform <- ifelse(platforms[1] == "multiple" | (length(platforms) > 1 & "android" %in% platforms & "ios" %in% platforms), "android", platforms[1])
|
platform <- ifelse(platforms[1] == "multiple" | (length(platforms) > 1 & "android" %in% platforms & "ios" %in% platforms), "android", platforms[1])
|
||||||
|
|
||||||
sensor_data <- unify_data(sensor_data, sensor, platform, unifiable_sensors)
|
sensor_data <- unify_data(sensor_data, sensor, platform)
|
||||||
|
|
||||||
write.csv(sensor_data, snakemake@output[[1]], row.names = FALSE)
|
write.csv(sensor_data, snakemake@output[[1]], row.names = FALSE)
|
||||||
|
|
|
@ -101,7 +101,7 @@ clean_ios_activity_column <- function(ios_gar){
|
||||||
return(ios_gar)
|
return(ios_gar)
|
||||||
}
|
}
|
||||||
|
|
||||||
unify_ios_gar <- function(ios_gar){
|
unify_ios_activity_recognition <- function(ios_gar){
|
||||||
# We only need to unify Google Activity Recognition data for iOS
|
# We only need to unify Google Activity Recognition data for iOS
|
||||||
# discard rows where activities column is blank
|
# discard rows where activities column is blank
|
||||||
ios_gar <- ios_gar[-which(ios_gar$activities == ""), ]
|
ios_gar <- ios_gar[-which(ios_gar$activities == ""), ]
|
||||||
|
@ -138,7 +138,7 @@ unify_ios_conversation <- function(conversation){
|
||||||
}
|
}
|
||||||
|
|
||||||
# This function is used in download_dataset.R
|
# This function is used in download_dataset.R
|
||||||
unify_raw_data <- function(dbEngine, table, start_datetime_utc, end_datetime_utc, aware_multiplatform_tables, unifiable_tables, device_ids, platforms){
|
unify_raw_data <- function(dbEngine, sensor_table, sensor, start_datetime_utc, end_datetime_utc, aware_multiplatform_tables, device_ids, platforms){
|
||||||
# If platforms is 'multiple', fetch each device_id's platform from aware_device, otherwise, use those given by the user
|
# If platforms is 'multiple', fetch each device_id's platform from aware_device, otherwise, use those given by the user
|
||||||
if(length(platforms) == 1 && platforms == "multiple")
|
if(length(platforms) == 1 && platforms == "multiple")
|
||||||
devices_platforms <- dbGetQuery(dbEngine, paste0("SELECT device_id,brand FROM aware_device WHERE device_id IN ('", paste0(device_ids, collapse = "','"), "')")) %>%
|
devices_platforms <- dbGetQuery(dbEngine, paste0("SELECT device_id,brand FROM aware_device WHERE device_id IN ('", paste0(device_ids, collapse = "','"), "')")) %>%
|
||||||
|
@ -147,8 +147,9 @@ unify_raw_data <- function(dbEngine, table, start_datetime_utc, end_datetime_utc
|
||||||
devices_platforms <- data.frame(device_id = device_ids, platform = platforms)
|
devices_platforms <- data.frame(device_id = device_ids, platform = platforms)
|
||||||
|
|
||||||
# Get existent tables in database
|
# Get existent tables in database
|
||||||
available_tables_in_db <- dbGetQuery(dbEngine, paste0("SELECT table_name FROM information_schema.tables WHERE table_type = 'base table' AND table_schema='", dbGetInfo(dbEngine)$dbname,"'")) %>% pull(table_name)
|
available_tables_in_db <- dbGetQuery(dbEngine, paste0("SELECT table_name FROM information_schema.tables WHERE table_schema='", dbGetInfo(dbEngine)$dbname,"'"))[[1]]
|
||||||
|
if(!any(sensor_table %in% available_tables_in_db))
|
||||||
|
stop(paste0("You requested data from these table(s) ", paste0(sensor_table, collapse=", "), " but they don't exist in your database ", dbGetInfo(dbEngine)$dbname))
|
||||||
# Parse the table names for activity recognition and conversation plugins because they are different between android and ios
|
# Parse the table names for activity recognition and conversation plugins because they are different between android and ios
|
||||||
ar_tables <- setNames(aware_multiplatform_tables[1:2], c("android", "ios"))
|
ar_tables <- setNames(aware_multiplatform_tables[1:2], c("android", "ios"))
|
||||||
conversation_tables <- setNames(aware_multiplatform_tables[3:4], c("android", "ios"))
|
conversation_tables <- setNames(aware_multiplatform_tables[3:4], c("android", "ios"))
|
||||||
|
@ -160,17 +161,19 @@ unify_raw_data <- function(dbEngine, table, start_datetime_utc, end_datetime_utc
|
||||||
platform <- row$platform
|
platform <- row$platform
|
||||||
|
|
||||||
# Handle special cases when tables for the same sensor have different names for Android and iOS (AR and conversation)
|
# Handle special cases when tables for the same sensor have different names for Android and iOS (AR and conversation)
|
||||||
if(table %in% ar_tables)
|
if(length(sensor_table) == 1)
|
||||||
|
table <- sensor_table
|
||||||
|
else if(all(sensor_table == ar_tables))
|
||||||
table <- ar_tables[[platform]]
|
table <- ar_tables[[platform]]
|
||||||
else if(table %in% conversation_tables)
|
else if(all(sensor_table == conversation_tables))
|
||||||
table <- conversation_tables[[platform]]
|
table <- conversation_tables[[platform]]
|
||||||
|
|
||||||
if(table %in% available_tables_in_db){
|
if(table %in% available_tables_in_db){
|
||||||
query <- paste0("SELECT * FROM ", table, " WHERE device_id IN ('", device_id, "')")
|
query <- paste0("SELECT * FROM ", table, " WHERE device_id IN ('", device_id, "')")
|
||||||
if("timestamp" %in% available_columns && !(is.na(start_datetime_utc)) && !(is.na(end_datetime_utc)) && start_datetime_utc < end_datetime_utc){
|
if(!(is.na(start_datetime_utc)) && !(is.na(end_datetime_utc)) && start_datetime_utc < end_datetime_utc){
|
||||||
query <- paste0(query, "AND timestamp BETWEEN 1000*UNIX_TIMESTAMP('", start_datetime_utc, "') AND 1000*UNIX_TIMESTAMP('", end_datetime_utc, "')")
|
query <- paste0(query, "AND timestamp BETWEEN 1000*UNIX_TIMESTAMP('", start_datetime_utc, "') AND 1000*UNIX_TIMESTAMP('", end_datetime_utc, "')")
|
||||||
}
|
}
|
||||||
sensor_data <- unify_data(dbGetQuery(dbEngine, query), table, platform, unifiable_tables)
|
sensor_data <- unify_data(dbGetQuery(dbEngine, query), sensor, platform)
|
||||||
participants_sensordata <- append(participants_sensordata, list(sensor_data))
|
participants_sensordata <- append(participants_sensordata, list(sensor_data))
|
||||||
}else{
|
}else{
|
||||||
warning(paste0("Missing ", table, " table. We unified the data from ", paste0(devices_platforms$device_id, collapse = " and "), " but without records from this missing table for ", device_id))
|
warning(paste0("Missing ", table, " table. We unified the data from ", paste0(devices_platforms$device_id, collapse = " and "), " but without records from this missing table for ", device_id))
|
||||||
|
@ -182,25 +185,16 @@ unify_raw_data <- function(dbEngine, table, start_datetime_utc, end_datetime_utc
|
||||||
}
|
}
|
||||||
|
|
||||||
# This function is used in unify_ios_android.R and unify_raw_data function
|
# This function is used in unify_ios_android.R and unify_raw_data function
|
||||||
unify_data <- function(sensor_data, sensor, platform, unifiable_sensors){
|
unify_data <- function(sensor_data, sensor, platform){
|
||||||
if(sensor == unifiable_sensors$calls){
|
if(sensor == "phone_calls" & platform == "ios"){
|
||||||
if(platform == "ios"){
|
sensor_data = unify_ios_calls(sensor_data)
|
||||||
sensor_data = unify_ios_calls(sensor_data)
|
} else if(sensor == "phone_battery" & platform == "ios"){
|
||||||
}
|
sensor_data = unify_ios_battery(sensor_data)
|
||||||
# android calls remain unchanged
|
} else if(sensor == "phone_activity_recognition" & platform == "ios"){
|
||||||
} else if(sensor == unifiable_sensors$battery){
|
sensor_data = unify_ios_activity_recognition(sensor_data)
|
||||||
if(platform == "ios"){
|
} else if(sensor == "phone_screen" & platform == "ios"){
|
||||||
sensor_data = unify_ios_battery(sensor_data)
|
sensor_data = unify_ios_screen(sensor_data)
|
||||||
}
|
} else if(sensor == "phone_conversation" & platform == "ios"){
|
||||||
# android battery remains unchanged
|
|
||||||
} else if(sensor == unifiable_sensors$ios_activity_recognition){
|
|
||||||
sensor_data = unify_ios_gar(sensor_data)
|
|
||||||
} else if(sensor == unifiable_sensors$screen){
|
|
||||||
if(platform == "ios"){
|
|
||||||
sensor_data = unify_ios_screen(sensor_data)
|
|
||||||
}
|
|
||||||
# android screen remains unchanged
|
|
||||||
} else if(sensor == unifiable_sensors$ios_conversation){
|
|
||||||
sensor_data = unify_ios_conversation(sensor_data)
|
sensor_data = unify_ios_conversation(sensor_data)
|
||||||
}
|
}
|
||||||
return(sensor_data)
|
return(sensor_data)
|
||||||
|
|
|
@ -3,7 +3,7 @@ library("dplyr")
|
||||||
library("stringr")
|
library("stringr")
|
||||||
|
|
||||||
# Load Ian Barnett's code. Taken from https://scholar.harvard.edu/ibarnett/software/gpsmobility
|
# Load Ian Barnett's code. Taken from https://scholar.harvard.edu/ibarnett/software/gpsmobility
|
||||||
file.sources = list.files(c("src/features/locations/barnett/library"), pattern="*.R$", full.names=TRUE, ignore.case=TRUE)
|
file.sources = list.files(c("src/features/phone_locations/barnett/library"), pattern="*.R$", full.names=TRUE, ignore.case=TRUE)
|
||||||
sapply(file.sources,source,.GlobalEnv)
|
sapply(file.sources,source,.GlobalEnv)
|
||||||
|
|
||||||
create_empty_file <- function(requested_features){
|
create_empty_file <- function(requested_features){
|
||||||
|
@ -52,10 +52,13 @@ barnett_features <- function(sensor_data_files, day_segment, params){
|
||||||
if (nrow(location) > 1){
|
if (nrow(location) > 1){
|
||||||
# Filter by segment and skipping any non-daily segment
|
# Filter by segment and skipping any non-daily segment
|
||||||
location <- location %>% filter_data_by_segment(day_segment)
|
location <- location %>% filter_data_by_segment(day_segment)
|
||||||
segment <- location %>% head(1) %>% pull(local_segment)
|
|
||||||
segment_data <- str_split(segment, "#")[[1]]
|
datetime_start_regex = "[0-9]{4}[\\-|\\/][0-9]{2}[\\-|\\/][0-9]{2} 00:00:00"
|
||||||
if(segment_data[[2]] != segment_data[[4]] || segment_data[[3]] != "00:00:00" || segment_data[[5]] != "23:59:59"){
|
datetime_end_regex = "[0-9]{4}[\\-|\\/][0-9]{2}[\\-|\\/][0-9]{2} 23:59:59"
|
||||||
warning(paste("Barnett's location features cannot be computed for day segmentes that are not daily (cover 00:00:00 to 23:59:59 of every day). Skipping for ", segment))
|
location <- location %>% mutate(is_daily = str_detect(local_segment, paste0(day_segment, "#", datetime_start_regex, ",", datetime_end_regex)))
|
||||||
|
|
||||||
|
if(!all(location$is_daily)){
|
||||||
|
message(paste("Barnett's location features cannot be computed for day segmentes that are not daily (cover 00:00:00 to 23:59:59 of every day). Skipping ", day_segment))
|
||||||
location_features <- create_empty_file(requested_features)
|
location_features <- create_empty_file(requested_features)
|
||||||
} else {
|
} else {
|
||||||
# Count how many minutes of data we use to get location features
|
# Count how many minutes of data we use to get location features
|
|
@ -0,0 +1,46 @@
|
||||||
|
library(dplyr)
|
||||||
|
|
||||||
|
compute_wifi_feature <- function(data, feature, day_segment){
|
||||||
|
data <- data %>% filter_data_by_segment(day_segment)
|
||||||
|
if(feature %in% c("countscans", "uniquedevices")){
|
||||||
|
data <- data %>% group_by(local_segment)
|
||||||
|
data <- switch(feature,
|
||||||
|
"countscans" = data %>% summarise(!!paste("wifi_rapids", feature, sep = "_") := n()),
|
||||||
|
"uniquedevices" = data %>% summarise(!!paste("wifi_rapids", feature, sep = "_") := n_distinct(bssid)))
|
||||||
|
return(data)
|
||||||
|
} else if(feature == "countscansmostuniquedevice"){
|
||||||
|
# Get the most scanned device
|
||||||
|
mostuniquedevice <- data %>%
|
||||||
|
group_by(bssid) %>%
|
||||||
|
mutate(N=n()) %>%
|
||||||
|
ungroup() %>%
|
||||||
|
filter(N == max(N)) %>%
|
||||||
|
head(1) %>% # if there are multiple device with the same amount of scans pick the first one only
|
||||||
|
pull(bssid)
|
||||||
|
return(data %>%
|
||||||
|
filter(bssid == mostuniquedevice) %>%
|
||||||
|
group_by(local_segment) %>%
|
||||||
|
summarise(!!paste("wifi_rapids", feature, sep = "_") := n()) %>%
|
||||||
|
replace(is.na(.), 0))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
rapids_features <- function(sensor_data_files, day_segment, provider){
|
||||||
|
wifi_data <- read.csv(sensor_data_files[["sensor_data"]], stringsAsFactors = FALSE)
|
||||||
|
requested_features <- provider[["FEATURES"]]
|
||||||
|
# Output dataframe
|
||||||
|
features = data.frame(local_segment = character(), stringsAsFactors = FALSE)
|
||||||
|
|
||||||
|
# The name of the features this function can compute
|
||||||
|
base_features_names <- c("countscans", "uniquedevices", "countscansmostuniquedevice")
|
||||||
|
|
||||||
|
# The subset of requested features this function can compute
|
||||||
|
features_to_compute <- intersect(base_features_names, requested_features)
|
||||||
|
|
||||||
|
for(feature_name in features_to_compute){
|
||||||
|
feature <- compute_wifi_feature(wifi_data, feature_name, day_segment)
|
||||||
|
features <- merge(features, feature, by="local_segment", all = TRUE)
|
||||||
|
}
|
||||||
|
|
||||||
|
return(features)
|
||||||
|
}
|
|
@ -1,6 +1,8 @@
|
||||||
source("renv/activate.R")
|
source("renv/activate.R")
|
||||||
library("dplyr")
|
library("dplyr")
|
||||||
library("tidyr")
|
library("tidyr")
|
||||||
|
library("tibble")
|
||||||
|
options(scipen=999)
|
||||||
|
|
||||||
# Using mostly indeixng instead of tidyr because is faster
|
# Using mostly indeixng instead of tidyr because is faster
|
||||||
resampled_episodes <- read.csv(snakemake@input[[1]])
|
resampled_episodes <- read.csv(snakemake@input[[1]])
|
||||||
|
|
|
@ -74,7 +74,7 @@ def fetch_provider_features(provider, provider_key, sensor_key, sensor_data_file
|
||||||
sensor_features = pd.DataFrame(columns=["local_segment"])
|
sensor_features = pd.DataFrame(columns=["local_segment"])
|
||||||
day_segments_labels = pd.read_csv(day_segments_file, header=0)
|
day_segments_labels = pd.read_csv(day_segments_file, header=0)
|
||||||
if "FEATURES" not in provider:
|
if "FEATURES" not in provider:
|
||||||
raise ValueError("Provider config[{}][PROVIDERS][{}] is missing a FEATURES attribute in config.yaml".format(sensor_key.upper(), provider_key))
|
raise ValueError("Provider config[{}][PROVIDERS][{}] is missing a FEATURES attribute in config.yaml".format(sensor_key.upper(), provider_key.upper()))
|
||||||
|
|
||||||
if provider["COMPUTE"] == True:
|
if provider["COMPUTE"] == True:
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue