Migrate analysis example to new data stream
parent
fae0c2ac05
commit
2b6447105a
|
@ -15,6 +15,15 @@ if len(config["PIDS"]) == 0:
|
|||
|
||||
for provider in config["PHONE_DATA_YIELD"]["PROVIDERS"].keys():
|
||||
if config["PHONE_DATA_YIELD"]["PROVIDERS"][provider]["COMPUTE"]:
|
||||
|
||||
allowed_phone_sensors = get_phone_sensor_names()
|
||||
if not (set(config["PHONE_DATA_YIELD"]["SENSORS"]) <= set(allowed_phone_sensors)):
|
||||
raise ValueError('\nInvalid sensor(s) for PHONE_DATA_YIELD. config["PHONE_DATA_YIELD"]["SENSORS"] can have '
|
||||
'one or more of the following phone sensors: {}.\nInstead you provided "{}".\n'
|
||||
'Keep in mind that the sensors\' TABLE attribute must point to a valid database table'\
|
||||
.format(', '.join(allowed_phone_sensors),
|
||||
', '.join(set(config["PHONE_DATA_YIELD"]["SENSORS"]) - set(allowed_phone_sensors))))
|
||||
|
||||
files_to_compute.extend(expand("data/raw/{pid}/{sensor}_raw.csv", pid=config["PIDS"], sensor=map(str.lower, config["PHONE_DATA_YIELD"]["SENSORS"])))
|
||||
files_to_compute.extend(expand("data/interim/{pid}/phone_yielded_timestamps.csv", pid=config["PIDS"]))
|
||||
files_to_compute.extend(expand("data/interim/{pid}/phone_yielded_timestamps_with_datetime.csv", pid=config["PIDS"]))
|
||||
|
@ -36,7 +45,6 @@ for provider in config["PHONE_CALLS"]["PROVIDERS"].keys():
|
|||
if config["PHONE_CALLS"]["PROVIDERS"][provider]["COMPUTE"]:
|
||||
files_to_compute.extend(expand("data/raw/{pid}/phone_calls_raw.csv", pid=config["PIDS"]))
|
||||
files_to_compute.extend(expand("data/raw/{pid}/phone_calls_with_datetime.csv", pid=config["PIDS"]))
|
||||
files_to_compute.extend(expand("data/raw/{pid}/phone_calls_with_datetime_unified.csv", pid=config["PIDS"]))
|
||||
files_to_compute.extend(expand("data/interim/{pid}/phone_calls_features/phone_calls_{language}_{provider_key}.csv", pid=config["PIDS"], language=config["PHONE_CALLS"]["PROVIDERS"][provider]["SRC_LANGUAGE"].lower(), provider_key=provider.lower()))
|
||||
files_to_compute.extend(expand("data/processed/features/{pid}/phone_calls.csv", pid=config["PIDS"]))
|
||||
files_to_compute.extend(expand("data/processed/features/{pid}/all_sensor_features.csv", pid=config["PIDS"]))
|
||||
|
@ -76,9 +84,12 @@ for provider in config["PHONE_BATTERY"]["PROVIDERS"].keys():
|
|||
|
||||
for provider in config["PHONE_SCREEN"]["PROVIDERS"].keys():
|
||||
if config["PHONE_SCREEN"]["PROVIDERS"][provider]["COMPUTE"]:
|
||||
# if "PHONE_SCREEN" in config["PHONE_DATA_YIELD"]["SENSORS"]:# not used for now because we took episodepersensedminutes out of the list of supported features
|
||||
# files_to_compute.extend(expand("data/interim/{pid}/phone_yielded_timestamps.csv", pid=config["PIDS"]))
|
||||
# else:
|
||||
# raise ValueError("Error: Add PHONE_SCREEN (and as many PHONE_SENSORS as you have in your database) to [PHONE_DATA_YIELD][SENSORS] in config.yaml. This is necessary to compute phone_yielded_timestamps (time when the smartphone was sensing data)")
|
||||
files_to_compute.extend(expand("data/raw/{pid}/phone_screen_raw.csv", pid=config["PIDS"]))
|
||||
files_to_compute.extend(expand("data/raw/{pid}/phone_screen_with_datetime.csv", pid=config["PIDS"]))
|
||||
files_to_compute.extend(expand("data/raw/{pid}/phone_screen_with_datetime_unified.csv", pid=config["PIDS"]))
|
||||
files_to_compute.extend(expand("data/interim/{pid}/phone_screen_episodes.csv", pid=config["PIDS"]))
|
||||
files_to_compute.extend(expand("data/interim/{pid}/phone_screen_episodes_resampled.csv", pid=config["PIDS"]))
|
||||
files_to_compute.extend(expand("data/interim/{pid}/phone_screen_episodes_resampled_with_datetime.csv", pid=config["PIDS"]))
|
||||
|
@ -142,27 +153,78 @@ for provider in config["PHONE_CONVERSATION"]["PROVIDERS"].keys():
|
|||
files_to_compute.extend(expand("data/processed/features/{pid}/all_sensor_features.csv", pid=config["PIDS"]))
|
||||
files_to_compute.append("data/processed/features/all_participants/all_sensor_features.csv")
|
||||
|
||||
# We can delete these if's as soon as we add feature PROVIDERS to any of these sensors
|
||||
if isinstance(config["PHONE_APPLICATIONS_CRASHES"]["PROVIDERS"], dict):
|
||||
for provider in config["PHONE_APPLICATIONS_CRASHES"]["PROVIDERS"].keys():
|
||||
if config["PHONE_APPLICATIONS_CRASHES"]["PROVIDERS"][provider]["COMPUTE"]:
|
||||
files_to_compute.extend(expand("data/raw/{pid}/phone_applications_crashes_raw.csv", pid=config["PIDS"]))
|
||||
files_to_compute.extend(expand("data/raw/{pid}/phone_applications_crashes_with_datetime.csv", pid=config["PIDS"]))
|
||||
files_to_compute.extend(expand("data/raw/{pid}/phone_applications_crashes_with_datetime_with_categories.csv", pid=config["PIDS"]))
|
||||
files_to_compute.extend(expand("data/interim/{pid}/phone_applications_crashes_features/phone_applications_crashes_{language}_{provider_key}.csv", pid=config["PIDS"], language=config["PHONE_APPLICATIONS_CRASHES"]["PROVIDERS"][provider]["SRC_LANGUAGE"].lower(), provider_key=provider.lower()))
|
||||
files_to_compute.extend(expand("data/processed/features/{pid}/phone_applications_crashes.csv", pid=config["PIDS"]))
|
||||
files_to_compute.extend(expand("data/processed/features/{pid}/all_sensor_features.csv", pid=config["PIDS"]))
|
||||
files_to_compute.append("data/processed/features/all_participants/all_sensor_features.csv")
|
||||
|
||||
if isinstance(config["PHONE_APPLICATIONS_NOTIFICATIONS"]["PROVIDERS"], dict):
|
||||
for provider in config["PHONE_APPLICATIONS_NOTIFICATIONS"]["PROVIDERS"].keys():
|
||||
if config["PHONE_APPLICATIONS_NOTIFICATIONS"]["PROVIDERS"][provider]["COMPUTE"]:
|
||||
files_to_compute.extend(expand("data/raw/{pid}/phone_applications_notifications_raw.csv", pid=config["PIDS"]))
|
||||
files_to_compute.extend(expand("data/raw/{pid}/phone_applications_notifications_with_datetime.csv", pid=config["PIDS"]))
|
||||
files_to_compute.extend(expand("data/raw/{pid}/phone_applications_notifications_with_datetime_with_categories.csv", pid=config["PIDS"]))
|
||||
files_to_compute.extend(expand("data/interim/{pid}/phone_applications_notifications_features/phone_applications_notifications_{language}_{provider_key}.csv", pid=config["PIDS"], language=config["PHONE_APPLICATIONS_NOTIFICATIONS"]["PROVIDERS"][provider]["SRC_LANGUAGE"].lower(), provider_key=provider.lower()))
|
||||
files_to_compute.extend(expand("data/processed/features/{pid}/phone_applications_notifications.csv", pid=config["PIDS"]))
|
||||
files_to_compute.extend(expand("data/processed/features/{pid}/all_sensor_features.csv", pid=config["PIDS"]))
|
||||
files_to_compute.append("data/processed/features/all_participants/all_sensor_features.csv")
|
||||
|
||||
if isinstance(config["PHONE_KEYBOARD"]["PROVIDERS"], dict):
|
||||
for provider in config["PHONE_KEYBOARD"]["PROVIDERS"].keys():
|
||||
if config["PHONE_KEYBOARD"]["PROVIDERS"][provider]["COMPUTE"]:
|
||||
files_to_compute.extend(expand("data/raw/{pid}/phone_keyboard_raw.csv", pid=config["PIDS"]))
|
||||
files_to_compute.extend(expand("data/raw/{pid}/phone_keyboard_with_datetime.csv", pid=config["PIDS"]))
|
||||
files_to_compute.extend(expand("data/interim/{pid}/phone_keyboard_features/phone_keyboard_{language}_{provider_key}.csv", pid=config["PIDS"], language=config["PHONE_KEYBOARD"]["PROVIDERS"][provider]["SRC_LANGUAGE"].lower(), provider_key=provider.lower()))
|
||||
files_to_compute.extend(expand("data/processed/features/{pid}/phone_keyboard.csv", pid=config["PIDS"]))
|
||||
files_to_compute.extend(expand("data/processed/features/{pid}/all_sensor_features.csv", pid=config["PIDS"]))
|
||||
files_to_compute.append("data/processed/features/all_participants/all_sensor_features.csv")
|
||||
|
||||
if isinstance(config["PHONE_LOG"]["PROVIDERS"], dict):
|
||||
for provider in config["PHONE_LOG"]["PROVIDERS"].keys():
|
||||
if config["PHONE_LOG"]["PROVIDERS"][provider]["COMPUTE"]:
|
||||
files_to_compute.extend(expand("data/raw/{pid}/phone_log_raw.csv", pid=config["PIDS"]))
|
||||
files_to_compute.extend(expand("data/raw/{pid}/phone_log_with_datetime.csv", pid=config["PIDS"]))
|
||||
files_to_compute.extend(expand("data/interim/{pid}/phone_log_features/phone_log_{language}_{provider_key}.csv", pid=config["PIDS"], language=config["PHONE_LOG"]["PROVIDERS"][provider]["SRC_LANGUAGE"].lower(), provider_key=provider.lower()))
|
||||
files_to_compute.extend(expand("data/processed/features/{pid}/phone_log.csv", pid=config["PIDS"]))
|
||||
files_to_compute.extend(expand("data/processed/features/{pid}/all_sensor_features.csv", pid=config["PIDS"]))
|
||||
files_to_compute.append("data/processed/features/all_participants/all_sensor_features.csv")
|
||||
|
||||
for provider in config["PHONE_LOCATIONS"]["PROVIDERS"].keys():
|
||||
if config["PHONE_LOCATIONS"]["PROVIDERS"][provider]["COMPUTE"]:
|
||||
if config["PHONE_LOCATIONS"]["LOCATIONS_TO_USE"] == "FUSED_RESAMPLED":
|
||||
if config["PHONE_LOCATIONS"]["LOCATIONS_TO_USE"] in ["FUSED_RESAMPLED","ALL_RESAMPLED"]:
|
||||
if "PHONE_LOCATIONS" in config["PHONE_DATA_YIELD"]["SENSORS"]:
|
||||
files_to_compute.extend(expand("data/interim/{pid}/phone_yielded_timestamps.csv", pid=config["PIDS"]))
|
||||
else:
|
||||
raise ValueError("Error: Add PHONE_LOCATIONS (and as many PHONE_SENSORS as you have) to [PHONE_DATA_YIELD][SENSORS] in config.yaml. This is necessary to compute phone_yielded_timestamps (time when the smartphone was sensing data) which is used to resample fused location data (RESAMPLED_FUSED)")
|
||||
raise ValueError("Error: Add PHONE_LOCATIONS (and as many PHONE_SENSORS as you have) to [PHONE_DATA_YIELD][SENSORS] in config.yaml. This is necessary to compute phone_yielded_timestamps (time when the smartphone was sensing data) which is used to resample fused location data (ALL_RESAMPLED and RESAMPLED_FUSED)")
|
||||
|
||||
files_to_compute.extend(expand("data/raw/{pid}/phone_locations_raw.csv", pid=config["PIDS"]))
|
||||
files_to_compute.extend(expand("data/interim/{pid}/phone_locations_processed.csv", pid=config["PIDS"]))
|
||||
files_to_compute.extend(expand("data/interim/{pid}/phone_locations_processed_with_datetime.csv", pid=config["PIDS"]))
|
||||
files_to_compute.extend(expand("data/interim/{pid}/phone_locations_processed_with_datetime_with_home.csv", pid=config["PIDS"]))
|
||||
files_to_compute.extend(expand("data/interim/{pid}/phone_locations_features/phone_locations_{language}_{provider_key}.csv", pid=config["PIDS"], language=config["PHONE_LOCATIONS"]["PROVIDERS"][provider]["SRC_LANGUAGE"].lower(), provider_key=provider.lower()))
|
||||
files_to_compute.extend(expand("data/processed/features/{pid}/phone_locations.csv", pid=config["PIDS"]))
|
||||
files_to_compute.extend(expand("data/processed/features/{pid}/all_sensor_features.csv", pid=config["PIDS"]))
|
||||
files_to_compute.append("data/processed/features/all_participants/all_sensor_features.csv")
|
||||
|
||||
for provider in config["FITBIT_DATA_YIELD"]["PROVIDERS"].keys():
|
||||
if config["FITBIT_DATA_YIELD"]["PROVIDERS"][provider]["COMPUTE"]:
|
||||
files_to_compute.extend(expand("data/raw/{pid}/fitbit_heartrate_intraday_raw.csv", pid=config["PIDS"]))
|
||||
files_to_compute.extend(expand("data/raw/{pid}/fitbit_heartrate_intraday_with_datetime.csv", pid=config["PIDS"]))
|
||||
files_to_compute.extend(expand("data/processed/features/{pid}/fitbit_data_yield.csv", pid=config["PIDS"]))
|
||||
files_to_compute.extend(expand("data/processed/features/{pid}/all_sensor_features.csv", pid=config["PIDS"]))
|
||||
files_to_compute.append("data/processed/features/all_participants/all_sensor_features.csv")
|
||||
|
||||
for provider in config["FITBIT_HEARTRATE_SUMMARY"]["PROVIDERS"].keys():
|
||||
if config["FITBIT_HEARTRATE_SUMMARY"]["PROVIDERS"][provider]["COMPUTE"]:
|
||||
files_to_compute.extend(expand("data/raw/{pid}/fitbit_heartrate_summary_raw.csv", pid=config["PIDS"]))
|
||||
files_to_compute.extend(expand("data/raw/{pid}/fitbit_heartrate_summary_parsed.csv", pid=config["PIDS"]))
|
||||
files_to_compute.extend(expand("data/raw/{pid}/fitbit_heartrate_summary_parsed_with_datetime.csv", pid=config["PIDS"]))
|
||||
files_to_compute.extend(expand("data/raw/{pid}/fitbit_heartrate_summary_with_datetime.csv", pid=config["PIDS"]))
|
||||
files_to_compute.extend(expand("data/interim/{pid}/fitbit_heartrate_summary_features/fitbit_heartrate_summary_{language}_{provider_key}.csv", pid=config["PIDS"], language=config["FITBIT_HEARTRATE_SUMMARY"]["PROVIDERS"][provider]["SRC_LANGUAGE"].lower(), provider_key=provider.lower()))
|
||||
files_to_compute.extend(expand("data/processed/features/{pid}/fitbit_heartrate_summary.csv", pid=config["PIDS"]))
|
||||
files_to_compute.extend(expand("data/processed/features/{pid}/all_sensor_features.csv", pid=config["PIDS"]))
|
||||
|
@ -171,8 +233,7 @@ for provider in config["FITBIT_HEARTRATE_SUMMARY"]["PROVIDERS"].keys():
|
|||
for provider in config["FITBIT_HEARTRATE_INTRADAY"]["PROVIDERS"].keys():
|
||||
if config["FITBIT_HEARTRATE_INTRADAY"]["PROVIDERS"][provider]["COMPUTE"]:
|
||||
files_to_compute.extend(expand("data/raw/{pid}/fitbit_heartrate_intraday_raw.csv", pid=config["PIDS"]))
|
||||
files_to_compute.extend(expand("data/raw/{pid}/fitbit_heartrate_intraday_parsed.csv", pid=config["PIDS"]))
|
||||
files_to_compute.extend(expand("data/raw/{pid}/fitbit_heartrate_intraday_parsed_with_datetime.csv", pid=config["PIDS"]))
|
||||
files_to_compute.extend(expand("data/raw/{pid}/fitbit_heartrate_intraday_with_datetime.csv", pid=config["PIDS"]))
|
||||
files_to_compute.extend(expand("data/interim/{pid}/fitbit_heartrate_intraday_features/fitbit_heartrate_intraday_{language}_{provider_key}.csv", pid=config["PIDS"], language=config["FITBIT_HEARTRATE_INTRADAY"]["PROVIDERS"][provider]["SRC_LANGUAGE"].lower(), provider_key=provider.lower()))
|
||||
files_to_compute.extend(expand("data/processed/features/{pid}/fitbit_heartrate_intraday.csv", pid=config["PIDS"]))
|
||||
files_to_compute.extend(expand("data/processed/features/{pid}/all_sensor_features.csv", pid=config["PIDS"]))
|
||||
|
@ -181,18 +242,28 @@ for provider in config["FITBIT_HEARTRATE_INTRADAY"]["PROVIDERS"].keys():
|
|||
for provider in config["FITBIT_SLEEP_SUMMARY"]["PROVIDERS"].keys():
|
||||
if config["FITBIT_SLEEP_SUMMARY"]["PROVIDERS"][provider]["COMPUTE"]:
|
||||
files_to_compute.extend(expand("data/raw/{pid}/fitbit_sleep_summary_raw.csv", pid=config["PIDS"]))
|
||||
files_to_compute.extend(expand("data/raw/{pid}/fitbit_sleep_summary_parsed.csv", pid=config["PIDS"]))
|
||||
files_to_compute.extend(expand("data/raw/{pid}/fitbit_sleep_summary_parsed_with_datetime.csv", pid=config["PIDS"]))
|
||||
files_to_compute.extend(expand("data/raw/{pid}/fitbit_sleep_summary_with_datetime.csv", pid=config["PIDS"]))
|
||||
files_to_compute.extend(expand("data/interim/{pid}/fitbit_sleep_summary_features/fitbit_sleep_summary_{language}_{provider_key}.csv", pid=config["PIDS"], language=config["FITBIT_SLEEP_SUMMARY"]["PROVIDERS"][provider]["SRC_LANGUAGE"].lower(), provider_key=provider.lower()))
|
||||
files_to_compute.extend(expand("data/processed/features/{pid}/fitbit_sleep_summary.csv", pid=config["PIDS"]))
|
||||
files_to_compute.extend(expand("data/processed/features/{pid}/all_sensor_features.csv", pid=config["PIDS"]))
|
||||
files_to_compute.append("data/processed/features/all_participants/all_sensor_features.csv")
|
||||
|
||||
for provider in config["FITBIT_SLEEP_INTRADAY"]["PROVIDERS"].keys():
|
||||
if config["FITBIT_SLEEP_INTRADAY"]["PROVIDERS"][provider]["COMPUTE"]:
|
||||
files_to_compute.extend(expand("data/raw/{pid}/fitbit_sleep_intraday_raw.csv", pid=config["PIDS"]))
|
||||
files_to_compute.extend(expand("data/raw/{pid}/fitbit_sleep_intraday_with_datetime.csv", pid=config["PIDS"]))
|
||||
files_to_compute.extend(expand("data/interim/{pid}/fitbit_sleep_intraday_episodes.csv", pid=config["PIDS"]))
|
||||
files_to_compute.extend(expand("data/interim/{pid}/fitbit_sleep_intraday_episodes_resampled.csv", pid=config["PIDS"]))
|
||||
files_to_compute.extend(expand("data/interim/{pid}/fitbit_sleep_intraday_episodes_resampled_with_datetime.csv", pid=config["PIDS"]))
|
||||
files_to_compute.extend(expand("data/interim/{pid}/fitbit_sleep_intraday_features/fitbit_sleep_intraday_{language}_{provider_key}.csv", pid=config["PIDS"], language=config["FITBIT_SLEEP_INTRADAY"]["PROVIDERS"][provider]["SRC_LANGUAGE"].lower(), provider_key=provider.lower()))
|
||||
files_to_compute.extend(expand("data/processed/features/{pid}/fitbit_sleep_intraday.csv", pid=config["PIDS"]))
|
||||
files_to_compute.extend(expand("data/processed/features/{pid}/all_sensor_features.csv", pid=config["PIDS"]))
|
||||
files_to_compute.append("data/processed/features/all_participants/all_sensor_features.csv")
|
||||
|
||||
for provider in config["FITBIT_STEPS_SUMMARY"]["PROVIDERS"].keys():
|
||||
if config["FITBIT_STEPS_SUMMARY"]["PROVIDERS"][provider]["COMPUTE"]:
|
||||
files_to_compute.extend(expand("data/raw/{pid}/fitbit_steps_summary_raw.csv", pid=config["PIDS"]))
|
||||
files_to_compute.extend(expand("data/raw/{pid}/fitbit_steps_summary_parsed.csv", pid=config["PIDS"]))
|
||||
files_to_compute.extend(expand("data/raw/{pid}/fitbit_steps_summary_parsed_with_datetime.csv", pid=config["PIDS"]))
|
||||
files_to_compute.extend(expand("data/raw/{pid}/fitbit_steps_summary_with_datetime.csv", pid=config["PIDS"]))
|
||||
files_to_compute.extend(expand("data/interim/{pid}/fitbit_steps_summary_features/fitbit_steps_summary_{language}_{provider_key}.csv", pid=config["PIDS"], language=config["FITBIT_STEPS_SUMMARY"]["PROVIDERS"][provider]["SRC_LANGUAGE"].lower(), provider_key=provider.lower()))
|
||||
files_to_compute.extend(expand("data/processed/features/{pid}/fitbit_steps_summary.csv", pid=config["PIDS"]))
|
||||
files_to_compute.extend(expand("data/processed/features/{pid}/all_sensor_features.csv", pid=config["PIDS"]))
|
||||
|
@ -201,13 +272,78 @@ for provider in config["FITBIT_STEPS_SUMMARY"]["PROVIDERS"].keys():
|
|||
for provider in config["FITBIT_STEPS_INTRADAY"]["PROVIDERS"].keys():
|
||||
if config["FITBIT_STEPS_INTRADAY"]["PROVIDERS"][provider]["COMPUTE"]:
|
||||
files_to_compute.extend(expand("data/raw/{pid}/fitbit_steps_intraday_raw.csv", pid=config["PIDS"]))
|
||||
files_to_compute.extend(expand("data/raw/{pid}/fitbit_steps_intraday_parsed.csv", pid=config["PIDS"]))
|
||||
files_to_compute.extend(expand("data/raw/{pid}/fitbit_steps_intraday_parsed_with_datetime.csv", pid=config["PIDS"]))
|
||||
files_to_compute.extend(expand("data/raw/{pid}/fitbit_steps_intraday_with_datetime.csv", pid=config["PIDS"]))
|
||||
files_to_compute.extend(expand("data/interim/{pid}/fitbit_steps_intraday_features/fitbit_steps_intraday_{language}_{provider_key}.csv", pid=config["PIDS"], language=config["FITBIT_STEPS_INTRADAY"]["PROVIDERS"][provider]["SRC_LANGUAGE"].lower(), provider_key=provider.lower()))
|
||||
files_to_compute.extend(expand("data/processed/features/{pid}/fitbit_steps_intraday.csv", pid=config["PIDS"]))
|
||||
files_to_compute.extend(expand("data/processed/features/{pid}/all_sensor_features.csv", pid=config["PIDS"]))
|
||||
files_to_compute.append("data/processed/features/all_participants/all_sensor_features.csv")
|
||||
|
||||
|
||||
for provider in config["EMPATICA_ACCELEROMETER"]["PROVIDERS"].keys():
|
||||
if config["EMPATICA_ACCELEROMETER"]["PROVIDERS"][provider]["COMPUTE"]:
|
||||
files_to_compute.extend(expand("data/raw/{pid}/empatica_accelerometer_raw.csv", pid=config["PIDS"]))
|
||||
files_to_compute.extend(expand("data/raw/{pid}/empatica_accelerometer_with_datetime.csv", pid=config["PIDS"]))
|
||||
files_to_compute.extend(expand("data/interim/{pid}/empatica_accelerometer_features/empatica_accelerometer_{language}_{provider_key}.csv", pid=config["PIDS"], language=config["EMPATICA_ACCELEROMETER"]["PROVIDERS"][provider]["SRC_LANGUAGE"].lower(), provider_key=provider.lower()))
|
||||
files_to_compute.extend(expand("data/processed/features/{pid}/empatica_accelerometer.csv", pid=config["PIDS"]))
|
||||
files_to_compute.extend(expand("data/processed/features/{pid}/all_sensor_features.csv", pid=config["PIDS"]))
|
||||
files_to_compute.append("data/processed/features/all_participants/all_sensor_features.csv")
|
||||
|
||||
for provider in config["EMPATICA_HEARTRATE"]["PROVIDERS"].keys():
|
||||
if config["EMPATICA_HEARTRATE"]["PROVIDERS"][provider]["COMPUTE"]:
|
||||
files_to_compute.extend(expand("data/raw/{pid}/empatica_heartrate_raw.csv", pid=config["PIDS"]))
|
||||
files_to_compute.extend(expand("data/raw/{pid}/empatica_heartrate_with_datetime.csv", pid=config["PIDS"]))
|
||||
files_to_compute.extend(expand("data/interim/{pid}/empatica_heartrate_features/empatica_heartrate_{language}_{provider_key}.csv", pid=config["PIDS"], language=config["EMPATICA_HEARTRATE"]["PROVIDERS"][provider]["SRC_LANGUAGE"].lower(), provider_key=provider.lower()))
|
||||
files_to_compute.extend(expand("data/processed/features/{pid}/empatica_heartrate.csv", pid=config["PIDS"]))
|
||||
files_to_compute.extend(expand("data/processed/features/{pid}/all_sensor_features.csv", pid=config["PIDS"]))
|
||||
files_to_compute.append("data/processed/features/all_participants/all_sensor_features.csv")
|
||||
|
||||
|
||||
for provider in config["EMPATICA_TEMPERATURE"]["PROVIDERS"].keys():
|
||||
if config["EMPATICA_TEMPERATURE"]["PROVIDERS"][provider]["COMPUTE"]:
|
||||
files_to_compute.extend(expand("data/raw/{pid}/empatica_temperature_raw.csv", pid=config["PIDS"]))
|
||||
files_to_compute.extend(expand("data/raw/{pid}/empatica_temperature_with_datetime.csv", pid=config["PIDS"]))
|
||||
files_to_compute.extend(expand("data/interim/{pid}/empatica_temperature_features/empatica_temperature_{language}_{provider_key}.csv", pid=config["PIDS"], language=config["EMPATICA_TEMPERATURE"]["PROVIDERS"][provider]["SRC_LANGUAGE"].lower(), provider_key=provider.lower()))
|
||||
files_to_compute.extend(expand("data/processed/features/{pid}/empatica_temperature.csv", pid=config["PIDS"]))
|
||||
files_to_compute.extend(expand("data/processed/features/{pid}/all_sensor_features.csv", pid=config["PIDS"]))
|
||||
files_to_compute.append("data/processed/features/all_participants/all_sensor_features.csv")
|
||||
|
||||
for provider in config["EMPATICA_ELECTRODERMAL_ACTIVITY"]["PROVIDERS"].keys():
|
||||
if config["EMPATICA_ELECTRODERMAL_ACTIVITY"]["PROVIDERS"][provider]["COMPUTE"]:
|
||||
files_to_compute.extend(expand("data/raw/{pid}/empatica_electrodermal_activity_raw.csv", pid=config["PIDS"]))
|
||||
files_to_compute.extend(expand("data/raw/{pid}/empatica_electrodermal_activity_with_datetime.csv", pid=config["PIDS"]))
|
||||
files_to_compute.extend(expand("data/interim/{pid}/empatica_electrodermal_activity_features/empatica_electrodermal_activity_{language}_{provider_key}.csv", pid=config["PIDS"], language=config["EMPATICA_ELECTRODERMAL_ACTIVITY"]["PROVIDERS"][provider]["SRC_LANGUAGE"].lower(), provider_key=provider.lower()))
|
||||
files_to_compute.extend(expand("data/processed/features/{pid}/empatica_electrodermal_activity.csv", pid=config["PIDS"]))
|
||||
files_to_compute.extend(expand("data/processed/features/{pid}/all_sensor_features.csv", pid=config["PIDS"]))
|
||||
files_to_compute.append("data/processed/features/all_participants/all_sensor_features.csv")
|
||||
|
||||
for provider in config["EMPATICA_BLOOD_VOLUME_PULSE"]["PROVIDERS"].keys():
|
||||
if config["EMPATICA_BLOOD_VOLUME_PULSE"]["PROVIDERS"][provider]["COMPUTE"]:
|
||||
files_to_compute.extend(expand("data/raw/{pid}/empatica_blood_volume_pulse_raw.csv", pid=config["PIDS"]))
|
||||
files_to_compute.extend(expand("data/raw/{pid}/empatica_blood_volume_pulse_with_datetime.csv", pid=config["PIDS"]))
|
||||
files_to_compute.extend(expand("data/interim/{pid}/empatica_blood_volume_pulse_features/empatica_blood_volume_pulse_{language}_{provider_key}.csv", pid=config["PIDS"], language=config["EMPATICA_BLOOD_VOLUME_PULSE"]["PROVIDERS"][provider]["SRC_LANGUAGE"].lower(), provider_key=provider.lower()))
|
||||
files_to_compute.extend(expand("data/processed/features/{pid}/empatica_blood_volume_pulse.csv", pid=config["PIDS"]))
|
||||
files_to_compute.extend(expand("data/processed/features/{pid}/all_sensor_features.csv", pid=config["PIDS"]))
|
||||
files_to_compute.append("data/processed/features/all_participants/all_sensor_features.csv")
|
||||
|
||||
for provider in config["EMPATICA_INTER_BEAT_INTERVAL"]["PROVIDERS"].keys():
|
||||
if config["EMPATICA_INTER_BEAT_INTERVAL"]["PROVIDERS"][provider]["COMPUTE"]:
|
||||
files_to_compute.extend(expand("data/raw/{pid}/empatica_inter_beat_interval_raw.csv", pid=config["PIDS"]))
|
||||
files_to_compute.extend(expand("data/raw/{pid}/empatica_inter_beat_interval_with_datetime.csv", pid=config["PIDS"]))
|
||||
files_to_compute.extend(expand("data/interim/{pid}/empatica_inter_beat_interval_features/empatica_inter_beat_interval_{language}_{provider_key}.csv", pid=config["PIDS"], language=config["EMPATICA_INTER_BEAT_INTERVAL"]["PROVIDERS"][provider]["SRC_LANGUAGE"].lower(), provider_key=provider.lower()))
|
||||
files_to_compute.extend(expand("data/processed/features/{pid}/empatica_inter_beat_interval.csv", pid=config["PIDS"]))
|
||||
files_to_compute.extend(expand("data/processed/features/{pid}/all_sensor_features.csv", pid=config["PIDS"]))
|
||||
files_to_compute.append("data/processed/features/all_participants/all_sensor_features.csv")
|
||||
|
||||
if isinstance(config["EMPATICA_TAGS"]["PROVIDERS"], dict):
|
||||
for provider in config["EMPATICA_TAGS"]["PROVIDERS"].keys():
|
||||
if config["EMPATICA_TAGS"]["PROVIDERS"][provider]["COMPUTE"]:
|
||||
files_to_compute.extend(expand("data/raw/{pid}/empatica_tags_raw.csv", pid=config["PIDS"]))
|
||||
files_to_compute.extend(expand("data/raw/{pid}/empatica_tags_with_datetime.csv", pid=config["PIDS"]))
|
||||
files_to_compute.extend(expand("data/interim/{pid}/empatica_tags_features/empatica_tags_{language}_{provider_key}.csv", pid=config["PIDS"], language=config["EMPATICA_TAGS"]["PROVIDERS"][provider]["SRC_LANGUAGE"].lower(), provider_key=provider.lower()))
|
||||
files_to_compute.extend(expand("data/processed/features/{pid}/empatica_tags.csv", pid=config["PIDS"]))
|
||||
files_to_compute.extend(expand("data/processed/features/{pid}/all_sensor_features.csv", pid=config["PIDS"]))
|
||||
files_to_compute.append("data/processed/features/all_participants/all_sensor_features.csv")
|
||||
|
||||
# Visualization for Data Exploration
|
||||
if config["HISTOGRAM_PHONE_DATA_YIELD"]["PLOT"]:
|
||||
files_to_compute.append("reports/data_exploration/histogram_phone_data_yield.html")
|
||||
|
|
|
@ -1,54 +1,62 @@
|
|||
# See https://www.rapids.science/setup/configuration/#database-credentials
|
||||
DATABASE_GROUP: &database_group
|
||||
MY_GROUP
|
||||
########################################################################################################################
|
||||
# GLOBAL CONFIGURATION #
|
||||
########################################################################################################################
|
||||
|
||||
# See https://www.rapids.science/setup/configuration/#timezone-of-your-study
|
||||
TIMEZONE: &timezone
|
||||
America/New_York
|
||||
|
||||
# See https://www.rapids.science/setup/configuration/#participant-files
|
||||
# See https://www.rapids.science/latest/setup/configuration/#participant-files
|
||||
PIDS: [example01, example02]
|
||||
|
||||
# See https://www.rapids.science/setup/configuration/#automatic-creation-of-participant-files
|
||||
# See https://www.rapids.science/latest/setup/configuration/#automatic-creation-of-participant-files
|
||||
CREATE_PARTICIPANT_FILES:
|
||||
SOURCE:
|
||||
TYPE: AWARE_DEVICE_TABLE #AWARE_DEVICE_TABLE or CSV_FILE
|
||||
DATABASE_GROUP: *database_group
|
||||
CSV_FILE_PATH: "data/external/example_participants.csv" # see docs for required format
|
||||
TIMEZONE: *timezone
|
||||
CSV_FILE_PATH: "data/external/example_participants.csv" # see docs for required format
|
||||
PHONE_SECTION:
|
||||
ADD: TRUE
|
||||
ADD: True
|
||||
DEVICE_ID_COLUMN: device_id # column name
|
||||
IGNORED_DEVICE_IDS: []
|
||||
FITBIT_SECTION:
|
||||
ADD: TRUE
|
||||
DEVICE_ID_COLUMN: device_id # column name
|
||||
ADD: True
|
||||
DEVICE_ID_COLUMN: fitbit_id # column name
|
||||
IGNORED_DEVICE_IDS: []
|
||||
EMPATICA_SECTION:
|
||||
ADD: False
|
||||
DEVICE_ID_COLUMN: empatica_id # column name
|
||||
IGNORED_DEVICE_IDS: []
|
||||
|
||||
# See https://www.rapids.science/setup/configuration/#time-segments
|
||||
# See https://www.rapids.science/latest/setup/configuration/#time-segments
|
||||
TIME_SEGMENTS: &time_segments
|
||||
TYPE: PERIODIC # FREQUENCY, PERIODIC, EVENT
|
||||
FILE: "example_profile/exampleworkflow_timesegments.csv"
|
||||
INCLUDE_PAST_PERIODIC_SEGMENTS: FALSE # Only relevant if TYPE=PERIODIC, see docs
|
||||
|
||||
|
||||
# See https://www.rapids.science/latest/setup/configuration/#timezone-of-your-study
|
||||
TIMEZONE:
|
||||
TYPE: SINGLE
|
||||
SINGLE:
|
||||
TZCODE: America/New_York
|
||||
MULTIPLE:
|
||||
TZCODES_FILE: data/external/multiple_timezones_example.csv
|
||||
IF_MISSING_TZCODE: STOP
|
||||
DEFAULT_TZCODE: America/New_York
|
||||
FITBIT:
|
||||
ALLOW_MULTIPLE_TZ_PER_DEVICE: False
|
||||
INFER_FROM_SMARTPHONE_TZ: False
|
||||
|
||||
########################################################################################################################
|
||||
# PHONE #
|
||||
########################################################################################################################
|
||||
|
||||
# See https://www.rapids.science/setup/configuration/#device-data-source-configuration
|
||||
PHONE_DATA_CONFIGURATION:
|
||||
SOURCE:
|
||||
TYPE: DATABASE
|
||||
DATABASE_GROUP: *database_group
|
||||
DEVICE_ID_COLUMN: device_id # column name
|
||||
TIMEZONE:
|
||||
TYPE: SINGLE # SINGLE or MULTIPLE
|
||||
VALUE: *timezone # IF TYPE=SINGLE, see docs
|
||||
# See https://www.rapids.science/latest/setup/configuration/#device-data-source-configuration
|
||||
PHONE_DATA_STREAMS:
|
||||
USE: aware_csv
|
||||
|
||||
# AVAILABLE:
|
||||
aware_mysql:
|
||||
DATABASE_GROUP: MY_GROUP
|
||||
|
||||
aware_csv:
|
||||
FOLDER: data/external/example_workflow
|
||||
# Sensors ------
|
||||
|
||||
# https://www.rapids.science/latest/features/phone-accelerometer/
|
||||
PHONE_ACCELEROMETER:
|
||||
CONTAINER: accelerometer
|
||||
PROVIDERS:
|
||||
|
@ -67,10 +75,11 @@ PHONE_ACCELEROMETER:
|
|||
SRC_FOLDER: "panda" # inside src/features/phone_accelerometer
|
||||
SRC_LANGUAGE: "python"
|
||||
|
||||
# See https://www.rapids.science/latest/features/phone-activity-recognition/
|
||||
PHONE_ACTIVITY_RECOGNITION:
|
||||
CONTAINER:
|
||||
ANDROID: plugin_google_activity_recognition
|
||||
IOS: plugin_ios_activity_recognition
|
||||
ANDROID: plugin_google_activity_recognition.csv
|
||||
IOS: plugin_ios_activity_recognition.csv
|
||||
EPISODE_THRESHOLD_BETWEEN_ROWS: 5 # minutes. Max time difference for two consecutive rows to be considered within the same battery episode.
|
||||
PROVIDERS:
|
||||
RAPIDS:
|
||||
|
@ -83,8 +92,19 @@ PHONE_ACTIVITY_RECOGNITION:
|
|||
SRC_FOLDER: "rapids" # inside src/features/phone_activity_recognition
|
||||
SRC_LANGUAGE: "python"
|
||||
|
||||
# See https://www.rapids.science/latest/features/phone-applications-crashes/
|
||||
PHONE_APPLICATIONS_CRASHES:
|
||||
CONTAINER: applications_crashes
|
||||
APPLICATION_CATEGORIES:
|
||||
CATALOGUE_SOURCE: FILE # FILE (genres are read from CATALOGUE_FILE) or GOOGLE (genres are scrapped from the Play Store)
|
||||
CATALOGUE_FILE: "data/external/stachl_application_genre_catalogue.csv"
|
||||
UPDATE_CATALOGUE_FILE: False # if CATALOGUE_SOURCE is equal to FILE, whether or not to update CATALOGUE_FILE, if CATALOGUE_SOURCE is equal to GOOGLE all scraped genres will be saved to CATALOGUE_FILE
|
||||
SCRAPE_MISSING_CATEGORIES: False # whether or not to scrape missing genres, only effective if CATALOGUE_SOURCE is equal to FILE. If CATALOGUE_SOURCE is equal to GOOGLE, all genres are scraped anyway
|
||||
PROVIDERS: # None implemented yet but this sensor can be used in PHONE_DATA_YIELD
|
||||
|
||||
# See https://www.rapids.science/latest/features/phone-applications-foreground/
|
||||
PHONE_APPLICATIONS_FOREGROUND:
|
||||
CONTAINER: applications_foreground
|
||||
CONTAINER: applications_foreground.csv
|
||||
APPLICATION_CATEGORIES:
|
||||
CATALOGUE_SOURCE: FILE # FILE (genres are read from CATALOGUE_FILE) or GOOGLE (genres are scrapped from the Play Store)
|
||||
CATALOGUE_FILE: "data/external/stachl_application_genre_catalogue.csv"
|
||||
|
@ -104,8 +124,19 @@ PHONE_APPLICATIONS_FOREGROUND:
|
|||
SRC_FOLDER: "rapids" # inside src/features/phone_applications_foreground
|
||||
SRC_LANGUAGE: "python"
|
||||
|
||||
# See https://www.rapids.science/latest/features/phone-applications-notifications/
|
||||
PHONE_APPLICATIONS_NOTIFICATIONS:
|
||||
CONTAINER: applications_notifications
|
||||
APPLICATION_CATEGORIES:
|
||||
CATALOGUE_SOURCE: FILE # FILE (genres are read from CATALOGUE_FILE) or GOOGLE (genres are scrapped from the Play Store)
|
||||
CATALOGUE_FILE: "data/external/stachl_application_genre_catalogue.csv"
|
||||
UPDATE_CATALOGUE_FILE: False # if CATALOGUE_SOURCE is equal to FILE, whether or not to update CATALOGUE_FILE, if CATALOGUE_SOURCE is equal to GOOGLE all scraped genres will be saved to CATALOGUE_FILE
|
||||
SCRAPE_MISSING_CATEGORIES: False # whether or not to scrape missing genres, only effective if CATALOGUE_SOURCE is equal to FILE. If CATALOGUE_SOURCE is equal to GOOGLE, all genres are scraped anyway
|
||||
PROVIDERS: # None implemented yet but this sensor can be used in PHONE_DATA_YIELD
|
||||
|
||||
# See https://www.rapids.science/latest/features/phone-battery/
|
||||
PHONE_BATTERY:
|
||||
CONTAINER: battery
|
||||
CONTAINER: battery.csv
|
||||
EPISODE_THRESHOLD_BETWEEN_ROWS: 30 # minutes. Max time difference for two consecutive rows to be considered within the same battery episode.
|
||||
PROVIDERS:
|
||||
RAPIDS:
|
||||
|
@ -114,17 +145,36 @@ PHONE_BATTERY:
|
|||
SRC_FOLDER: "rapids" # inside src/features/phone_battery
|
||||
SRC_LANGUAGE: "python"
|
||||
|
||||
# See https://www.rapids.science/latest/features/phone-bluetooth/
|
||||
PHONE_BLUETOOTH:
|
||||
CONTAINER: bluetooth
|
||||
CONTAINER: bluetooth.csv
|
||||
PROVIDERS:
|
||||
RAPIDS:
|
||||
COMPUTE: True
|
||||
FEATURES: ["countscans", "uniquedevices", "countscansmostuniquedevice"]
|
||||
SRC_FOLDER: "rapids" # inside src/features/phone_bluetooth
|
||||
SRC_LANGUAGE: "r"
|
||||
DORYAB:
|
||||
COMPUTE: False
|
||||
FEATURES:
|
||||
ALL:
|
||||
DEVICES: ["countscans", "uniquedevices", "meanscans", "stdscans"]
|
||||
SCANS_MOST_FREQUENT_DEVICE: ["withinsegments", "acrosssegments", "acrossdataset"]
|
||||
SCANS_LEAST_FREQUENT_DEVICE: ["withinsegments", "acrosssegments", "acrossdataset"]
|
||||
OWN:
|
||||
DEVICES: ["countscans", "uniquedevices", "meanscans", "stdscans"]
|
||||
SCANS_MOST_FREQUENT_DEVICE: ["withinsegments", "acrosssegments", "acrossdataset"]
|
||||
SCANS_LEAST_FREQUENT_DEVICE: ["withinsegments", "acrosssegments", "acrossdataset"]
|
||||
OTHERS:
|
||||
DEVICES: ["countscans", "uniquedevices", "meanscans", "stdscans"]
|
||||
SCANS_MOST_FREQUENT_DEVICE: ["withinsegments", "acrosssegments", "acrossdataset"]
|
||||
SCANS_LEAST_FREQUENT_DEVICE: ["withinsegments", "acrosssegments", "acrossdataset"]
|
||||
SRC_FOLDER: "doryab" # inside src/features/phone_bluetooth
|
||||
SRC_LANGUAGE: "python"
|
||||
|
||||
# See https://www.rapids.science/latest/features/phone-calls/
|
||||
PHONE_CALLS:
|
||||
CONTAINER: calls
|
||||
CONTAINER: calls.csv
|
||||
PROVIDERS:
|
||||
RAPIDS:
|
||||
COMPUTE: True
|
||||
|
@ -136,10 +186,11 @@ PHONE_CALLS:
|
|||
SRC_LANGUAGE: "r"
|
||||
SRC_FOLDER: "rapids" # inside src/features/phone_calls
|
||||
|
||||
# See https://www.rapids.science/latest/features/phone-conversation/
|
||||
PHONE_CONVERSATION:
|
||||
CONTAINER:
|
||||
ANDROID: plugin_studentlife_audio_android
|
||||
IOS: plugin_studentlife_audio
|
||||
ANDROID: plugin_studentlife_audio_android.csv
|
||||
IOS: plugin_studentlife_audio.csv
|
||||
PROVIDERS:
|
||||
RAPIDS:
|
||||
COMPUTE: True
|
||||
|
@ -154,18 +205,25 @@ PHONE_CONVERSATION:
|
|||
SRC_FOLDER: "rapids" # inside src/features/phone_conversation
|
||||
SRC_LANGUAGE: "python"
|
||||
|
||||
# See https://www.rapids.science/latest/features/phone-data-yield/
|
||||
PHONE_DATA_YIELD:
|
||||
SENSORS: [PHONE_ACCELEROMETER, PHONE_ACTIVITY_RECOGNITION, PHONE_APPLICATIONS_FOREGROUND, PHONE_BATTERY, PHONE_BLUETOOTH, PHONE_CALLS, PHONE_CONVERSATION, PHONE_LIGHT, PHONE_LOCATIONS, PHONE_MESSAGES, PHONE_SCREEN, PHONE_WIFI_CONNECTED, PHONE_WIFI_VISIBLE]
|
||||
SENSORS: [PHONE_ACTIVITY_RECOGNITION, PHONE_APPLICATIONS_FOREGROUND, PHONE_BATTERY, PHONE_BLUETOOTH, PHONE_CALLS, PHONE_CONVERSATION, PHONE_LIGHT, PHONE_LOCATIONS, PHONE_MESSAGES, PHONE_SCREEN, PHONE_WIFI_CONNECTED, PHONE_WIFI_VISIBLE]
|
||||
PROVIDERS:
|
||||
RAPIDS:
|
||||
COMPUTE: True
|
||||
FEATURES: [ratiovalidyieldedminutes, ratiovalidyieldedhours]
|
||||
MINUTE_RATIO_THRESHOLD_FOR_VALID_YIELDED_HOURS: 0.5 # 0 to 1 representing the number of minutes with at least
|
||||
MINUTE_RATIO_THRESHOLD_FOR_VALID_YIELDED_HOURS: 0.5 # 0 to 1, minimum percentage of valid minutes in an hour to be considered valid.
|
||||
SRC_LANGUAGE: "r"
|
||||
SRC_FOLDER: "rapids" # inside src/features/phone_data_yield
|
||||
|
||||
# See https://www.rapids.science/latest/features/phone-keyboard/
|
||||
PHONE_KEYBOARD:
|
||||
CONTAINER: keyboard
|
||||
PROVIDERS: # None implemented yet but this sensor can be used in PHONE_DATA_YIELD
|
||||
|
||||
# See https://www.rapids.science/latest/features/phone-light/
|
||||
PHONE_LIGHT:
|
||||
CONTAINER: light
|
||||
CONTAINER: light.csv
|
||||
PROVIDERS:
|
||||
RAPIDS:
|
||||
COMPUTE: True
|
||||
|
@ -173,21 +231,32 @@ PHONE_LIGHT:
|
|||
SRC_FOLDER: "rapids" # inside src/features/phone_light
|
||||
SRC_LANGUAGE: "python"
|
||||
|
||||
# See https://www.rapids.science/latest/features/phone-locations/
|
||||
PHONE_LOCATIONS:
|
||||
CONTAINER: locations
|
||||
LOCATIONS_TO_USE: FUSED_RESAMPLED # ALL, GPS OR FUSED_RESAMPLED
|
||||
CONTAINER: locations.csv
|
||||
LOCATIONS_TO_USE: FUSED_RESAMPLED # ALL, GPS, ALL_RESAMPLED, OR FUSED_RESAMPLED
|
||||
FUSED_RESAMPLED_CONSECUTIVE_THRESHOLD: 30 # minutes, only replicate location samples to the next sensed bin if the phone did not stop collecting data for more than this threshold
|
||||
FUSED_RESAMPLED_TIME_SINCE_VALID_LOCATION: 720 # minutes, only replicate location samples to consecutive sensed bins if they were logged within this threshold after a valid location row
|
||||
HOME_INFERENCE:
|
||||
DBSCAN_EPS: 10 # meters
|
||||
DBSCAN_MINSAMPLES: 5
|
||||
THRESHOLD_STATIC : 1 # km/h
|
||||
CLUSTERING_ALGORITHM: DBSCAN #DBSCAN,OPTICS
|
||||
|
||||
PROVIDERS:
|
||||
DORYAB:
|
||||
COMPUTE: True
|
||||
FEATURES: ["locationvariance","loglocationvariance","totaldistance","averagespeed","varspeed","circadianmovement","numberofsignificantplaces","numberlocationtransitions","radiusgyration","timeattop1location","timeattop2location","timeattop3location","movingtostaticratio","outlierstimepercent","maxlengthstayatclusters","minlengthstayatclusters","meanlengthstayatclusters","stdlengthstayatclusters","locationentropy","normalizedlocationentropy"]
|
||||
FEATURES: ["locationvariance","loglocationvariance","totaldistance","averagespeed","varspeed", "numberofsignificantplaces","numberlocationtransitions","radiusgyration","timeattop1location","timeattop2location","timeattop3location","movingtostaticratio","outlierstimepercent","maxlengthstayatclusters","minlengthstayatclusters","meanlengthstayatclusters","stdlengthstayatclusters","locationentropy","normalizedlocationentropy","timeathome"]
|
||||
ACCURACY_LIMIT: 51 # meters, drops location coordinates with an accuracy higher than this. This number means there's a 68% probability the true location is within this radius
|
||||
DBSCAN_EPS: 10 # meters
|
||||
DBSCAN_MINSAMPLES: 5
|
||||
THRESHOLD_STATIC : 1 # km/h
|
||||
MAXIMUM_GAP_ALLOWED: 300
|
||||
MAXIMUM_ROW_GAP: 300
|
||||
MAXIMUM_ROW_DURATION: 60
|
||||
MINUTES_DATA_USED: False
|
||||
SAMPLING_FREQUENCY: 0
|
||||
CLUSTER_ON: PARTICIPANT_DATASET # PARTICIPANT_DATASET,TIME_SEGMENT
|
||||
CLUSTERING_ALGORITHM: DBSCAN #DBSCAN,OPTICS
|
||||
RADIUS_FOR_HOME: 100
|
||||
SRC_FOLDER: "doryab" # inside src/features/phone_locations
|
||||
SRC_LANGUAGE: "python"
|
||||
|
||||
|
@ -195,13 +264,21 @@ PHONE_LOCATIONS:
|
|||
COMPUTE: False
|
||||
FEATURES: ["hometime","disttravelled","rog","maxdiam","maxhomedist","siglocsvisited","avgflightlen","stdflightlen","avgflightdur","stdflightdur","probpause","siglocentropy","circdnrtn","wkenddayrtn"]
|
||||
ACCURACY_LIMIT: 51 # meters, drops location coordinates with an accuracy higher than this. This number means there's a 68% probability the true location is within this radius
|
||||
TIMEZONE: *timezone
|
||||
IF_MULTIPLE_TIMEZONES: USE_MOST_COMMON
|
||||
MINUTES_DATA_USED: False # Use this for quality control purposes, how many minutes of data (location coordinates gruped by minute) were used to compute features
|
||||
SRC_FOLDER: "barnett" # inside src/features/phone_locations
|
||||
SRC_LANGUAGE: "r"
|
||||
|
||||
# See https://www.rapids.science/latest/features/phone-log/
|
||||
PHONE_LOG:
|
||||
CONTAINER:
|
||||
ANDROID: aware_log
|
||||
IOS: ios_aware_log
|
||||
PROVIDERS: # None implemented yet but this sensor can be used in PHONE_DATA_YIELD
|
||||
|
||||
# See https://www.rapids.science/latest/features/phone-messages/
|
||||
PHONE_MESSAGES:
|
||||
CONTAINER: messages
|
||||
CONTAINER: messages.csv
|
||||
PROVIDERS:
|
||||
RAPIDS:
|
||||
COMPUTE: True
|
||||
|
@ -212,8 +289,9 @@ PHONE_MESSAGES:
|
|||
SRC_LANGUAGE: "r"
|
||||
SRC_FOLDER: "rapids" # inside src/features/phone_messages
|
||||
|
||||
# See https://www.rapids.science/latest/features/phone-screen/
|
||||
PHONE_SCREEN:
|
||||
CONTAINER: screen
|
||||
CONTAINER: screen.csv
|
||||
PROVIDERS:
|
||||
RAPIDS:
|
||||
COMPUTE: True
|
||||
|
@ -225,8 +303,9 @@ PHONE_SCREEN:
|
|||
SRC_FOLDER: "rapids" # inside src/features/phone_screen
|
||||
SRC_LANGUAGE: "python"
|
||||
|
||||
# See https://www.rapids.science/latest/features/phone-wifi-connected/
|
||||
PHONE_WIFI_CONNECTED:
|
||||
CONTAINER: "sensor_wifi"
|
||||
CONTAINER: sensor_wifi.csv
|
||||
PROVIDERS:
|
||||
RAPIDS:
|
||||
COMPUTE: True
|
||||
|
@ -234,8 +313,9 @@ PHONE_WIFI_CONNECTED:
|
|||
SRC_FOLDER: "rapids" # inside src/features/phone_wifi_connected
|
||||
SRC_LANGUAGE: "r"
|
||||
|
||||
# See https://www.rapids.science/latest/features/phone-wifi-visible/
|
||||
PHONE_WIFI_VISIBLE:
|
||||
CONTAINER: "wifi"
|
||||
CONTAINER: wifi.csv
|
||||
PROVIDERS:
|
||||
RAPIDS:
|
||||
COMPUTE: True
|
||||
|
@ -250,20 +330,43 @@ PHONE_WIFI_VISIBLE:
|
|||
########################################################################################################################
|
||||
|
||||
# See https://www.rapids.science/latest/setup/configuration/#device-data-source-configuration
|
||||
FITBIT_DATA_CONFIGURATION:
|
||||
SOURCE:
|
||||
TYPE: DATABASE # DATABASE or FILES (set each [FITBIT_SENSOR][CONTAINER] attribute with a table name or a file path accordingly)
|
||||
COLUMN_FORMAT: JSON # JSON or PLAIN_TEXT
|
||||
DATABASE_GROUP: *database_group
|
||||
DEVICE_ID_COLUMN: device_id # column name
|
||||
TIMEZONE:
|
||||
TYPE: SINGLE # Fitbit only supports SINGLE timezones
|
||||
VALUE: *timezone # see docs
|
||||
HIDDEN:
|
||||
SINGLE_FITBIT_TABLE: TRUE
|
||||
FITBIT_DATA_STREAMS:
|
||||
USE: fitbitjson_csv
|
||||
|
||||
# AVAILABLE:
|
||||
fitbitjson_mysql:
|
||||
DATABASE_GROUP: MY_GROUP
|
||||
SLEEP_SUMMARY_EPISODE_DAY_ANCHOR: end # summary sleep episodes are considered as events based on either the start timestamp or end timestamp.
|
||||
|
||||
fitbitparsed_mysql:
|
||||
DATABASE_GROUP: MY_GROUP
|
||||
SLEEP_SUMMARY_EPISODE_DAY_ANCHOR: end # summary sleep episodes are considered as events based on either the start timestamp or end timestamp.
|
||||
|
||||
fitbitjson_csv:
|
||||
FOLDER: data/external/example_workflow
|
||||
SLEEP_SUMMARY_EPISODE_DAY_ANCHOR: end # summary sleep episodes are considered as events based on either the start timestamp or end timestamp.
|
||||
|
||||
fitbitparsed_csv:
|
||||
FOLDER: data/external/fitbit_csv
|
||||
SLEEP_SUMMARY_EPISODE_DAY_ANCHOR: end # summary sleep episodes are considered as events based on either the start timestamp or end timestamp.
|
||||
|
||||
# Sensors ------
|
||||
|
||||
# See https://www.rapids.science/latest/features/fitbit-data-yield/
|
||||
FITBIT_DATA_YIELD:
|
||||
SENSOR: FITBIT_HEARTRATE_INTRADAY
|
||||
PROVIDERS:
|
||||
RAPIDS:
|
||||
COMPUTE: False
|
||||
FEATURES: [ratiovalidyieldedminutes, ratiovalidyieldedhours]
|
||||
MINUTE_RATIO_THRESHOLD_FOR_VALID_YIELDED_HOURS: 0.5 # 0 to 1, minimum percentage of valid minutes in an hour to be considered valid.
|
||||
SRC_LANGUAGE: "r"
|
||||
SRC_FOLDER: "rapids" # inside src/features/fitbit_data_yield
|
||||
|
||||
|
||||
# See https://www.rapids.science/latest/features/fitbit-heartrate-summary/
|
||||
FITBIT_HEARTRATE_SUMMARY:
|
||||
CONTAINER: fitbit_data
|
||||
CONTAINER: fitbit_data.csv
|
||||
PROVIDERS:
|
||||
RAPIDS:
|
||||
COMPUTE: True
|
||||
|
@ -271,8 +374,9 @@ FITBIT_HEARTRATE_SUMMARY:
|
|||
SRC_FOLDER: "rapids" # inside src/features/fitbit_heartrate_summary
|
||||
SRC_LANGUAGE: "python"
|
||||
|
||||
# See https://www.rapids.science/latest/features/fitbit-heartrate-intraday/
|
||||
FITBIT_HEARTRATE_INTRADAY:
|
||||
CONTAINER: fitbit_data
|
||||
CONTAINER: fitbit_data.csv
|
||||
PROVIDERS:
|
||||
RAPIDS:
|
||||
COMPUTE: True
|
||||
|
@ -280,8 +384,9 @@ FITBIT_HEARTRATE_INTRADAY:
|
|||
SRC_FOLDER: "rapids" # inside src/features/fitbit_heartrate_intraday
|
||||
SRC_LANGUAGE: "python"
|
||||
|
||||
# See https://www.rapids.science/latest/features/fitbit-sleep-summary/
|
||||
FITBIT_SLEEP_SUMMARY:
|
||||
CONTAINER: fitbit_data
|
||||
CONTAINER: fitbit_data.csv
|
||||
SLEEP_EPISODE_TIMESTAMP: end # summary sleep episodes are considered as events based on either the start timestamp or end timestamp.
|
||||
PROVIDERS:
|
||||
RAPIDS:
|
||||
|
@ -291,8 +396,45 @@ FITBIT_SLEEP_SUMMARY:
|
|||
SRC_FOLDER: "rapids" # inside src/features/fitbit_sleep_summary
|
||||
SRC_LANGUAGE: "python"
|
||||
|
||||
# See https://www.rapids.science/latest/features/fitbit-sleep-intraday/
|
||||
FITBIT_SLEEP_INTRADAY:
|
||||
CONTAINER: sleep_intraday
|
||||
PROVIDERS:
|
||||
RAPIDS:
|
||||
COMPUTE: False
|
||||
FEATURES:
|
||||
LEVELS_AND_TYPES_COMBINING_ALL: True
|
||||
LEVELS_AND_TYPES: [countepisode, sumduration, maxduration, minduration, avgduration, medianduration, stdduration]
|
||||
RATIOS_TYPE: [count, duration]
|
||||
RATIOS_SCOPE: [ACROSS_LEVELS, ACROSS_TYPES, WITHIN_LEVELS, WITHIN_TYPES]
|
||||
ROUTINE: [starttimefirstmainsleep, endtimelastmainsleep, starttimefirstnap, endtimelastnap]
|
||||
SLEEP_LEVELS:
|
||||
CLASSIC: [awake, restless, asleep]
|
||||
STAGES: [wake, deep, light, rem]
|
||||
UNIFIED: [awake, asleep]
|
||||
SLEEP_TYPES: [main, nap]
|
||||
INCLUDE_SLEEP_LATER_THAN: 0 # a number ranged from 0 (midnight) to 1439 (23:59)
|
||||
REFERENCE_TIME: MIDNIGHT # chosen from "MIDNIGHT" and "START_OF_THE_SEGMENT"
|
||||
SRC_FOLDER: "rapids" # inside src/features/fitbit_sleep_intraday
|
||||
SRC_LANGUAGE: "python"
|
||||
|
||||
PRICE:
|
||||
COMPUTE: False
|
||||
FEATURES: [avgduration, avgratioduration, avgstarttimeofepisodemain, avgendtimeofepisodemain, avgmidpointofepisodemain, "stdstarttimeofepisodemain", "stdendtimeofepisodemain", "stdmidpointofepisodemain", socialjetlag, meanssdstarttimeofepisodemain, meanssdendtimeofepisodemain, meanssdmidpointofepisodemain, medianssdstarttimeofepisodemain, medianssdendtimeofepisodemain, medianssdmidpointofepisodemain]
|
||||
SLEEP_LEVELS:
|
||||
CLASSIC: [awake, restless, asleep]
|
||||
STAGES: [wake, deep, light, rem]
|
||||
UNIFIED: [awake, asleep]
|
||||
DAY_TYPES: [WEEKEND, WEEK, ALL]
|
||||
GROUP_EPISODES_WITHIN: # by default: today's 6pm to tomorrow's noon
|
||||
START_TIME: 1080 # number of minutes after the midnight (18:00) 18*60
|
||||
LENGTH: 1080 # in minutes (18 hours) 18*60
|
||||
SRC_FOLDER: "price" # inside src/features/fitbit_sleep_intraday
|
||||
SRC_LANGUAGE: "python"
|
||||
|
||||
# See https://www.rapids.science/latest/features/fitbit-steps-summary/
|
||||
FITBIT_STEPS_SUMMARY:
|
||||
CONTAINER: fitbit_data
|
||||
CONTAINER: fitbit_data.csv
|
||||
PROVIDERS:
|
||||
RAPIDS:
|
||||
COMPUTE: True
|
||||
|
@ -300,8 +442,9 @@ FITBIT_STEPS_SUMMARY:
|
|||
SRC_FOLDER: "rapids" # inside src/features/fitbit_steps_summary
|
||||
SRC_LANGUAGE: "python"
|
||||
|
||||
# See https://www.rapids.science/latest/features/fitbit-steps-intraday/
|
||||
FITBIT_STEPS_INTRADAY:
|
||||
CONTAINER: fitbit_data
|
||||
CONTAINER: fitbit_data.csv
|
||||
PROVIDERS:
|
||||
RAPIDS:
|
||||
COMPUTE: True
|
||||
|
@ -314,27 +457,113 @@ FITBIT_STEPS_INTRADAY:
|
|||
SRC_FOLDER: "rapids" # inside src/features/fitbit_steps_intraday
|
||||
SRC_LANGUAGE: "python"
|
||||
|
||||
########################################################################################################################
|
||||
# EMPATICA #
|
||||
########################################################################################################################
|
||||
|
||||
EMPATICA_DATA_STREAMS:
|
||||
USE: empatica_zip
|
||||
|
||||
# AVAILABLE:
|
||||
empatica_zip:
|
||||
FOLDER: data/external/empatica
|
||||
|
||||
# Sensors ------
|
||||
|
||||
# See https://www.rapids.science/latest/features/empatica-accelerometer/
|
||||
EMPATICA_ACCELEROMETER:
|
||||
CONTAINER: ACC
|
||||
PROVIDERS:
|
||||
DBDP:
|
||||
COMPUTE: False
|
||||
FEATURES: ["maxmagnitude", "minmagnitude", "avgmagnitude", "medianmagnitude", "stdmagnitude"]
|
||||
SRC_FOLDER: "dbdp" # inside src/features/empatica_accelerometer
|
||||
SRC_LANGUAGE: "python"
|
||||
|
||||
# See https://www.rapids.science/latest/features/empatica-heartrate/
|
||||
EMPATICA_HEARTRATE:
|
||||
CONTAINER: HR
|
||||
PROVIDERS:
|
||||
DBDP:
|
||||
COMPUTE: False
|
||||
FEATURES: ["maxhr", "minhr", "avghr", "medianhr", "modehr", "stdhr", "diffmaxmodehr", "diffminmodehr", "entropyhr"]
|
||||
SRC_FOLDER: "dbdp" # inside src/features/empatica_heartrate
|
||||
SRC_LANGUAGE: "python"
|
||||
|
||||
# See https://www.rapids.science/latest/features/empatica-temperature/
|
||||
EMPATICA_TEMPERATURE:
|
||||
CONTAINER: TEMP
|
||||
PROVIDERS:
|
||||
DBDP:
|
||||
COMPUTE: False
|
||||
FEATURES: ["maxtemp", "mintemp", "avgtemp", "mediantemp", "modetemp", "stdtemp", "diffmaxmodetemp", "diffminmodetemp", "entropytemp"]
|
||||
SRC_FOLDER: "dbdp" # inside src/features/empatica_heartrate
|
||||
SRC_LANGUAGE: "python"
|
||||
|
||||
# See https://www.rapids.science/latest/features/empatica-electrodermal-activity/
|
||||
EMPATICA_ELECTRODERMAL_ACTIVITY:
|
||||
CONTAINER: EDA
|
||||
PROVIDERS:
|
||||
DBDP:
|
||||
COMPUTE: False
|
||||
FEATURES: ["maxeda", "mineda", "avgeda", "medianeda", "modeeda", "stdeda", "diffmaxmodeeda", "diffminmodeeda", "entropyeda"]
|
||||
SRC_FOLDER: "dbdp" # inside src/features/empatica_electrodermal_activity
|
||||
SRC_LANGUAGE: "python"
|
||||
|
||||
# See https://www.rapids.science/latest/features/empatica-blood-volume-pulse/
|
||||
EMPATICA_BLOOD_VOLUME_PULSE:
|
||||
CONTAINER: BVP
|
||||
PROVIDERS:
|
||||
DBDP:
|
||||
COMPUTE: False
|
||||
FEATURES: ["maxbvp", "minbvp", "avgbvp", "medianbvp", "modebvp", "stdbvp", "diffmaxmodebvp", "diffminmodebvp", "entropybvp"]
|
||||
SRC_FOLDER: "dbdp" # inside src/features/empatica_blood_volume_pulse
|
||||
SRC_LANGUAGE: "python"
|
||||
|
||||
# See https://www.rapids.science/latest/features/empatica-inter-beat-interval/
|
||||
EMPATICA_INTER_BEAT_INTERVAL:
|
||||
CONTAINER: IBI
|
||||
PROVIDERS:
|
||||
DBDP:
|
||||
COMPUTE: False
|
||||
FEATURES: ["maxibi", "minibi", "avgibi", "medianibi", "modeibi", "stdibi", "diffmaxmodeibi", "diffminmodeibi", "entropyibi"]
|
||||
SRC_FOLDER: "dbdp" # inside src/features/inter_beat_interval
|
||||
SRC_LANGUAGE: "python"
|
||||
|
||||
# See https://www.rapids.science/latest/features/empatica-tags/
|
||||
EMPATICA_TAGS:
|
||||
CONTAINER: TAGS
|
||||
PROVIDERS: # None implemented yet
|
||||
|
||||
|
||||
########################################################################################################################
|
||||
# PLOTS #
|
||||
########################################################################################################################
|
||||
|
||||
# Data quality ------
|
||||
|
||||
# See https://www.rapids.science/latest/visualizations/data-quality-visualizations/#1-histograms-of-phone-data-yield
|
||||
HISTOGRAM_PHONE_DATA_YIELD:
|
||||
PLOT: True
|
||||
|
||||
HEATMAP_SENSORS_PER_MINUTE_PER_TIME_SEGMENT:
|
||||
PLOT: True
|
||||
|
||||
HEATMAP_SENSOR_ROW_COUNT_PER_TIME_SEGMENT:
|
||||
PLOT: True
|
||||
SENSORS: [PHONE_ACTIVITY_RECOGNITION, PHONE_APPLICATIONS_FOREGROUND, PHONE_BATTERY, PHONE_BLUETOOTH, PHONE_CALLS, PHONE_CONVERSATION, PHONE_LIGHT, PHONE_LOCATIONS, PHONE_MESSAGES, PHONE_SCREEN, PHONE_WIFI_CONNECTED, PHONE_WIFI_VISIBLE]
|
||||
|
||||
# See https://www.rapids.science/latest/visualizations/data-quality-visualizations/#2-heatmaps-of-overall-data-yield
|
||||
HEATMAP_PHONE_DATA_YIELD_PER_PARTICIPANT_PER_TIME_SEGMENT:
|
||||
PLOT: True
|
||||
|
||||
# See https://www.rapids.science/latest/visualizations/data-quality-visualizations/#3-heatmap-of-recorded-phone-sensors
|
||||
HEATMAP_SENSORS_PER_MINUTE_PER_TIME_SEGMENT:
|
||||
PLOT: True
|
||||
|
||||
# See https://www.rapids.science/latest/visualizations/data-quality-visualizations/#4-heatmap-of-sensor-row-count
|
||||
HEATMAP_SENSOR_ROW_COUNT_PER_TIME_SEGMENT:
|
||||
PLOT: False
|
||||
SENSORS: [PHONE_ACTIVITY_RECOGNITION, PHONE_APPLICATIONS_FOREGROUND, PHONE_BATTERY, PHONE_BLUETOOTH, PHONE_CALLS, PHONE_CONVERSATION, PHONE_LIGHT, PHONE_LOCATIONS, PHONE_MESSAGES, PHONE_SCREEN, PHONE_WIFI_CONNECTED, PHONE_WIFI_VISIBLE]
|
||||
|
||||
# Features ------
|
||||
|
||||
# See https://www.rapids.science/latest/visualizations/feature-visualizations/#1-heatmap-correlation-matrix
|
||||
HEATMAP_FEATURE_CORRELATION_MATRIX:
|
||||
PLOT: TRUE
|
||||
PLOT: False
|
||||
MIN_ROWS_RATIO: 0.5
|
||||
CORR_THRESHOLD: 0.1
|
||||
CORR_METHOD: "pearson" # choose from {"pearson", "kendall", "spearman"}
|
||||
|
@ -349,18 +578,14 @@ PARAMS_FOR_ANALYSIS:
|
|||
CATEGORICAL_OPERATORS: [mostcommon]
|
||||
|
||||
DEMOGRAPHIC:
|
||||
CONTAINER: participant_info
|
||||
FOLDER: data/external/example_workflow
|
||||
CONTAINER: participant_info.csv
|
||||
FEATURES: [age, gender, inpatientdays]
|
||||
CATEGORICAL_FEATURES: [gender]
|
||||
SOURCE:
|
||||
DATABASE_GROUP: *database_group
|
||||
TIMEZONE: *timezone
|
||||
|
||||
TARGET:
|
||||
CONTAINER: participant_target
|
||||
SOURCE:
|
||||
DATABASE_GROUP: *database_group
|
||||
TIMEZONE: *timezone
|
||||
FOLDER: data/external/example_workflow
|
||||
CONTAINER: participant_target.csv
|
||||
|
||||
# Cleaning Parameters
|
||||
COLS_NAN_THRESHOLD: 0.3
|
||||
|
|
|
@ -1,9 +1,7 @@
|
|||
rule download_demographic_data:
|
||||
input:
|
||||
participant_file = "data/external/participant_files/{pid}.yaml"
|
||||
params:
|
||||
source = config["PARAMS_FOR_ANALYSIS"]["DEMOGRAPHIC"]["SOURCE"],
|
||||
table = config["PARAMS_FOR_ANALYSIS"]["DEMOGRAPHIC"]["CONTAINER"],
|
||||
participant_file = "data/external/participant_files/{pid}.yaml",
|
||||
data = config["PARAMS_FOR_ANALYSIS"]["DEMOGRAPHIC"]["FOLDER"] + "/" + config["PARAMS_FOR_ANALYSIS"]["DEMOGRAPHIC"]["CONTAINER"]
|
||||
output:
|
||||
"data/raw/{pid}/participant_info_raw.csv"
|
||||
script:
|
||||
|
@ -22,10 +20,8 @@ rule demographic_features:
|
|||
|
||||
rule download_target_data:
|
||||
input:
|
||||
participant_file = "data/external/participant_files/{pid}.yaml"
|
||||
params:
|
||||
source = config["PARAMS_FOR_ANALYSIS"]["TARGET"]["SOURCE"],
|
||||
table = config["PARAMS_FOR_ANALYSIS"]["TARGET"]["CONTAINER"],
|
||||
participant_file = "data/external/participant_files/{pid}.yaml",
|
||||
data = config["PARAMS_FOR_ANALYSIS"]["TARGET"]["FOLDER"] + "/" + config["PARAMS_FOR_ANALYSIS"]["TARGET"]["CONTAINER"]
|
||||
output:
|
||||
"data/raw/{pid}/participant_target_raw.csv"
|
||||
script:
|
||||
|
@ -34,15 +30,19 @@ rule download_target_data:
|
|||
rule target_readable_datetime:
|
||||
input:
|
||||
sensor_input = "data/raw/{pid}/participant_target_raw.csv",
|
||||
time_segments = "data/interim/time_segments/{pid}_time_segments.csv"
|
||||
time_segments = "data/interim/time_segments/{pid}_time_segments.csv",
|
||||
pid_file = "data/external/participant_files/{pid}.yaml",
|
||||
tzcodes_file = input_tzcodes_file,
|
||||
params:
|
||||
fixed_timezone = config["PARAMS_FOR_ANALYSIS"]["TARGET"]["SOURCE"]["TIMEZONE"],
|
||||
device_type = "fitbit",
|
||||
timezone_parameters = config["TIMEZONE"],
|
||||
pid = "{pid}",
|
||||
time_segments_type = config["TIME_SEGMENTS"]["TYPE"],
|
||||
include_past_periodic_segments = config["TIME_SEGMENTS"]["INCLUDE_PAST_PERIODIC_SEGMENTS"]
|
||||
output:
|
||||
"data/raw/{pid}/participant_target_with_datetime.csv"
|
||||
script:
|
||||
"../src/data/readable_datetime.R"
|
||||
"../src/data/datetime/readable_datetime.R"
|
||||
|
||||
rule parse_targets:
|
||||
input:
|
||||
|
|
|
@ -1,5 +1,4 @@
|
|||
source("renv/activate.R")
|
||||
library(RMariaDB)
|
||||
library("dplyr", warn.conflicts = F)
|
||||
library(readr)
|
||||
library(stringr)
|
||||
|
@ -7,16 +6,13 @@ library(yaml)
|
|||
|
||||
|
||||
participant_file <- snakemake@input[["participant_file"]]
|
||||
source <- snakemake@params[["source"]]
|
||||
table <- snakemake@params[["table"]]
|
||||
|
||||
sensor_file <- snakemake@output[[1]]
|
||||
|
||||
participant <- read_yaml(participant_file)
|
||||
record_id <- participant$PHONE$LABEL
|
||||
|
||||
dbEngine <- dbConnect(MariaDB(), default.file = "./.env", group = source$DATABASE_GROUP)
|
||||
query <- paste0("SELECT * FROM ", table, " WHERE record_id = '", record_id, "'")
|
||||
sensor_data <- dbGetQuery(dbEngine, query)
|
||||
dbDisconnect(dbEngine)
|
||||
demographic_data = read.csv(snakemake@input[["data"]])
|
||||
demographic_data = demographic_data[demographic_data$record_id == record_id, ]
|
||||
|
||||
write_csv(sensor_data, sensor_file)
|
||||
write_csv(demographic_data, sensor_file)
|
||||
|
|
|
@ -1,5 +1,4 @@
|
|||
source("renv/activate.R")
|
||||
library(RMariaDB)
|
||||
library("dplyr", warn.conflicts = F)
|
||||
library(readr)
|
||||
library(stringr)
|
||||
|
@ -8,19 +7,17 @@ library(lubridate)
|
|||
|
||||
|
||||
participant_file <- snakemake@input[["participant_file"]]
|
||||
source <- snakemake@params[["source"]]
|
||||
table <- snakemake@params[["table"]]
|
||||
sensor_file <- snakemake@output[[1]]
|
||||
|
||||
participant <- read_yaml(participant_file)
|
||||
record_id <- participant$PHONE$LABEL
|
||||
|
||||
dbEngine <- dbConnect(MariaDB(), default.file = "./.env", group = source$DATABASE_GROUP)
|
||||
query <- paste0("SELECT * FROM ", table, " WHERE record_id = '", record_id, "'")
|
||||
sensor_data <- dbGetQuery(dbEngine, query)
|
||||
dbDisconnect(dbEngine)
|
||||
target_data <- read.csv(snakemake@input[["data"]])
|
||||
target_data <- target_data[target_data$record_id == record_id, ]
|
||||
|
||||
# generate timestamp based on local_date
|
||||
sensor_data$timestamp <- as.numeric(ymd_hms(paste(sensor_data$local_date, "00:00:00"), tz=source$TIMEZONE, quiet=TRUE)) * 1000
|
||||
target_data$local_date_time <- paste(target_data$local_date, "00:00:00")
|
||||
#target_data <- target_data %>% rename(local_date_time = local_date)
|
||||
|
||||
write_csv(sensor_data, sensor_file)
|
||||
target_data$timestamp <- 0
|
||||
|
||||
write_csv(target_data, sensor_file)
|
||||
|
|
Loading…
Reference in New Issue