2019-10-24 18:11:24 +02:00
|
|
|
configfile: "config.yaml"
|
2020-05-02 01:46:04 +02:00
|
|
|
include: "rules/renv.snakefile"
|
2019-10-24 18:11:24 +02:00
|
|
|
include: "rules/preprocessing.snakefile"
|
2019-10-24 22:27:43 +02:00
|
|
|
include: "rules/features.snakefile"
|
2020-03-09 18:32:14 +01:00
|
|
|
include: "rules/models.snakefile"
|
2019-10-25 17:12:55 +02:00
|
|
|
include: "rules/reports.snakefile"
|
2020-03-17 22:26:30 +01:00
|
|
|
include: "rules/mystudy.snakefile" # You can add snakfiles with rules tailored to your project
|
2019-10-24 18:11:24 +02:00
|
|
|
|
2020-05-16 00:45:45 +02:00
|
|
|
models, scalers = [], []
|
|
|
|
for model_name in config["PARAMS_FOR_ANALYSIS"]["MODEL_NAMES"]:
|
|
|
|
models = models + [model_name] * len(config["PARAMS_FOR_ANALYSIS"]["MODEL_SCALER"][model_name])
|
|
|
|
scalers = scalers + config["PARAMS_FOR_ANALYSIS"]["MODEL_SCALER"][model_name]
|
|
|
|
|
2019-10-24 18:11:24 +02:00
|
|
|
rule all:
|
|
|
|
input:
|
2020-03-17 22:26:30 +01:00
|
|
|
# My study (this is an example of a rule created specifically for a study)
|
|
|
|
expand("data/interim/{pid}/days_to_analyse_{days_before_surgery}_{days_in_hospital}_{days_after_discharge}.csv",
|
2020-04-16 20:42:44 +02:00
|
|
|
pid = config["PIDS"],
|
|
|
|
days_before_surgery = config["PARAMS_FOR_ANALYSIS"]["DAYS_TO_ANALYSE"]["DAYS_BEFORE_SURGERY"],
|
|
|
|
days_after_discharge = config["PARAMS_FOR_ANALYSIS"]["DAYS_TO_ANALYSE"]["DAYS_AFTER_DISCHARGE"],
|
|
|
|
days_in_hospital = config["PARAMS_FOR_ANALYSIS"]["DAYS_TO_ANALYSE"]["DAYS_IN_HOSPITAL"]),
|
2020-03-26 22:27:23 +01:00
|
|
|
expand("data/processed/{pid}/targets_{summarised}.csv",
|
|
|
|
pid = config["PIDS"],
|
2020-04-16 18:38:28 +02:00
|
|
|
summarised = config["PARAMS_FOR_ANALYSIS"]["SUMMARISED"]),
|
|
|
|
expand("data/processed/{pid}/demographic_features.csv", pid=config["PIDS"]),
|
2020-03-17 22:26:30 +01:00
|
|
|
# Feature extraction
|
2019-10-24 22:08:05 +02:00
|
|
|
expand("data/raw/{pid}/{sensor}_raw.csv", pid=config["PIDS"], sensor=config["SENSORS"]),
|
2020-01-15 23:18:10 +01:00
|
|
|
expand("data/raw/{pid}/{sensor}_raw.csv", pid=config["PIDS"], sensor=config["FITBIT_TABLE"]),
|
2019-10-24 22:27:43 +02:00
|
|
|
expand("data/raw/{pid}/{sensor}_with_datetime.csv", pid=config["PIDS"], sensor=config["SENSORS"]),
|
2019-11-05 16:47:55 +01:00
|
|
|
expand("data/processed/{pid}/battery_deltas.csv", pid=config["PIDS"]),
|
2020-01-16 00:28:56 +01:00
|
|
|
expand("data/interim/{pid}/applications_foreground_with_datetime_with_genre.csv", pid=config["PIDS"]),
|
2019-11-27 21:27:48 +01:00
|
|
|
expand("data/processed/{pid}/screen_deltas.csv", pid=config["PIDS"]),
|
2019-12-04 18:04:20 +01:00
|
|
|
expand("data/processed/{pid}/plugin_google_activity_recognition_deltas.csv", pid=config["PIDS"]),
|
2019-11-05 18:34:22 +01:00
|
|
|
expand("data/interim/{pid}/phone_valid_sensed_days.csv", pid=config["PIDS"]),
|
2019-12-04 17:33:25 +01:00
|
|
|
expand("data/interim/{pid}/phone_sensed_bins.csv", pid=config["PIDS"]),
|
2019-11-06 21:38:08 +01:00
|
|
|
expand("data/processed/{pid}/sms_{sms_type}_{day_segment}.csv",
|
2019-10-24 22:27:43 +02:00
|
|
|
pid=config["PIDS"],
|
2019-11-06 21:38:08 +01:00
|
|
|
sms_type = config["SMS"]["TYPES"],
|
|
|
|
day_segment = config["SMS"]["DAY_SEGMENTS"]),
|
2019-11-06 20:47:33 +01:00
|
|
|
expand("data/processed/{pid}/call_{call_type}_{segment}.csv",
|
2019-10-25 16:21:09 +02:00
|
|
|
pid=config["PIDS"],
|
2019-11-06 20:47:33 +01:00
|
|
|
call_type=config["CALLS"]["TYPES"],
|
|
|
|
segment = config["CALLS"]["DAY_SEGMENTS"]),
|
2020-03-05 16:32:38 +01:00
|
|
|
expand("data/processed/{pid}/location_barnett_{segment}.csv",
|
|
|
|
pid=config["PIDS"],
|
|
|
|
segment = config["BARNETT_LOCATION"]["DAY_SEGMENTS"]),
|
2019-11-06 18:19:30 +01:00
|
|
|
expand("data/processed/{pid}/bluetooth_{segment}.csv",
|
|
|
|
pid=config["PIDS"],
|
|
|
|
segment = config["BLUETOOTH"]["DAY_SEGMENTS"]),
|
2020-04-21 00:05:54 +02:00
|
|
|
expand("data/processed/{pid}/activity_recognition_{segment}.csv",pid=config["PIDS"],
|
|
|
|
segment = config["ACTIVITY_RECOGNITION"]["DAY_SEGMENTS"]),
|
2019-11-25 18:53:32 +01:00
|
|
|
expand("data/processed/{pid}/battery_{day_segment}.csv",
|
|
|
|
pid = config["PIDS"],
|
|
|
|
day_segment = config["BATTERY"]["DAY_SEGMENTS"]),
|
2019-11-27 20:25:17 +01:00
|
|
|
expand("data/processed/{pid}/screen_{day_segment}.csv",
|
|
|
|
pid = config["PIDS"],
|
|
|
|
day_segment = config["SCREEN"]["DAY_SEGMENTS"]),
|
2020-01-14 15:51:39 +01:00
|
|
|
expand("data/processed/{pid}/light_{day_segment}.csv",
|
|
|
|
pid = config["PIDS"],
|
|
|
|
day_segment = config["LIGHT"]["DAY_SEGMENTS"]),
|
2020-01-15 20:15:24 +01:00
|
|
|
expand("data/processed/{pid}/accelerometer_{day_segment}.csv",
|
2020-01-15 23:18:10 +01:00
|
|
|
pid = config["PIDS"],
|
|
|
|
day_segment = config["ACCELEROMETER"]["DAY_SEGMENTS"]),
|
2020-02-07 17:52:55 +01:00
|
|
|
expand("data/processed/{pid}/applications_foreground_{day_segment}.csv",
|
|
|
|
pid = config["PIDS"],
|
|
|
|
day_segment = config["APPLICATIONS_FOREGROUND"]["DAY_SEGMENTS"]),
|
2020-05-15 23:51:00 +02:00
|
|
|
expand("data/raw/{pid}/fitbit_{fitbit_sensor}_{fitbit_data_type}_with_datetime.csv",
|
2020-01-15 23:18:10 +01:00
|
|
|
pid=config["PIDS"],
|
2020-05-15 23:51:00 +02:00
|
|
|
fitbit_sensor=config["FITBIT_SENSORS"],
|
|
|
|
fitbit_data_type=config["FITBIT_DATA_TYPE"]),
|
2020-02-07 17:35:15 +01:00
|
|
|
expand("data/processed/{pid}/fitbit_heartrate_{day_segment}.csv",
|
|
|
|
pid = config["PIDS"],
|
|
|
|
day_segment = config["HEARTRATE"]["DAY_SEGMENTS"]),
|
2020-01-29 22:22:53 +01:00
|
|
|
expand("data/processed/{pid}/fitbit_step_{day_segment}.csv",
|
|
|
|
pid = config["PIDS"],
|
|
|
|
day_segment = config["STEP"]["DAY_SEGMENTS"]),
|
2020-05-15 23:51:00 +02:00
|
|
|
expand("data/processed/{pid}/fitbit_sleep_{day_segment}.csv",
|
|
|
|
pid = config["PIDS"],
|
|
|
|
day_segment = config["SLEEP"]["DAY_SEGMENTS"]),
|
2020-04-13 19:24:52 +02:00
|
|
|
expand("data/processed/{pid}/wifi_{segment}.csv",
|
|
|
|
pid=config["PIDS"],
|
|
|
|
segment = config["WIFI"]["DAY_SEGMENTS"]),
|
2020-03-09 18:32:14 +01:00
|
|
|
# Models
|
2020-04-16 20:20:16 +02:00
|
|
|
expand("data/processed/{pid}/data_for_individual_model/{source}_{day_segment}_original.csv",
|
2020-04-30 00:53:54 +02:00
|
|
|
pid = config["PIDS"],
|
|
|
|
source = config["PARAMS_FOR_ANALYSIS"]["SOURCES"],
|
|
|
|
day_segment = config["PARAMS_FOR_ANALYSIS"]["DAY_SEGMENTS"]),
|
2020-04-16 20:20:16 +02:00
|
|
|
expand("data/processed/data_for_population_model/{source}_{day_segment}_original.csv",
|
2020-04-30 00:53:54 +02:00
|
|
|
source = config["PARAMS_FOR_ANALYSIS"]["SOURCES"],
|
|
|
|
day_segment = config["PARAMS_FOR_ANALYSIS"]["DAY_SEGMENTS"]),
|
2020-05-16 00:10:36 +02:00
|
|
|
expand("data/processed/{pid}/data_for_individual_model/{rows_nan_threshold}|{cols_nan_threshold}_{days_before_threshold}|{days_after_threshold}_{cols_var_threshold}/{source}_{day_segment}_clean.csv",
|
2020-04-30 00:53:54 +02:00
|
|
|
pid = config["PIDS"],
|
2020-05-16 00:10:36 +02:00
|
|
|
rows_nan_threshold = config["PARAMS_FOR_ANALYSIS"]["ROWS_NAN_THRESHOLD"],
|
|
|
|
cols_nan_threshold = config["PARAMS_FOR_ANALYSIS"]["COLS_NAN_THRESHOLD"],
|
|
|
|
days_before_threshold = config["PARAMS_FOR_ANALYSIS"]["PARTICIPANT_DAYS_BEFORE_THRESHOLD"],
|
|
|
|
days_after_threshold = config["PARAMS_FOR_ANALYSIS"]["PARTICIPANT_DAYS_AFTER_THRESHOLD"],
|
|
|
|
cols_var_threshold = config["PARAMS_FOR_ANALYSIS"]["COLS_VAR_THRESHOLD"],
|
2020-04-30 00:53:54 +02:00
|
|
|
source = config["PARAMS_FOR_ANALYSIS"]["SOURCES"],
|
|
|
|
day_segment = config["PARAMS_FOR_ANALYSIS"]["DAY_SEGMENTS"]),
|
2020-05-16 00:10:36 +02:00
|
|
|
expand("data/processed/data_for_population_model/{rows_nan_threshold}|{cols_nan_threshold}_{days_before_threshold}|{days_after_threshold}_{cols_var_threshold}/{source}_{day_segment}_clean.csv",
|
|
|
|
rows_nan_threshold = config["PARAMS_FOR_ANALYSIS"]["ROWS_NAN_THRESHOLD"],
|
|
|
|
cols_nan_threshold = config["PARAMS_FOR_ANALYSIS"]["COLS_NAN_THRESHOLD"],
|
|
|
|
days_before_threshold = config["PARAMS_FOR_ANALYSIS"]["PARTICIPANT_DAYS_BEFORE_THRESHOLD"],
|
|
|
|
days_after_threshold = config["PARAMS_FOR_ANALYSIS"]["PARTICIPANT_DAYS_AFTER_THRESHOLD"],
|
|
|
|
cols_var_threshold = config["PARAMS_FOR_ANALYSIS"]["COLS_VAR_THRESHOLD"],
|
2020-04-30 00:53:54 +02:00
|
|
|
source = config["PARAMS_FOR_ANALYSIS"]["SOURCES"],
|
|
|
|
day_segment = config["PARAMS_FOR_ANALYSIS"]["DAY_SEGMENTS"]),
|
2020-04-16 20:20:16 +02:00
|
|
|
expand("data/processed/data_for_population_model/demographic_features.csv"),
|
|
|
|
expand("data/processed/data_for_population_model/targets_{summarised}.csv",
|
|
|
|
summarised = config["PARAMS_FOR_ANALYSIS"]["SUMMARISED"]),
|
2020-05-16 00:25:07 +02:00
|
|
|
expand("data/processed/data_for_population_model/{rows_nan_threshold}|{cols_nan_threshold}_{days_before_threshold}|{days_after_threshold}_{cols_var_threshold}/{source}_{day_segment}_nancellsratio.csv",
|
|
|
|
rows_nan_threshold = config["PARAMS_FOR_ANALYSIS"]["ROWS_NAN_THRESHOLD"],
|
|
|
|
cols_nan_threshold = config["PARAMS_FOR_ANALYSIS"]["COLS_NAN_THRESHOLD"],
|
|
|
|
days_before_threshold = config["PARAMS_FOR_ANALYSIS"]["PARTICIPANT_DAYS_BEFORE_THRESHOLD"],
|
|
|
|
days_after_threshold = config["PARAMS_FOR_ANALYSIS"]["PARTICIPANT_DAYS_AFTER_THRESHOLD"],
|
|
|
|
cols_var_threshold = config["PARAMS_FOR_ANALYSIS"]["COLS_VAR_THRESHOLD"],
|
|
|
|
source = config["PARAMS_FOR_ANALYSIS"]["SOURCES"],
|
|
|
|
day_segment = config["PARAMS_FOR_ANALYSIS"]["DAY_SEGMENTS"]),
|
2020-05-16 00:42:03 +02:00
|
|
|
expand("data/processed/data_for_population_model/{rows_nan_threshold}|{cols_nan_threshold}_{days_before_threshold}|{days_after_threshold}_{cols_var_threshold}/{source}_{day_segment}_{summarised}.csv",
|
2020-04-30 00:53:54 +02:00
|
|
|
rows_nan_threshold = config["PARAMS_FOR_ANALYSIS"]["ROWS_NAN_THRESHOLD"],
|
|
|
|
cols_nan_threshold = config["PARAMS_FOR_ANALYSIS"]["COLS_NAN_THRESHOLD"],
|
|
|
|
days_before_threshold = config["PARAMS_FOR_ANALYSIS"]["PARTICIPANT_DAYS_BEFORE_THRESHOLD"],
|
|
|
|
days_after_threshold = config["PARAMS_FOR_ANALYSIS"]["PARTICIPANT_DAYS_AFTER_THRESHOLD"],
|
|
|
|
cols_var_threshold = config["PARAMS_FOR_ANALYSIS"]["COLS_VAR_THRESHOLD"],
|
|
|
|
source = config["PARAMS_FOR_ANALYSIS"]["SOURCES"],
|
|
|
|
day_segment = config["PARAMS_FOR_ANALYSIS"]["DAY_SEGMENTS"],
|
2020-05-16 00:45:45 +02:00
|
|
|
summarised = config["PARAMS_FOR_ANALYSIS"]["SUMMARISED"]),
|
|
|
|
expand("data/processed/output_population_model/{rows_nan_threshold}|{cols_nan_threshold}_{days_before_threshold}|{days_after_threshold}_{cols_var_threshold}/{source}_{day_segment}_{summarised}_{cv_method}_baseline.csv",
|
|
|
|
rows_nan_threshold = config["PARAMS_FOR_ANALYSIS"]["ROWS_NAN_THRESHOLD"],
|
|
|
|
cols_nan_threshold = config["PARAMS_FOR_ANALYSIS"]["COLS_NAN_THRESHOLD"],
|
|
|
|
days_before_threshold = config["PARAMS_FOR_ANALYSIS"]["PARTICIPANT_DAYS_BEFORE_THRESHOLD"],
|
|
|
|
days_after_threshold = config["PARAMS_FOR_ANALYSIS"]["PARTICIPANT_DAYS_AFTER_THRESHOLD"],
|
|
|
|
cols_var_threshold = config["PARAMS_FOR_ANALYSIS"]["COLS_VAR_THRESHOLD"],
|
|
|
|
cv_method = config["PARAMS_FOR_ANALYSIS"]["CV_METHODS"],
|
|
|
|
source = config["PARAMS_FOR_ANALYSIS"]["SOURCES"],
|
|
|
|
day_segment = config["PARAMS_FOR_ANALYSIS"]["DAY_SEGMENTS"],
|
2020-05-16 00:42:03 +02:00
|
|
|
summarised = config["PARAMS_FOR_ANALYSIS"]["SUMMARISED"]),
|
|
|
|
expand(
|
|
|
|
expand("data/processed/output_population_model/{rows_nan_threshold}|{cols_nan_threshold}_{days_before_threshold}|{days_after_threshold}_{cols_var_threshold}/{{model}}/{cv_method}/{source}_{day_segment}_{summarised}_{{scaler}}/{result_component}.csv",
|
|
|
|
rows_nan_threshold = config["PARAMS_FOR_ANALYSIS"]["ROWS_NAN_THRESHOLD"],
|
|
|
|
cols_nan_threshold = config["PARAMS_FOR_ANALYSIS"]["COLS_NAN_THRESHOLD"],
|
|
|
|
days_before_threshold = config["PARAMS_FOR_ANALYSIS"]["PARTICIPANT_DAYS_BEFORE_THRESHOLD"],
|
|
|
|
days_after_threshold = config["PARAMS_FOR_ANALYSIS"]["PARTICIPANT_DAYS_AFTER_THRESHOLD"],
|
|
|
|
cols_var_threshold = config["PARAMS_FOR_ANALYSIS"]["COLS_VAR_THRESHOLD"],
|
|
|
|
cv_method = config["PARAMS_FOR_ANALYSIS"]["CV_METHODS"],
|
|
|
|
source = config["PARAMS_FOR_ANALYSIS"]["SOURCES"],
|
|
|
|
day_segment = config["PARAMS_FOR_ANALYSIS"]["DAY_SEGMENTS"],
|
|
|
|
summarised = config["PARAMS_FOR_ANALYSIS"]["SUMMARISED"],
|
|
|
|
result_component = config["PARAMS_FOR_ANALYSIS"]["RESULT_COMPONENTS"]),
|
|
|
|
zip,
|
|
|
|
model = models,
|
|
|
|
scaler = scalers),
|
2020-05-16 00:49:14 +02:00
|
|
|
expand(
|
|
|
|
expand("data/processed/output_population_model/{rows_nan_threshold}|{cols_nan_threshold}_{days_before_threshold}|{days_after_threshold}_{cols_var_threshold}/{{model}}/{cv_method}/{source}_{day_segment}_{summarised}_{{scaler}}/merged_population_model_results.csv",
|
|
|
|
rows_nan_threshold = config["PARAMS_FOR_ANALYSIS"]["ROWS_NAN_THRESHOLD"],
|
|
|
|
cols_nan_threshold = config["PARAMS_FOR_ANALYSIS"]["COLS_NAN_THRESHOLD"],
|
|
|
|
days_before_threshold = config["PARAMS_FOR_ANALYSIS"]["PARTICIPANT_DAYS_BEFORE_THRESHOLD"],
|
|
|
|
days_after_threshold = config["PARAMS_FOR_ANALYSIS"]["PARTICIPANT_DAYS_AFTER_THRESHOLD"],
|
|
|
|
cols_var_threshold = config["PARAMS_FOR_ANALYSIS"]["COLS_VAR_THRESHOLD"],
|
|
|
|
cv_method = config["PARAMS_FOR_ANALYSIS"]["CV_METHODS"],
|
|
|
|
source = config["PARAMS_FOR_ANALYSIS"]["SOURCES"],
|
|
|
|
day_segment = config["PARAMS_FOR_ANALYSIS"]["DAY_SEGMENTS"],
|
|
|
|
summarised = config["PARAMS_FOR_ANALYSIS"]["SUMMARISED"]),
|
|
|
|
zip,
|
|
|
|
model = models,
|
|
|
|
scaler = scalers),
|
2020-05-16 00:42:03 +02:00
|
|
|
|
2020-03-17 22:26:30 +01:00
|
|
|
# Vizualisations
|
2019-11-01 19:26:51 +01:00
|
|
|
expand("reports/figures/{pid}/{sensor}_heatmap_rows.html", pid=config["PIDS"], sensor=config["SENSORS"]),
|
2019-12-04 21:32:20 +01:00
|
|
|
expand("reports/figures/{pid}/compliance_heatmap.html", pid=config["PIDS"]),
|
2019-11-08 18:18:21 +01:00
|
|
|
expand("reports/figures/{pid}/battery_consumption_rates_barchart.html", pid=config["PIDS"]),
|
2019-12-18 21:23:42 +01:00
|
|
|
expand("reports/compliance/{pid}/compliance_report.html", pid=config["PIDS"]),
|
2020-02-25 19:27:17 +01:00
|
|
|
expand("reports/figures/overall_compliance_heatmap.html"),
|
2019-10-24 18:11:24 +02:00
|
|
|
|
2019-12-18 06:28:23 +01:00
|
|
|
rule clean:
|
2019-10-24 18:11:24 +02:00
|
|
|
shell:
|
2019-12-18 23:50:58 +01:00
|
|
|
"rm -rf data/raw/* && rm -rf data/interim/* && rm -rf data/processed/* && rm -rf reports/figures/* && rm -rf reports/*.zip && rm -rf reports/compliance/*"
|