diff --git a/Snakefile b/Snakefile index 2778cb0b..633ca3a2 100644 --- a/Snakefile +++ b/Snakefile @@ -150,6 +150,20 @@ rule all: zip, model = models, scaler = scalers), + expand( + expand("data/processed/output_population_model/{rows_nan_threshold}|{cols_nan_threshold}_{days_before_threshold}|{days_after_threshold}_{cols_var_threshold}/{{model}}/{cv_method}/{source}_{day_segment}_{summarised}_{{scaler}}/merged_population_model_results.csv", + rows_nan_threshold = config["PARAMS_FOR_ANALYSIS"]["ROWS_NAN_THRESHOLD"], + cols_nan_threshold = config["PARAMS_FOR_ANALYSIS"]["COLS_NAN_THRESHOLD"], + days_before_threshold = config["PARAMS_FOR_ANALYSIS"]["PARTICIPANT_DAYS_BEFORE_THRESHOLD"], + days_after_threshold = config["PARAMS_FOR_ANALYSIS"]["PARTICIPANT_DAYS_AFTER_THRESHOLD"], + cols_var_threshold = config["PARAMS_FOR_ANALYSIS"]["COLS_VAR_THRESHOLD"], + cv_method = config["PARAMS_FOR_ANALYSIS"]["CV_METHODS"], + source = config["PARAMS_FOR_ANALYSIS"]["SOURCES"], + day_segment = config["PARAMS_FOR_ANALYSIS"]["DAY_SEGMENTS"], + summarised = config["PARAMS_FOR_ANALYSIS"]["SUMMARISED"]), + zip, + model = models, + scaler = scalers), # Vizualisations expand("reports/figures/{pid}/{sensor}_heatmap_rows.html", pid=config["PIDS"], sensor=config["SENSORS"]), diff --git a/rules/models.snakefile b/rules/models.snakefile index d0185b03..d360b8b1 100644 --- a/rules/models.snakefile +++ b/rules/models.snakefile @@ -148,3 +148,13 @@ rule modeling: "data/processed/output_population_model/{rows_nan_threshold}|{cols_nan_threshold}_{days_before_threshold}|{days_after_threshold}_{cols_var_threshold}/{model}/{cv_method}/{source}_{day_segment}_{summarised}_{scaler}/notes.log" script: "../src/models/modeling.py" + +rule merge_population_model_results: + input: + overall_results = "data/processed/output_population_model/{rows_nan_threshold}|{cols_nan_threshold}_{days_before_threshold}|{days_after_threshold}_{cols_var_threshold}/{model}/{cv_method}/{source}_{day_segment}_{summarised}_{scaler}/overall_results.csv", + nan_cells_ratio = "data/processed/data_for_population_model/{rows_nan_threshold}|{cols_nan_threshold}_{days_before_threshold}|{days_after_threshold}_{cols_var_threshold}/{source}_{day_segment}_nancellsratio.csv", + baseline = "data/processed/output_population_model/{rows_nan_threshold}|{cols_nan_threshold}_{days_before_threshold}|{days_after_threshold}_{cols_var_threshold}/{source}_{day_segment}_{summarised}_{cv_method}_baseline.csv" + output: + "data/processed/output_population_model/{rows_nan_threshold}|{cols_nan_threshold}_{days_before_threshold}|{days_after_threshold}_{cols_var_threshold}/{model}/{cv_method}/{source}_{day_segment}_{summarised}_{scaler}/merged_population_model_results.csv" + script: + "../src/models/merge_population_model_results.py" diff --git a/src/models/merge_population_model_results.py b/src/models/merge_population_model_results.py new file mode 100644 index 00000000..d81cc596 --- /dev/null +++ b/src/models/merge_population_model_results.py @@ -0,0 +1,16 @@ +import pandas as pd + +overall_results = pd.read_csv(snakemake.input["overall_results"]) +nan_cells_ratio = pd.read_csv(snakemake.input["nan_cells_ratio"]) +baseline = pd.read_csv(snakemake.input["baseline"], index_col=["method"]) + +# add nan cells ratio +overall_results.insert(3, "nan_cells_ratio", nan_cells_ratio["nan_cells_ratio"]) + +# add baseline +baseline = baseline.stack().to_frame().T +baseline.columns = ['{}_{}'.format(*col) for col in baseline.columns] +baseline = baseline.add_prefix('b_') +results = pd.concat([overall_results, baseline], axis=1) + +results.to_csv(snakemake.output[0], index=False)