Add the rule to merge population model results

2020-05-15 18:49:14 -04:00 · 2020-05-15 18:49:14 -04:00 · 34ffe4abaf
parent 8df8a5c2b3
commit 34ffe4abaf
3 changed files with 40 additions and 0 deletions
--- a/14
+++ b/14
@ -150,6 +150,20 @@ rule all:
            zip, 
            model = models,
            scaler = scalers),
+        expand(
+            expand("data/processed/output_population_model/{rows_nan_threshold}|{cols_nan_threshold}_{days_before_threshold}|{days_after_threshold}_{cols_var_threshold}/{{model}}/{cv_method}/{source}_{day_segment}_{summarised}_{{scaler}}/merged_population_model_results.csv",
+                rows_nan_threshold = config["PARAMS_FOR_ANALYSIS"]["ROWS_NAN_THRESHOLD"],
+                cols_nan_threshold = config["PARAMS_FOR_ANALYSIS"]["COLS_NAN_THRESHOLD"],
+                days_before_threshold = config["PARAMS_FOR_ANALYSIS"]["PARTICIPANT_DAYS_BEFORE_THRESHOLD"],
+                days_after_threshold = config["PARAMS_FOR_ANALYSIS"]["PARTICIPANT_DAYS_AFTER_THRESHOLD"],
+                cols_var_threshold = config["PARAMS_FOR_ANALYSIS"]["COLS_VAR_THRESHOLD"],
+                cv_method = config["PARAMS_FOR_ANALYSIS"]["CV_METHODS"],
+                source = config["PARAMS_FOR_ANALYSIS"]["SOURCES"],
+                day_segment = config["PARAMS_FOR_ANALYSIS"]["DAY_SEGMENTS"],
+                summarised = config["PARAMS_FOR_ANALYSIS"]["SUMMARISED"]), 
+            zip, 
+            model = models,
+            scaler = scalers),

        # Vizualisations
        expand("reports/figures/{pid}/{sensor}_heatmap_rows.html", pid=config["PIDS"], sensor=config["SENSORS"]),
--- a/rules/models.snakefile
+++ b/rules/models.snakefile
@ -148,3 +148,13 @@ rule modeling:
        "data/processed/output_population_model/{rows_nan_threshold}|{cols_nan_threshold}_{days_before_threshold}|{days_after_threshold}_{cols_var_threshold}/{model}/{cv_method}/{source}_{day_segment}_{summarised}_{scaler}/notes.log"
    script:
        "../src/models/modeling.py"
+
+rule merge_population_model_results:
+    input:
+        overall_results = "data/processed/output_population_model/{rows_nan_threshold}|{cols_nan_threshold}_{days_before_threshold}|{days_after_threshold}_{cols_var_threshold}/{model}/{cv_method}/{source}_{day_segment}_{summarised}_{scaler}/overall_results.csv",
+        nan_cells_ratio = "data/processed/data_for_population_model/{rows_nan_threshold}|{cols_nan_threshold}_{days_before_threshold}|{days_after_threshold}_{cols_var_threshold}/{source}_{day_segment}_nancellsratio.csv",
+        baseline =  "data/processed/output_population_model/{rows_nan_threshold}|{cols_nan_threshold}_{days_before_threshold}|{days_after_threshold}_{cols_var_threshold}/{source}_{day_segment}_{summarised}_{cv_method}_baseline.csv"
+    output:
+        "data/processed/output_population_model/{rows_nan_threshold}|{cols_nan_threshold}_{days_before_threshold}|{days_after_threshold}_{cols_var_threshold}/{model}/{cv_method}/{source}_{day_segment}_{summarised}_{scaler}/merged_population_model_results.csv"
+    script:
+        "../src/models/merge_population_model_results.py"
--- a/src/models/merge_population_model_results.py
+++ b/src/models/merge_population_model_results.py
@ -0,0 +1,16 @@
+import pandas as pd
+
+overall_results = pd.read_csv(snakemake.input["overall_results"])
+nan_cells_ratio = pd.read_csv(snakemake.input["nan_cells_ratio"])
+baseline = pd.read_csv(snakemake.input["baseline"], index_col=["method"])
+
+# add nan cells ratio
+overall_results.insert(3, "nan_cells_ratio", nan_cells_ratio["nan_cells_ratio"])
+
+# add baseline
+baseline = baseline.stack().to_frame().T
+baseline.columns = ['{}_{}'.format(*col) for col in baseline.columns]
+baseline = baseline.add_prefix('b_')
+results = pd.concat([overall_results, baseline], axis=1)
+
+results.to_csv(snakemake.output[0], index=False)