Add the rule to merge population model results
parent
8df8a5c2b3
commit
34ffe4abaf
14
Snakefile
14
Snakefile
|
@ -150,6 +150,20 @@ rule all:
|
||||||
zip,
|
zip,
|
||||||
model = models,
|
model = models,
|
||||||
scaler = scalers),
|
scaler = scalers),
|
||||||
|
expand(
|
||||||
|
expand("data/processed/output_population_model/{rows_nan_threshold}|{cols_nan_threshold}_{days_before_threshold}|{days_after_threshold}_{cols_var_threshold}/{{model}}/{cv_method}/{source}_{day_segment}_{summarised}_{{scaler}}/merged_population_model_results.csv",
|
||||||
|
rows_nan_threshold = config["PARAMS_FOR_ANALYSIS"]["ROWS_NAN_THRESHOLD"],
|
||||||
|
cols_nan_threshold = config["PARAMS_FOR_ANALYSIS"]["COLS_NAN_THRESHOLD"],
|
||||||
|
days_before_threshold = config["PARAMS_FOR_ANALYSIS"]["PARTICIPANT_DAYS_BEFORE_THRESHOLD"],
|
||||||
|
days_after_threshold = config["PARAMS_FOR_ANALYSIS"]["PARTICIPANT_DAYS_AFTER_THRESHOLD"],
|
||||||
|
cols_var_threshold = config["PARAMS_FOR_ANALYSIS"]["COLS_VAR_THRESHOLD"],
|
||||||
|
cv_method = config["PARAMS_FOR_ANALYSIS"]["CV_METHODS"],
|
||||||
|
source = config["PARAMS_FOR_ANALYSIS"]["SOURCES"],
|
||||||
|
day_segment = config["PARAMS_FOR_ANALYSIS"]["DAY_SEGMENTS"],
|
||||||
|
summarised = config["PARAMS_FOR_ANALYSIS"]["SUMMARISED"]),
|
||||||
|
zip,
|
||||||
|
model = models,
|
||||||
|
scaler = scalers),
|
||||||
|
|
||||||
# Vizualisations
|
# Vizualisations
|
||||||
expand("reports/figures/{pid}/{sensor}_heatmap_rows.html", pid=config["PIDS"], sensor=config["SENSORS"]),
|
expand("reports/figures/{pid}/{sensor}_heatmap_rows.html", pid=config["PIDS"], sensor=config["SENSORS"]),
|
||||||
|
|
|
@ -148,3 +148,13 @@ rule modeling:
|
||||||
"data/processed/output_population_model/{rows_nan_threshold}|{cols_nan_threshold}_{days_before_threshold}|{days_after_threshold}_{cols_var_threshold}/{model}/{cv_method}/{source}_{day_segment}_{summarised}_{scaler}/notes.log"
|
"data/processed/output_population_model/{rows_nan_threshold}|{cols_nan_threshold}_{days_before_threshold}|{days_after_threshold}_{cols_var_threshold}/{model}/{cv_method}/{source}_{day_segment}_{summarised}_{scaler}/notes.log"
|
||||||
script:
|
script:
|
||||||
"../src/models/modeling.py"
|
"../src/models/modeling.py"
|
||||||
|
|
||||||
|
rule merge_population_model_results:
|
||||||
|
input:
|
||||||
|
overall_results = "data/processed/output_population_model/{rows_nan_threshold}|{cols_nan_threshold}_{days_before_threshold}|{days_after_threshold}_{cols_var_threshold}/{model}/{cv_method}/{source}_{day_segment}_{summarised}_{scaler}/overall_results.csv",
|
||||||
|
nan_cells_ratio = "data/processed/data_for_population_model/{rows_nan_threshold}|{cols_nan_threshold}_{days_before_threshold}|{days_after_threshold}_{cols_var_threshold}/{source}_{day_segment}_nancellsratio.csv",
|
||||||
|
baseline = "data/processed/output_population_model/{rows_nan_threshold}|{cols_nan_threshold}_{days_before_threshold}|{days_after_threshold}_{cols_var_threshold}/{source}_{day_segment}_{summarised}_{cv_method}_baseline.csv"
|
||||||
|
output:
|
||||||
|
"data/processed/output_population_model/{rows_nan_threshold}|{cols_nan_threshold}_{days_before_threshold}|{days_after_threshold}_{cols_var_threshold}/{model}/{cv_method}/{source}_{day_segment}_{summarised}_{scaler}/merged_population_model_results.csv"
|
||||||
|
script:
|
||||||
|
"../src/models/merge_population_model_results.py"
|
||||||
|
|
|
@ -0,0 +1,16 @@
|
||||||
|
import pandas as pd
|
||||||
|
|
||||||
|
overall_results = pd.read_csv(snakemake.input["overall_results"])
|
||||||
|
nan_cells_ratio = pd.read_csv(snakemake.input["nan_cells_ratio"])
|
||||||
|
baseline = pd.read_csv(snakemake.input["baseline"], index_col=["method"])
|
||||||
|
|
||||||
|
# add nan cells ratio
|
||||||
|
overall_results.insert(3, "nan_cells_ratio", nan_cells_ratio["nan_cells_ratio"])
|
||||||
|
|
||||||
|
# add baseline
|
||||||
|
baseline = baseline.stack().to_frame().T
|
||||||
|
baseline.columns = ['{}_{}'.format(*col) for col in baseline.columns]
|
||||||
|
baseline = baseline.add_prefix('b_')
|
||||||
|
results = pd.concat([overall_results, baseline], axis=1)
|
||||||
|
|
||||||
|
results.to_csv(snakemake.output[0], index=False)
|
Loading…
Reference in New Issue