From a620def209e9f043f852097d0318cb45bde74467 Mon Sep 17 00:00:00 2001 From: Primoz Date: Wed, 24 Aug 2022 13:42:39 +0000 Subject: [PATCH] Generate standardized model input files (NOTE: commented unstandardized sections!) --- Snakefile | 6 ++++-- rules/models.smk | 29 +++++++++++++++++++++++++---- 2 files changed, 29 insertions(+), 6 deletions(-) diff --git a/Snakefile b/Snakefile index a16e3569..83882ac4 100644 --- a/Snakefile +++ b/Snakefile @@ -545,8 +545,10 @@ if config["PARAMS_FOR_ANALYSIS"]["BASELINE"]["COMPUTE"]: # Targets (labels) if config["PARAMS_FOR_ANALYSIS"]["TARGET"]["COMPUTE"]: - files_to_compute.extend(expand("data/processed/models/individual_model/{pid}/input.csv", pid=config["PIDS"])) - files_to_compute.extend(expand("data/processed/models/population_model/input.csv")) + # files_to_compute.extend(expand("data/processed/models/individual_model/{pid}/input.csv", pid=config["PIDS"])) + # files_to_compute.extend(expand("data/processed/models/population_model/input.csv")) + files_to_compute.extend(expand("data/processed/models/individual_model/{pid}/z_input.csv", pid=config["PIDS"])) + files_to_compute.extend(expand("data/processed/models/population_model/z_input.csv")) #files_to_compute.extend(expand("data/processed/models/individual_model/{pid}/output_{cv_method}/baselines.csv", pid=config["PIDS"], cv_method=config["PARAMS_FOR_ANALYSIS"]["CV_METHODS"])) # Put the for loop over STANDARDIZATION providers if all are COMPUTE == True diff --git a/rules/models.smk b/rules/models.smk index b027ca2c..34a7a937 100644 --- a/rules/models.smk +++ b/rules/models.smk @@ -30,22 +30,43 @@ rule baseline_features: rule select_target: input: - cleaned_sensor_features = "data/processed/features/{pid}/all_sensor_features_cleaned_rapids_R.csv" + cleaned_sensor_features = "data/processed/features/{pid}/z_all_sensor_features_cleaned_rapids_R.csv" params: target_variable = config["PARAMS_FOR_ANALYSIS"]["TARGET"]["LABEL"] output: - "data/processed/models/individual_model/{pid}/input.csv" + "data/processed/models/individual_model/{pid}/z_input.csv" script: "../src/models/select_targets.py" rule merge_features_and_targets_for_population_model: input: - cleaned_sensor_features = "data/processed/features/all_participants/all_sensor_features_cleaned_rapids_R.csv", + cleaned_sensor_features = "data/processed/features/all_participants/z_all_sensor_features_cleaned_rapids_R.csv", demographic_features = expand("data/processed/features/{pid}/baseline_features.csv", pid=config["PIDS"]), params: target_variable=config["PARAMS_FOR_ANALYSIS"]["TARGET"]["LABEL"] output: - "data/processed/models/population_model/input.csv" + "data/processed/models/population_model/z_input.csv" script: "../src/models/merge_features_and_targets_for_population_model.py" +# rule select_target: +# input: +# cleaned_sensor_features = "data/processed/features/{pid}/all_sensor_features_cleaned_rapids_R.csv" +# params: +# target_variable = config["PARAMS_FOR_ANALYSIS"]["TARGET"]["LABEL"] +# output: +# "data/processed/models/individual_model/{pid}/input.csv" +# script: +# "../src/models/select_targets.py" + +# rule merge_features_and_targets_for_population_model: +# input: +# cleaned_sensor_features = "data/processed/features/all_participants/all_sensor_features_cleaned_rapids_R.csv", +# demographic_features = expand("data/processed/features/{pid}/baseline_features.csv", pid=config["PIDS"]), +# params: +# target_variable=config["PARAMS_FOR_ANALYSIS"]["TARGET"]["LABEL"] +# output: +# "data/processed/models/population_model/input.csv" +# script: +# "../src/models/merge_features_and_targets_for_population_model.py" +