Generate standardized model input files (NOTE: commented unstandardized sections!)

sociality-task
Primoz 2022-08-24 13:42:39 +00:00
parent c498ecb742
commit a620def209
2 changed files with 29 additions and 6 deletions

View File

@ -545,8 +545,10 @@ if config["PARAMS_FOR_ANALYSIS"]["BASELINE"]["COMPUTE"]:
# Targets (labels) # Targets (labels)
if config["PARAMS_FOR_ANALYSIS"]["TARGET"]["COMPUTE"]: if config["PARAMS_FOR_ANALYSIS"]["TARGET"]["COMPUTE"]:
files_to_compute.extend(expand("data/processed/models/individual_model/{pid}/input.csv", pid=config["PIDS"])) # files_to_compute.extend(expand("data/processed/models/individual_model/{pid}/input.csv", pid=config["PIDS"]))
files_to_compute.extend(expand("data/processed/models/population_model/input.csv")) # files_to_compute.extend(expand("data/processed/models/population_model/input.csv"))
files_to_compute.extend(expand("data/processed/models/individual_model/{pid}/z_input.csv", pid=config["PIDS"]))
files_to_compute.extend(expand("data/processed/models/population_model/z_input.csv"))
#files_to_compute.extend(expand("data/processed/models/individual_model/{pid}/output_{cv_method}/baselines.csv", pid=config["PIDS"], cv_method=config["PARAMS_FOR_ANALYSIS"]["CV_METHODS"])) #files_to_compute.extend(expand("data/processed/models/individual_model/{pid}/output_{cv_method}/baselines.csv", pid=config["PIDS"], cv_method=config["PARAMS_FOR_ANALYSIS"]["CV_METHODS"]))
# Put the for loop over STANDARDIZATION providers if all are COMPUTE == True # Put the for loop over STANDARDIZATION providers if all are COMPUTE == True

View File

@ -30,22 +30,43 @@ rule baseline_features:
rule select_target: rule select_target:
input: input:
cleaned_sensor_features = "data/processed/features/{pid}/all_sensor_features_cleaned_rapids_R.csv" cleaned_sensor_features = "data/processed/features/{pid}/z_all_sensor_features_cleaned_rapids_R.csv"
params: params:
target_variable = config["PARAMS_FOR_ANALYSIS"]["TARGET"]["LABEL"] target_variable = config["PARAMS_FOR_ANALYSIS"]["TARGET"]["LABEL"]
output: output:
"data/processed/models/individual_model/{pid}/input.csv" "data/processed/models/individual_model/{pid}/z_input.csv"
script: script:
"../src/models/select_targets.py" "../src/models/select_targets.py"
rule merge_features_and_targets_for_population_model: rule merge_features_and_targets_for_population_model:
input: input:
cleaned_sensor_features = "data/processed/features/all_participants/all_sensor_features_cleaned_rapids_R.csv", cleaned_sensor_features = "data/processed/features/all_participants/z_all_sensor_features_cleaned_rapids_R.csv",
demographic_features = expand("data/processed/features/{pid}/baseline_features.csv", pid=config["PIDS"]), demographic_features = expand("data/processed/features/{pid}/baseline_features.csv", pid=config["PIDS"]),
params: params:
target_variable=config["PARAMS_FOR_ANALYSIS"]["TARGET"]["LABEL"] target_variable=config["PARAMS_FOR_ANALYSIS"]["TARGET"]["LABEL"]
output: output:
"data/processed/models/population_model/input.csv" "data/processed/models/population_model/z_input.csv"
script: script:
"../src/models/merge_features_and_targets_for_population_model.py" "../src/models/merge_features_and_targets_for_population_model.py"
# rule select_target:
# input:
# cleaned_sensor_features = "data/processed/features/{pid}/all_sensor_features_cleaned_rapids_R.csv"
# params:
# target_variable = config["PARAMS_FOR_ANALYSIS"]["TARGET"]["LABEL"]
# output:
# "data/processed/models/individual_model/{pid}/input.csv"
# script:
# "../src/models/select_targets.py"
# rule merge_features_and_targets_for_population_model:
# input:
# cleaned_sensor_features = "data/processed/features/all_participants/all_sensor_features_cleaned_rapids_R.csv",
# demographic_features = expand("data/processed/features/{pid}/baseline_features.csv", pid=config["PIDS"]),
# params:
# target_variable=config["PARAMS_FOR_ANALYSIS"]["TARGET"]["LABEL"]
# output:
# "data/processed/models/population_model/input.csv"
# script:
# "../src/models/merge_features_and_targets_for_population_model.py"