rapids/rules/models.smk

73 lines
2.9 KiB
Python

rule merge_baseline_data:
input:
data = expand(config["PARAMS_FOR_ANALYSIS"]["BASELINE"]["FOLDER"] + "/{container}", container=config["PARAMS_FOR_ANALYSIS"]["BASELINE"]["CONTAINER"])
output:
"data/raw/baseline_merged.csv"
script:
"../src/data/merge_baseline_data.py"
rule download_baseline_data:
input:
participant_file = "data/external/participant_files/{pid}.yaml",
data = "data/raw/baseline_merged.csv"
output:
"data/raw/{pid}/participant_baseline_raw.csv"
script:
"../src/data/download_baseline_data.py"
rule baseline_features:
input:
"data/raw/{pid}/participant_baseline_raw.csv"
params:
pid="{pid}",
features=config["PARAMS_FOR_ANALYSIS"]["BASELINE"]["FEATURES"],
question_filename=config["PARAMS_FOR_ANALYSIS"]["BASELINE"]["FOLDER"] + "/" + config["PARAMS_FOR_ANALYSIS"]["BASELINE"]["QUESTION_LIST"]
output:
interim="data/interim/{pid}/baseline_questionnaires.csv",
features="data/processed/features/{pid}/baseline_features.csv"
script:
"../src/data/baseline_features.py"
rule select_target:
input:
cleaned_sensor_features = "data/processed/features/{pid}/z_all_sensor_features_cleaned_straw_py.csv"
params:
target_variable = config["PARAMS_FOR_ANALYSIS"]["TARGET"]["LABEL"]
output:
"data/processed/models/individual_model/{pid}/z_input.csv"
script:
"../src/models/select_targets.py"
rule merge_features_and_targets_for_population_model:
input:
cleaned_sensor_features = "data/processed/features/all_participants/z_all_sensor_features_cleaned_straw_py.csv",
demographic_features = expand("data/processed/features/{pid}/baseline_features.csv", pid=config["PIDS"]),
params:
target_variable=config["PARAMS_FOR_ANALYSIS"]["TARGET"]["LABEL"]
output:
"data/processed/models/population_model/z_input.csv"
script:
"../src/models/merge_features_and_targets_for_population_model.py"
# rule select_target:
# input:
# cleaned_sensor_features = "data/processed/features/{pid}/all_sensor_features_cleaned_straw_py.csv"
# params:
# target_variable = config["PARAMS_FOR_ANALYSIS"]["TARGET"]["LABEL"]
# output:
# "data/processed/models/individual_model/{pid}/input.csv"
# script:
# "../src/models/select_targets.py"
# rule merge_features_and_targets_for_population_model:
# input:
# cleaned_sensor_features = "data/processed/features/all_participants/all_sensor_features_cleaned_straw_py.csv",
# demographic_features = expand("data/processed/features/{pid}/baseline_features.csv", pid=config["PIDS"]),
# params:
# target_variable=config["PARAMS_FOR_ANALYSIS"]["TARGET"]["LABEL"]
# output:
# "data/processed/models/population_model/input.csv"
# script:
# "../src/models/merge_features_and_targets_for_population_model.py"