2022-02-04 18:21:42 +01:00
|
|
|
rule merge_baseline_data:
|
2022-02-04 17:37:00 +01:00
|
|
|
input:
|
2022-02-04 18:21:42 +01:00
|
|
|
data = expand(config["PARAMS_FOR_ANALYSIS"]["BASELINE"]["FOLDER"] + "/{container}", container=config["PARAMS_FOR_ANALYSIS"]["BASELINE"]["CONTAINER"])
|
2022-02-04 17:37:00 +01:00
|
|
|
output:
|
2022-02-04 18:21:42 +01:00
|
|
|
"data/raw/baseline_merged.csv"
|
2022-02-04 17:37:00 +01:00
|
|
|
script:
|
2022-02-04 18:21:42 +01:00
|
|
|
"../src/data/merge_baseline_data.py"
|
2022-02-04 17:37:00 +01:00
|
|
|
|
2022-02-04 18:37:57 +01:00
|
|
|
rule download_baseline_data:
|
|
|
|
input:
|
|
|
|
participant_file = "data/external/participant_files/{pid}.yaml",
|
|
|
|
data = "data/raw/baseline_merged.csv"
|
|
|
|
output:
|
|
|
|
"data/raw/{pid}/participant_baseline_raw.csv"
|
|
|
|
script:
|
|
|
|
"../src/data/download_baseline_data.py"
|
2022-02-23 11:09:33 +01:00
|
|
|
|
|
|
|
rule baseline_features:
|
|
|
|
input:
|
|
|
|
"data/raw/{pid}/participant_baseline_raw.csv"
|
|
|
|
params:
|
|
|
|
pid="{pid}",
|
2022-02-23 19:08:10 +01:00
|
|
|
features=config["PARAMS_FOR_ANALYSIS"]["BASELINE"]["FEATURES"],
|
|
|
|
question_filename=config["PARAMS_FOR_ANALYSIS"]["BASELINE"]["FOLDER"] + "/" + config["PARAMS_FOR_ANALYSIS"]["BASELINE"]["QUESTION_LIST"]
|
2022-02-23 11:09:33 +01:00
|
|
|
output:
|
2022-03-01 11:39:58 +01:00
|
|
|
interim="data/interim/{pid}/baseline_questionnaires.csv",
|
|
|
|
features="data/processed/features/{pid}/baseline_features.csv"
|
2022-02-23 11:09:33 +01:00
|
|
|
script:
|
|
|
|
"../src/data/baseline_features.py"
|
2022-04-06 17:47:03 +02:00
|
|
|
|
2022-04-06 18:16:49 +02:00
|
|
|
rule select_target:
|
2022-04-06 17:47:03 +02:00
|
|
|
input:
|
2022-04-06 18:16:49 +02:00
|
|
|
cleaned_sensor_features = "data/processed/features/{pid}/all_sensor_features_cleaned_rapids.csv"
|
|
|
|
params:
|
|
|
|
target_variable = config["PARAMS_FOR_ANALYSIS"]["TARGET"]["LABEL"]
|
2022-04-06 17:47:03 +02:00
|
|
|
output:
|
2022-04-06 18:16:49 +02:00
|
|
|
"data/processed/models/individual_model/{pid}/input.csv"
|
2022-04-06 17:47:03 +02:00
|
|
|
script:
|
2022-04-06 18:16:49 +02:00
|
|
|
"../src/models/select_targets.py"
|
2022-04-08 15:42:04 +02:00
|
|
|
|
|
|
|
rule merge_features_and_targets_for_population_model:
|
|
|
|
input:
|
|
|
|
cleaned_sensor_features = "data/processed/features/all_participants/all_sensor_features_cleaned_rapids.csv",
|
|
|
|
demographic_features = expand("data/processed/features/{pid}/baseline_features.csv", pid=config["PIDS"]),
|
|
|
|
params:
|
|
|
|
target_variable=config["PARAMS_FOR_ANALYSIS"]["TARGET"]["LABEL"]
|
|
|
|
output:
|
|
|
|
"data/processed/models/population_model/input.csv"
|
|
|
|
script:
|
|
|
|
"../src/models/merge_features_and_targets_for_population_model.py"
|
2022-04-12 14:23:58 +02:00
|
|
|
|