Add a rule to merge sensor and baseline features.

And select target as before.
labels
junos 2022-04-08 15:42:04 +02:00
parent b1f356c3f7
commit f5688f6154
3 changed files with 32 additions and 1 deletions

View File

@ -645,7 +645,7 @@ PARAMS_FOR_ANALYSIS:
results-survey413767_final.csv # Belgium 2
]
QUESTION_LIST: survey637813+question_text.csv
FEATURES: [age, gender, startlanguage, demand, control, demand_control_ratio]
FEATURES: [age, gender, startlanguage, limesurvey_demand, limesurvey_control, limesurvey_demand_control_ratio, limesurvey_demand_control_ratio_quartile]
CATEGORICAL_FEATURES: [gender]
TARGET:

View File

@ -37,3 +37,14 @@ rule select_target:
"data/processed/models/individual_model/{pid}/input.csv"
script:
"../src/models/select_targets.py"
rule merge_features_and_targets_for_population_model:
input:
cleaned_sensor_features = "data/processed/features/all_participants/all_sensor_features_cleaned_rapids.csv",
demographic_features = expand("data/processed/features/{pid}/baseline_features.csv", pid=config["PIDS"]),
params:
target_variable=config["PARAMS_FOR_ANALYSIS"]["TARGET"]["LABEL"]
output:
"data/processed/models/population_model/input.csv"
script:
"../src/models/merge_features_and_targets_for_population_model.py"

View File

@ -0,0 +1,20 @@
import pandas as pd
from helper import retain_target_column
sensor_features = pd.read_csv(snakemake.input["cleaned_sensor_features"])
all_baseline_features = pd.DataFrame()
for baseline_features_path in snakemake.input["demographic_features"]:
pid = baseline_features_path.split("/")[3]
baseline_features = pd.read_csv(baseline_features_path)
baseline_features = baseline_features.assign(pid=pid)
all_baseline_features = pd.concat([all_baseline_features, baseline_features], axis=0)
# merge sensor features and baseline features
features = sensor_features.merge(all_baseline_features, on="pid", how="left")
target_variable_name = snakemake.params["target_variable"]
model_input = retain_target_column(features, target_variable_name)
model_input.to_csv(snakemake.output[0], index=False)