parent
b1f356c3f7
commit
f5688f6154
|
@ -645,7 +645,7 @@ PARAMS_FOR_ANALYSIS:
|
|||
results-survey413767_final.csv # Belgium 2
|
||||
]
|
||||
QUESTION_LIST: survey637813+question_text.csv
|
||||
FEATURES: [age, gender, startlanguage, demand, control, demand_control_ratio]
|
||||
FEATURES: [age, gender, startlanguage, limesurvey_demand, limesurvey_control, limesurvey_demand_control_ratio, limesurvey_demand_control_ratio_quartile]
|
||||
CATEGORICAL_FEATURES: [gender]
|
||||
|
||||
TARGET:
|
||||
|
|
|
@ -37,3 +37,14 @@ rule select_target:
|
|||
"data/processed/models/individual_model/{pid}/input.csv"
|
||||
script:
|
||||
"../src/models/select_targets.py"
|
||||
|
||||
rule merge_features_and_targets_for_population_model:
|
||||
input:
|
||||
cleaned_sensor_features = "data/processed/features/all_participants/all_sensor_features_cleaned_rapids.csv",
|
||||
demographic_features = expand("data/processed/features/{pid}/baseline_features.csv", pid=config["PIDS"]),
|
||||
params:
|
||||
target_variable=config["PARAMS_FOR_ANALYSIS"]["TARGET"]["LABEL"]
|
||||
output:
|
||||
"data/processed/models/population_model/input.csv"
|
||||
script:
|
||||
"../src/models/merge_features_and_targets_for_population_model.py"
|
||||
|
|
|
@ -0,0 +1,20 @@
|
|||
import pandas as pd
|
||||
|
||||
from helper import retain_target_column
|
||||
|
||||
sensor_features = pd.read_csv(snakemake.input["cleaned_sensor_features"])
|
||||
|
||||
all_baseline_features = pd.DataFrame()
|
||||
for baseline_features_path in snakemake.input["demographic_features"]:
|
||||
pid = baseline_features_path.split("/")[3]
|
||||
baseline_features = pd.read_csv(baseline_features_path)
|
||||
baseline_features = baseline_features.assign(pid=pid)
|
||||
all_baseline_features = pd.concat([all_baseline_features, baseline_features], axis=0)
|
||||
|
||||
# merge sensor features and baseline features
|
||||
features = sensor_features.merge(all_baseline_features, on="pid", how="left")
|
||||
|
||||
target_variable_name = snakemake.params["target_variable"]
|
||||
model_input = retain_target_column(features, target_variable_name)
|
||||
|
||||
model_input.to_csv(snakemake.output[0], index=False)
|
Loading…
Reference in New Issue