parent
b1f356c3f7
commit
f5688f6154
|
@ -645,7 +645,7 @@ PARAMS_FOR_ANALYSIS:
|
||||||
results-survey413767_final.csv # Belgium 2
|
results-survey413767_final.csv # Belgium 2
|
||||||
]
|
]
|
||||||
QUESTION_LIST: survey637813+question_text.csv
|
QUESTION_LIST: survey637813+question_text.csv
|
||||||
FEATURES: [age, gender, startlanguage, demand, control, demand_control_ratio]
|
FEATURES: [age, gender, startlanguage, limesurvey_demand, limesurvey_control, limesurvey_demand_control_ratio, limesurvey_demand_control_ratio_quartile]
|
||||||
CATEGORICAL_FEATURES: [gender]
|
CATEGORICAL_FEATURES: [gender]
|
||||||
|
|
||||||
TARGET:
|
TARGET:
|
||||||
|
|
|
@ -37,3 +37,14 @@ rule select_target:
|
||||||
"data/processed/models/individual_model/{pid}/input.csv"
|
"data/processed/models/individual_model/{pid}/input.csv"
|
||||||
script:
|
script:
|
||||||
"../src/models/select_targets.py"
|
"../src/models/select_targets.py"
|
||||||
|
|
||||||
|
rule merge_features_and_targets_for_population_model:
|
||||||
|
input:
|
||||||
|
cleaned_sensor_features = "data/processed/features/all_participants/all_sensor_features_cleaned_rapids.csv",
|
||||||
|
demographic_features = expand("data/processed/features/{pid}/baseline_features.csv", pid=config["PIDS"]),
|
||||||
|
params:
|
||||||
|
target_variable=config["PARAMS_FOR_ANALYSIS"]["TARGET"]["LABEL"]
|
||||||
|
output:
|
||||||
|
"data/processed/models/population_model/input.csv"
|
||||||
|
script:
|
||||||
|
"../src/models/merge_features_and_targets_for_population_model.py"
|
||||||
|
|
|
@ -0,0 +1,20 @@
|
||||||
|
import pandas as pd
|
||||||
|
|
||||||
|
from helper import retain_target_column
|
||||||
|
|
||||||
|
sensor_features = pd.read_csv(snakemake.input["cleaned_sensor_features"])
|
||||||
|
|
||||||
|
all_baseline_features = pd.DataFrame()
|
||||||
|
for baseline_features_path in snakemake.input["demographic_features"]:
|
||||||
|
pid = baseline_features_path.split("/")[3]
|
||||||
|
baseline_features = pd.read_csv(baseline_features_path)
|
||||||
|
baseline_features = baseline_features.assign(pid=pid)
|
||||||
|
all_baseline_features = pd.concat([all_baseline_features, baseline_features], axis=0)
|
||||||
|
|
||||||
|
# merge sensor features and baseline features
|
||||||
|
features = sensor_features.merge(all_baseline_features, on="pid", how="left")
|
||||||
|
|
||||||
|
target_variable_name = snakemake.params["target_variable"]
|
||||||
|
model_input = retain_target_column(features, target_variable_name)
|
||||||
|
|
||||||
|
model_input.to_csv(snakemake.output[0], index=False)
|
Loading…
Reference in New Issue