21 lines
808 B
Python
21 lines
808 B
Python
|
import pandas as pd
|
||
|
|
||
|
from helper import retain_target_column
|
||
|
|
||
|
sensor_features = pd.read_csv(snakemake.input["cleaned_sensor_features"])
|
||
|
|
||
|
all_baseline_features = pd.DataFrame()
|
||
|
for baseline_features_path in snakemake.input["demographic_features"]:
|
||
|
pid = baseline_features_path.split("/")[3]
|
||
|
baseline_features = pd.read_csv(baseline_features_path)
|
||
|
baseline_features = baseline_features.assign(pid=pid)
|
||
|
all_baseline_features = pd.concat([all_baseline_features, baseline_features], axis=0)
|
||
|
|
||
|
# merge sensor features and baseline features
|
||
|
features = sensor_features.merge(all_baseline_features, on="pid", how="left")
|
||
|
|
||
|
target_variable_name = snakemake.params["target_variable"]
|
||
|
model_input = retain_target_column(features, target_variable_name)
|
||
|
|
||
|
model_input.to_csv(snakemake.output[0], index=False)
|