rapids/src/models/merge_features_and_targets_...

25 lines
926 B
Python

import pandas as pd
from helper import retain_target_column
sensor_features = pd.read_csv(snakemake.input["cleaned_sensor_features"])
all_baseline_features = pd.DataFrame()
for baseline_features_path in snakemake.input["demographic_features"]:
pid = baseline_features_path.split("/")[3]
baseline_features = pd.read_csv(baseline_features_path)
baseline_features = baseline_features.assign(pid=pid)
all_baseline_features = pd.concat([all_baseline_features, baseline_features], axis=0)
# merge sensor features and baseline features
if not sensor_features.empty:
features = sensor_features.merge(all_baseline_features, on="pid", how="left")
target_variable_name = snakemake.params["target_variable"]
model_input = retain_target_column(features, target_variable_name)
model_input.to_csv(snakemake.output[0], index=False)
else:
sensor_features.to_csv(snakemake.output[0], index=False)