rapids/src/data/baseline_features.py

24 lines
840 B
Python

import pandas as pd
pid = snakemake.params["pid"]
requested_features = snakemake.params["features"]
baseline_features = pd.DataFrame(columns=requested_features)
participant_info = pd.read_csv(snakemake.input[0], parse_dates=["date_of_birth"])
if not participant_info.empty:
if "age" in requested_features:
now = pd.Timestamp("now")
baseline_features.loc[0, "age"] = (
now - participant_info.loc[0, "date_of_birth"]
).days / 365.25245
if "gender" in requested_features:
baseline_features.loc[0, "gender"] = participant_info.loc[0, "gender"]
if "startlanguage" in requested_features:
baseline_features.loc[0, "startlanguage"] = participant_info.loc[
0, "startlanguage"
]
baseline_features.to_csv(
snakemake.output[0], index=False, encoding="utf-8",
)