rapids/src/data/merge_baseline_data.py

30 lines
664 B
Python
Raw Normal View History

2022-02-04 18:21:42 +01:00
import pandas as pd
VARIABLES_TO_TRANSLATE = {
"Gebruikersnaam": "username",
"Geslacht": "gender",
"Geboortedatum": "date_of_birth",
}
filenames = snakemake.input["data"]
baseline_dfs = []
for fn in filenames:
baseline_dfs.append(pd.read_csv(fn))
baseline = (
pd.concat(baseline_dfs, join="inner")
.reset_index()
.drop(columns="index")
)
baseline.rename(columns=VARIABLES_TO_TRANSLATE, copy=False, inplace=True)
now = pd.Timestamp("now")
baseline = baseline.assign(
date_of_birth=lambda x: pd.to_datetime(x.date_of_birth),
age=lambda x: (now - x.date_of_birth).dt.days / 365.25245,
)
baseline.to_csv(snakemake.output[0])