30 lines
664 B
Python
30 lines
664 B
Python
|
import pandas as pd
|
||
|
|
||
|
VARIABLES_TO_TRANSLATE = {
|
||
|
"Gebruikersnaam": "username",
|
||
|
"Geslacht": "gender",
|
||
|
"Geboortedatum": "date_of_birth",
|
||
|
}
|
||
|
|
||
|
filenames = snakemake.input["data"]
|
||
|
|
||
|
baseline_dfs = []
|
||
|
|
||
|
for fn in filenames:
|
||
|
baseline_dfs.append(pd.read_csv(fn))
|
||
|
|
||
|
baseline = (
|
||
|
pd.concat(baseline_dfs, join="inner")
|
||
|
.reset_index()
|
||
|
.drop(columns="index")
|
||
|
)
|
||
|
|
||
|
baseline.rename(columns=VARIABLES_TO_TRANSLATE, copy=False, inplace=True)
|
||
|
now = pd.Timestamp("now")
|
||
|
baseline = baseline.assign(
|
||
|
date_of_birth=lambda x: pd.to_datetime(x.date_of_birth),
|
||
|
age=lambda x: (now - x.date_of_birth).dt.days / 365.25245,
|
||
|
)
|
||
|
|
||
|
baseline.to_csv(snakemake.output[0])
|