2022-02-04 18:21:42 +01:00
|
|
|
import pandas as pd
|
|
|
|
|
|
|
|
VARIABLES_TO_TRANSLATE = {
|
|
|
|
"Gebruikersnaam": "username",
|
|
|
|
"Geslacht": "gender",
|
|
|
|
"Geboortedatum": "date_of_birth",
|
|
|
|
}
|
|
|
|
|
|
|
|
filenames = snakemake.input["data"]
|
|
|
|
|
|
|
|
baseline_dfs = []
|
|
|
|
|
|
|
|
for fn in filenames:
|
2022-02-04 18:37:57 +01:00
|
|
|
baseline_dfs.append(pd.read_csv(fn,
|
|
|
|
parse_dates=["Geboortedatum"],
|
|
|
|
infer_datetime_format=True,
|
|
|
|
cache_dates=True,
|
|
|
|
))
|
2022-02-04 18:21:42 +01:00
|
|
|
|
|
|
|
baseline = (
|
|
|
|
pd.concat(baseline_dfs, join="inner")
|
|
|
|
.reset_index()
|
|
|
|
.drop(columns="index")
|
|
|
|
)
|
|
|
|
|
|
|
|
baseline.rename(columns=VARIABLES_TO_TRANSLATE, copy=False, inplace=True)
|
|
|
|
|
2022-02-04 18:37:57 +01:00
|
|
|
baseline.to_csv(snakemake.output[0],
|
|
|
|
index=False,
|
|
|
|
encoding="utf-8",)
|