import pandas as pd VARIABLES_TO_TRANSLATE = { "Gebruikersnaam": "username", "Geslacht": "gender", "Geboortedatum": "date_of_birth", } filenames = snakemake.input["data"] baseline_dfs = [] for fn in filenames: baseline_dfs.append(pd.read_csv(fn, parse_dates=["Geboortedatum"], infer_datetime_format=True, cache_dates=True, )) baseline = ( pd.concat(baseline_dfs, join="inner") .reset_index() .drop(columns="index") ) baseline.rename(columns=VARIABLES_TO_TRANSLATE, copy=False, inplace=True) now = pd.Timestamp("now") baseline = baseline.assign( age=lambda x: (now - x.date_of_birth).dt.days / 365.25245, ) baseline.to_csv(snakemake.output[0], index=False, encoding="utf-8",)