parent
16e608db74
commit
bf9c764c97
|
@ -405,7 +405,7 @@ for provider in config["ALL_CLEANING_OVERALL"]["PROVIDERS"].keys():
|
|||
|
||||
# Demographic features
|
||||
files_to_compute.extend(expand("data/raw/baseline_merged.csv"))
|
||||
#files_to_compute.extend(expand("data/raw/{pid}/participant_baseline_raw.csv", pid=config["PIDS"]))
|
||||
files_to_compute.extend(expand("data/raw/{pid}/participant_baseline_raw.csv", pid=config["PIDS"]))
|
||||
|
||||
rule all:
|
||||
input:
|
||||
|
|
|
@ -6,3 +6,11 @@ rule merge_baseline_data:
|
|||
script:
|
||||
"../src/data/merge_baseline_data.py"
|
||||
|
||||
rule download_baseline_data:
|
||||
input:
|
||||
participant_file = "data/external/participant_files/{pid}.yaml",
|
||||
data = "data/raw/baseline_merged.csv"
|
||||
output:
|
||||
"data/raw/{pid}/participant_baseline_raw.csv"
|
||||
script:
|
||||
"../src/data/download_baseline_data.py"
|
||||
|
|
|
@ -0,0 +1,14 @@
|
|||
import pandas as pd
|
||||
import yaml
|
||||
|
||||
filename = snakemake.input["data"]
|
||||
baseline = pd.read_csv(filename)
|
||||
|
||||
with open(snakemake.input["participant_file"], "r") as file:
|
||||
participant = yaml.safe_load(file)
|
||||
|
||||
username = participant["PHONE"]["LABEL"]
|
||||
|
||||
baseline[baseline["username"] == username].to_csv(snakemake.output[0],
|
||||
index=False,
|
||||
encoding="utf-8",)
|
|
@ -11,7 +11,11 @@ filenames = snakemake.input["data"]
|
|||
baseline_dfs = []
|
||||
|
||||
for fn in filenames:
|
||||
baseline_dfs.append(pd.read_csv(fn))
|
||||
baseline_dfs.append(pd.read_csv(fn,
|
||||
parse_dates=["Geboortedatum"],
|
||||
infer_datetime_format=True,
|
||||
cache_dates=True,
|
||||
))
|
||||
|
||||
baseline = (
|
||||
pd.concat(baseline_dfs, join="inner")
|
||||
|
@ -22,8 +26,9 @@ baseline = (
|
|||
baseline.rename(columns=VARIABLES_TO_TRANSLATE, copy=False, inplace=True)
|
||||
now = pd.Timestamp("now")
|
||||
baseline = baseline.assign(
|
||||
date_of_birth=lambda x: pd.to_datetime(x.date_of_birth),
|
||||
age=lambda x: (now - x.date_of_birth).dt.days / 365.25245,
|
||||
)
|
||||
|
||||
baseline.to_csv(snakemake.output[0])
|
||||
baseline.to_csv(snakemake.output[0],
|
||||
index=False,
|
||||
encoding="utf-8",)
|
||||
|
|
Loading…
Reference in New Issue