parent
16e608db74
commit
bf9c764c97
|
@ -405,7 +405,7 @@ for provider in config["ALL_CLEANING_OVERALL"]["PROVIDERS"].keys():
|
||||||
|
|
||||||
# Demographic features
|
# Demographic features
|
||||||
files_to_compute.extend(expand("data/raw/baseline_merged.csv"))
|
files_to_compute.extend(expand("data/raw/baseline_merged.csv"))
|
||||||
#files_to_compute.extend(expand("data/raw/{pid}/participant_baseline_raw.csv", pid=config["PIDS"]))
|
files_to_compute.extend(expand("data/raw/{pid}/participant_baseline_raw.csv", pid=config["PIDS"]))
|
||||||
|
|
||||||
rule all:
|
rule all:
|
||||||
input:
|
input:
|
||||||
|
|
|
@ -6,3 +6,11 @@ rule merge_baseline_data:
|
||||||
script:
|
script:
|
||||||
"../src/data/merge_baseline_data.py"
|
"../src/data/merge_baseline_data.py"
|
||||||
|
|
||||||
|
rule download_baseline_data:
|
||||||
|
input:
|
||||||
|
participant_file = "data/external/participant_files/{pid}.yaml",
|
||||||
|
data = "data/raw/baseline_merged.csv"
|
||||||
|
output:
|
||||||
|
"data/raw/{pid}/participant_baseline_raw.csv"
|
||||||
|
script:
|
||||||
|
"../src/data/download_baseline_data.py"
|
||||||
|
|
|
@ -0,0 +1,14 @@
|
||||||
|
import pandas as pd
|
||||||
|
import yaml
|
||||||
|
|
||||||
|
filename = snakemake.input["data"]
|
||||||
|
baseline = pd.read_csv(filename)
|
||||||
|
|
||||||
|
with open(snakemake.input["participant_file"], "r") as file:
|
||||||
|
participant = yaml.safe_load(file)
|
||||||
|
|
||||||
|
username = participant["PHONE"]["LABEL"]
|
||||||
|
|
||||||
|
baseline[baseline["username"] == username].to_csv(snakemake.output[0],
|
||||||
|
index=False,
|
||||||
|
encoding="utf-8",)
|
|
@ -11,7 +11,11 @@ filenames = snakemake.input["data"]
|
||||||
baseline_dfs = []
|
baseline_dfs = []
|
||||||
|
|
||||||
for fn in filenames:
|
for fn in filenames:
|
||||||
baseline_dfs.append(pd.read_csv(fn))
|
baseline_dfs.append(pd.read_csv(fn,
|
||||||
|
parse_dates=["Geboortedatum"],
|
||||||
|
infer_datetime_format=True,
|
||||||
|
cache_dates=True,
|
||||||
|
))
|
||||||
|
|
||||||
baseline = (
|
baseline = (
|
||||||
pd.concat(baseline_dfs, join="inner")
|
pd.concat(baseline_dfs, join="inner")
|
||||||
|
@ -22,8 +26,9 @@ baseline = (
|
||||||
baseline.rename(columns=VARIABLES_TO_TRANSLATE, copy=False, inplace=True)
|
baseline.rename(columns=VARIABLES_TO_TRANSLATE, copy=False, inplace=True)
|
||||||
now = pd.Timestamp("now")
|
now = pd.Timestamp("now")
|
||||||
baseline = baseline.assign(
|
baseline = baseline.assign(
|
||||||
date_of_birth=lambda x: pd.to_datetime(x.date_of_birth),
|
|
||||||
age=lambda x: (now - x.date_of_birth).dt.days / 365.25245,
|
age=lambda x: (now - x.date_of_birth).dt.days / 365.25245,
|
||||||
)
|
)
|
||||||
|
|
||||||
baseline.to_csv(snakemake.output[0])
|
baseline.to_csv(snakemake.output[0],
|
||||||
|
index=False,
|
||||||
|
encoding="utf-8",)
|
||||||
|
|
Loading…
Reference in New Issue