First merge baseline datasets.
parent
204f6f50b0
commit
16e608db74
|
@ -404,7 +404,8 @@ for provider in config["ALL_CLEANING_OVERALL"]["PROVIDERS"].keys():
|
||||||
files_to_compute.extend(expand("data/processed/features/all_participants/all_sensor_features_cleaned_" + provider.lower() +".csv"))
|
files_to_compute.extend(expand("data/processed/features/all_participants/all_sensor_features_cleaned_" + provider.lower() +".csv"))
|
||||||
|
|
||||||
# Demographic features
|
# Demographic features
|
||||||
files_to_compute.extend(expand("data/raw/{pid}/participant_baseline_raw.csv", pid=config["PIDS"]))
|
files_to_compute.extend(expand("data/raw/baseline_merged.csv"))
|
||||||
|
#files_to_compute.extend(expand("data/raw/{pid}/participant_baseline_raw.csv", pid=config["PIDS"]))
|
||||||
|
|
||||||
rule all:
|
rule all:
|
||||||
input:
|
input:
|
||||||
|
|
|
@ -628,7 +628,7 @@ ALL_CLEANING_OVERALL:
|
||||||
########################################################################################################################
|
########################################################################################################################
|
||||||
|
|
||||||
PARAMS_FOR_ANALYSIS:
|
PARAMS_FOR_ANALYSIS:
|
||||||
DEMOGRAPHIC:
|
BASELINE:
|
||||||
FOLDER: data/external/baseline
|
FOLDER: data/external/baseline
|
||||||
CONTAINER: [results-survey637813_final.csv, # Slovenia
|
CONTAINER: [results-survey637813_final.csv, # Slovenia
|
||||||
results-survey358134_final.csv, # Belgium 1
|
results-survey358134_final.csv, # Belgium 1
|
||||||
|
|
|
@ -1,9 +1,8 @@
|
||||||
rule download_demographic_data:
|
rule merge_baseline_data:
|
||||||
input:
|
input:
|
||||||
participant_file = "data/external/participant_files/{pid}.yaml",
|
data = expand(config["PARAMS_FOR_ANALYSIS"]["BASELINE"]["FOLDER"] + "/{container}", container=config["PARAMS_FOR_ANALYSIS"]["BASELINE"]["CONTAINER"])
|
||||||
data = expand(config["PARAMS_FOR_ANALYSIS"]["DEMOGRAPHIC"]["FOLDER"] + "/{container}", container=config["PARAMS_FOR_ANALYSIS"]["DEMOGRAPHIC"]["CONTAINER"])
|
|
||||||
output:
|
output:
|
||||||
"data/raw/{pid}/participant_baseline_raw.csv"
|
"data/raw/baseline_merged.csv"
|
||||||
script:
|
script:
|
||||||
"../src/data/download_demographic_data.py"
|
"../src/data/merge_baseline_data.py"
|
||||||
|
|
||||||
|
|
|
@ -1,14 +0,0 @@
|
||||||
import pandas as pd
|
|
||||||
|
|
||||||
filenames = snakemake.input["data"]
|
|
||||||
|
|
||||||
baseline_dfs = []
|
|
||||||
|
|
||||||
for fn in filenames:
|
|
||||||
baseline_dfs.append(pd.read_csv(fn))
|
|
||||||
|
|
||||||
baseline = (
|
|
||||||
pd.concat(baseline_dfs, join="inner")
|
|
||||||
.reset_index()
|
|
||||||
.drop(columns="index")
|
|
||||||
)
|
|
|
@ -0,0 +1,29 @@
|
||||||
|
import pandas as pd
|
||||||
|
|
||||||
|
VARIABLES_TO_TRANSLATE = {
|
||||||
|
"Gebruikersnaam": "username",
|
||||||
|
"Geslacht": "gender",
|
||||||
|
"Geboortedatum": "date_of_birth",
|
||||||
|
}
|
||||||
|
|
||||||
|
filenames = snakemake.input["data"]
|
||||||
|
|
||||||
|
baseline_dfs = []
|
||||||
|
|
||||||
|
for fn in filenames:
|
||||||
|
baseline_dfs.append(pd.read_csv(fn))
|
||||||
|
|
||||||
|
baseline = (
|
||||||
|
pd.concat(baseline_dfs, join="inner")
|
||||||
|
.reset_index()
|
||||||
|
.drop(columns="index")
|
||||||
|
)
|
||||||
|
|
||||||
|
baseline.rename(columns=VARIABLES_TO_TRANSLATE, copy=False, inplace=True)
|
||||||
|
now = pd.Timestamp("now")
|
||||||
|
baseline = baseline.assign(
|
||||||
|
date_of_birth=lambda x: pd.to_datetime(x.date_of_birth),
|
||||||
|
age=lambda x: (now - x.date_of_birth).dt.days / 365.25245,
|
||||||
|
)
|
||||||
|
|
||||||
|
baseline.to_csv(snakemake.output[0])
|
Loading…
Reference in New Issue