From 204f6f50b0256a0976e60315922f20cc1ce640d2 Mon Sep 17 00:00:00 2001 From: junos Date: Fri, 4 Feb 2022 18:06:02 +0100 Subject: [PATCH] Read the relevant files. --- Snakefile | 3 +++ config.yaml | 2 +- rules/models.smk | 2 +- src/data/download_demographic_data.py | 14 ++++++++++++++ 4 files changed, 19 insertions(+), 2 deletions(-) diff --git a/Snakefile b/Snakefile index a42f8b6d..70a44b22 100644 --- a/Snakefile +++ b/Snakefile @@ -403,6 +403,9 @@ for provider in config["ALL_CLEANING_OVERALL"]["PROVIDERS"].keys(): if config["ALL_CLEANING_OVERALL"]["PROVIDERS"][provider]["COMPUTE"]: files_to_compute.extend(expand("data/processed/features/all_participants/all_sensor_features_cleaned_" + provider.lower() +".csv")) +# Demographic features +files_to_compute.extend(expand("data/raw/{pid}/participant_baseline_raw.csv", pid=config["PIDS"])) + rule all: input: files_to_compute diff --git a/config.yaml b/config.yaml index 169ed5be..559396f9 100644 --- a/config.yaml +++ b/config.yaml @@ -629,7 +629,7 @@ ALL_CLEANING_OVERALL: PARAMS_FOR_ANALYSIS: DEMOGRAPHIC: - FOLDER: E:/STRAWbaseline + FOLDER: data/external/baseline CONTAINER: [results-survey637813_final.csv, # Slovenia results-survey358134_final.csv, # Belgium 1 results-survey413767_final.csv # Belgium 2 diff --git a/rules/models.smk b/rules/models.smk index 51212c0b..3dc2ce61 100644 --- a/rules/models.smk +++ b/rules/models.smk @@ -1,7 +1,7 @@ rule download_demographic_data: input: participant_file = "data/external/participant_files/{pid}.yaml", - data = config["PARAMS_FOR_ANALYSIS"]["DEMOGRAPHIC"]["FOLDER"] + "/" + config["PARAMS_FOR_ANALYSIS"]["DEMOGRAPHIC"]["CONTAINER"] + data = expand(config["PARAMS_FOR_ANALYSIS"]["DEMOGRAPHIC"]["FOLDER"] + "/{container}", container=config["PARAMS_FOR_ANALYSIS"]["DEMOGRAPHIC"]["CONTAINER"]) output: "data/raw/{pid}/participant_baseline_raw.csv" script: diff --git a/src/data/download_demographic_data.py b/src/data/download_demographic_data.py index e69de29b..af7eb0b5 100644 --- a/src/data/download_demographic_data.py +++ b/src/data/download_demographic_data.py @@ -0,0 +1,14 @@ +import pandas as pd + +filenames = snakemake.input["data"] + +baseline_dfs = [] + +for fn in filenames: + baseline_dfs.append(pd.read_csv(fn)) + +baseline = ( + pd.concat(baseline_dfs, join="inner") + .reset_index() + .drop(columns="index") +)