Read the relevant files.

labels
junos 2022-02-04 18:06:02 +01:00
parent 685ed6a546
commit 204f6f50b0
4 changed files with 19 additions and 2 deletions

View File

@ -403,6 +403,9 @@ for provider in config["ALL_CLEANING_OVERALL"]["PROVIDERS"].keys():
if config["ALL_CLEANING_OVERALL"]["PROVIDERS"][provider]["COMPUTE"]: if config["ALL_CLEANING_OVERALL"]["PROVIDERS"][provider]["COMPUTE"]:
files_to_compute.extend(expand("data/processed/features/all_participants/all_sensor_features_cleaned_" + provider.lower() +".csv")) files_to_compute.extend(expand("data/processed/features/all_participants/all_sensor_features_cleaned_" + provider.lower() +".csv"))
# Demographic features
files_to_compute.extend(expand("data/raw/{pid}/participant_baseline_raw.csv", pid=config["PIDS"]))
rule all: rule all:
input: input:
files_to_compute files_to_compute

View File

@ -629,7 +629,7 @@ ALL_CLEANING_OVERALL:
PARAMS_FOR_ANALYSIS: PARAMS_FOR_ANALYSIS:
DEMOGRAPHIC: DEMOGRAPHIC:
FOLDER: E:/STRAWbaseline FOLDER: data/external/baseline
CONTAINER: [results-survey637813_final.csv, # Slovenia CONTAINER: [results-survey637813_final.csv, # Slovenia
results-survey358134_final.csv, # Belgium 1 results-survey358134_final.csv, # Belgium 1
results-survey413767_final.csv # Belgium 2 results-survey413767_final.csv # Belgium 2

View File

@ -1,7 +1,7 @@
rule download_demographic_data: rule download_demographic_data:
input: input:
participant_file = "data/external/participant_files/{pid}.yaml", participant_file = "data/external/participant_files/{pid}.yaml",
data = config["PARAMS_FOR_ANALYSIS"]["DEMOGRAPHIC"]["FOLDER"] + "/" + config["PARAMS_FOR_ANALYSIS"]["DEMOGRAPHIC"]["CONTAINER"] data = expand(config["PARAMS_FOR_ANALYSIS"]["DEMOGRAPHIC"]["FOLDER"] + "/{container}", container=config["PARAMS_FOR_ANALYSIS"]["DEMOGRAPHIC"]["CONTAINER"])
output: output:
"data/raw/{pid}/participant_baseline_raw.csv" "data/raw/{pid}/participant_baseline_raw.csv"
script: script:

View File

@ -0,0 +1,14 @@
import pandas as pd
filenames = snakemake.input["data"]
baseline_dfs = []
for fn in filenames:
baseline_dfs.append(pd.read_csv(fn))
baseline = (
pd.concat(baseline_dfs, join="inner")
.reset_index()
.drop(columns="index")
)