From 9a74e74d084e3bc49316414ee63c4f2bb77d2542 Mon Sep 17 00:00:00 2001 From: junos Date: Wed, 23 Feb 2022 18:15:26 +0100 Subject: [PATCH] Add the baseline features rule to snakefile. Correct age calculation for a single value instead of dataframe. --- Snakefile | 3 ++- src/data/baseline_features.py | 2 +- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/Snakefile b/Snakefile index 01af47dd..9e0efdcc 100644 --- a/Snakefile +++ b/Snakefile @@ -403,9 +403,10 @@ for provider in config["ALL_CLEANING_OVERALL"]["PROVIDERS"].keys(): if config["ALL_CLEANING_OVERALL"]["PROVIDERS"][provider]["COMPUTE"]: files_to_compute.extend(expand("data/processed/features/all_participants/all_sensor_features_cleaned_" + provider.lower() +".csv")) -# Demographic features +# Baseline features files_to_compute.extend(expand("data/raw/baseline_merged.csv")) files_to_compute.extend(expand("data/raw/{pid}/participant_baseline_raw.csv", pid=config["PIDS"])) +files_to_compute.extend(expand("data/processed/features/{pid}/baseline_features.csv", pid=config["PIDS"])) rule all: input: diff --git a/src/data/baseline_features.py b/src/data/baseline_features.py index 60671911..599dab4c 100644 --- a/src/data/baseline_features.py +++ b/src/data/baseline_features.py @@ -10,7 +10,7 @@ if not participant_info.empty: now = pd.Timestamp("now") baseline_features.loc[0, "age"] = ( now - participant_info.loc[0, "date_of_birth"] - ).dt.days / 365.25245 + ).days / 365.25245 if "gender" in requested_features: baseline_features.loc[0, "gender"] = participant_info.loc[0, "gender"] if "startlanguage" in requested_features: