Add the baseline features rule to snakefile.

Correct age calculation for a single value instead of dataframe.
labels
junos 2022-02-23 18:15:26 +01:00
parent 07da6be398
commit 9a74e74d08
2 changed files with 3 additions and 2 deletions

View File

@ -403,9 +403,10 @@ for provider in config["ALL_CLEANING_OVERALL"]["PROVIDERS"].keys():
if config["ALL_CLEANING_OVERALL"]["PROVIDERS"][provider]["COMPUTE"]: if config["ALL_CLEANING_OVERALL"]["PROVIDERS"][provider]["COMPUTE"]:
files_to_compute.extend(expand("data/processed/features/all_participants/all_sensor_features_cleaned_" + provider.lower() +".csv")) files_to_compute.extend(expand("data/processed/features/all_participants/all_sensor_features_cleaned_" + provider.lower() +".csv"))
# Demographic features # Baseline features
files_to_compute.extend(expand("data/raw/baseline_merged.csv")) files_to_compute.extend(expand("data/raw/baseline_merged.csv"))
files_to_compute.extend(expand("data/raw/{pid}/participant_baseline_raw.csv", pid=config["PIDS"])) files_to_compute.extend(expand("data/raw/{pid}/participant_baseline_raw.csv", pid=config["PIDS"]))
files_to_compute.extend(expand("data/processed/features/{pid}/baseline_features.csv", pid=config["PIDS"]))
rule all: rule all:
input: input:

View File

@ -10,7 +10,7 @@ if not participant_info.empty:
now = pd.Timestamp("now") now = pd.Timestamp("now")
baseline_features.loc[0, "age"] = ( baseline_features.loc[0, "age"] = (
now - participant_info.loc[0, "date_of_birth"] now - participant_info.loc[0, "date_of_birth"]
).dt.days / 365.25245 ).days / 365.25245
if "gender" in requested_features: if "gender" in requested_features:
baseline_features.loc[0, "gender"] = participant_info.loc[0, "gender"] baseline_features.loc[0, "gender"] = participant_info.loc[0, "gender"]
if "startlanguage" in requested_features: if "startlanguage" in requested_features: