From 99c2fab8f9ab9cf2ed40a952019f994299b78c05 Mon Sep 17 00:00:00 2001 From: Primoz Date: Wed, 16 Nov 2022 09:50:18 +0000 Subject: [PATCH] Fix a bug in the making of the individual model (when there is no target in the participants columns). --- config.yaml | 2 +- src/features/all_cleaning_individual/straw/main.py | 5 ++++- src/models/helper.py | 9 +++++---- src/models/select_targets.py | 5 ++++- 4 files changed, 14 insertions(+), 7 deletions(-) diff --git a/config.yaml b/config.yaml index 0f108cbc..4bbf9c4d 100644 --- a/config.yaml +++ b/config.yaml @@ -732,7 +732,7 @@ PARAMS_FOR_ANALYSIS: TARGET: COMPUTE: True - LABEL: PANAS_negative_affect_mean + LABEL: appraisal_stressfulness_event_mean ALL_LABELS: [appraisal_stressfulness_event_mean, appraisal_threat_mean, appraisal_challenge_mean] # PANAS_positive_affect_mean, PANAS_negative_affect_mean, JCQ_job_demand_mean, JCQ_job_control_mean, JCQ_supervisor_support_mean, # JCQ_coworker_support_mean, appraisal_stressfulness_period_mean, appraisal_stressfulness_event_mean diff --git a/src/features/all_cleaning_individual/straw/main.py b/src/features/all_cleaning_individual/straw/main.py index 31a51367..a35808d9 100644 --- a/src/features/all_cleaning_individual/straw/main.py +++ b/src/features/all_cleaning_individual/straw/main.py @@ -27,7 +27,10 @@ def straw_cleaning(sensor_data_files, provider): # (1) FILTER_OUT THE ROWS THAT DO NOT HAVE THE TARGET COLUMN AVAILABLE if config['PARAMS_FOR_ANALYSIS']['TARGET']['COMPUTE']: target = config['PARAMS_FOR_ANALYSIS']['TARGET']['LABEL'] # get target label from config - features = features[features['phone_esm_straw_' + target].notna()].reset_index(drop=True) + if 'phone_esm_straw_' + target in features: + features = features[features['phone_esm_straw_' + target].notna()].reset_index(drop=True) + else: + return features # (2.1) QUALITY CHECK (DATA YIELD COLUMN) deletes the rows where E4 or phone data is low quality phone_data_yield_unit = provider["PHONE_DATA_YIELD_FEATURE"].split("_")[3].lower() diff --git a/src/models/helper.py b/src/models/helper.py index 61f9f666..3b90f52d 100644 --- a/src/models/helper.py +++ b/src/models/helper.py @@ -1,5 +1,6 @@ import pandas as pd - +import sys +import warnings def retain_target_column(df_input: pd.DataFrame, target_variable_name: str): column_names = df_input.columns @@ -8,9 +9,9 @@ def retain_target_column(df_input: pd.DataFrame, target_variable_name: str): esm_names = column_names[esm_names_index] target_variable_index = esm_names.str.contains(target_variable_name) if all(~target_variable_index): - raise ValueError("The requested target (", target_variable_name, - ")cannot be found in the dataset.", - "Please check the names of phone_esm_ columns in z_all_sensor_features_cleaned_straw_py.csv") + warnings.warn(f"The requested target (, {target_variable_name} ,)cannot be found in the dataset. Please check the names of phone_esm_ columns in z_all_sensor_features_cleaned_straw_py.csv") + return False + sensor_features_plus_target = df_input.drop(esm_names, axis=1) sensor_features_plus_target["target"] = df_input[esm_names[target_variable_index]] # We will only keep one column related to phone_esm and that will be our target variable. diff --git a/src/models/select_targets.py b/src/models/select_targets.py index 196cdcd1..6c29aed7 100644 --- a/src/models/select_targets.py +++ b/src/models/select_targets.py @@ -7,4 +7,7 @@ target_variable_name = snakemake.params["target_variable"] model_input = retain_target_column(cleaned_sensor_features, target_variable_name) -model_input.to_csv(snakemake.output[0], index=False) +if not model_input: + pd.DataFrame().to_csv(snakemake.output[0]) +else: + model_input.to_csv(snakemake.output[0], index=False)