Fix a bug in the making of the individual model (when there is no target in the participants columns).
parent
286de93bfd
commit
99c2fab8f9
|
@ -732,7 +732,7 @@ PARAMS_FOR_ANALYSIS:
|
||||||
|
|
||||||
TARGET:
|
TARGET:
|
||||||
COMPUTE: True
|
COMPUTE: True
|
||||||
LABEL: PANAS_negative_affect_mean
|
LABEL: appraisal_stressfulness_event_mean
|
||||||
ALL_LABELS: [appraisal_stressfulness_event_mean, appraisal_threat_mean, appraisal_challenge_mean]
|
ALL_LABELS: [appraisal_stressfulness_event_mean, appraisal_threat_mean, appraisal_challenge_mean]
|
||||||
# PANAS_positive_affect_mean, PANAS_negative_affect_mean, JCQ_job_demand_mean, JCQ_job_control_mean, JCQ_supervisor_support_mean,
|
# PANAS_positive_affect_mean, PANAS_negative_affect_mean, JCQ_job_demand_mean, JCQ_job_control_mean, JCQ_supervisor_support_mean,
|
||||||
# JCQ_coworker_support_mean, appraisal_stressfulness_period_mean, appraisal_stressfulness_event_mean
|
# JCQ_coworker_support_mean, appraisal_stressfulness_period_mean, appraisal_stressfulness_event_mean
|
||||||
|
|
|
@ -27,7 +27,10 @@ def straw_cleaning(sensor_data_files, provider):
|
||||||
# (1) FILTER_OUT THE ROWS THAT DO NOT HAVE THE TARGET COLUMN AVAILABLE
|
# (1) FILTER_OUT THE ROWS THAT DO NOT HAVE THE TARGET COLUMN AVAILABLE
|
||||||
if config['PARAMS_FOR_ANALYSIS']['TARGET']['COMPUTE']:
|
if config['PARAMS_FOR_ANALYSIS']['TARGET']['COMPUTE']:
|
||||||
target = config['PARAMS_FOR_ANALYSIS']['TARGET']['LABEL'] # get target label from config
|
target = config['PARAMS_FOR_ANALYSIS']['TARGET']['LABEL'] # get target label from config
|
||||||
features = features[features['phone_esm_straw_' + target].notna()].reset_index(drop=True)
|
if 'phone_esm_straw_' + target in features:
|
||||||
|
features = features[features['phone_esm_straw_' + target].notna()].reset_index(drop=True)
|
||||||
|
else:
|
||||||
|
return features
|
||||||
|
|
||||||
# (2.1) QUALITY CHECK (DATA YIELD COLUMN) deletes the rows where E4 or phone data is low quality
|
# (2.1) QUALITY CHECK (DATA YIELD COLUMN) deletes the rows where E4 or phone data is low quality
|
||||||
phone_data_yield_unit = provider["PHONE_DATA_YIELD_FEATURE"].split("_")[3].lower()
|
phone_data_yield_unit = provider["PHONE_DATA_YIELD_FEATURE"].split("_")[3].lower()
|
||||||
|
|
|
@ -1,5 +1,6 @@
|
||||||
import pandas as pd
|
import pandas as pd
|
||||||
|
import sys
|
||||||
|
import warnings
|
||||||
|
|
||||||
def retain_target_column(df_input: pd.DataFrame, target_variable_name: str):
|
def retain_target_column(df_input: pd.DataFrame, target_variable_name: str):
|
||||||
column_names = df_input.columns
|
column_names = df_input.columns
|
||||||
|
@ -8,9 +9,9 @@ def retain_target_column(df_input: pd.DataFrame, target_variable_name: str):
|
||||||
esm_names = column_names[esm_names_index]
|
esm_names = column_names[esm_names_index]
|
||||||
target_variable_index = esm_names.str.contains(target_variable_name)
|
target_variable_index = esm_names.str.contains(target_variable_name)
|
||||||
if all(~target_variable_index):
|
if all(~target_variable_index):
|
||||||
raise ValueError("The requested target (", target_variable_name,
|
warnings.warn(f"The requested target (, {target_variable_name} ,)cannot be found in the dataset. Please check the names of phone_esm_ columns in z_all_sensor_features_cleaned_straw_py.csv")
|
||||||
")cannot be found in the dataset.",
|
return False
|
||||||
"Please check the names of phone_esm_ columns in z_all_sensor_features_cleaned_straw_py.csv")
|
|
||||||
sensor_features_plus_target = df_input.drop(esm_names, axis=1)
|
sensor_features_plus_target = df_input.drop(esm_names, axis=1)
|
||||||
sensor_features_plus_target["target"] = df_input[esm_names[target_variable_index]]
|
sensor_features_plus_target["target"] = df_input[esm_names[target_variable_index]]
|
||||||
# We will only keep one column related to phone_esm and that will be our target variable.
|
# We will only keep one column related to phone_esm and that will be our target variable.
|
||||||
|
|
|
@ -7,4 +7,7 @@ target_variable_name = snakemake.params["target_variable"]
|
||||||
|
|
||||||
model_input = retain_target_column(cleaned_sensor_features, target_variable_name)
|
model_input = retain_target_column(cleaned_sensor_features, target_variable_name)
|
||||||
|
|
||||||
model_input.to_csv(snakemake.output[0], index=False)
|
if not model_input:
|
||||||
|
pd.DataFrame().to_csv(snakemake.output[0])
|
||||||
|
else:
|
||||||
|
model_input.to_csv(snakemake.output[0], index=False)
|
||||||
|
|
Loading…
Reference in New Issue