import pandas as pd def retain_target_column(df_input: pd.DataFrame, target_variable_name: str): column_names = df_input.columns esm_names_index = column_names.str.startswith("phone_esm_straw") # Find all columns coming from phone_esm, since these are not features for our purposes and we will drop them. esm_names = column_names[esm_names_index] target_variable_index = esm_names.str.contains(target_variable_name) if all(~target_variable_index): raise ValueError("The requested target (", target_variable_name, ")cannot be found in the dataset.", "Please check the names of phone_esm_ columns in z_all_sensor_features_cleaned_straw_py.csv") sensor_features_plus_target = df_input.drop(esm_names, axis=1) sensor_features_plus_target["target"] = df_input[esm_names[target_variable_index]] # We will only keep one column related to phone_esm and that will be our target variable. # Add it back to the very and of the data frame and rename it to target. return sensor_features_plus_target