2022-04-08 15:36:32 +02:00
import pandas as pd
2022-11-16 10:50:18 +01:00
import sys
import warnings
2022-04-08 15:36:32 +02:00
def retain_target_column ( df_input : pd . DataFrame , target_variable_name : str ) :
column_names = df_input . columns
esm_names_index = column_names . str . startswith ( " phone_esm_straw " )
# Find all columns coming from phone_esm, since these are not features for our purposes and we will drop them.
esm_names = column_names [ esm_names_index ]
target_variable_index = esm_names . str . contains ( target_variable_name )
if all ( ~ target_variable_index ) :
2022-11-16 10:50:18 +01:00
warnings . warn ( f " The requested target (, { target_variable_name } ,)cannot be found in the dataset. Please check the names of phone_esm_ columns in z_all_sensor_features_cleaned_straw_py.csv " )
return False
2022-04-08 15:36:32 +02:00
sensor_features_plus_target = df_input . drop ( esm_names , axis = 1 )
sensor_features_plus_target [ " target " ] = df_input [ esm_names [ target_variable_index ] ]
# We will only keep one column related to phone_esm and that will be our target variable.
# Add it back to the very and of the data frame and rename it to target.
return sensor_features_plus_target