import pandas as pd cleaned_sensor_features = pd.read_csv(snakemake.input["cleaned_sensor_features"]) column_names = cleaned_sensor_features.columns esm_names_index = column_names.str.startswith("phone_esm_straw") # Find all columns coming from phone_esm, since these are not features for our purposes and we will drop them. esm_names = column_names[esm_names_index] target_variable_name = esm_names.str.contains(snakemake.params["target_variable"]) if all(~target_variable_name): raise ValueError("The requested target (", snakemake.params["target_variable"], ")cannot be found in the dataset.", "Please check the names of phone_esm_ columns in all_sensor_features_cleaned_rapids.csv") esm_names = esm_names[~target_variable_name] # We will only keep one column related to phone_esm and that will be our target variable. model_input = cleaned_sensor_features.drop(esm_names, axis=1) model_input.to_csv(snakemake.output[0], index=False)