Disable (SOME) feature cleaning for ESM data.

models
junos 2022-04-13 16:01:31 +02:00
parent 144f0d0dcf
commit 53ec52a954
1 changed files with 2 additions and 2 deletions

View File

@ -44,11 +44,11 @@ rapids_cleaning <- function(sensor_data_files, provider){
# Drop columns with a percentage of NA values above cols_nan_threshold
if(nrow(clean_features))
clean_features <- clean_features %>% select_if(~ sum(is.na(.)) / length(.) <= cols_nan_threshold )
clean_features <- clean_features %>% select(where(~ sum(is.na(.)) / length(.) <= cols_nan_threshold ), starts_with("phone_esm"))
# Drop columns with zero variance
if(drop_zero_variance_columns)
clean_features <- clean_features %>% select_if(grepl("pid|local_segment|local_segment_label|local_segment_start_datetime|local_segment_end_datetime",names(.)) | sapply(., n_distinct, na.rm = T) > 1)
clean_features <- clean_features %>% select_if(grepl("pid|local_segment|local_segment_label|local_segment_start_datetime|local_segment_end_datetime|phone_esm",names(.)) | sapply(., n_distinct, na.rm = T) > 1)
# Drop highly correlated features
if(as.logical(drop_highly_correlated_features$COMPUTE)){