diff --git a/src/data/baseline_features.py b/src/data/baseline_features.py index 0c0d1221..01b37931 100644 --- a/src/data/baseline_features.py +++ b/src/data/baseline_features.py @@ -95,7 +95,7 @@ if not participant_info.empty: - limesurvey_demand.loc[rows_demand_reverse, "score_original"] ) baseline_interim = pd.concat([baseline_interim, limesurvey_demand], axis=0, ignore_index=True) - if "demand" in requested_features: + if "limesurvey_demand" in requested_features: baseline_features.loc[0, "limesurvey_demand"] = limesurvey_demand[ "score" ].sum() @@ -136,9 +136,12 @@ if not participant_info.empty: ].sum() if "limesurvey_demand_control_ratio" in requested_features: - limesurvey_demand_control_ratio = ( - limesurvey_demand["score"].sum() / limesurvey_control["score"].sum() - ) + if limesurvey_control["score"].sum(): + limesurvey_demand_control_ratio = ( + limesurvey_demand["score"].sum() / limesurvey_control["score"].sum() + ) + else: + limesurvey_demand_control_ratio = 0 if ( JCQ_NORMS[participant_info.loc[0, "gender"]][0] <= limesurvey_demand_control_ratio diff --git a/src/features/all_cleaning_individual/rapids/main.R b/src/features/all_cleaning_individual/rapids/main.R index 6221c428..1d1636c2 100644 --- a/src/features/all_cleaning_individual/rapids/main.R +++ b/src/features/all_cleaning_individual/rapids/main.R @@ -44,11 +44,11 @@ rapids_cleaning <- function(sensor_data_files, provider){ # Drop columns with a percentage of NA values above cols_nan_threshold if(nrow(clean_features)) - clean_features <- clean_features %>% select_if(~ sum(is.na(.)) / length(.) <= cols_nan_threshold ) + clean_features <- clean_features %>% select(where(~ sum(is.na(.)) / length(.) <= cols_nan_threshold ), starts_with("phone_esm")) # Drop columns with zero variance if(drop_zero_variance_columns) - clean_features <- clean_features %>% select_if(grepl("pid|local_segment|local_segment_label|local_segment_start_datetime|local_segment_end_datetime",names(.)) | sapply(., n_distinct, na.rm = T) > 1) + clean_features <- clean_features %>% select_if(grepl("pid|local_segment|local_segment_label|local_segment_start_datetime|local_segment_end_datetime|phone_esm",names(.)) | sapply(., n_distinct, na.rm = T) > 1) # Drop highly correlated features if(as.logical(drop_highly_correlated_features$COMPUTE)){