From 18d220d6c0eb8126769299901c1441265457c277 Mon Sep 17 00:00:00 2001 From: Meng Li <34143965+Meng6@users.noreply.github.com> Date: Thu, 6 Aug 2020 17:26:37 -0400 Subject: [PATCH] Fix data cleaning bug: days threshold --- src/models/clean_features_for_model.R | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/src/models/clean_features_for_model.R b/src/models/clean_features_for_model.R index ae4fade9..a11a86b8 100644 --- a/src/models/clean_features_for_model.R +++ b/src/models/clean_features_for_model.R @@ -3,22 +3,23 @@ library(tidyr) library(dplyr) filter_participant_without_enough_days <- function(clean_features, days_before_threshold, days_after_threshold){ + clean_features$day_type <- ifelse(clean_features$day_idx < 0, -1, ifelse(clean_features$day_idx > 0, 1, 0)) if("pid" %in% colnames(clean_features)){ clean_features <- clean_features %>% group_by(pid) %>% - add_count(pid, day_idx) # this adds a new column "n" + add_count(pid, day_type) # this adds a new column "n" } else { - clean_features <- clean_features %>% add_count(day_idx) + clean_features <- clean_features %>% add_count(day_type < 0) } # Only keep participants with enough days before surgery and after discharge clean_features <- clean_features %>% - mutate(count_before = ifelse(day_idx < 0, n, NA), # before surgery - count_after = ifelse(day_idx > 0, n, NA)) %>% # after discharge + mutate(count_before = ifelse(day_type == -1, n, NA), # before surgery + count_after = ifelse(day_type == 1, n, NA)) %>% # after discharge fill(count_before, .direction = "downup") %>% fill(count_after, .direction = "downup") %>% filter(count_before >= days_before_threshold & count_after >= days_after_threshold) %>% - select(-n, -count_before, -count_after) %>% + select(-n, -count_before, -count_after, -day_type) %>% ungroup() return(clean_features)