Contextual imputation on time(first/last) features.

notes
Primoz 2022-09-28 12:37:51 +00:00
parent 8af4ef11dc
commit d6adda30cf
4 changed files with 16 additions and 6 deletions

View File

@ -21,9 +21,3 @@ def calculate_empatica_data_yield(features):
features.drop(empatica_data_yield_cols, axis=1, inplace=True) # In case of if the advanced operations will later not be needed (e.g., weighted average) features.drop(empatica_data_yield_cols, axis=1, inplace=True) # In case of if the advanced operations will later not be needed (e.g., weighted average)
return features return features

View File

@ -88,6 +88,14 @@ rapids_features <- function(sensor_data_files, time_segment, provider){
features <- call_features_of_type(calls_of_type, features_type, call_type, time_segment, requested_features) features <- call_features_of_type(calls_of_type, features_type, call_type, time_segment, requested_features)
call_features <- merge(call_features, features, all=TRUE) call_features <- merge(call_features, features, all=TRUE)
} }
# Fill seleted columns with a high number
time_cols <- select(call_features, contains("timefirstcall") | contains("timelastcall")) %>%
colnames(.)
call_features <- call_features %>%
mutate_at(., time_cols, ~replace(., is.na(.), 1000000))
# Fill NA values with 0 # Fill NA values with 0
call_features <- call_features %>% mutate_all(~replace(., is.na(.), 0)) call_features <- call_features %>% mutate_all(~replace(., is.na(.), 0))

View File

@ -65,6 +65,13 @@ rapids_features <- function(sensor_data_files, time_segment, provider){
features <- message_features_of_type(messages_of_type, message_type, time_segment, requested_features) features <- message_features_of_type(messages_of_type, message_type, time_segment, requested_features)
messages_features <- merge(messages_features, features, all=TRUE) messages_features <- merge(messages_features, features, all=TRUE)
} }
# Fill seleted columns with a high number
time_cols <- select(call_features, contains("timefirstmessages") | contains("timelastmessages")) %>%
colnames(.)
call_features <- call_features %>%
mutate_at(., time_cols, ~replace(., is.na(.), 1000000))
# Fill NA values with 0 # Fill NA values with 0
messages_features <- messages_features %>% mutate_all(~replace(., is.na(.), 0)) messages_features <- messages_features %>% mutate_all(~replace(., is.na(.), 0))

View File

@ -63,6 +63,7 @@ def rapids_features(sensor_data_files, time_segment, provider, filter_data_by_se
if not screen_features.empty: if not screen_features.empty:
screen_features.fillna(value={feature_name: 0 for feature_name in screen_features.columns if not feature_name.startswith(("stdduration", "firstuseafter"))}, inplace=True) screen_features.fillna(value={feature_name: 0 for feature_name in screen_features.columns if not feature_name.startswith(("stdduration", "firstuseafter"))}, inplace=True)
screen_features["firstuseafter"] = screen_features["firstuseafter"].fillna(1000000)
screen_features = screen_features.reset_index() screen_features = screen_features.reset_index()
return screen_features return screen_features