Contextual imputation on time(first/last) features.
parent
8af4ef11dc
commit
d6adda30cf
|
@ -21,9 +21,3 @@ def calculate_empatica_data_yield(features):
|
||||||
features.drop(empatica_data_yield_cols, axis=1, inplace=True) # In case of if the advanced operations will later not be needed (e.g., weighted average)
|
features.drop(empatica_data_yield_cols, axis=1, inplace=True) # In case of if the advanced operations will later not be needed (e.g., weighted average)
|
||||||
|
|
||||||
return features
|
return features
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -88,6 +88,14 @@ rapids_features <- function(sensor_data_files, time_segment, provider){
|
||||||
features <- call_features_of_type(calls_of_type, features_type, call_type, time_segment, requested_features)
|
features <- call_features_of_type(calls_of_type, features_type, call_type, time_segment, requested_features)
|
||||||
call_features <- merge(call_features, features, all=TRUE)
|
call_features <- merge(call_features, features, all=TRUE)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
# Fill seleted columns with a high number
|
||||||
|
time_cols <- select(call_features, contains("timefirstcall") | contains("timelastcall")) %>%
|
||||||
|
colnames(.)
|
||||||
|
|
||||||
|
call_features <- call_features %>%
|
||||||
|
mutate_at(., time_cols, ~replace(., is.na(.), 1000000))
|
||||||
|
|
||||||
# Fill NA values with 0
|
# Fill NA values with 0
|
||||||
call_features <- call_features %>% mutate_all(~replace(., is.na(.), 0))
|
call_features <- call_features %>% mutate_all(~replace(., is.na(.), 0))
|
||||||
|
|
||||||
|
|
|
@ -65,6 +65,13 @@ rapids_features <- function(sensor_data_files, time_segment, provider){
|
||||||
features <- message_features_of_type(messages_of_type, message_type, time_segment, requested_features)
|
features <- message_features_of_type(messages_of_type, message_type, time_segment, requested_features)
|
||||||
messages_features <- merge(messages_features, features, all=TRUE)
|
messages_features <- merge(messages_features, features, all=TRUE)
|
||||||
}
|
}
|
||||||
|
# Fill seleted columns with a high number
|
||||||
|
time_cols <- select(call_features, contains("timefirstmessages") | contains("timelastmessages")) %>%
|
||||||
|
colnames(.)
|
||||||
|
|
||||||
|
call_features <- call_features %>%
|
||||||
|
mutate_at(., time_cols, ~replace(., is.na(.), 1000000))
|
||||||
|
|
||||||
# Fill NA values with 0
|
# Fill NA values with 0
|
||||||
messages_features <- messages_features %>% mutate_all(~replace(., is.na(.), 0))
|
messages_features <- messages_features %>% mutate_all(~replace(., is.na(.), 0))
|
||||||
|
|
||||||
|
|
|
@ -63,6 +63,7 @@ def rapids_features(sensor_data_files, time_segment, provider, filter_data_by_se
|
||||||
|
|
||||||
if not screen_features.empty:
|
if not screen_features.empty:
|
||||||
screen_features.fillna(value={feature_name: 0 for feature_name in screen_features.columns if not feature_name.startswith(("stdduration", "firstuseafter"))}, inplace=True)
|
screen_features.fillna(value={feature_name: 0 for feature_name in screen_features.columns if not feature_name.startswith(("stdduration", "firstuseafter"))}, inplace=True)
|
||||||
|
screen_features["firstuseafter"] = screen_features["firstuseafter"].fillna(1000000)
|
||||||
screen_features = screen_features.reset_index()
|
screen_features = screen_features.reset_index()
|
||||||
|
|
||||||
return screen_features
|
return screen_features
|
||||||
|
|
Loading…
Reference in New Issue