Contextual imputation on time(first/last) features.

2022-09-28 12:37:51 +00:00 · 2022-09-28 12:37:51 +00:00 · d6adda30cf
parent 8af4ef11dc
commit d6adda30cf
4 changed files with 16 additions and 6 deletions
--- a/src/features/empatica_data_yield.py
+++ b/src/features/empatica_data_yield.py
@ -21,9 +21,3 @@ def calculate_empatica_data_yield(features):
    features.drop(empatica_data_yield_cols, axis=1, inplace=True) # In case of if the advanced operations will later not be needed (e.g., weighted average)
    return features
--- a/src/features/phone_calls/rapids/main.R
+++ b/src/features/phone_calls/rapids/main.R
@ -88,6 +88,14 @@ rapids_features <- function(sensor_data_files, time_segment, provider){
        features <- call_features_of_type(calls_of_type, features_type, call_type, time_segment, requested_features)
        call_features <- merge(call_features, features, all=TRUE)
    }
    # Fill seleted columns with a high number
    time_cols <- select(call_features, contains("timefirstcall") |  contains("timelastcall")) %>% 
        colnames(.)
    call_features <- call_features %>% 
        mutate_at(., time_cols, ~replace(., is.na(.), 1000000))
    # Fill NA values with 0
    call_features <- call_features %>% mutate_all(~replace(., is.na(.), 0))
--- a/src/features/phone_messages/rapids/main.R
+++ b/src/features/phone_messages/rapids/main.R
@ -65,6 +65,13 @@ rapids_features <- function(sensor_data_files, time_segment, provider){
        features <- message_features_of_type(messages_of_type, message_type, time_segment, requested_features)
        messages_features <- merge(messages_features, features, all=TRUE)
    }
    # Fill seleted columns with a high number
    time_cols <- select(call_features, contains("timefirstmessages") |  contains("timelastmessages")) %>% 
    colnames(.)
    call_features <- call_features %>% 
        mutate_at(., time_cols, ~replace(., is.na(.), 1000000))
    # Fill NA values with 0
    messages_features <- messages_features %>% mutate_all(~replace(., is.na(.), 0))
--- a/src/features/phone_screen/rapids/main.py
+++ b/src/features/phone_screen/rapids/main.py
@ -63,6 +63,7 @@ def rapids_features(sensor_data_files, time_segment, provider, filter_data_by_se
        if not screen_features.empty:
            screen_features.fillna(value={feature_name: 0 for feature_name in screen_features.columns if not feature_name.startswith(("stdduration", "firstuseafter"))}, inplace=True)
            screen_features["firstuseafter"] = screen_features["firstuseafter"].fillna(1000000)
            screen_features = screen_features.reset_index()
    return screen_features
`@ -21,9 +21,3 @@ def calculate_empatica_data_yield(features):`
	`features.drop(empatica_data_yield_cols, axis=1, inplace=True) # In case of if the advanced operations will later not be needed (e.g., weighted average)`	`features.drop(empatica_data_yield_cols, axis=1, inplace=True) # In case of if the advanced operations will later not be needed (e.g., weighted average)`

	`return features`	`return features`