Impute selected phone features with 0.

Wifi visible, screen, and light.
notes
Primoz 2022-09-16 10:58:57 +00:00
parent 7d85f75d21
commit e3b78c8a85
3 changed files with 7 additions and 3 deletions

View File

@ -29,7 +29,7 @@ def rapids_features(sensor_data_files, time_segment, provider, filter_data_by_se
if "medianlux" in features_to_compute: if "medianlux" in features_to_compute:
light_features["medianlux"] = light_data.groupby(["local_segment"])["double_light_lux"].median() light_features["medianlux"] = light_data.groupby(["local_segment"])["double_light_lux"].median()
if "stdlux" in features_to_compute: if "stdlux" in features_to_compute:
light_features["stdlux"] = light_data.groupby(["local_segment"])["double_light_lux"].std() light_features["stdlux"] = light_data.groupby(["local_segment"])["double_light_lux"].std().fillna(0)
light_features = light_features.reset_index() light_features = light_features.reset_index()

View File

@ -15,7 +15,7 @@ def getEpisodeDurationFeatures(screen_data, time_segment, episode, features, ref
if "avgduration" in features: if "avgduration" in features:
duration_helper = pd.concat([duration_helper, screen_data_episode.groupby(["local_segment"])[["duration"]].mean().rename(columns = {"duration":"avgduration" + episode})], axis = 1) duration_helper = pd.concat([duration_helper, screen_data_episode.groupby(["local_segment"])[["duration"]].mean().rename(columns = {"duration":"avgduration" + episode})], axis = 1)
if "stdduration" in features: if "stdduration" in features:
duration_helper = pd.concat([duration_helper, screen_data_episode.groupby(["local_segment"])[["duration"]].std().rename(columns = {"duration":"stdduration" + episode})], axis = 1) duration_helper = pd.concat([duration_helper, screen_data_episode.groupby(["local_segment"])[["duration"]].std().fillna(0).rename(columns = {"duration":"stdduration" + episode})], axis = 1)
if "firstuseafter" + "{0:0=2d}".format(reference_hour_first_use) in features: if "firstuseafter" + "{0:0=2d}".format(reference_hour_first_use) in features:
screen_data_episode_after_hour = screen_data_episode.copy() screen_data_episode_after_hour = screen_data_episode.copy()
screen_data_episode_after_hour["hour"] = pd.to_datetime(screen_data_episode["local_start_date_time"]).dt.hour screen_data_episode_after_hour["hour"] = pd.to_datetime(screen_data_episode["local_start_date_time"]).dt.hour

View File

@ -18,12 +18,15 @@ compute_wifi_feature <- function(data, feature, time_segment){
filter(N == max(N)) %>% filter(N == max(N)) %>%
head(1) %>% # if there are multiple device with the same amount of scans pick the first one only head(1) %>% # if there are multiple device with the same amount of scans pick the first one only
pull(bssid) pull(bssid)
data <- data %>% filter_data_by_segment(time_segment) data <- data %>% filter_data_by_segment(time_segment)
return(data %>% return(data %>%
filter(bssid == mostuniquedevice) %>% filter(bssid == mostuniquedevice) %>%
group_by(local_segment) %>% group_by(local_segment) %>%
summarise(!!feature := n()) %>% summarise(!!feature := n()) %>%
replace(is.na(.), 0)) mutate_all(~replace(., is.na(.), 0))
)
} }
} }
@ -44,5 +47,6 @@ rapids_features <- function(sensor_data_files, time_segment, provider){
features <- merge(features, feature, by="local_segment", all = TRUE) features <- merge(features, feature, by="local_segment", all = TRUE)
} }
features <- features %>% mutate_all(~replace(., is.na(.), 0))
return(features) return(features)
} }