From b795d1a0223444edfbe721eabd480299a92e26f2 Mon Sep 17 00:00:00 2001 From: Meng Li <34143965+Meng6@users.noreply.github.com> Date: Wed, 24 Jun 2020 15:46:27 -0400 Subject: [PATCH] Refactor wifi features --- src/features/wifi/wifi_base.R | 45 +++++++++++++++++++++++++++++++++++ src/features/wifi_features.R | 41 +++++++------------------------ 2 files changed, 53 insertions(+), 33 deletions(-) create mode 100644 src/features/wifi/wifi_base.R diff --git a/src/features/wifi/wifi_base.R b/src/features/wifi/wifi_base.R new file mode 100644 index 00000000..7e8eff6c --- /dev/null +++ b/src/features/wifi/wifi_base.R @@ -0,0 +1,45 @@ +library(dplyr) + +filter_by_day_segment <- function(data, day_segment) { + if(day_segment %in% c("morning", "afternoon", "evening", "night")) + data <- data %>% filter(local_day_segment == day_segment) + + return(data %>% group_by(local_date)) +} + +compute_wifi_feature <- function(data, feature, day_segment){ + if(feature %in% c("countscans", "uniquedevices")){ + data <- data %>% filter_by_day_segment(day_segment) + data <- switch(feature, + "countscans" = data %>% summarise(!!paste("wifi", day_segment, feature, sep = "_") := n()), + "uniquedevices" = data %>% summarise(!!paste("wifi", day_segment, feature, sep = "_") := n_distinct(bssid))) + return(data) + } else if(feature == "countscansmostuniquedevice"){ + # Get the most scanned device + data <- data %>% group_by(bssid) %>% + mutate(N=n()) %>% + ungroup() %>% + filter(N == max(N)) + return(data %>% + filter_by_day_segment(day_segment) %>% + summarise(!!paste("wifi", day_segment, feature, sep = "_") := n())) + } +} + +base_wifi_features <- function(wifi_data, day_segment, requested_features){ + # Output dataframe + features = data.frame(local_date = character(), stringsAsFactors = FALSE) + + # The name of the features this function can compute + base_features_names <- c("countscans", "uniquedevices", "countscansmostuniquedevice") + + # The subset of requested features this function can compute + features_to_compute <- intersect(base_features_names, requested_features) + + for(feature_name in features_to_compute){ + feature <- compute_wifi_feature(wifi_data, feature_name, day_segment) + features <- merge(features, feature, by="local_date", all = TRUE) + } + + return(features) +} diff --git a/src/features/wifi_features.R b/src/features/wifi_features.R index 7c6e5697..fb3964b0 100644 --- a/src/features/wifi_features.R +++ b/src/features/wifi_features.R @@ -1,41 +1,16 @@ source("renv/activate.R") - +source("src/features/wifi/wifi_base.R") library(dplyr) -filter_by_day_segment <- function(data, day_segment) { - if(day_segment %in% c("morning", "afternoon", "evening", "night")) - data <- data %>% filter(local_day_segment == day_segment) - - return(data %>% group_by(local_date)) -} - -compute_wifi_feature <- function(data, feature, day_segment){ - if(feature %in% c("countscans", "uniquedevices")){ - data <- data %>% filter_by_day_segment(day_segment) - data <- switch(feature, - "countscans" = data %>% summarise(!!paste("wifi", day_segment, feature, sep = "_") := n()), - "uniquedevices" = data %>% summarise(!!paste("wifi", day_segment, feature, sep = "_") := n_distinct(bssid))) - return(data) - } else if(feature == "countscansmostuniquedevice"){ - # Get the most scanned device - data <- data %>% group_by(bssid) %>% - mutate(N=n()) %>% - ungroup() %>% - filter(N == max(N)) - return(data %>% - filter_by_day_segment(day_segment) %>% - summarise(!!paste("wifi", day_segment, feature, sep = "_") := n())) - } -} - -data <- read.csv(snakemake@input[[1]], stringsAsFactors = FALSE) +wifi_data <- read.csv(snakemake@input[[1]], stringsAsFactors = FALSE) day_segment <- snakemake@params[["day_segment"]] requested_features <- snakemake@params[["features"]] features = data.frame(local_date = character(), stringsAsFactors = FALSE) -for(requested_feature in requested_features){ - feature <- compute_wifi_feature(data, requested_feature, day_segment) - features <- merge(features, feature, by="local_date", all = TRUE) -} +# Compute base wifi features +features <- merge(features, base_wifi_features(wifi_data, day_segment, requested_features), by="local_date", all = TRUE) -write.csv(features, snakemake@output[[1]], row.names = FALSE) \ No newline at end of file +if(ncol(features) != length(requested_features) + 1) + stop(paste0("The number of features in the output dataframe (=", ncol(features),") does not match the expected value (=", length(requested_features)," + 1). Verify your wifi feature extraction functions")) + +write.csv(features, snakemake@output[[1]], row.names = FALSE)