rapids/src/features/wifi_features.R

34 lines
2.0 KiB
R
Raw Normal View History

2020-05-02 01:46:04 +02:00
source("renv/activate.R")
2020-06-24 21:46:27 +02:00
source("src/features/wifi/wifi_base.R")
2020-07-24 22:35:41 +02:00
library("dplyr")
if(!is.null(snakemake@input[["visible_access_points"]]) && is.null(snakemake@input[["connected_access_points"]])){
wifi_data <- read.csv(snakemake@input[["visible_access_points"]], stringsAsFactors = FALSE)
wifi_data <- wifi_data %>% mutate(connected = 0)
} else if(is.null(snakemake@input[["visible_access_points"]]) && !is.null(snakemake@input[["connected_access_points"]])){
wifi_data <- read.csv(snakemake@input[["connected_access_points"]], stringsAsFactors = FALSE)
wifi_data <- wifi_data %>% mutate(connected = 1)
} else if(!is.null(snakemake@input[["visible_access_points"]]) && !is.null(snakemake@input[["connected_access_points"]])){
visible_access_points <- read.csv(snakemake@input[["visible_access_points"]], stringsAsFactors = FALSE)
2020-07-29 20:14:47 +02:00
visible_access_points <- visible_access_points %>% mutate(connected = 0)
2020-07-24 22:35:41 +02:00
connected_access_points <- read.csv(snakemake@input[["connected_access_points"]], stringsAsFactors = FALSE)
connected_access_points <- connected_access_points %>% mutate(connected = 1)
wifi_data <- bind_rows(visible_access_points, connected_access_points) %>% arrange(timestamp)
}
2020-04-13 19:24:52 +02:00
2020-07-23 18:00:51 +02:00
wifi_data <- read.csv(snakemake@input[[1]], stringsAsFactors = FALSE)
day_segments <- read.csv(snakemake@input[["day_segments"]])
2020-04-13 19:24:52 +02:00
requested_features <- snakemake@params[["features"]]
features = data.frame(local_date = character(), stringsAsFactors = FALSE)
2020-07-23 18:00:51 +02:00
day_segments <- day_segments %>% distinct(label) %>% pull(label)
2020-06-24 21:46:27 +02:00
# Compute base wifi features
2020-07-23 18:00:51 +02:00
for (day_segment in day_segments)
features <- merge(features, base_wifi_features(wifi_data, day_segment, requested_features), by="local_date", all = TRUE)
2020-06-24 21:46:27 +02:00
2020-07-23 18:00:51 +02:00
if(ncol(features) != (length(requested_features)) * length(day_segments) + 1)
2020-06-24 21:46:27 +02:00
stop(paste0("The number of features in the output dataframe (=", ncol(features),") does not match the expected value (=", length(requested_features)," + 1). Verify your wifi feature extraction functions"))
2020-04-13 19:24:52 +02:00
2020-06-24 21:46:27 +02:00
write.csv(features, snakemake@output[[1]], row.names = FALSE)