From 010114c1aa421734f55d458bdc8cd0e7466ca336 Mon Sep 17 00:00:00 2001 From: JulioV Date: Mon, 13 Apr 2020 13:24:52 -0400 Subject: [PATCH] Add wifi features --- Snakefile | 3 +++ config.yaml | 4 ++++ rules/features.snakefile | 11 ++++++++++ src/features/wifi_features.R | 41 ++++++++++++++++++++++++++++++++++++ 4 files changed, 59 insertions(+) create mode 100644 src/features/wifi_features.R diff --git a/Snakefile b/Snakefile index 06d323a6..7379de28 100644 --- a/Snakefile +++ b/Snakefile @@ -67,6 +67,9 @@ rule all: expand("data/processed/{pid}/fitbit_step_{day_segment}.csv", pid = config["PIDS"], day_segment = config["STEP"]["DAY_SEGMENTS"]), + expand("data/processed/{pid}/wifi_{segment}.csv", + pid=config["PIDS"], + segment = config["WIFI"]["DAY_SEGMENTS"]), # Models expand("data/processed/{pid}/metrics_for_individual_model/{source}_{day_segment}_original.csv", pid = config["PIDS"], diff --git a/config.yaml b/config.yaml index 7220fc5d..068155e2 100644 --- a/config.yaml +++ b/config.yaml @@ -124,6 +124,10 @@ STEP: THRESHOLD_ACTIVE_BOUT: 10 # steps INCLUDE_ZERO_STEP_ROWS: True +WIFI: + DAY_SEGMENTS: *day_segments + FEATURES: ["countscans", "uniquedevices", "countscansmostuniquedevice"] + METRICS_FOR_ANALYSIS: GROUNDTRUTH_TABLE: participant_info SOURCES: &sources ["phone_metrics", "fitbit_metrics", "phone_fitbit_metrics"] diff --git a/rules/features.snakefile b/rules/features.snakefile index 9d937797..ce2f99d1 100644 --- a/rules/features.snakefile +++ b/rules/features.snakefile @@ -174,3 +174,14 @@ rule fitbit_step_features: "data/processed/{pid}/fitbit_step_{day_segment}.csv" script: "../src/features/fitbit_step_features.py" + +rule wifi_features: + input: + "data/raw/{pid}/wifi_with_datetime.csv" + params: + day_segment = "{day_segment}", + features = config["WIFI"]["FEATURES"] + output: + "data/processed/{pid}/wifi_{day_segment}.csv" + script: + "../src/features/wifi_features.R" \ No newline at end of file diff --git a/src/features/wifi_features.R b/src/features/wifi_features.R new file mode 100644 index 00000000..b2f11339 --- /dev/null +++ b/src/features/wifi_features.R @@ -0,0 +1,41 @@ +source("packrat/init.R") + +library(dplyr) + +filter_by_day_segment <- function(data, day_segment) { + if(day_segment %in% c("morning", "afternoon", "evening", "night")) + data <- data %>% filter(local_day_segment == day_segment) + + return(data %>% group_by(local_date)) +} + +compute_wifi_feature <- function(data, feature, day_segment){ + if(feature %in% c("countscans", "uniquedevices")){ + data <- data %>% filter_by_day_segment(day_segment) + data <- switch(feature, + "countscans" = data %>% summarise(!!paste("wifi", day_segment, feature, sep = "_") := n()), + "uniquedevices" = data %>% summarise(!!paste("wifi", day_segment, feature, sep = "_") := n_distinct(bssid))) + return(data) + } else if(feature == "countscansmostuniquedevice"){ + # Get the most scanned device + data <- data %>% group_by(bssid) %>% + mutate(N=n()) %>% + ungroup() %>% + filter(N == max(N)) + return(data %>% + filter_by_day_segment(day_segment) %>% + summarise(!!paste("wifi", day_segment, feature, sep = "_") := n())) + } +} + +data <- read.csv(snakemake@input[[1]], stringsAsFactors = FALSE) +day_segment <- snakemake@params[["day_segment"]] +requested_features <- snakemake@params[["features"]] +features = data.frame(local_date = character(), stringsAsFactors = FALSE) + +for(requested_feature in requested_features){ + feature <- compute_wifi_feature(data, requested_feature, day_segment) + features <- merge(features, feature, by="local_date", all = TRUE) +} + +write.csv(features, snakemake@output[[1]], row.names = FALSE) \ No newline at end of file