From 911e183c265a6fd7fb2f0e6d41402c828e3e3b2b Mon Sep 17 00:00:00 2001 From: JulioV Date: Wed, 6 Nov 2019 12:19:30 -0500 Subject: [PATCH] Add bluetooth features --- Snakefile | 3 +++ config.yaml | 6 ++++- rules/features.snakefile | 13 +++++++++- src/features/bluetooth_metrics.R | 41 ++++++++++++++++++++++++++++++++ 4 files changed, 61 insertions(+), 2 deletions(-) create mode 100644 src/features/bluetooth_metrics.R diff --git a/Snakefile b/Snakefile index 5976cc0a..26f4f1a2 100644 --- a/Snakefile +++ b/Snakefile @@ -25,6 +25,9 @@ rule all: segment = config["COM_CALL"]["DAY_SEGMENTS"], metric = config["COM_CALL"]["METRICS_TAKEN"]), expand("data/processed/{pid}/location_barnett_metrics.csv", pid=config["PIDS"]), + expand("data/processed/{pid}/bluetooth_{segment}.csv", + pid=config["PIDS"], + segment = config["BLUETOOTH"]["DAY_SEGMENTS"]), # Reports expand("reports/figures/{pid}/{sensor}_heatmap_rows.html", pid=config["PIDS"], sensor=config["SENSORS"]), expand("reports/figures/{pid}/compliance_heatmap.html", pid=config["PIDS"], sensor=config["SENSORS"]), diff --git a/config.yaml b/config.yaml index 48ff0c3d..a1818aba 100644 --- a/config.yaml +++ b/config.yaml @@ -44,4 +44,8 @@ PHONE_VALID_SENSED_DAYS: BARNETT_LOCATION: ACCURACY_LIMIT: 51 # filters location coordinates with an accuracy higher than this - TIMEZONE: *timezone \ No newline at end of file + TIMEZONE: *timezone + +BLUETOOTH: + DAY_SEGMENTS: *day_segments + METRICS: ["countscans", "uniquedevices", "countscansmostuniquedevice"] \ No newline at end of file diff --git a/rules/features.snakefile b/rules/features.snakefile index 14544319..cfdb5097 100644 --- a/rules/features.snakefile +++ b/rules/features.snakefile @@ -39,4 +39,15 @@ rule location_barnett_metrics: output: "data/processed/{pid}/location_barnett_metrics.csv" script: - "../src/features/location_barnett_metrics.R" \ No newline at end of file + "../src/features/location_barnett_metrics.R" + +rule bluetooth_metrics: + input: + "data/raw/{pid}/bluetooth_with_datetime.csv" + params: + day_segment = "{day_segment}", + metrics = config["BLUETOOTH"]["METRICS"] + output: + "data/processed/{pid}/bluetooth_{day_segment}.csv" + script: + "../src/features/bluetooth_metrics.R" \ No newline at end of file diff --git a/src/features/bluetooth_metrics.R b/src/features/bluetooth_metrics.R new file mode 100644 index 00000000..21200364 --- /dev/null +++ b/src/features/bluetooth_metrics.R @@ -0,0 +1,41 @@ +source("packrat/init.R") + +library(dplyr) + +filter_by_day_segment <- function(data, day_segment) { + if(day_segment %in% c("morning", "afternoon", "evening", "night")) + data <- data %>% filter(local_day_segment == day_segment) + + return(data %>% group_by(local_date)) +} + +compute_bluetooth_metric <- function(data, metric, day_segment){ + if(metric %in% c("countscans", "uniquedevices")){ + data <- data %>% filter_by_day_segment(day_segment) + data <- switch(metric, + "countscans" = data %>% summarise(!!paste("bluetooth", day_segment, metric, sep = "_") := n()), + "uniquedevices" = data %>% summarise(!!paste("bluetooth", day_segment, metric, sep = "_") := n_distinct(bt_address))) + return(data) + } else if(metric == "countscansmostuniquedevice"){ + # Get the most scanned device + data <- data %>% group_by(bt_address) %>% + mutate(N=n()) %>% + ungroup() %>% + filter(N == max(N)) + return(data %>% + filter_by_day_segment(day_segment) %>% + summarise(!!paste("bluetooth", day_segment, metric, sep = "_") := n())) + } +} + +data <- read.csv(snakemake@input[[1]], stringsAsFactors = FALSE) +day_segment <- snakemake@params[["day_segment"]] +metrics <- snakemake@params[["metrics"]] +features = data.frame(local_date = character(), stringsAsFactors = FALSE) + +for(metric in metrics){ + feature <- compute_bluetooth_metric(data, metric, day_segment) + features <- merge(features, feature, by="local_date", all = TRUE) +} + +write.csv(features, snakemake@output[[1]], row.names = FALSE) \ No newline at end of file