Migrate wifi to new file structure
parent
77d9d8f025
commit
eda58d9d2c
24
Snakefile
24
Snakefile
|
@ -99,16 +99,20 @@ if config["APPLICATIONS_FOREGROUND"]["COMPUTE"]:
|
||||||
files_to_compute.extend(expand("data/interim/{pid}/{sensor}_with_datetime_with_genre.csv", pid=config["PIDS"], sensor=config["APPLICATIONS_FOREGROUND"]["DB_TABLE"]))
|
files_to_compute.extend(expand("data/interim/{pid}/{sensor}_with_datetime_with_genre.csv", pid=config["PIDS"], sensor=config["APPLICATIONS_FOREGROUND"]["DB_TABLE"]))
|
||||||
files_to_compute.extend(expand("data/processed/{pid}/applications_foreground_{day_segment}.csv", pid = config["PIDS"], day_segment = config["APPLICATIONS_FOREGROUND"]["DAY_SEGMENTS"]))
|
files_to_compute.extend(expand("data/processed/{pid}/applications_foreground_{day_segment}.csv", pid = config["PIDS"], day_segment = config["APPLICATIONS_FOREGROUND"]["DAY_SEGMENTS"]))
|
||||||
|
|
||||||
if config["WIFI"]["COMPUTE"]:
|
for provider in config["WIFI"]["PROVIDERS"].keys():
|
||||||
if len(config["WIFI"]["DB_TABLE"]["VISIBLE_ACCESS_POINTS"]) > 0:
|
if config["WIFI"]["PROVIDERS"][provider]["COMPUTE"]:
|
||||||
files_to_compute.extend(expand("data/raw/{pid}/{sensor}_raw.csv", pid=config["PIDS"], sensor=config["WIFI"]["DB_TABLE"]["VISIBLE_ACCESS_POINTS"]))
|
if len(config["WIFI"]["DB_TABLE"]["VISIBLE_ACCESS_POINTS"]) > 0:
|
||||||
files_to_compute.extend(expand("data/raw/{pid}/{sensor}_with_datetime.csv", pid=config["PIDS"], sensor=config["WIFI"]["DB_TABLE"]["VISIBLE_ACCESS_POINTS"]))
|
files_to_compute.extend(expand("data/raw/{pid}/{sensor}_raw.csv", pid=config["PIDS"], sensor=config["WIFI"]["DB_TABLE"]["VISIBLE_ACCESS_POINTS"]))
|
||||||
files_to_compute.extend(expand("data/processed/{pid}/wifi_{day_segment}.csv", pid = config["PIDS"], day_segment = config["WIFI"]["DAY_SEGMENTS"]))
|
files_to_compute.extend(expand("data/raw/{pid}/{sensor}_with_datetime.csv", pid=config["PIDS"], sensor=config["WIFI"]["DB_TABLE"]["VISIBLE_ACCESS_POINTS"]))
|
||||||
|
files_to_compute.extend(expand("data/raw/{pid}/{sensor_key}_with_datetime_visibleandconnected.csv", pid=config["PIDS"], sensor_key="WIFI".lower()))
|
||||||
if len(config["WIFI"]["DB_TABLE"]["CONNECTED_ACCESS_POINTS"]) > 0:
|
files_to_compute.extend(expand("data/interim/{pid}/{sensor_key}_features/{sensor_key}_{language}_{provider_key}.csv", pid=config["PIDS"], language=config["WIFI"]["PROVIDERS"][provider]["SRC_LANGUAGE"], provider_key=provider, sensor_key="WIFI".lower()))
|
||||||
files_to_compute.extend(expand("data/raw/{pid}/{sensor}_raw.csv", pid=config["PIDS"], sensor=config["WIFI"]["DB_TABLE"]["CONNECTED_ACCESS_POINTS"]))
|
files_to_compute.extend(expand("data/processed/features/{pid}/{sensor_key}.csv", pid=config["PIDS"], sensor_key="WIFI".lower()))
|
||||||
files_to_compute.extend(expand("data/raw/{pid}/{sensor}_with_datetime.csv", pid=config["PIDS"], sensor=config["WIFI"]["DB_TABLE"]["CONNECTED_ACCESS_POINTS"]))
|
if len(config["WIFI"]["DB_TABLE"]["CONNECTED_ACCESS_POINTS"]) > 0:
|
||||||
files_to_compute.extend(expand("data/processed/{pid}/wifi_{day_segment}.csv", pid = config["PIDS"], day_segment = config["WIFI"]["DAY_SEGMENTS"]))
|
files_to_compute.extend(expand("data/raw/{pid}/{sensor}_raw.csv", pid=config["PIDS"], sensor=config["WIFI"]["DB_TABLE"]["CONNECTED_ACCESS_POINTS"]))
|
||||||
|
files_to_compute.extend(expand("data/raw/{pid}/{sensor}_with_datetime.csv", pid=config["PIDS"], sensor=config["WIFI"]["DB_TABLE"]["CONNECTED_ACCESS_POINTS"]))
|
||||||
|
files_to_compute.extend(expand("data/raw/{pid}/{sensor_key}_with_datetime_visibleandconnected.csv", pid=config["PIDS"], sensor_key="WIFI".lower()))
|
||||||
|
files_to_compute.extend(expand("data/interim/{pid}/{sensor_key}_features/{sensor_key}_{language}_{provider_key}.csv", pid=config["PIDS"], language=config["WIFI"]["PROVIDERS"][provider]["SRC_LANGUAGE"], provider_key=provider, sensor_key="WIFI".lower()))
|
||||||
|
files_to_compute.extend(expand("data/processed/features/{pid}/{sensor_key}.csv", pid=config["PIDS"], sensor_key="WIFI".lower()))
|
||||||
|
|
||||||
if config["HEARTRATE"]["COMPUTE"]:
|
if config["HEARTRATE"]["COMPUTE"]:
|
||||||
files_to_compute.extend(expand("data/raw/{pid}/{sensor}_raw.csv", pid=config["PIDS"], sensor=config["HEARTRATE"]["DB_TABLE"]))
|
files_to_compute.extend(expand("data/raw/{pid}/{sensor}_raw.csv", pid=config["PIDS"], sensor=config["HEARTRATE"]["DB_TABLE"]))
|
||||||
|
|
10
config.yaml
10
config.yaml
|
@ -106,7 +106,6 @@ BLUETOOTH:
|
||||||
PROVIDERS:
|
PROVIDERS:
|
||||||
RAPIDS:
|
RAPIDS:
|
||||||
COMPUTE: False
|
COMPUTE: False
|
||||||
DAY_SEGMENTS: *day_segments
|
|
||||||
FEATURES: ["countscans", "uniquedevices", "countscansmostuniquedevice"]
|
FEATURES: ["countscans", "uniquedevices", "countscansmostuniquedevice"]
|
||||||
SRC_FOLDER: "rapids" # inside src/features/bluetooth
|
SRC_FOLDER: "rapids" # inside src/features/bluetooth
|
||||||
SRC_LANGUAGE: "r"
|
SRC_LANGUAGE: "r"
|
||||||
|
@ -197,12 +196,15 @@ SLEEP:
|
||||||
SUMMARY_FEATURES: ["sumdurationafterwakeup", "sumdurationasleep", "sumdurationawake", "sumdurationtofallasleep", "sumdurationinbed", "avgefficiency", "countepisode"]
|
SUMMARY_FEATURES: ["sumdurationafterwakeup", "sumdurationasleep", "sumdurationawake", "sumdurationtofallasleep", "sumdurationinbed", "avgefficiency", "countepisode"]
|
||||||
|
|
||||||
WIFI:
|
WIFI:
|
||||||
COMPUTE: False
|
|
||||||
DB_TABLE:
|
DB_TABLE:
|
||||||
VISIBLE_ACCESS_POINTS: "wifi" # if you only have a CONNECTED_ACCESS_POINTS table, set this value to ""
|
VISIBLE_ACCESS_POINTS: "wifi" # if you only have a CONNECTED_ACCESS_POINTS table, set this value to ""
|
||||||
CONNECTED_ACCESS_POINTS: "sensor_wifi" # if you only have a VISIBLE_ACCESS_POINTS table, set this value to ""
|
CONNECTED_ACCESS_POINTS: "sensor_wifi" # if you only have a VISIBLE_ACCESS_POINTS table, set this value to ""
|
||||||
DAY_SEGMENTS: *day_segments
|
PROVIDERS:
|
||||||
FEATURES: ["countscans", "uniquedevices", "countscansmostuniquedevice"]
|
RAPIDS:
|
||||||
|
COMPUTE: False
|
||||||
|
FEATURES: ["countscans", "uniquedevices", "countscansmostuniquedevice"]
|
||||||
|
SRC_FOLDER: "rapids" # inside src/features/bluetooth
|
||||||
|
SRC_LANGUAGE: "r"
|
||||||
|
|
||||||
CONVERSATION:
|
CONVERSATION:
|
||||||
COMPUTE: False
|
COMPUTE: False
|
||||||
|
|
|
@ -227,16 +227,29 @@ rule applications_foreground_features:
|
||||||
script:
|
script:
|
||||||
"../src/features/applications_foreground_features.py"
|
"../src/features/applications_foreground_features.py"
|
||||||
|
|
||||||
rule wifi_features:
|
rule wifi_r_features:
|
||||||
input:
|
input:
|
||||||
expand("data/raw/{{pid}}/{sensor}_with_datetime.csv", sensor=config["WIFI"]["DB_TABLE"]),
|
sensor_data = expand("data/raw/{{pid}}/{sensor_key}_with_datetime_visibleandconnected.csv", sensor_key="WIFI".lower()),
|
||||||
day_segments = expand("data/interim/{sensor}_day_segments.csv", sensor=config["WIFI"]["DB_TABLE"])
|
day_segments_labels = "data/interim/day_segments_labels.csv"
|
||||||
params:
|
params:
|
||||||
features = config["WIFI"]["FEATURES"]
|
provider = lambda wildcards: config["WIFI"]["PROVIDERS"][wildcards.provider_key],
|
||||||
|
provider_key = "{provider_key}"
|
||||||
output:
|
output:
|
||||||
"data/processed/{pid}/wifi_features.csv"
|
"data/interim/{pid}/wifi_features/wifi_r_{provider_key}.csv"
|
||||||
script:
|
script:
|
||||||
"../src/features/wifi_features.R"
|
"../src/features/wifi/wifi_entry.R"
|
||||||
|
|
||||||
|
rule wifi_python_features:
|
||||||
|
input:
|
||||||
|
sensor_data = expand("data/raw/{{pid}}/{sensor_key}_with_datetime_visibleandconnected.csv", sensor_key="WIFI".lower()),
|
||||||
|
day_segments_labels = "data/interim/day_segments_labels.csv"
|
||||||
|
params:
|
||||||
|
provider = lambda wildcards: config["WIFI"]["PROVIDERS"][wildcards.provider_key],
|
||||||
|
provider_key = "{provider_key}"
|
||||||
|
output:
|
||||||
|
"data/interim/{pid}/wifi_features/wifi_python_{provider_key}.csv"
|
||||||
|
script:
|
||||||
|
"../src/features/wifi/wifi_entry.py"
|
||||||
|
|
||||||
rule fitbit_heartrate_features:
|
rule fitbit_heartrate_features:
|
||||||
input:
|
input:
|
||||||
|
|
|
@ -172,3 +172,11 @@ rule fitbit_sleep_with_datetime:
|
||||||
intraday_data = "data/raw/{pid}/fitbit_sleep_intraday_with_datetime.csv"
|
intraday_data = "data/raw/{pid}/fitbit_sleep_intraday_with_datetime.csv"
|
||||||
script:
|
script:
|
||||||
"../src/data/fitbit_readable_datetime.py"
|
"../src/data/fitbit_readable_datetime.py"
|
||||||
|
|
||||||
|
rule join_wifi_tables:
|
||||||
|
input:
|
||||||
|
unpack(optional_wifi_input)
|
||||||
|
output:
|
||||||
|
"data/raw/{pid}/wifi_with_datetime_visibleandconnected.csv"
|
||||||
|
script:
|
||||||
|
"../src/data/join_visible_and_connected_wifi.R"
|
|
@ -0,0 +1,18 @@
|
||||||
|
source("renv/activate.R")
|
||||||
|
library("dplyr")
|
||||||
|
|
||||||
|
if(!is.null(snakemake@input[["visible_access_points"]]) && is.null(snakemake@input[["connected_access_points"]])){
|
||||||
|
wifi_data <- read.csv(snakemake@input[["visible_access_points"]], stringsAsFactors = FALSE)
|
||||||
|
wifi_data <- wifi_data %>% mutate(connected = 0)
|
||||||
|
} else if(is.null(snakemake@input[["visible_access_points"]]) && !is.null(snakemake@input[["connected_access_points"]])){
|
||||||
|
wifi_data <- read.csv(snakemake@input[["connected_access_points"]], stringsAsFactors = FALSE)
|
||||||
|
wifi_data <- wifi_data %>% mutate(connected = 1)
|
||||||
|
} else if(!is.null(snakemake@input[["visible_access_points"]]) && !is.null(snakemake@input[["connected_access_points"]])){
|
||||||
|
visible_access_points <- read.csv(snakemake@input[["visible_access_points"]], stringsAsFactors = FALSE)
|
||||||
|
visible_access_points <- visible_access_points %>% mutate(connected = 0)
|
||||||
|
connected_access_points <- read.csv(snakemake@input[["connected_access_points"]], stringsAsFactors = FALSE)
|
||||||
|
connected_access_points <- connected_access_points %>% mutate(connected = 1)
|
||||||
|
wifi_data <- bind_rows(visible_access_points, connected_access_points) %>% arrange(timestamp)
|
||||||
|
}
|
||||||
|
|
||||||
|
write.csv(wifi_data, snakemake@output[[1]], row.names = FALSE)
|
|
@ -20,7 +20,7 @@ fetch_provider_features <- function(provider, provider_key, config_key, sensor_d
|
||||||
day_segments_labels <- read.csv(day_segments_file, stringsAsFactors = FALSE)
|
day_segments_labels <- read.csv(day_segments_file, stringsAsFactors = FALSE)
|
||||||
|
|
||||||
if(!"FEATURES" %in% names(provider))
|
if(!"FEATURES" %in% names(provider))
|
||||||
stop(paste0("Provider config[CALLS][PROVIDERS][", provider_key,"] is missing a FEATURES attribute in config.yaml"))
|
stop(paste0("Provider config[", config_key,"][PROVIDERS][", provider_key,"] is missing a FEATURES attribute in config.yaml"))
|
||||||
|
|
||||||
if(provider[["COMPUTE"]] == TRUE){
|
if(provider[["COMPUTE"]] == TRUE){
|
||||||
code_path <- paste0("src/features/", config_key,"/", provider[["SRC_FOLDER"]], "/main.R")
|
code_path <- paste0("src/features/", config_key,"/", provider[["SRC_FOLDER"]], "/main.R")
|
||||||
|
|
|
@ -1,18 +1,12 @@
|
||||||
library(dplyr)
|
library(dplyr)
|
||||||
|
|
||||||
filter_by_day_segment <- function(data, day_segment) {
|
|
||||||
if(day_segment != "daily")
|
|
||||||
data <- data %>% filter(local_day_segment == day_segment)
|
|
||||||
|
|
||||||
return(data %>% group_by(local_date))
|
|
||||||
}
|
|
||||||
|
|
||||||
compute_wifi_feature <- function(data, feature, day_segment){
|
compute_wifi_feature <- function(data, feature, day_segment){
|
||||||
data <- data %>% filter_by_day_segment(day_segment)
|
data <- data %>% filter_data_by_segment(day_segment)
|
||||||
if(feature %in% c("countscans", "uniquedevices")){
|
if(feature %in% c("countscans", "uniquedevices")){
|
||||||
|
data <- data %>% group_by(local_segment)
|
||||||
data <- switch(feature,
|
data <- switch(feature,
|
||||||
"countscans" = data %>% summarise(!!paste("wifi", day_segment, feature, sep = "_") := n()),
|
"countscans" = data %>% summarise(!!paste("wifi_rapids", feature, sep = "_") := n()),
|
||||||
"uniquedevices" = data %>% summarise(!!paste("wifi", day_segment, feature, sep = "_") := n_distinct(bssid)))
|
"uniquedevices" = data %>% summarise(!!paste("wifi_rapids", feature, sep = "_") := n_distinct(bssid)))
|
||||||
return(data)
|
return(data)
|
||||||
} else if(feature == "countscansmostuniquedevice"){
|
} else if(feature == "countscansmostuniquedevice"){
|
||||||
# Get the most scanned device
|
# Get the most scanned device
|
||||||
|
@ -25,15 +19,16 @@ compute_wifi_feature <- function(data, feature, day_segment){
|
||||||
pull(bssid)
|
pull(bssid)
|
||||||
return(data %>%
|
return(data %>%
|
||||||
filter(bssid == mostuniquedevice) %>%
|
filter(bssid == mostuniquedevice) %>%
|
||||||
group_by(local_date) %>%
|
group_by(local_segment) %>%
|
||||||
summarise(!!paste("wifi", day_segment, feature, sep = "_") := n()) %>%
|
summarise(!!paste("wifi_rapids", feature, sep = "_") := n()) %>%
|
||||||
replace(is.na(.), 0))
|
replace(is.na(.), 0))
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
base_wifi_features <- function(wifi_data, day_segment, requested_features){
|
rapids_features <- function(wifi_data, day_segment, provider){
|
||||||
|
requested_features <- provider[["FEATURES"]]
|
||||||
# Output dataframe
|
# Output dataframe
|
||||||
features = data.frame(local_date = character(), stringsAsFactors = FALSE)
|
features = data.frame(local_segment = character(), stringsAsFactors = FALSE)
|
||||||
|
|
||||||
# The name of the features this function can compute
|
# The name of the features this function can compute
|
||||||
base_features_names <- c("countscans", "uniquedevices", "countscansmostuniquedevice")
|
base_features_names <- c("countscans", "uniquedevices", "countscansmostuniquedevice")
|
||||||
|
@ -42,8 +37,8 @@ base_wifi_features <- function(wifi_data, day_segment, requested_features){
|
||||||
features_to_compute <- intersect(base_features_names, requested_features)
|
features_to_compute <- intersect(base_features_names, requested_features)
|
||||||
|
|
||||||
for(feature_name in features_to_compute){
|
for(feature_name in features_to_compute){
|
||||||
feature <- compute_wifi_feature(wifi_data, feature_name, day_segment)
|
feature <- compute_wifi_feature(wifi_data, feature_name, day_segment)
|
||||||
features <- merge(features, feature, by="local_date", all = TRUE)
|
features <- merge(features, feature, by="local_segment", all = TRUE)
|
||||||
}
|
}
|
||||||
|
|
||||||
return(features)
|
return(features)
|
|
@ -0,0 +1,13 @@
|
||||||
|
source("renv/activate.R")
|
||||||
|
source("src/features/utils/utils.R")
|
||||||
|
library("dplyr")
|
||||||
|
library("tidyr")
|
||||||
|
|
||||||
|
sensor_data_file <- snakemake@input[["sensor_data"]]
|
||||||
|
day_segments_file <- snakemake@input[["day_segments_labels"]]
|
||||||
|
provider <- snakemake@params["provider"][["provider"]]
|
||||||
|
provider_key <- snakemake@params["provider_key"]
|
||||||
|
|
||||||
|
sensor_features <- fetch_provider_features(provider, provider_key, "wifi", sensor_data_file, day_segments_file)
|
||||||
|
|
||||||
|
write.csv(sensor_features, snakemake@output[[1]], row.names = FALSE)
|
|
@ -0,0 +1,18 @@
|
||||||
|
import pandas as pd
|
||||||
|
from importlib import import_module, util
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
# import fetch_provider_features from src/features/utils/utils.py
|
||||||
|
spec = util.spec_from_file_location("util", str(Path(snakemake.scriptdir).parent / "utils" / "utils.py"))
|
||||||
|
mod = util.module_from_spec(spec)
|
||||||
|
spec.loader.exec_module(mod)
|
||||||
|
fetch_provider_features = getattr(mod, "fetch_provider_features")
|
||||||
|
|
||||||
|
sensor_data_file = snakemake.input["sensor_data"][0]
|
||||||
|
day_segments_file = snakemake.input["day_segments_labels"]
|
||||||
|
provider = snakemake.params["provider"]
|
||||||
|
provider_key = snakemake.params["provider_key"]
|
||||||
|
|
||||||
|
sensor_features = fetch_provider_features(provider, provider_key, "wifi", sensor_data_file, day_segments_file)
|
||||||
|
|
||||||
|
sensor_features.to_csv(snakemake.output[0], index=False)
|
Loading…
Reference in New Issue