Migrate bluetooth to new file structure
parent
132311da77
commit
77d9d8f025
10
Snakefile
10
Snakefile
|
@ -47,10 +47,12 @@ for provider in config["CALLS"]["PROVIDERS"].keys():
|
|||
files_to_compute.extend(expand("data/interim/{pid}/{sensor_key}_features/{sensor_key}_{language}_{provider_key}.csv", pid=config["PIDS"], language=config["CALLS"]["PROVIDERS"][provider]["SRC_LANGUAGE"], provider_key=provider, sensor_key="CALLS".lower()))
|
||||
files_to_compute.extend(expand("data/processed/features/{pid}/{sensor_key}.csv", pid=config["PIDS"], sensor_key="CALLS".lower()))
|
||||
|
||||
if config["BLUETOOTH"]["COMPUTE"]:
|
||||
files_to_compute.extend(expand("data/raw/{pid}/{sensor}_raw.csv", pid=config["PIDS"], sensor=config["BLUETOOTH"]["DB_TABLE"]))
|
||||
files_to_compute.extend(expand("data/raw/{pid}/{sensor}_with_datetime.csv", pid=config["PIDS"], sensor=config["BLUETOOTH"]["DB_TABLE"]))
|
||||
files_to_compute.extend(expand("data/processed/{pid}/bluetooth_{day_segment}.csv", pid=config["PIDS"], day_segment = config["BLUETOOTH"]["DAY_SEGMENTS"]))
|
||||
for provider in config["BLUETOOTH"]["PROVIDERS"].keys():
|
||||
if config["BLUETOOTH"]["PROVIDERS"][provider]["COMPUTE"]:
|
||||
files_to_compute.extend(expand("data/raw/{pid}/{sensor}_raw.csv", pid=config["PIDS"], sensor=config["BLUETOOTH"]["DB_TABLE"]))
|
||||
files_to_compute.extend(expand("data/raw/{pid}/{sensor}_with_datetime.csv", pid=config["PIDS"], sensor=config["BLUETOOTH"]["DB_TABLE"]))
|
||||
files_to_compute.extend(expand("data/interim/{pid}/{sensor_key}_features/{sensor_key}_{language}_{provider_key}.csv", pid=config["PIDS"], language=config["BLUETOOTH"]["PROVIDERS"][provider]["SRC_LANGUAGE"], provider_key=provider, sensor_key="BLUETOOTH".lower()))
|
||||
files_to_compute.extend(expand("data/processed/features/{pid}/{sensor_key}.csv", pid=config["PIDS"], sensor_key="BLUETOOTH".lower()))
|
||||
|
||||
if config["ACTIVITY_RECOGNITION"]["COMPUTE"]:
|
||||
pids_android = list(filter(lambda pid: infer_participant_platform("data/external/" + pid) == "android", config["PIDS"]))
|
||||
|
|
11
config.yaml
11
config.yaml
|
@ -102,10 +102,15 @@ LOCATIONS:
|
|||
SRC_LANGUAGE: "r"
|
||||
|
||||
BLUETOOTH:
|
||||
COMPUTE: False
|
||||
DB_TABLE: bluetooth
|
||||
DAY_SEGMENTS: *day_segments
|
||||
FEATURES: ["countscans", "uniquedevices", "countscansmostuniquedevice"]
|
||||
PROVIDERS:
|
||||
RAPIDS:
|
||||
COMPUTE: False
|
||||
DAY_SEGMENTS: *day_segments
|
||||
FEATURES: ["countscans", "uniquedevices", "countscansmostuniquedevice"]
|
||||
SRC_FOLDER: "rapids" # inside src/features/bluetooth
|
||||
SRC_LANGUAGE: "r"
|
||||
|
||||
|
||||
ACTIVITY_RECOGNITION:
|
||||
COMPUTE: False
|
||||
|
|
|
@ -110,16 +110,29 @@ rule locations_r_features:
|
|||
script:
|
||||
"../src/features/locations/locations_entry.R"
|
||||
|
||||
rule bluetooth_features:
|
||||
input:
|
||||
expand("data/raw/{{pid}}/{sensor}_with_datetime.csv", sensor=config["BLUETOOTH"]["DB_TABLE"]),
|
||||
day_segments = expand("data/interim/{sensor}_day_segments.csv", sensor=config["BLUETOOTH"]["DB_TABLE"])
|
||||
rule bluetooth_r_features:
|
||||
input:
|
||||
sensor_data = expand("data/raw/{{pid}}/{sensor}_with_datetime.csv", sensor=config["BLUETOOTH"]["DB_TABLE"]),
|
||||
day_segments_labels = "data/interim/day_segments_labels.csv"
|
||||
params:
|
||||
features = config["BLUETOOTH"]["FEATURES"]
|
||||
provider = lambda wildcards: config["BLUETOOTH"]["PROVIDERS"][wildcards.provider_key],
|
||||
provider_key = "{provider_key}"
|
||||
output:
|
||||
"data/processed/{pid}/bluetooth_features.csv"
|
||||
"data/interim/{pid}/bluetooth_features/bluetooth_r_{provider_key}.csv"
|
||||
script:
|
||||
"../src/features/bluetooth_features.R"
|
||||
"../src/features/bluetooth/bluetooth_entry.R"
|
||||
|
||||
rule bluetooth_python_features:
|
||||
input:
|
||||
sensor_data = expand("data/raw/{{pid}}/{sensor}_with_datetime.csv", sensor=config["BLUETOOTH"]["DB_TABLE"]),
|
||||
day_segments_labels = "data/interim/day_segments_labels.csv"
|
||||
params:
|
||||
provider = lambda wildcards: config["BLUETOOTH"]["PROVIDERS"][wildcards.provider_key],
|
||||
provider_key = "{provider_key}"
|
||||
output:
|
||||
"data/interim/{pid}/bluetooth_features/bluetooth_python_{provider_key}.csv"
|
||||
script:
|
||||
"../src/features/bluetooth/bluetooth_entry.py"
|
||||
|
||||
rule activity_features:
|
||||
input:
|
||||
|
|
|
@ -0,0 +1,13 @@
|
|||
source("renv/activate.R")
|
||||
source("src/features/utils/utils.R")
|
||||
library("dplyr")
|
||||
library("tidyr")
|
||||
|
||||
sensor_data_file <- snakemake@input[["sensor_data"]]
|
||||
day_segments_file <- snakemake@input[["day_segments_labels"]]
|
||||
provider <- snakemake@params["provider"][["provider"]]
|
||||
provider_key <- snakemake@params["provider_key"]
|
||||
|
||||
sensor_features <- fetch_provider_features(provider, provider_key, "bluetooth", sensor_data_file, day_segments_file)
|
||||
|
||||
write.csv(sensor_features, snakemake@output[[1]], row.names = FALSE)
|
|
@ -0,0 +1,18 @@
|
|||
import pandas as pd
|
||||
from importlib import import_module, util
|
||||
from pathlib import Path
|
||||
|
||||
# import fetch_provider_features from src/features/utils/utils.py
|
||||
spec = util.spec_from_file_location("util", str(Path(snakemake.scriptdir).parent / "utils" / "utils.py"))
|
||||
mod = util.module_from_spec(spec)
|
||||
spec.loader.exec_module(mod)
|
||||
fetch_provider_features = getattr(mod, "fetch_provider_features")
|
||||
|
||||
sensor_data_file = snakemake.input["sensor_data"][0]
|
||||
day_segments_file = snakemake.input["day_segments_labels"]
|
||||
provider = snakemake.params["provider"]
|
||||
provider_key = snakemake.params["provider_key"]
|
||||
|
||||
sensor_features = fetch_provider_features(provider, provider_key, "bluetooth", sensor_data_file, day_segments_file)
|
||||
|
||||
sensor_features.to_csv(snakemake.output[0], index=False)
|
|
@ -1,19 +1,13 @@
|
|||
library(dplyr)
|
||||
library(tidyr)
|
||||
|
||||
filter_by_day_segment <- function(data, day_segment) {
|
||||
if(day_segment != "daily")
|
||||
data <- data %>% filter(local_day_segment == day_segment)
|
||||
|
||||
return(data %>% group_by(local_date))
|
||||
}
|
||||
|
||||
compute_bluetooth_feature <- function(data, feature, day_segment){
|
||||
data <- data %>% filter_by_day_segment(day_segment)
|
||||
data <- data %>% filter_data_by_segment(day_segment)
|
||||
if(feature %in% c("countscans", "uniquedevices")){
|
||||
data <- data %>% group_by(local_segment)
|
||||
data <- switch(feature,
|
||||
"countscans" = data %>% summarise(!!paste("bluetooth", day_segment, feature, sep = "_") := n()),
|
||||
"uniquedevices" = data %>% summarise(!!paste("bluetooth", day_segment, feature, sep = "_") := n_distinct(bt_address)))
|
||||
"countscans" = data %>% summarise(!!paste("bluetooth_rapids", feature, sep = "_") := n()),
|
||||
"uniquedevices" = data %>% summarise(!!paste("bluetooth_rapids", feature, sep = "_") := n_distinct(bt_address)))
|
||||
return(data)
|
||||
} else if(feature == "countscansmostuniquedevice"){
|
||||
# Get the most scanned device
|
||||
|
@ -26,15 +20,17 @@ compute_bluetooth_feature <- function(data, feature, day_segment){
|
|||
pull(bt_address)
|
||||
return(data %>%
|
||||
filter(bt_address == mostuniquedevice) %>%
|
||||
group_by(local_date) %>%
|
||||
summarise(!!paste("bluetooth", day_segment, feature, sep = "_") := n()) %>%
|
||||
group_by(local_segment) %>%
|
||||
summarise(!!paste("bluetooth_rapids", feature, sep = "_") := n()) %>%
|
||||
replace(is.na(.), 0))
|
||||
}
|
||||
}
|
||||
|
||||
base_bluetooth_features <- function(bluetooth_data, day_segment, requested_features){
|
||||
rapids_features <- function(bluetooth_data, day_segment, provider){
|
||||
requested_features <- provider[["FEATURES"]]
|
||||
|
||||
# Output dataframe
|
||||
features = data.frame(local_date = character(), stringsAsFactors = FALSE)
|
||||
features = data.frame(local_segment = character(), stringsAsFactors = FALSE)
|
||||
|
||||
# The name of the features this function can compute
|
||||
base_features_names <- c("countscans", "uniquedevices", "countscansmostuniquedevice")
|
||||
|
@ -43,8 +39,8 @@ base_bluetooth_features <- function(bluetooth_data, day_segment, requested_featu
|
|||
features_to_compute <- intersect(base_features_names, requested_features)
|
||||
|
||||
for(feature_name in features_to_compute){
|
||||
feature <- compute_bluetooth_feature(bluetooth_data, feature_name, day_segment)
|
||||
features <- merge(features, feature, by="local_date", all = TRUE)
|
||||
feature <- compute_bluetooth_feature(bluetooth_data, feature_name, day_segment)
|
||||
features <- merge(features, feature, by="local_segment", all = TRUE)
|
||||
}
|
||||
|
||||
features <- features %>% mutate_at(vars(contains("countscansmostuniquedevice")), list( ~ replace_na(., 0)))
|
Loading…
Reference in New Issue