From cb2ee1ec821d9363095483a58ec5af0cad0784ab Mon Sep 17 00:00:00 2001 From: JulioV Date: Mon, 9 Dec 2019 19:15:10 -0500 Subject: [PATCH] Add switch to barnet_locations.R to use all, all except fused or resample fused locations --- config.yaml | 3 ++- rules/features.snakefile | 4 +++- src/features/location_barnett_metrics.R | 14 +++++++++++++- 3 files changed, 18 insertions(+), 3 deletions(-) diff --git a/config.yaml b/config.yaml index a56c3ef4..ee89d16f 100644 --- a/config.yaml +++ b/config.yaml @@ -52,7 +52,8 @@ RESAMPLE_FUSED_LOCATION: TIMEZONE: *timezone BARNETT_LOCATION: - ACCURACY_LIMIT: 51 # filters location coordinates with an accuracy higher than this + LOCATIONS_TO_USE: ALL # ALL_EXCEPT_FUSED, RESAMPLE_FUSED + ACCURACY_LIMIT: 51 # meters, drops location coordinates with an accuracy higher than this. This number means there's a 68% probability the true location is within this radius TIMEZONE: *timezone BLUETOOTH: diff --git a/rules/features.snakefile b/rules/features.snakefile index 80d0427a..cfbde01c 100644 --- a/rules/features.snakefile +++ b/rules/features.snakefile @@ -48,8 +48,10 @@ rule google_activity_recognition_deltas: rule location_barnett_metrics: input: - "data/raw/{pid}/locations_with_datetime.csv" + raw = "data/raw/{pid}/locations_raw.csv", + fused = rules.resample_fused_location.output params: + locations_to_use = config["BARNETT_LOCATION"]["LOCATIONS_TO_USE"], accuracy_limit = config["BARNETT_LOCATION"]["ACCURACY_LIMIT"], timezone = config["BARNETT_LOCATION"]["TIMEZONE"] output: diff --git a/src/features/location_barnett_metrics.R b/src/features/location_barnett_metrics.R index 89e18bf3..bc643961 100644 --- a/src/features/location_barnett_metrics.R +++ b/src/features/location_barnett_metrics.R @@ -26,12 +26,24 @@ write_empty_file <- function(file_path){ file.sources = list.files(c("src/features/location_barnett"), pattern="*.R$", full.names=TRUE, ignore.case=TRUE) sapply(file.sources,source,.GlobalEnv) +locations_to_use <- snakemake@params[["locations_to_use"]] accuracy_limit <- snakemake@params[["accuracy_limit"]] timezone <- snakemake@params[["timezone"]] -location <- read.csv(snakemake@input[[1]], stringsAsFactors = F) %>% +# By deafult we use all raw locations: fused without resampling and not fused (gps, network) +location <- read.csv(snakemake@input[["raw"]], stringsAsFactors = F) %>% select(timestamp, latitude = double_latitude, longitude = double_longitude, altitude = double_altitude, accuracy) +if(locations_to_use == "ALL_EXCEPT_FUSED"){ + location <- location %>% filter(provider != "fused") +} else if (locations_to_use == "RESAMPLE_FUSED"){ + location <- read.csv(snakemake@input[["fused"]], stringsAsFactors = F) %>% + select(timestamp, latitude = double_latitude, longitude = double_longitude, altitude = double_altitude, accuracy) +} else if (locations_to_use != "ALL"){ + print("Unkown filter, provide one of the following three: ALL, ALL_EXCEPT_FUSED, or RESAMPLE_FUSED") + quit(save = "no", status = 1, runLast = FALSE) +} + if (nrow(location) > 1){ features <- MobilityFeatures(location, ACCURACY_LIM = accuracy_limit, tz = timezone) if(is.null(features)){