diff --git a/config.yaml b/config.yaml index 19b9629f..9325934c 100644 --- a/config.yaml +++ b/config.yaml @@ -68,7 +68,8 @@ RESAMPLE_FUSED_LOCATION: TIMEZONE: *timezone BARNETT_LOCATION: - LOCATIONS_TO_USE: ALL # ALL_EXCEPT_FUSED, RESAMPLE_FUSED + METRICS: ["hometime","disttravelled","rog","maxdiam","maxhomedist","siglocsvisited","avgflightlen","stdflightlen","avgflightdur","stdflightdur","probpause","siglocentropy","minsmissing","circdnrtn","wkenddayrtn"] + LOCATIONS_TO_USE: ALL # ALL, ALL_EXCEPT_FUSED OR RESAMPLE_FUSED ACCURACY_LIMIT: 51 # meters, drops location coordinates with an accuracy higher than this. This number means there's a 68% probability the true location is within this radius TIMEZONE: *timezone diff --git a/rules/features.snakefile b/rules/features.snakefile index c7218e85..b50ab43c 100644 --- a/rules/features.snakefile +++ b/rules/features.snakefile @@ -51,6 +51,7 @@ rule location_barnett_metrics: raw = "data/raw/{pid}/locations_raw.csv", fused = rules.resample_fused_location.output params: + metrics = config["BARNETT_LOCATION"]["METRICS"], locations_to_use = config["BARNETT_LOCATION"]["LOCATIONS_TO_USE"], accuracy_limit = config["BARNETT_LOCATION"]["ACCURACY_LIMIT"], timezone = config["BARNETT_LOCATION"]["TIMEZONE"] diff --git a/src/features/location_barnett_metrics.R b/src/features/location_barnett_metrics.R index bc643961..7ce0b9e5 100644 --- a/src/features/location_barnett_metrics.R +++ b/src/features/location_barnett_metrics.R @@ -2,7 +2,7 @@ source("packrat/init.R") library(dplyr) -write_empty_file <- function(file_path){ +write_empty_file <- function(file_path, metrics_to_include){ write.csv(data.frame(local_date= character(), hometime= numeric(), disttravelled= numeric(), @@ -19,7 +19,7 @@ write_empty_file <- function(file_path){ minsmissing= numeric(), circdnrtn= numeric(), wkenddayrtn= numeric() - ), file_path, row.names = F) + ) %>% select(metrics_to_include), file_path, row.names = F) } # Load Ian Barnett's code. Taken from https://scholar.harvard.edu/ibarnett/software/gpsmobility @@ -29,6 +29,8 @@ sapply(file.sources,source,.GlobalEnv) locations_to_use <- snakemake@params[["locations_to_use"]] accuracy_limit <- snakemake@params[["accuracy_limit"]] timezone <- snakemake@params[["timezone"]] +metrics_to_include <- intersect(unlist(snakemake@params["metrics"], use.names = F), + c("hometime","disttravelled","rog","maxdiam","maxhomedist","siglocsvisited","avgflightlen","stdflightlen","avgflightdur","stdflightdur","probpause","siglocentropy","minsmissing","circdnrtn","wkenddayrtn")) # By deafult we use all raw locations: fused without resampling and not fused (gps, network) location <- read.csv(snakemake@input[["raw"]], stringsAsFactors = F) %>% @@ -47,16 +49,16 @@ if(locations_to_use == "ALL_EXCEPT_FUSED"){ if (nrow(location) > 1){ features <- MobilityFeatures(location, ACCURACY_LIM = accuracy_limit, tz = timezone) if(is.null(features)){ - write_empty_file(snakemake@output[[1]]) + write_empty_file(snakemake@output[[1]], metrics_to_include) } else{ # Copy index (dates) as a column outmatrix <- cbind(rownames(features$featavg), features$featavg) outmatrix <- as.data.frame(outmatrix) outmatrix[-1] <- lapply(lapply(outmatrix[-1], as.character), as.numeric) colnames(outmatrix)=c("local_date",tolower(colnames(features$featavg))) - write.csv(outmatrix,snakemake@output[[1]], row.names = F) + write.csv(outmatrix %>% select(metrics_to_include), snakemake@output[[1]], row.names = F) } } else { - write_empty_file(snakemake@output[[1]]) + write_empty_file(snakemake@output[[1]], metrics_to_include) } \ No newline at end of file