Add metric filter to barnett location features

pull/95/head
JulioV 2020-02-21 10:58:35 -05:00
parent 579df6325f
commit f22d1834ee
3 changed files with 10 additions and 6 deletions

View File

@ -68,7 +68,8 @@ RESAMPLE_FUSED_LOCATION:
TIMEZONE: *timezone
BARNETT_LOCATION:
LOCATIONS_TO_USE: ALL # ALL_EXCEPT_FUSED, RESAMPLE_FUSED
METRICS: ["hometime","disttravelled","rog","maxdiam","maxhomedist","siglocsvisited","avgflightlen","stdflightlen","avgflightdur","stdflightdur","probpause","siglocentropy","minsmissing","circdnrtn","wkenddayrtn"]
LOCATIONS_TO_USE: ALL # ALL, ALL_EXCEPT_FUSED OR RESAMPLE_FUSED
ACCURACY_LIMIT: 51 # meters, drops location coordinates with an accuracy higher than this. This number means there's a 68% probability the true location is within this radius
TIMEZONE: *timezone

View File

@ -51,6 +51,7 @@ rule location_barnett_metrics:
raw = "data/raw/{pid}/locations_raw.csv",
fused = rules.resample_fused_location.output
params:
metrics = config["BARNETT_LOCATION"]["METRICS"],
locations_to_use = config["BARNETT_LOCATION"]["LOCATIONS_TO_USE"],
accuracy_limit = config["BARNETT_LOCATION"]["ACCURACY_LIMIT"],
timezone = config["BARNETT_LOCATION"]["TIMEZONE"]

View File

@ -2,7 +2,7 @@ source("packrat/init.R")
library(dplyr)
write_empty_file <- function(file_path){
write_empty_file <- function(file_path, metrics_to_include){
write.csv(data.frame(local_date= character(),
hometime= numeric(),
disttravelled= numeric(),
@ -19,7 +19,7 @@ write_empty_file <- function(file_path){
minsmissing= numeric(),
circdnrtn= numeric(),
wkenddayrtn= numeric()
), file_path, row.names = F)
) %>% select(metrics_to_include), file_path, row.names = F)
}
# Load Ian Barnett's code. Taken from https://scholar.harvard.edu/ibarnett/software/gpsmobility
@ -29,6 +29,8 @@ sapply(file.sources,source,.GlobalEnv)
locations_to_use <- snakemake@params[["locations_to_use"]]
accuracy_limit <- snakemake@params[["accuracy_limit"]]
timezone <- snakemake@params[["timezone"]]
metrics_to_include <- intersect(unlist(snakemake@params["metrics"], use.names = F),
c("hometime","disttravelled","rog","maxdiam","maxhomedist","siglocsvisited","avgflightlen","stdflightlen","avgflightdur","stdflightdur","probpause","siglocentropy","minsmissing","circdnrtn","wkenddayrtn"))
# By deafult we use all raw locations: fused without resampling and not fused (gps, network)
location <- read.csv(snakemake@input[["raw"]], stringsAsFactors = F) %>%
@ -47,16 +49,16 @@ if(locations_to_use == "ALL_EXCEPT_FUSED"){
if (nrow(location) > 1){
features <- MobilityFeatures(location, ACCURACY_LIM = accuracy_limit, tz = timezone)
if(is.null(features)){
write_empty_file(snakemake@output[[1]])
write_empty_file(snakemake@output[[1]], metrics_to_include)
} else{
# Copy index (dates) as a column
outmatrix <- cbind(rownames(features$featavg), features$featavg)
outmatrix <- as.data.frame(outmatrix)
outmatrix[-1] <- lapply(lapply(outmatrix[-1], as.character), as.numeric)
colnames(outmatrix)=c("local_date",tolower(colnames(features$featavg)))
write.csv(outmatrix,snakemake@output[[1]], row.names = F)
write.csv(outmatrix %>% select(metrics_to_include), snakemake@output[[1]], row.names = F)
}
} else {
write_empty_file(snakemake@output[[1]])
write_empty_file(snakemake@output[[1]], metrics_to_include)
}