Add metric filter to barnett location features
parent
579df6325f
commit
f22d1834ee
|
@ -68,7 +68,8 @@ RESAMPLE_FUSED_LOCATION:
|
|||
TIMEZONE: *timezone
|
||||
|
||||
BARNETT_LOCATION:
|
||||
LOCATIONS_TO_USE: ALL # ALL_EXCEPT_FUSED, RESAMPLE_FUSED
|
||||
METRICS: ["hometime","disttravelled","rog","maxdiam","maxhomedist","siglocsvisited","avgflightlen","stdflightlen","avgflightdur","stdflightdur","probpause","siglocentropy","minsmissing","circdnrtn","wkenddayrtn"]
|
||||
LOCATIONS_TO_USE: ALL # ALL, ALL_EXCEPT_FUSED OR RESAMPLE_FUSED
|
||||
ACCURACY_LIMIT: 51 # meters, drops location coordinates with an accuracy higher than this. This number means there's a 68% probability the true location is within this radius
|
||||
TIMEZONE: *timezone
|
||||
|
||||
|
|
|
@ -51,6 +51,7 @@ rule location_barnett_metrics:
|
|||
raw = "data/raw/{pid}/locations_raw.csv",
|
||||
fused = rules.resample_fused_location.output
|
||||
params:
|
||||
metrics = config["BARNETT_LOCATION"]["METRICS"],
|
||||
locations_to_use = config["BARNETT_LOCATION"]["LOCATIONS_TO_USE"],
|
||||
accuracy_limit = config["BARNETT_LOCATION"]["ACCURACY_LIMIT"],
|
||||
timezone = config["BARNETT_LOCATION"]["TIMEZONE"]
|
||||
|
|
|
@ -2,7 +2,7 @@ source("packrat/init.R")
|
|||
|
||||
library(dplyr)
|
||||
|
||||
write_empty_file <- function(file_path){
|
||||
write_empty_file <- function(file_path, metrics_to_include){
|
||||
write.csv(data.frame(local_date= character(),
|
||||
hometime= numeric(),
|
||||
disttravelled= numeric(),
|
||||
|
@ -19,7 +19,7 @@ write_empty_file <- function(file_path){
|
|||
minsmissing= numeric(),
|
||||
circdnrtn= numeric(),
|
||||
wkenddayrtn= numeric()
|
||||
), file_path, row.names = F)
|
||||
) %>% select(metrics_to_include), file_path, row.names = F)
|
||||
}
|
||||
|
||||
# Load Ian Barnett's code. Taken from https://scholar.harvard.edu/ibarnett/software/gpsmobility
|
||||
|
@ -29,6 +29,8 @@ sapply(file.sources,source,.GlobalEnv)
|
|||
locations_to_use <- snakemake@params[["locations_to_use"]]
|
||||
accuracy_limit <- snakemake@params[["accuracy_limit"]]
|
||||
timezone <- snakemake@params[["timezone"]]
|
||||
metrics_to_include <- intersect(unlist(snakemake@params["metrics"], use.names = F),
|
||||
c("hometime","disttravelled","rog","maxdiam","maxhomedist","siglocsvisited","avgflightlen","stdflightlen","avgflightdur","stdflightdur","probpause","siglocentropy","minsmissing","circdnrtn","wkenddayrtn"))
|
||||
|
||||
# By deafult we use all raw locations: fused without resampling and not fused (gps, network)
|
||||
location <- read.csv(snakemake@input[["raw"]], stringsAsFactors = F) %>%
|
||||
|
@ -47,16 +49,16 @@ if(locations_to_use == "ALL_EXCEPT_FUSED"){
|
|||
if (nrow(location) > 1){
|
||||
features <- MobilityFeatures(location, ACCURACY_LIM = accuracy_limit, tz = timezone)
|
||||
if(is.null(features)){
|
||||
write_empty_file(snakemake@output[[1]])
|
||||
write_empty_file(snakemake@output[[1]], metrics_to_include)
|
||||
} else{
|
||||
# Copy index (dates) as a column
|
||||
outmatrix <- cbind(rownames(features$featavg), features$featavg)
|
||||
outmatrix <- as.data.frame(outmatrix)
|
||||
outmatrix[-1] <- lapply(lapply(outmatrix[-1], as.character), as.numeric)
|
||||
colnames(outmatrix)=c("local_date",tolower(colnames(features$featavg)))
|
||||
write.csv(outmatrix,snakemake@output[[1]], row.names = F)
|
||||
write.csv(outmatrix %>% select(metrics_to_include), snakemake@output[[1]], row.names = F)
|
||||
}
|
||||
|
||||
} else {
|
||||
write_empty_file(snakemake@output[[1]])
|
||||
write_empty_file(snakemake@output[[1]], metrics_to_include)
|
||||
}
|
Loading…
Reference in New Issue