Add metric filter to barnett location features
parent
579df6325f
commit
f22d1834ee
|
@ -68,7 +68,8 @@ RESAMPLE_FUSED_LOCATION:
|
||||||
TIMEZONE: *timezone
|
TIMEZONE: *timezone
|
||||||
|
|
||||||
BARNETT_LOCATION:
|
BARNETT_LOCATION:
|
||||||
LOCATIONS_TO_USE: ALL # ALL_EXCEPT_FUSED, RESAMPLE_FUSED
|
METRICS: ["hometime","disttravelled","rog","maxdiam","maxhomedist","siglocsvisited","avgflightlen","stdflightlen","avgflightdur","stdflightdur","probpause","siglocentropy","minsmissing","circdnrtn","wkenddayrtn"]
|
||||||
|
LOCATIONS_TO_USE: ALL # ALL, ALL_EXCEPT_FUSED OR RESAMPLE_FUSED
|
||||||
ACCURACY_LIMIT: 51 # meters, drops location coordinates with an accuracy higher than this. This number means there's a 68% probability the true location is within this radius
|
ACCURACY_LIMIT: 51 # meters, drops location coordinates with an accuracy higher than this. This number means there's a 68% probability the true location is within this radius
|
||||||
TIMEZONE: *timezone
|
TIMEZONE: *timezone
|
||||||
|
|
||||||
|
|
|
@ -51,6 +51,7 @@ rule location_barnett_metrics:
|
||||||
raw = "data/raw/{pid}/locations_raw.csv",
|
raw = "data/raw/{pid}/locations_raw.csv",
|
||||||
fused = rules.resample_fused_location.output
|
fused = rules.resample_fused_location.output
|
||||||
params:
|
params:
|
||||||
|
metrics = config["BARNETT_LOCATION"]["METRICS"],
|
||||||
locations_to_use = config["BARNETT_LOCATION"]["LOCATIONS_TO_USE"],
|
locations_to_use = config["BARNETT_LOCATION"]["LOCATIONS_TO_USE"],
|
||||||
accuracy_limit = config["BARNETT_LOCATION"]["ACCURACY_LIMIT"],
|
accuracy_limit = config["BARNETT_LOCATION"]["ACCURACY_LIMIT"],
|
||||||
timezone = config["BARNETT_LOCATION"]["TIMEZONE"]
|
timezone = config["BARNETT_LOCATION"]["TIMEZONE"]
|
||||||
|
|
|
@ -2,7 +2,7 @@ source("packrat/init.R")
|
||||||
|
|
||||||
library(dplyr)
|
library(dplyr)
|
||||||
|
|
||||||
write_empty_file <- function(file_path){
|
write_empty_file <- function(file_path, metrics_to_include){
|
||||||
write.csv(data.frame(local_date= character(),
|
write.csv(data.frame(local_date= character(),
|
||||||
hometime= numeric(),
|
hometime= numeric(),
|
||||||
disttravelled= numeric(),
|
disttravelled= numeric(),
|
||||||
|
@ -19,7 +19,7 @@ write_empty_file <- function(file_path){
|
||||||
minsmissing= numeric(),
|
minsmissing= numeric(),
|
||||||
circdnrtn= numeric(),
|
circdnrtn= numeric(),
|
||||||
wkenddayrtn= numeric()
|
wkenddayrtn= numeric()
|
||||||
), file_path, row.names = F)
|
) %>% select(metrics_to_include), file_path, row.names = F)
|
||||||
}
|
}
|
||||||
|
|
||||||
# Load Ian Barnett's code. Taken from https://scholar.harvard.edu/ibarnett/software/gpsmobility
|
# Load Ian Barnett's code. Taken from https://scholar.harvard.edu/ibarnett/software/gpsmobility
|
||||||
|
@ -29,6 +29,8 @@ sapply(file.sources,source,.GlobalEnv)
|
||||||
locations_to_use <- snakemake@params[["locations_to_use"]]
|
locations_to_use <- snakemake@params[["locations_to_use"]]
|
||||||
accuracy_limit <- snakemake@params[["accuracy_limit"]]
|
accuracy_limit <- snakemake@params[["accuracy_limit"]]
|
||||||
timezone <- snakemake@params[["timezone"]]
|
timezone <- snakemake@params[["timezone"]]
|
||||||
|
metrics_to_include <- intersect(unlist(snakemake@params["metrics"], use.names = F),
|
||||||
|
c("hometime","disttravelled","rog","maxdiam","maxhomedist","siglocsvisited","avgflightlen","stdflightlen","avgflightdur","stdflightdur","probpause","siglocentropy","minsmissing","circdnrtn","wkenddayrtn"))
|
||||||
|
|
||||||
# By deafult we use all raw locations: fused without resampling and not fused (gps, network)
|
# By deafult we use all raw locations: fused without resampling and not fused (gps, network)
|
||||||
location <- read.csv(snakemake@input[["raw"]], stringsAsFactors = F) %>%
|
location <- read.csv(snakemake@input[["raw"]], stringsAsFactors = F) %>%
|
||||||
|
@ -47,16 +49,16 @@ if(locations_to_use == "ALL_EXCEPT_FUSED"){
|
||||||
if (nrow(location) > 1){
|
if (nrow(location) > 1){
|
||||||
features <- MobilityFeatures(location, ACCURACY_LIM = accuracy_limit, tz = timezone)
|
features <- MobilityFeatures(location, ACCURACY_LIM = accuracy_limit, tz = timezone)
|
||||||
if(is.null(features)){
|
if(is.null(features)){
|
||||||
write_empty_file(snakemake@output[[1]])
|
write_empty_file(snakemake@output[[1]], metrics_to_include)
|
||||||
} else{
|
} else{
|
||||||
# Copy index (dates) as a column
|
# Copy index (dates) as a column
|
||||||
outmatrix <- cbind(rownames(features$featavg), features$featavg)
|
outmatrix <- cbind(rownames(features$featavg), features$featavg)
|
||||||
outmatrix <- as.data.frame(outmatrix)
|
outmatrix <- as.data.frame(outmatrix)
|
||||||
outmatrix[-1] <- lapply(lapply(outmatrix[-1], as.character), as.numeric)
|
outmatrix[-1] <- lapply(lapply(outmatrix[-1], as.character), as.numeric)
|
||||||
colnames(outmatrix)=c("local_date",tolower(colnames(features$featavg)))
|
colnames(outmatrix)=c("local_date",tolower(colnames(features$featavg)))
|
||||||
write.csv(outmatrix,snakemake@output[[1]], row.names = F)
|
write.csv(outmatrix %>% select(metrics_to_include), snakemake@output[[1]], row.names = F)
|
||||||
}
|
}
|
||||||
|
|
||||||
} else {
|
} else {
|
||||||
write_empty_file(snakemake@output[[1]])
|
write_empty_file(snakemake@output[[1]], metrics_to_include)
|
||||||
}
|
}
|
Loading…
Reference in New Issue