2020-05-02 01:46:04 +02:00
|
|
|
source("renv/activate.R")
|
2020-06-11 18:25:49 +02:00
|
|
|
# Load Ian Barnett's code. Taken from https://scholar.harvard.edu/ibarnett/software/gpsmobility
|
|
|
|
file.sources = list.files(c("src/features/location_barnett"), pattern="*.R$", full.names=TRUE, ignore.case=TRUE)
|
|
|
|
sapply(file.sources,source,.GlobalEnv)
|
2019-11-05 22:22:46 +01:00
|
|
|
|
2019-11-05 21:17:20 +01:00
|
|
|
library(dplyr)
|
|
|
|
|
2020-06-11 18:25:49 +02:00
|
|
|
write_empty_file <- function(file_path, requested_features){
|
2019-11-13 20:54:24 +01:00
|
|
|
write.csv(data.frame(local_date= character(),
|
2020-03-12 18:18:36 +01:00
|
|
|
location_barnett_hometime= numeric(),
|
|
|
|
location_barnett_disttravelled= numeric(),
|
|
|
|
location_barnett_rog= numeric(),
|
|
|
|
location_barnett_maxdiam= numeric(),
|
|
|
|
location_barnett_maxhomedist= numeric(),
|
|
|
|
location_barnett_siglocsvisited= numeric(),
|
|
|
|
location_barnett_avgflightlen= numeric(),
|
|
|
|
location_barnett_stdflightlen= numeric(),
|
|
|
|
location_barnett_avgflightdur= numeric(),
|
|
|
|
location_barnett_stdflightdur= numeric(),
|
|
|
|
location_barnett_probpause= numeric(),
|
|
|
|
location_barnett_siglocentropy= numeric(),
|
|
|
|
location_barnett_minsmissing= numeric(),
|
|
|
|
location_barnett_circdnrtn= numeric(),
|
2020-06-11 18:25:49 +02:00
|
|
|
location_barnett_wkenddayrtn= numeric(),
|
|
|
|
minutes_data_used= numeric()
|
|
|
|
) %>% select(requested_features), file_path, row.names = F)
|
2019-11-13 20:54:24 +01:00
|
|
|
}
|
|
|
|
|
2020-06-11 18:25:49 +02:00
|
|
|
location <- read.csv(snakemake@input[["locations"]], stringsAsFactors = F)
|
|
|
|
# The choice between RESAMPLE_FUSED and the original location data happens at the rule level in the function
|
|
|
|
# optional_location_input in features.snakefile
|
2019-12-10 01:15:10 +01:00
|
|
|
locations_to_use <- snakemake@params[["locations_to_use"]]
|
2019-11-05 21:17:20 +01:00
|
|
|
accuracy_limit <- snakemake@params[["accuracy_limit"]]
|
|
|
|
timezone <- snakemake@params[["timezone"]]
|
2020-06-11 18:25:49 +02:00
|
|
|
minutes_data_used <- snakemake@params[["minutes_data_used"]]
|
|
|
|
requested_features <- intersect(unlist(snakemake@params["features"], use.names = F),
|
2020-02-21 16:58:35 +01:00
|
|
|
c("hometime","disttravelled","rog","maxdiam","maxhomedist","siglocsvisited","avgflightlen","stdflightlen","avgflightdur","stdflightdur","probpause","siglocentropy","minsmissing","circdnrtn","wkenddayrtn"))
|
2020-06-11 18:25:49 +02:00
|
|
|
requested_features <- c("local_date", paste("location_barnett", requested_features, sep = "_"))
|
|
|
|
if(minutes_data_used)
|
|
|
|
requested_features <- c(requested_features, "minutes_data_used")
|
2019-11-05 21:17:20 +01:00
|
|
|
|
2020-06-11 18:25:49 +02:00
|
|
|
if(!locations_to_use %in% c("ALL_EXCEPT_FUSED", "RESAMPLE_FUSED", "ALL")){
|
2019-12-10 01:15:10 +01:00
|
|
|
print("Unkown filter, provide one of the following three: ALL, ALL_EXCEPT_FUSED, or RESAMPLE_FUSED")
|
|
|
|
quit(save = "no", status = 1, runLast = FALSE)
|
|
|
|
}
|
|
|
|
|
2020-06-11 18:25:49 +02:00
|
|
|
# excludes fused and resample
|
|
|
|
if(locations_to_use == "ALL_EXCEPT_FUSED")
|
|
|
|
location <- location %>% filter(provider == "gps")
|
|
|
|
|
|
|
|
# Remove 0,0 location coordinates
|
|
|
|
location <- location %>% filter(double_latitude != 0 & double_longitude != 0)
|
|
|
|
|
|
|
|
# Excludes datasets with less than 24 hours of data
|
|
|
|
if(max(location$timestamp) - min(location$timestamp) < 86400000)
|
|
|
|
location <- head(location, 0)
|
|
|
|
|
2019-11-13 20:54:24 +01:00
|
|
|
if (nrow(location) > 1){
|
2020-06-11 18:25:49 +02:00
|
|
|
|
|
|
|
# Count how many minutes of data we use to get location features
|
|
|
|
# Some minutes have multiple fused rows
|
|
|
|
location_minutes_used <- location %>%
|
|
|
|
group_by(local_date, local_hour) %>%
|
|
|
|
summarise(n_minutes = n_distinct(local_minute)) %>%
|
|
|
|
group_by(local_date) %>%
|
|
|
|
summarise(minutes_data_used = sum(n_minutes)) %>%
|
|
|
|
select(local_date, minutes_data_used)
|
|
|
|
|
|
|
|
location <- location %>%
|
|
|
|
select(timestamp, latitude = double_latitude, longitude = double_longitude, altitude = double_altitude, accuracy)
|
|
|
|
|
|
|
|
outputMobility <- MobilityFeatures(location, ACCURACY_LIM = accuracy_limit, tz = timezone)
|
|
|
|
|
|
|
|
if(is.null(outputMobility)){
|
|
|
|
write_empty_file(snakemake@output[[1]], requested_features)
|
2019-11-13 20:54:24 +01:00
|
|
|
} else{
|
|
|
|
# Copy index (dates) as a column
|
2020-06-11 18:25:49 +02:00
|
|
|
features <- cbind(rownames(outputMobility$featavg), outputMobility$featavg)
|
|
|
|
features <- as.data.frame(features)
|
|
|
|
features[-1] <- lapply(lapply(features[-1], as.character), as.numeric)
|
|
|
|
colnames(features)=c("local_date",tolower(paste("location_barnett", colnames(outputMobility$featavg), sep = "_")))
|
|
|
|
# Add the minute count column
|
|
|
|
features <- left_join(features, location_minutes_used, by = "local_date")
|
|
|
|
write.csv(features %>% select(requested_features), snakemake@output[[1]], row.names = F)
|
2019-11-13 20:54:24 +01:00
|
|
|
}
|
|
|
|
|
2019-11-05 21:17:20 +01:00
|
|
|
} else {
|
2020-06-11 18:25:49 +02:00
|
|
|
write_empty_file(snakemake@output[[1]], requested_features)
|
2020-03-05 20:48:48 +01:00
|
|
|
}
|