diff --git a/docs/change-log.md b/docs/change-log.md index a692df9d..430521e7 100644 --- a/docs/change-log.md +++ b/docs/change-log.md @@ -1,6 +1,8 @@ # Change Log ## v1.6.0 - Refactor PHONE_CALLS RAPIDS provider to compute features based on call episodes or events +- Refactor PHONE_LOCATIONS DORYAB provider to compute features based on location episodes +- Temporary revert PHONE_LOCATIONS BARNETT provider to use R script ## v1.5.0 - Update Barnett location features with faster Python implementation - Fix rounding bug in data yield features diff --git a/rules/features.smk b/rules/features.smk index 79d1ba13..63ea7289 100644 --- a/rules/features.smk +++ b/rules/features.smk @@ -408,7 +408,7 @@ rule phone_locations_barnett_daily_features: output: "data/interim/{pid}/phone_locations_barnett_daily.csv" script: - "../src/features/phone_locations/barnett/daily_features.py" + "../src/features/phone_locations/barnett/daily_features.R" rule phone_locations_r_features: input: diff --git a/src/features/phone_locations/barnett/daily_features.R b/src/features/phone_locations/barnett/daily_features.R index 86e87718..25f057f9 100644 --- a/src/features/phone_locations/barnett/daily_features.R +++ b/src/features/phone_locations/barnett/daily_features.R @@ -20,16 +20,14 @@ barnett_daily_features <- function(snakemake){ location_features <- NULL location <- read.csv(snakemake@input[["sensor_data"]], stringsAsFactors = FALSE) segment_labels <- read.csv(snakemake@input[["time_segments_labels"]], stringsAsFactors = FALSE) - accuracy_limit <- snakemake@params[["provider"]][["ACCURACY_LIMIT"]] + accuracy_limit = 999999999 # We filter rows based on accuracy in src/data/process_location_types.R script datetime_start_regex = "[0-9]{4}[\\-|\\/][0-9]{2}[\\-|\\/][0-9]{2} 00:00:00" datetime_end_regex = "[0-9]{4}[\\-|\\/][0-9]{2}[\\-|\\/][0-9]{2} 23:59:59" location <- location %>% - filter(accuracy < accuracy_limit) %>% mutate(is_daily = str_detect(assigned_segments, paste0(".*#", datetime_start_regex, ",", datetime_end_regex, ".*"))) if(nrow(segment_labels) == 0 || nrow(location) == 0 || all(location$is_daily == FALSE) || (max(location$timestamp) - min(location$timestamp) < 86400000)){ warning("Barnett's location features cannot be computed for data or time segments that do not span one or more entire days (00:00:00 to 23:59:59). Values below point to the problem:", - "\nLocation data rows within accuracy: ", nrow(location %>% filter(accuracy < accuracy_limit)), "\nLocation data rows within a daily time segment: ", nrow(filter(location, is_daily)), "\nLocation data time span in days: ", round((max(location$timestamp) - min(location$timestamp)) / 86400000, 2) )