Temporary revert PHONE_LOCATIONS BARNETT provider to use R script

2021-09-23 18:16:13 -04:00 · 2021-09-23 18:16:13 -04:00 · f340b89c58
parent a3fb718aea
commit f340b89c58
3 changed files with 4 additions and 4 deletions
--- a/docs/change-log.md
+++ b/docs/change-log.md
@ -1,6 +1,8 @@
 # Change Log
 ## v1.6.0
 - Refactor PHONE_CALLS RAPIDS provider to compute features based on call episodes or events
 - Refactor PHONE_LOCATIONS DORYAB provider to compute features based on location episodes
 - Temporary revert PHONE_LOCATIONS BARNETT provider to use R script
 ## v1.5.0
 - Update Barnett location features with faster Python implementation
 - Fix rounding bug in data yield features
--- a/rules/features.smk
+++ b/rules/features.smk
@ -408,7 +408,7 @@ rule phone_locations_barnett_daily_features:
    output:
        "data/interim/{pid}/phone_locations_barnett_daily.csv"
    script:
-        "../src/features/phone_locations/barnett/daily_features.py"
+        "../src/features/phone_locations/barnett/daily_features.R"
 rule phone_locations_r_features:
    input:
--- a/src/features/phone_locations/barnett/daily_features.R
+++ b/src/features/phone_locations/barnett/daily_features.R
@ -20,16 +20,14 @@ barnett_daily_features <- function(snakemake){
  location_features <- NULL
  location <- read.csv(snakemake@input[["sensor_data"]], stringsAsFactors = FALSE)
  segment_labels <- read.csv(snakemake@input[["time_segments_labels"]], stringsAsFactors = FALSE)
-  accuracy_limit <- snakemake@params[["provider"]][["ACCURACY_LIMIT"]]
+  accuracy_limit = 999999999 # We filter rows based on accuracy in src/data/process_location_types.R script
  datetime_start_regex = "[0-9]{4}[\\-|\\/][0-9]{2}[\\-|\\/][0-9]{2} 00:00:00"
  datetime_end_regex = "[0-9]{4}[\\-|\\/][0-9]{2}[\\-|\\/][0-9]{2} 23:59:59"
  location <- location %>% 
    filter(accuracy < accuracy_limit) %>% 
    mutate(is_daily = str_detect(assigned_segments, paste0(".*#", datetime_start_regex, ",", datetime_end_regex, ".*")))
  if(nrow(segment_labels) == 0 || nrow(location) == 0 || all(location$is_daily == FALSE) || (max(location$timestamp) - min(location$timestamp) < 86400000)){
    warning("Barnett's location features cannot be computed for data or time segments that do not span one or more entire days (00:00:00 to 23:59:59). Values below point to the problem:",
            "\nLocation data rows within accuracy: ", nrow(location %>% filter(accuracy < accuracy_limit)),
            "\nLocation data rows within a daily time segment: ", nrow(filter(location, is_daily)),
            "\nLocation data time span in days: ", round((max(location$timestamp) - min(location$timestamp)) / 86400000, 2)
            )