Temporary revert PHONE_LOCATIONS BARNETT provider to use R script

pull/167/head
Meng Li 2021-09-23 18:16:13 -04:00
parent a3fb718aea
commit f340b89c58
3 changed files with 4 additions and 4 deletions

View File

@ -1,6 +1,8 @@
# Change Log # Change Log
## v1.6.0 ## v1.6.0
- Refactor PHONE_CALLS RAPIDS provider to compute features based on call episodes or events - Refactor PHONE_CALLS RAPIDS provider to compute features based on call episodes or events
- Refactor PHONE_LOCATIONS DORYAB provider to compute features based on location episodes
- Temporary revert PHONE_LOCATIONS BARNETT provider to use R script
## v1.5.0 ## v1.5.0
- Update Barnett location features with faster Python implementation - Update Barnett location features with faster Python implementation
- Fix rounding bug in data yield features - Fix rounding bug in data yield features

View File

@ -408,7 +408,7 @@ rule phone_locations_barnett_daily_features:
output: output:
"data/interim/{pid}/phone_locations_barnett_daily.csv" "data/interim/{pid}/phone_locations_barnett_daily.csv"
script: script:
"../src/features/phone_locations/barnett/daily_features.py" "../src/features/phone_locations/barnett/daily_features.R"
rule phone_locations_r_features: rule phone_locations_r_features:
input: input:

View File

@ -20,16 +20,14 @@ barnett_daily_features <- function(snakemake){
location_features <- NULL location_features <- NULL
location <- read.csv(snakemake@input[["sensor_data"]], stringsAsFactors = FALSE) location <- read.csv(snakemake@input[["sensor_data"]], stringsAsFactors = FALSE)
segment_labels <- read.csv(snakemake@input[["time_segments_labels"]], stringsAsFactors = FALSE) segment_labels <- read.csv(snakemake@input[["time_segments_labels"]], stringsAsFactors = FALSE)
accuracy_limit <- snakemake@params[["provider"]][["ACCURACY_LIMIT"]] accuracy_limit = 999999999 # We filter rows based on accuracy in src/data/process_location_types.R script
datetime_start_regex = "[0-9]{4}[\\-|\\/][0-9]{2}[\\-|\\/][0-9]{2} 00:00:00" datetime_start_regex = "[0-9]{4}[\\-|\\/][0-9]{2}[\\-|\\/][0-9]{2} 00:00:00"
datetime_end_regex = "[0-9]{4}[\\-|\\/][0-9]{2}[\\-|\\/][0-9]{2} 23:59:59" datetime_end_regex = "[0-9]{4}[\\-|\\/][0-9]{2}[\\-|\\/][0-9]{2} 23:59:59"
location <- location %>% location <- location %>%
filter(accuracy < accuracy_limit) %>%
mutate(is_daily = str_detect(assigned_segments, paste0(".*#", datetime_start_regex, ",", datetime_end_regex, ".*"))) mutate(is_daily = str_detect(assigned_segments, paste0(".*#", datetime_start_regex, ",", datetime_end_regex, ".*")))
if(nrow(segment_labels) == 0 || nrow(location) == 0 || all(location$is_daily == FALSE) || (max(location$timestamp) - min(location$timestamp) < 86400000)){ if(nrow(segment_labels) == 0 || nrow(location) == 0 || all(location$is_daily == FALSE) || (max(location$timestamp) - min(location$timestamp) < 86400000)){
warning("Barnett's location features cannot be computed for data or time segments that do not span one or more entire days (00:00:00 to 23:59:59). Values below point to the problem:", warning("Barnett's location features cannot be computed for data or time segments that do not span one or more entire days (00:00:00 to 23:59:59). Values below point to the problem:",
"\nLocation data rows within accuracy: ", nrow(location %>% filter(accuracy < accuracy_limit)),
"\nLocation data rows within a daily time segment: ", nrow(filter(location, is_daily)), "\nLocation data rows within a daily time segment: ", nrow(filter(location, is_daily)),
"\nLocation data time span in days: ", round((max(location$timestamp) - min(location$timestamp)) / 86400000, 2) "\nLocation data time span in days: ", round((max(location$timestamp) - min(location$timestamp)) / 86400000, 2)
) )