Temporary revert PHONE_LOCATIONS BARNETT provider to use R script
parent
a3fb718aea
commit
f340b89c58
|
@ -1,6 +1,8 @@
|
||||||
# Change Log
|
# Change Log
|
||||||
## v1.6.0
|
## v1.6.0
|
||||||
- Refactor PHONE_CALLS RAPIDS provider to compute features based on call episodes or events
|
- Refactor PHONE_CALLS RAPIDS provider to compute features based on call episodes or events
|
||||||
|
- Refactor PHONE_LOCATIONS DORYAB provider to compute features based on location episodes
|
||||||
|
- Temporary revert PHONE_LOCATIONS BARNETT provider to use R script
|
||||||
## v1.5.0
|
## v1.5.0
|
||||||
- Update Barnett location features with faster Python implementation
|
- Update Barnett location features with faster Python implementation
|
||||||
- Fix rounding bug in data yield features
|
- Fix rounding bug in data yield features
|
||||||
|
|
|
@ -408,7 +408,7 @@ rule phone_locations_barnett_daily_features:
|
||||||
output:
|
output:
|
||||||
"data/interim/{pid}/phone_locations_barnett_daily.csv"
|
"data/interim/{pid}/phone_locations_barnett_daily.csv"
|
||||||
script:
|
script:
|
||||||
"../src/features/phone_locations/barnett/daily_features.py"
|
"../src/features/phone_locations/barnett/daily_features.R"
|
||||||
|
|
||||||
rule phone_locations_r_features:
|
rule phone_locations_r_features:
|
||||||
input:
|
input:
|
||||||
|
|
|
@ -20,16 +20,14 @@ barnett_daily_features <- function(snakemake){
|
||||||
location_features <- NULL
|
location_features <- NULL
|
||||||
location <- read.csv(snakemake@input[["sensor_data"]], stringsAsFactors = FALSE)
|
location <- read.csv(snakemake@input[["sensor_data"]], stringsAsFactors = FALSE)
|
||||||
segment_labels <- read.csv(snakemake@input[["time_segments_labels"]], stringsAsFactors = FALSE)
|
segment_labels <- read.csv(snakemake@input[["time_segments_labels"]], stringsAsFactors = FALSE)
|
||||||
accuracy_limit <- snakemake@params[["provider"]][["ACCURACY_LIMIT"]]
|
accuracy_limit = 999999999 # We filter rows based on accuracy in src/data/process_location_types.R script
|
||||||
datetime_start_regex = "[0-9]{4}[\\-|\\/][0-9]{2}[\\-|\\/][0-9]{2} 00:00:00"
|
datetime_start_regex = "[0-9]{4}[\\-|\\/][0-9]{2}[\\-|\\/][0-9]{2} 00:00:00"
|
||||||
datetime_end_regex = "[0-9]{4}[\\-|\\/][0-9]{2}[\\-|\\/][0-9]{2} 23:59:59"
|
datetime_end_regex = "[0-9]{4}[\\-|\\/][0-9]{2}[\\-|\\/][0-9]{2} 23:59:59"
|
||||||
location <- location %>%
|
location <- location %>%
|
||||||
filter(accuracy < accuracy_limit) %>%
|
|
||||||
mutate(is_daily = str_detect(assigned_segments, paste0(".*#", datetime_start_regex, ",", datetime_end_regex, ".*")))
|
mutate(is_daily = str_detect(assigned_segments, paste0(".*#", datetime_start_regex, ",", datetime_end_regex, ".*")))
|
||||||
|
|
||||||
if(nrow(segment_labels) == 0 || nrow(location) == 0 || all(location$is_daily == FALSE) || (max(location$timestamp) - min(location$timestamp) < 86400000)){
|
if(nrow(segment_labels) == 0 || nrow(location) == 0 || all(location$is_daily == FALSE) || (max(location$timestamp) - min(location$timestamp) < 86400000)){
|
||||||
warning("Barnett's location features cannot be computed for data or time segments that do not span one or more entire days (00:00:00 to 23:59:59). Values below point to the problem:",
|
warning("Barnett's location features cannot be computed for data or time segments that do not span one or more entire days (00:00:00 to 23:59:59). Values below point to the problem:",
|
||||||
"\nLocation data rows within accuracy: ", nrow(location %>% filter(accuracy < accuracy_limit)),
|
|
||||||
"\nLocation data rows within a daily time segment: ", nrow(filter(location, is_daily)),
|
"\nLocation data rows within a daily time segment: ", nrow(filter(location, is_daily)),
|
||||||
"\nLocation data time span in days: ", round((max(location$timestamp) - min(location$timestamp)) / 86400000, 2)
|
"\nLocation data time span in days: ", round((max(location$timestamp) - min(location$timestamp)) / 86400000, 2)
|
||||||
)
|
)
|
||||||
|
|
Loading…
Reference in New Issue