From d6c22fdbc710246896a8eff86f457ec0d6c71c60 Mon Sep 17 00:00:00 2001 From: JulioV Date: Mon, 15 Mar 2021 19:35:58 -0400 Subject: [PATCH 1/5] Fix an import bug and docs --- docs/migrating-from-old-versions.md | 2 +- rules/common.smk | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/docs/migrating-from-old-versions.md b/docs/migrating-from-old-versions.md index 83b5c4fa..ee250da1 100644 --- a/docs/migrating-from-old-versions.md +++ b/docs/migrating-from-old-versions.md @@ -10,9 +10,9 @@ There are four actions that you need to take if you were using RAPIDS `0.4.3` or ??? check "Deploy RAPIDS in a new folder" - Clone RAPIDS 1.x in a new folder (do not pull the updates in your current folder) + - Activate your conda environment - Install renv again `snakemake -j1 renv_install` (for Ubuntu take advantage of the [platform specific R `renv` instructions](../setup/installation)) - Restore renv packages `snakemake -j1 renv_restore` (for Ubuntu take advantage of the [platform specific R `renv` instructions](../setup/installation)) - - Activate your conda environment - Move your participant files `pxx.yaml` to the new folder - Move your time segment files to the new folder - Move your `.env` file to the new folder diff --git a/rules/common.smk b/rules/common.smk index 29ee7b71..3e34a105 100644 --- a/rules/common.smk +++ b/rules/common.smk @@ -43,6 +43,7 @@ def get_phone_sensor_names(): return phone_sensor_names def pull_phone_data_input_with_mutation_scripts(wilcards): + from pathlib import Path import yaml input = dict() phone_stream = config["PHONE_DATA_STREAMS"]["USE"] From bb737237d0ca0dbfed5c3a984e3dcf9b4b03bc72 Mon Sep 17 00:00:00 2001 From: JulioV Date: Tue, 16 Mar 2021 11:26:46 -0400 Subject: [PATCH 2/5] Fixes for aware_influxdb --- src/data/streams/aware_influxdb/container.R | 25 ++++++++++++--------- 1 file changed, 15 insertions(+), 10 deletions(-) diff --git a/src/data/streams/aware_influxdb/container.R b/src/data/streams/aware_influxdb/container.R index e844be99..e3f102c3 100644 --- a/src/data/streams/aware_influxdb/container.R +++ b/src/data/streams/aware_influxdb/container.R @@ -41,13 +41,13 @@ get_db_engine <- function(group){ #' @return The OS the device ran, "android" or "ios" infer_device_os <- function(stream_parameters, device){ - dbEngine <- get_db_engine(stream_parameters$SOURCE$DATABASE_GROUP) + dbEngine <- get_db_engine(stream_parameters$DATABASE_GROUP) #need to re-fetch the YAML for the DB name credentials <- read_yaml("./credentials.yaml") message(paste0("Utilizing the Influx query for: ", device)) #execute query string query_object <- influx_select(dbEngine, - db = credentials[[stream_parameters$SOURCE$DATABASE_GROUP]][["database"]], + db = credentials[[stream_parameters$DATABASE_GROUP]][["database"]], field_keys="device_id,brand", measurement="aware_device", where= paste0("device_id = '",device,"'"), @@ -55,7 +55,11 @@ infer_device_os <- function(stream_parameters, device){ #fetches the table from the query_object, filtering rows with ALL n/a #a behavior of influxdbr is that one all NA row will be returned with no matches - os <- query_object[[1]] %>% filter_all(any_vars(!is.na(.))) %>% select(c('device_id','brand','time')) + columns = c("brand", "device_id") + if(! all(columns %in% colnames( query_object[[1]]))) + os <- data.frame(matrix(ncol=length(columns),nrow=0, dimnames=list(NULL, columns))) + else + os <- query_object[[1]] %>% filter_all(any_vars(!is.na(.))) %>% select(columns) if(nrow(os) > 0) @@ -76,7 +80,7 @@ infer_device_os <- function(stream_parameters, device){ #' @return A dataframe with the sensor data for device pull_data <- function(stream_parameters, device, sensor, sensor_container, columns){ - dbEngine <- get_db_engine(stream_parameters$SOURCE$DATABASE_GROUP) + dbEngine <- get_db_engine(stream_parameters$DATABASE_GROUP) #need to re-fetch the YAML for the DB name credentials <- read_yaml("./credentials.yaml") @@ -85,16 +89,17 @@ pull_data <- function(stream_parameters, device, sensor, sensor_container, colum message(paste0("Executing an Influx query for: ", device, " ", sensor, ". Extracting ", columns, " from ", sensor_container)) #execute query string query_object <- influx_select(dbEngine, - db = credentials[[stream_parameters$SOURCE$DATABASE_GROUP]][["database"]], + db = credentials[[stream_parameters$DATABASE_GROUP]][["database"]], field_keys=paste(columns, collapse = ","), measurement=sensor_container, where= paste0(columns$DEVICE_ID, " = '",device,"'"), return_xts=FALSE) - - - #fetches the table from the query_object, filtering rows with ALL n/a - #a behavior of influxdbr is that one all NA row will be returned with no matches - sensor_data <- query_object[[1]] %>% filter_all(any_vars(!is.na(.))) %>% select(c('time',columns)) + + columns = unlist(columns, use.names = FALSE) + if(! all(columns %in% colnames( query_object[[1]]))) + sensor_data <- data.frame(matrix(ncol=length(columns),nrow=0, dimnames=list(NULL, columns))) + else + sensor_data <- query_object[[1]] %>% filter_all(any_vars(!is.na(.))) %>% select(columns) if(nrow(sensor_data) == 0) warning(paste("The device '", device,"' did not have data in ", sensor_container)) From 4c2f60fffd5b4acd8015b334f6056c37f053ba8d Mon Sep 17 00:00:00 2001 From: JulioV Date: Tue, 16 Mar 2021 20:01:43 -0400 Subject: [PATCH 3/5] Fix bugs in readable datetime and screen episodes --- src/data/datetime/readable_datetime.R | 4 ++-- src/features/phone_screen/episodes/screen_episodes.R | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/data/datetime/readable_datetime.R b/src/data/datetime/readable_datetime.R index 86e57b95..4f2a289c 100644 --- a/src/data/datetime/readable_datetime.R +++ b/src/data/datetime/readable_datetime.R @@ -49,8 +49,8 @@ validate_user_timezones <- function(timezone_parameters){ } create_mising_temporal_column <- function(data, device_type){ - if(device_type == "fitbit" && all(data$timestamp == 0)){ - # For fibit we infere timestamp from Fitbit's local date time + if(device_type == "fitbit" && all(data$timestamp == 0) && "local_date_time" %in% colnames(data)){ + # For fibit we infere timestamp from Fitbit's local date time only right after pulling it if(nrow(data) == 0) return(data %>% mutate(timestamp = NA_real_)) if(any(is.na(parse_date_time(data$local_date_time, orders= c("%Y/%m/%d %H:%M:%S","%Y-%m-%d %H:%M:%S"), exact=T)))) diff --git a/src/features/phone_screen/episodes/screen_episodes.R b/src/features/phone_screen/episodes/screen_episodes.R index 51e9ae37..26e024f4 100644 --- a/src/features/phone_screen/episodes/screen_episodes.R +++ b/src/features/phone_screen/episodes/screen_episodes.R @@ -56,7 +56,7 @@ get_screen_episodes <- function(screen){ } if(nrow(screen) < 2){ - episodes <- data.frame(device_id = character(),, + episodes <- data.frame(device_id = character(), episode = character(), screen_sequence = character(), start_timestamp = character(), From 6e234f79511f9f0cef743749e130ae36e459d486 Mon Sep 17 00:00:00 2001 From: JulioV Date: Tue, 16 Mar 2021 20:02:16 -0400 Subject: [PATCH 4/5] Fix warn instead of stop when there are not device ids --- src/data/streams/pull_phone_data.R | 9 ++++++--- src/data/streams/pull_wearable_data.R | 9 +++++++-- 2 files changed, 13 insertions(+), 5 deletions(-) diff --git a/src/data/streams/pull_phone_data.R b/src/data/streams/pull_phone_data.R index 016c96fb..3755119d 100644 --- a/src/data/streams/pull_phone_data.R +++ b/src/data/streams/pull_phone_data.R @@ -119,9 +119,6 @@ pull_phone_data <- function(){ device_oss <- replace(device_oss, device_oss == "multiple", "infer") # support multiple for retro compatibility validate_deviceid_platforms(devices, device_oss, participant_file) - if(length(devices) == 0) - stop("There were no PHONE device ids in this participant file:", participant_file) - if(length(device_oss) == 1) device_oss <- rep(device_oss, length(devices)) @@ -130,6 +127,12 @@ pull_phone_data <- function(){ expected_columns <- tolower(rapids_schema[[sensor]]) participant_data <- setNames(data.frame(matrix(ncol = length(expected_columns), nrow = 0)), expected_columns) + if(length(devices) == 0){ + warning("There were no PHONE device ids in this participant file:", participant_file) + write_csv(participant_data, output_data_file) + return() + } + container_functions <- load_container_script(stream_container) infer_device_os_container <- container_functions$infer_device_os pull_data_container <- container_functions$pull_data diff --git a/src/data/streams/pull_wearable_data.R b/src/data/streams/pull_wearable_data.R index 9f744231..9a0b1647 100644 --- a/src/data/streams/pull_wearable_data.R +++ b/src/data/streams/pull_wearable_data.R @@ -85,12 +85,17 @@ pull_wearable_data_main <- function(){ stream_schema <- read_yaml(stream_format) rapids_schema <- read_yaml(rapids_schema_file) devices <- participant_data[[toupper(device_type)]]$DEVICE_IDS - if(length(devices) == 0) - stop("There were no ", device_type ," device ids in this participant file: ", participant_file) + validate_expected_columns_mapping(stream_schema, rapids_schema, sensor, rapids_schema_file, stream_format) expected_columns <- tolower(names(stream_schema[[sensor]][["RAPIDS_COLUMN_MAPPINGS"]])) participant_data <- setNames(data.frame(matrix(ncol = length(expected_columns), nrow = 0)), expected_columns) + if(length(devices) == 0){ + warning("There were no ", device_type ," device ids in this participant file: ", participant_file) + write_csv(participant_data, output_data_file) + return() + } + pull_data_container <- load_container_script(stream_container) for(idx in seq_along(devices)){ #TODO remove length From 771c14a9280917168eef6f176becf4704f8ebb62 Mon Sep 17 00:00:00 2001 From: JulioV Date: Tue, 16 Mar 2021 20:02:44 -0400 Subject: [PATCH 5/5] Improve mysql containers error messages --- src/data/streams/aware_mysql/container.R | 2 +- src/data/streams/fitbitjson_mysql/container.R | 2 +- src/data/streams/fitbitparsed_mysql/container.R | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/data/streams/aware_mysql/container.R b/src/data/streams/aware_mysql/container.R index f4e93d70..50705f7c 100644 --- a/src/data/streams/aware_mysql/container.R +++ b/src/data/streams/aware_mysql/container.R @@ -12,7 +12,7 @@ get_db_engine <- function(group){ # The working dir is aways RAPIDS root folder, so your credentials file is always /credentials.yaml credentials <- read_yaml("./credentials.yaml") if(!group %in% names(credentials)) - stop(paste("The credentials group",group, "does not exist in ./credentials.yaml. The only groups that exist in that file are:", paste(names(credentials), collapse = ","))) + stop(paste("The credentials group",group, "does not exist in ./credentials.yaml. The only groups that exist in that file are:", paste(names(credentials), collapse = ","), ". Did you forget to set the group in [PHONE_DATA_STREAMS][aware_mysql][DATABASE_GROUP] in config.yaml?")) dbEngine <- dbConnect(MariaDB(), db = credentials[[group]][["database"]], username = credentials[[group]][["user"]], password = credentials[[group]][["password"]], diff --git a/src/data/streams/fitbitjson_mysql/container.R b/src/data/streams/fitbitjson_mysql/container.R index c9f0c736..02ad29d2 100644 --- a/src/data/streams/fitbitjson_mysql/container.R +++ b/src/data/streams/fitbitjson_mysql/container.R @@ -12,7 +12,7 @@ get_db_engine <- function(group){ # The working dir is aways RAPIDS root folder, so your credentials file is always /credentials.yaml credentials <- read_yaml("./credentials.yaml") if(!group %in% names(credentials)) - stop(paste("The credentials group",group, "does not exist in ./credentials.yaml. The only groups that exist in that file are:", paste(names(credentials), collapse = ","))) + stop(paste("The credentials group",group, "does not exist in ./credentials.yaml. The only groups that exist in that file are:", paste(names(credentials), collapse = ","), ". Did you forget to set the group in [PHONE_DATA_STREAMS][fitbitjson_mysql][DATABASE_GROUP] in config.yaml?")) dbEngine <- dbConnect(MariaDB(), db = credentials[[group]][["database"]], username = credentials[[group]][["user"]], password = credentials[[group]][["password"]], diff --git a/src/data/streams/fitbitparsed_mysql/container.R b/src/data/streams/fitbitparsed_mysql/container.R index c9f0c736..ccf1eafd 100644 --- a/src/data/streams/fitbitparsed_mysql/container.R +++ b/src/data/streams/fitbitparsed_mysql/container.R @@ -12,7 +12,7 @@ get_db_engine <- function(group){ # The working dir is aways RAPIDS root folder, so your credentials file is always /credentials.yaml credentials <- read_yaml("./credentials.yaml") if(!group %in% names(credentials)) - stop(paste("The credentials group",group, "does not exist in ./credentials.yaml. The only groups that exist in that file are:", paste(names(credentials), collapse = ","))) + stop(paste("The credentials group",group, "does not exist in ./credentials.yaml. The only groups that exist in that file are:", paste(names(credentials), collapse = ","), ". Did you forget to set the group in [PHONE_DATA_STREAMS][fitbitparsed_mysql][DATABASE_GROUP] in config.yaml?")) dbEngine <- dbConnect(MariaDB(), db = credentials[[group]][["database"]], username = credentials[[group]][["user"]], password = credentials[[group]][["password"]],