Merge branch 'develop' of https://github.com/carissalow/rapids into develop

pull/130/head
Weiyu 2021-03-16 22:52:50 -04:00
commit 7e919eaaeb
10 changed files with 36 additions and 22 deletions

View File

@ -10,9 +10,9 @@ There are four actions that you need to take if you were using RAPIDS `0.4.3` or
??? check "Deploy RAPIDS in a new folder"
- Clone RAPIDS 1.x in a new folder (do not pull the updates in your current folder)
- Activate your conda environment
- Install renv again `snakemake -j1 renv_install` (for Ubuntu take advantage of the [platform specific R `renv` instructions](../setup/installation))
- Restore renv packages `snakemake -j1 renv_restore` (for Ubuntu take advantage of the [platform specific R `renv` instructions](../setup/installation))
- Activate your conda environment
- Move your participant files `pxx.yaml` to the new folder
- Move your time segment files to the new folder
- Move your `.env` file to the new folder

View File

@ -43,6 +43,7 @@ def get_phone_sensor_names():
return phone_sensor_names
def pull_phone_data_input_with_mutation_scripts(wilcards):
from pathlib import Path
import yaml
input = dict()
phone_stream = config["PHONE_DATA_STREAMS"]["USE"]

View File

@ -49,8 +49,8 @@ validate_user_timezones <- function(timezone_parameters){
}
create_mising_temporal_column <- function(data, device_type){
if(device_type == "fitbit" && all(data$timestamp == 0)){
# For fibit we infere timestamp from Fitbit's local date time
if(device_type == "fitbit" && all(data$timestamp == 0) && "local_date_time" %in% colnames(data)){
# For fibit we infere timestamp from Fitbit's local date time only right after pulling it
if(nrow(data) == 0)
return(data %>% mutate(timestamp = NA_real_))
if(any(is.na(parse_date_time(data$local_date_time, orders= c("%Y/%m/%d %H:%M:%S","%Y-%m-%d %H:%M:%S"), exact=T))))

View File

@ -41,13 +41,13 @@ get_db_engine <- function(group){
#' @return The OS the device ran, "android" or "ios"
infer_device_os <- function(stream_parameters, device){
dbEngine <- get_db_engine(stream_parameters$SOURCE$DATABASE_GROUP)
dbEngine <- get_db_engine(stream_parameters$DATABASE_GROUP)
#need to re-fetch the YAML for the DB name
credentials <- read_yaml("./credentials.yaml")
message(paste0("Utilizing the Influx query for: ", device))
#execute query string
query_object <- influx_select(dbEngine,
db = credentials[[stream_parameters$SOURCE$DATABASE_GROUP]][["database"]],
db = credentials[[stream_parameters$DATABASE_GROUP]][["database"]],
field_keys="device_id,brand",
measurement="aware_device",
where= paste0("device_id = '",device,"'"),
@ -55,7 +55,11 @@ infer_device_os <- function(stream_parameters, device){
#fetches the table from the query_object, filtering rows with ALL n/a
#a behavior of influxdbr is that one all NA row will be returned with no matches
os <- query_object[[1]] %>% filter_all(any_vars(!is.na(.))) %>% select(c('device_id','brand','time'))
columns = c("brand", "device_id")
if(! all(columns %in% colnames( query_object[[1]])))
os <- data.frame(matrix(ncol=length(columns),nrow=0, dimnames=list(NULL, columns)))
else
os <- query_object[[1]] %>% filter_all(any_vars(!is.na(.))) %>% select(columns)
if(nrow(os) > 0)
@ -76,7 +80,7 @@ infer_device_os <- function(stream_parameters, device){
#' @return A dataframe with the sensor data for device
pull_data <- function(stream_parameters, device, sensor, sensor_container, columns){
dbEngine <- get_db_engine(stream_parameters$SOURCE$DATABASE_GROUP)
dbEngine <- get_db_engine(stream_parameters$DATABASE_GROUP)
#need to re-fetch the YAML for the DB name
credentials <- read_yaml("./credentials.yaml")
@ -85,16 +89,17 @@ pull_data <- function(stream_parameters, device, sensor, sensor_container, colum
message(paste0("Executing an Influx query for: ", device, " ", sensor, ". Extracting ", columns, " from ", sensor_container))
#execute query string
query_object <- influx_select(dbEngine,
db = credentials[[stream_parameters$SOURCE$DATABASE_GROUP]][["database"]],
db = credentials[[stream_parameters$DATABASE_GROUP]][["database"]],
field_keys=paste(columns, collapse = ","),
measurement=sensor_container,
where= paste0(columns$DEVICE_ID, " = '",device,"'"),
return_xts=FALSE)
#fetches the table from the query_object, filtering rows with ALL n/a
#a behavior of influxdbr is that one all NA row will be returned with no matches
sensor_data <- query_object[[1]] %>% filter_all(any_vars(!is.na(.))) %>% select(c('time',columns))
columns = unlist(columns, use.names = FALSE)
if(! all(columns %in% colnames( query_object[[1]])))
sensor_data <- data.frame(matrix(ncol=length(columns),nrow=0, dimnames=list(NULL, columns)))
else
sensor_data <- query_object[[1]] %>% filter_all(any_vars(!is.na(.))) %>% select(columns)
if(nrow(sensor_data) == 0)
warning(paste("The device '", device,"' did not have data in ", sensor_container))

View File

@ -12,7 +12,7 @@ get_db_engine <- function(group){
# The working dir is aways RAPIDS root folder, so your credentials file is always /credentials.yaml
credentials <- read_yaml("./credentials.yaml")
if(!group %in% names(credentials))
stop(paste("The credentials group",group, "does not exist in ./credentials.yaml. The only groups that exist in that file are:", paste(names(credentials), collapse = ",")))
stop(paste("The credentials group",group, "does not exist in ./credentials.yaml. The only groups that exist in that file are:", paste(names(credentials), collapse = ","), ". Did you forget to set the group in [PHONE_DATA_STREAMS][aware_mysql][DATABASE_GROUP] in config.yaml?"))
dbEngine <- dbConnect(MariaDB(), db = credentials[[group]][["database"]],
username = credentials[[group]][["user"]],
password = credentials[[group]][["password"]],

View File

@ -12,7 +12,7 @@ get_db_engine <- function(group){
# The working dir is aways RAPIDS root folder, so your credentials file is always /credentials.yaml
credentials <- read_yaml("./credentials.yaml")
if(!group %in% names(credentials))
stop(paste("The credentials group",group, "does not exist in ./credentials.yaml. The only groups that exist in that file are:", paste(names(credentials), collapse = ",")))
stop(paste("The credentials group",group, "does not exist in ./credentials.yaml. The only groups that exist in that file are:", paste(names(credentials), collapse = ","), ". Did you forget to set the group in [PHONE_DATA_STREAMS][fitbitjson_mysql][DATABASE_GROUP] in config.yaml?"))
dbEngine <- dbConnect(MariaDB(), db = credentials[[group]][["database"]],
username = credentials[[group]][["user"]],
password = credentials[[group]][["password"]],

View File

@ -12,7 +12,7 @@ get_db_engine <- function(group){
# The working dir is aways RAPIDS root folder, so your credentials file is always /credentials.yaml
credentials <- read_yaml("./credentials.yaml")
if(!group %in% names(credentials))
stop(paste("The credentials group",group, "does not exist in ./credentials.yaml. The only groups that exist in that file are:", paste(names(credentials), collapse = ",")))
stop(paste("The credentials group",group, "does not exist in ./credentials.yaml. The only groups that exist in that file are:", paste(names(credentials), collapse = ","), ". Did you forget to set the group in [PHONE_DATA_STREAMS][fitbitparsed_mysql][DATABASE_GROUP] in config.yaml?"))
dbEngine <- dbConnect(MariaDB(), db = credentials[[group]][["database"]],
username = credentials[[group]][["user"]],
password = credentials[[group]][["password"]],

View File

@ -119,9 +119,6 @@ pull_phone_data <- function(){
device_oss <- replace(device_oss, device_oss == "multiple", "infer") # support multiple for retro compatibility
validate_deviceid_platforms(devices, device_oss, participant_file)
if(length(devices) == 0)
stop("There were no PHONE device ids in this participant file:", participant_file)
if(length(device_oss) == 1)
device_oss <- rep(device_oss, length(devices))
@ -130,6 +127,12 @@ pull_phone_data <- function(){
expected_columns <- tolower(rapids_schema[[sensor]])
participant_data <- setNames(data.frame(matrix(ncol = length(expected_columns), nrow = 0)), expected_columns)
if(length(devices) == 0){
warning("There were no PHONE device ids in this participant file:", participant_file)
write_csv(participant_data, output_data_file)
return()
}
container_functions <- load_container_script(stream_container)
infer_device_os_container <- container_functions$infer_device_os
pull_data_container <- container_functions$pull_data

View File

@ -85,12 +85,17 @@ pull_wearable_data_main <- function(){
stream_schema <- read_yaml(stream_format)
rapids_schema <- read_yaml(rapids_schema_file)
devices <- participant_data[[toupper(device_type)]]$DEVICE_IDS
if(length(devices) == 0)
stop("There were no ", device_type ," device ids in this participant file: ", participant_file)
validate_expected_columns_mapping(stream_schema, rapids_schema, sensor, rapids_schema_file, stream_format)
expected_columns <- tolower(names(stream_schema[[sensor]][["RAPIDS_COLUMN_MAPPINGS"]]))
participant_data <- setNames(data.frame(matrix(ncol = length(expected_columns), nrow = 0)), expected_columns)
if(length(devices) == 0){
warning("There were no ", device_type ," device ids in this participant file: ", participant_file)
write_csv(participant_data, output_data_file)
return()
}
pull_data_container <- load_container_script(stream_container)
for(idx in seq_along(devices)){ #TODO remove length

View File

@ -56,7 +56,7 @@ get_screen_episodes <- function(screen){
}
if(nrow(screen) < 2){
episodes <- data.frame(device_id = character(),,
episodes <- data.frame(device_id = character(),
episode = character(),
screen_sequence = character(),
start_timestamp = character(),