Merge branch 'develop' of https://github.com/carissalow/rapids into develop
commit
7e919eaaeb
|
@ -10,9 +10,9 @@ There are four actions that you need to take if you were using RAPIDS `0.4.3` or
|
|||
??? check "Deploy RAPIDS in a new folder"
|
||||
|
||||
- Clone RAPIDS 1.x in a new folder (do not pull the updates in your current folder)
|
||||
- Activate your conda environment
|
||||
- Install renv again `snakemake -j1 renv_install` (for Ubuntu take advantage of the [platform specific R `renv` instructions](../setup/installation))
|
||||
- Restore renv packages `snakemake -j1 renv_restore` (for Ubuntu take advantage of the [platform specific R `renv` instructions](../setup/installation))
|
||||
- Activate your conda environment
|
||||
- Move your participant files `pxx.yaml` to the new folder
|
||||
- Move your time segment files to the new folder
|
||||
- Move your `.env` file to the new folder
|
||||
|
|
|
@ -43,6 +43,7 @@ def get_phone_sensor_names():
|
|||
return phone_sensor_names
|
||||
|
||||
def pull_phone_data_input_with_mutation_scripts(wilcards):
|
||||
from pathlib import Path
|
||||
import yaml
|
||||
input = dict()
|
||||
phone_stream = config["PHONE_DATA_STREAMS"]["USE"]
|
||||
|
|
|
@ -49,8 +49,8 @@ validate_user_timezones <- function(timezone_parameters){
|
|||
}
|
||||
|
||||
create_mising_temporal_column <- function(data, device_type){
|
||||
if(device_type == "fitbit" && all(data$timestamp == 0)){
|
||||
# For fibit we infere timestamp from Fitbit's local date time
|
||||
if(device_type == "fitbit" && all(data$timestamp == 0) && "local_date_time" %in% colnames(data)){
|
||||
# For fibit we infere timestamp from Fitbit's local date time only right after pulling it
|
||||
if(nrow(data) == 0)
|
||||
return(data %>% mutate(timestamp = NA_real_))
|
||||
if(any(is.na(parse_date_time(data$local_date_time, orders= c("%Y/%m/%d %H:%M:%S","%Y-%m-%d %H:%M:%S"), exact=T))))
|
||||
|
|
|
@ -41,13 +41,13 @@ get_db_engine <- function(group){
|
|||
#' @return The OS the device ran, "android" or "ios"
|
||||
|
||||
infer_device_os <- function(stream_parameters, device){
|
||||
dbEngine <- get_db_engine(stream_parameters$SOURCE$DATABASE_GROUP)
|
||||
dbEngine <- get_db_engine(stream_parameters$DATABASE_GROUP)
|
||||
#need to re-fetch the YAML for the DB name
|
||||
credentials <- read_yaml("./credentials.yaml")
|
||||
message(paste0("Utilizing the Influx query for: ", device))
|
||||
#execute query string
|
||||
query_object <- influx_select(dbEngine,
|
||||
db = credentials[[stream_parameters$SOURCE$DATABASE_GROUP]][["database"]],
|
||||
db = credentials[[stream_parameters$DATABASE_GROUP]][["database"]],
|
||||
field_keys="device_id,brand",
|
||||
measurement="aware_device",
|
||||
where= paste0("device_id = '",device,"'"),
|
||||
|
@ -55,7 +55,11 @@ infer_device_os <- function(stream_parameters, device){
|
|||
|
||||
#fetches the table from the query_object, filtering rows with ALL n/a
|
||||
#a behavior of influxdbr is that one all NA row will be returned with no matches
|
||||
os <- query_object[[1]] %>% filter_all(any_vars(!is.na(.))) %>% select(c('device_id','brand','time'))
|
||||
columns = c("brand", "device_id")
|
||||
if(! all(columns %in% colnames( query_object[[1]])))
|
||||
os <- data.frame(matrix(ncol=length(columns),nrow=0, dimnames=list(NULL, columns)))
|
||||
else
|
||||
os <- query_object[[1]] %>% filter_all(any_vars(!is.na(.))) %>% select(columns)
|
||||
|
||||
|
||||
if(nrow(os) > 0)
|
||||
|
@ -76,7 +80,7 @@ infer_device_os <- function(stream_parameters, device){
|
|||
#' @return A dataframe with the sensor data for device
|
||||
|
||||
pull_data <- function(stream_parameters, device, sensor, sensor_container, columns){
|
||||
dbEngine <- get_db_engine(stream_parameters$SOURCE$DATABASE_GROUP)
|
||||
dbEngine <- get_db_engine(stream_parameters$DATABASE_GROUP)
|
||||
#need to re-fetch the YAML for the DB name
|
||||
credentials <- read_yaml("./credentials.yaml")
|
||||
|
||||
|
@ -85,16 +89,17 @@ pull_data <- function(stream_parameters, device, sensor, sensor_container, colum
|
|||
message(paste0("Executing an Influx query for: ", device, " ", sensor, ". Extracting ", columns, " from ", sensor_container))
|
||||
#execute query string
|
||||
query_object <- influx_select(dbEngine,
|
||||
db = credentials[[stream_parameters$SOURCE$DATABASE_GROUP]][["database"]],
|
||||
db = credentials[[stream_parameters$DATABASE_GROUP]][["database"]],
|
||||
field_keys=paste(columns, collapse = ","),
|
||||
measurement=sensor_container,
|
||||
where= paste0(columns$DEVICE_ID, " = '",device,"'"),
|
||||
return_xts=FALSE)
|
||||
|
||||
|
||||
#fetches the table from the query_object, filtering rows with ALL n/a
|
||||
#a behavior of influxdbr is that one all NA row will be returned with no matches
|
||||
sensor_data <- query_object[[1]] %>% filter_all(any_vars(!is.na(.))) %>% select(c('time',columns))
|
||||
columns = unlist(columns, use.names = FALSE)
|
||||
if(! all(columns %in% colnames( query_object[[1]])))
|
||||
sensor_data <- data.frame(matrix(ncol=length(columns),nrow=0, dimnames=list(NULL, columns)))
|
||||
else
|
||||
sensor_data <- query_object[[1]] %>% filter_all(any_vars(!is.na(.))) %>% select(columns)
|
||||
|
||||
if(nrow(sensor_data) == 0)
|
||||
warning(paste("The device '", device,"' did not have data in ", sensor_container))
|
||||
|
|
|
@ -12,7 +12,7 @@ get_db_engine <- function(group){
|
|||
# The working dir is aways RAPIDS root folder, so your credentials file is always /credentials.yaml
|
||||
credentials <- read_yaml("./credentials.yaml")
|
||||
if(!group %in% names(credentials))
|
||||
stop(paste("The credentials group",group, "does not exist in ./credentials.yaml. The only groups that exist in that file are:", paste(names(credentials), collapse = ",")))
|
||||
stop(paste("The credentials group",group, "does not exist in ./credentials.yaml. The only groups that exist in that file are:", paste(names(credentials), collapse = ","), ". Did you forget to set the group in [PHONE_DATA_STREAMS][aware_mysql][DATABASE_GROUP] in config.yaml?"))
|
||||
dbEngine <- dbConnect(MariaDB(), db = credentials[[group]][["database"]],
|
||||
username = credentials[[group]][["user"]],
|
||||
password = credentials[[group]][["password"]],
|
||||
|
|
|
@ -12,7 +12,7 @@ get_db_engine <- function(group){
|
|||
# The working dir is aways RAPIDS root folder, so your credentials file is always /credentials.yaml
|
||||
credentials <- read_yaml("./credentials.yaml")
|
||||
if(!group %in% names(credentials))
|
||||
stop(paste("The credentials group",group, "does not exist in ./credentials.yaml. The only groups that exist in that file are:", paste(names(credentials), collapse = ",")))
|
||||
stop(paste("The credentials group",group, "does not exist in ./credentials.yaml. The only groups that exist in that file are:", paste(names(credentials), collapse = ","), ". Did you forget to set the group in [PHONE_DATA_STREAMS][fitbitjson_mysql][DATABASE_GROUP] in config.yaml?"))
|
||||
dbEngine <- dbConnect(MariaDB(), db = credentials[[group]][["database"]],
|
||||
username = credentials[[group]][["user"]],
|
||||
password = credentials[[group]][["password"]],
|
||||
|
|
|
@ -12,7 +12,7 @@ get_db_engine <- function(group){
|
|||
# The working dir is aways RAPIDS root folder, so your credentials file is always /credentials.yaml
|
||||
credentials <- read_yaml("./credentials.yaml")
|
||||
if(!group %in% names(credentials))
|
||||
stop(paste("The credentials group",group, "does not exist in ./credentials.yaml. The only groups that exist in that file are:", paste(names(credentials), collapse = ",")))
|
||||
stop(paste("The credentials group",group, "does not exist in ./credentials.yaml. The only groups that exist in that file are:", paste(names(credentials), collapse = ","), ". Did you forget to set the group in [PHONE_DATA_STREAMS][fitbitparsed_mysql][DATABASE_GROUP] in config.yaml?"))
|
||||
dbEngine <- dbConnect(MariaDB(), db = credentials[[group]][["database"]],
|
||||
username = credentials[[group]][["user"]],
|
||||
password = credentials[[group]][["password"]],
|
||||
|
|
|
@ -119,9 +119,6 @@ pull_phone_data <- function(){
|
|||
device_oss <- replace(device_oss, device_oss == "multiple", "infer") # support multiple for retro compatibility
|
||||
validate_deviceid_platforms(devices, device_oss, participant_file)
|
||||
|
||||
if(length(devices) == 0)
|
||||
stop("There were no PHONE device ids in this participant file:", participant_file)
|
||||
|
||||
if(length(device_oss) == 1)
|
||||
device_oss <- rep(device_oss, length(devices))
|
||||
|
||||
|
@ -130,6 +127,12 @@ pull_phone_data <- function(){
|
|||
expected_columns <- tolower(rapids_schema[[sensor]])
|
||||
participant_data <- setNames(data.frame(matrix(ncol = length(expected_columns), nrow = 0)), expected_columns)
|
||||
|
||||
if(length(devices) == 0){
|
||||
warning("There were no PHONE device ids in this participant file:", participant_file)
|
||||
write_csv(participant_data, output_data_file)
|
||||
return()
|
||||
}
|
||||
|
||||
container_functions <- load_container_script(stream_container)
|
||||
infer_device_os_container <- container_functions$infer_device_os
|
||||
pull_data_container <- container_functions$pull_data
|
||||
|
|
|
@ -85,12 +85,17 @@ pull_wearable_data_main <- function(){
|
|||
stream_schema <- read_yaml(stream_format)
|
||||
rapids_schema <- read_yaml(rapids_schema_file)
|
||||
devices <- participant_data[[toupper(device_type)]]$DEVICE_IDS
|
||||
if(length(devices) == 0)
|
||||
stop("There were no ", device_type ," device ids in this participant file: ", participant_file)
|
||||
|
||||
validate_expected_columns_mapping(stream_schema, rapids_schema, sensor, rapids_schema_file, stream_format)
|
||||
expected_columns <- tolower(names(stream_schema[[sensor]][["RAPIDS_COLUMN_MAPPINGS"]]))
|
||||
participant_data <- setNames(data.frame(matrix(ncol = length(expected_columns), nrow = 0)), expected_columns)
|
||||
|
||||
if(length(devices) == 0){
|
||||
warning("There were no ", device_type ," device ids in this participant file: ", participant_file)
|
||||
write_csv(participant_data, output_data_file)
|
||||
return()
|
||||
}
|
||||
|
||||
pull_data_container <- load_container_script(stream_container)
|
||||
|
||||
for(idx in seq_along(devices)){ #TODO remove length
|
||||
|
|
|
@ -56,7 +56,7 @@ get_screen_episodes <- function(screen){
|
|||
}
|
||||
|
||||
if(nrow(screen) < 2){
|
||||
episodes <- data.frame(device_id = character(),,
|
||||
episodes <- data.frame(device_id = character(),
|
||||
episode = character(),
|
||||
screen_sequence = character(),
|
||||
start_timestamp = character(),
|
||||
|
|
Loading…
Reference in New Issue