# if you need a new package, you should add it with renv::install(package) so your renv venv is updated library(RPostgres) # Needs libpq-dev for compiling from source. # Error installing package 'RPostgres': # ===================================== # # * installing *source* package 'RPostgres' ... # ** package 'RPostgres' successfully unpacked and MD5 sums checked # ** using staged installation # Using PKG_CFLAGS= # Using PKG_LIBS=-lpq # Using PKG_PLOGR= # ------------------------- ANTICONF ERROR --------------------------- # Configuration failed because libpq was not found. Try installing: # * deb: libpq-dev (Debian, Ubuntu, etc) # * rpm: postgresql-devel (Fedora, EPEL) # * rpm: postgreql8-devel, psstgresql92-devel, postgresql93-devel, or postgresql94-devel (Amazon Linux) # * csw: postgresql_dev (Solaris) # * brew: libpq (OSX) # If libpq is already installed, check that either: # (i) 'pkg-config' is in your PATH AND PKG_CONFIG_PATH contains # a libpq.pc file; or # (ii) 'pg_config' is in your PATH. # If neither can detect , you can set INCLUDE_DIR # and LIB_DIR manually via: # R CMD INSTALL --configure-vars='INCLUDE_DIR=... LIB_DIR=...' # --------------------------[ ERROR MESSAGE ]---------------------------- # :1:10: fatal error: libpq-fe.h: No such file or directory # compilation terminated. library(dbplyr) library(yaml) #' @description #' Auxiliary function to parse the connection credentials from a specifc group in ./credentials.yaml #' You can reause most of this function if you are connection to a DB or Web API. #' It's OK to delete this function if you don't need credentials, e.g., you are pulling data from a CSV for example. #' @param group the yaml key containing the credentials to connect to a database #' @preturn dbEngine a database engine (connection) ready to perform queries get_db_engine <- function(group){ # The working dir is aways RAPIDS root folder, so your credentials file is always /credentials.yaml credentials <- read_yaml("./credentials.yaml") if(!group %in% names(credentials)) stop(paste("The credentials group",group, "does not exist in ./credentials.yaml. The only groups that exist in that file are:", paste(names(credentials), collapse = ","), ". Did you forget to set the group in [PHONE_DATA_STREAMS][aware_mysql][DATABASE_GROUP] in config.yaml?")) dbEngine <- dbConnect(Postgres(), db = credentials[[group]][["database"]], user = credentials[[group]][["user"]], password = credentials[[group]][["password"]], host = credentials[[group]][["host"]], port = credentials[[group]][["port"]]) return(dbEngine) } # This file gets executed for each PHONE_SENSOR of each participant # If you are connecting to a database the env file containing its credentials is available at "./.env" # If you are reading a CSV file instead of a DB table, the @param sensor_container wil contain the file path as set in config.yaml # You are not bound to databases or files, you can query a web API or whatever data source you need. #' @description #' RAPIDS allows users to use the keyword "infer" (previously "multiple") to automatically infer the mobile Operative System a device was running. #' If you have a way to infer the OS of a device ID, implement this function. For example, for AWARE data we use the "aware_device" table. #' #' If you don't have a way to infer the OS, call stop("Error Message") so other users know they can't use "infer" or the inference failed, #' and they have to assign the OS manually in the participant file #' #' @param stream_parameters The PHONE_STREAM_PARAMETERS key in config.yaml. If you need specific parameters add them there. #' @param device A device ID string #' @return The OS the device ran, "android" or "ios" infer_device_os <- function(stream_parameters, device){ #dbEngine <- get_db_engine(stream_parameters$DATABASE_GROUP) #query <- paste0("SELECT device_id,brand FROM aware_device WHERE device_id = '", device, "'") #message(paste0("Executing the following query to infer phone OS: ", query)) #os <- dbGetQuery(dbEngine, query) #dbDisconnect(dbEngine) #if(nrow(os) > 0) # return(os %>% mutate(os = ifelse(brand == "iPhone", "ios", "android")) %>% pull(os)) #else stop(paste("We cannot infer the OS of the following device id because the aware_device table does not exist.")) #return(os) } #' @description #' Gets the sensor data for a specific device id from a database table, file or whatever source you want to query #' #' @param stream_parameters The PHONE_STREAM_PARAMETERS key in config.yaml. If you need specific parameters add them there. #' @param device A device ID string #' @param sensor_container database table or file containing the sensor data for all participants. This is the PHONE_SENSOR[CONTAINER] key in config.yaml #' @param columns the columns needed from this sensor (we recommend to only return these columns instead of every column in sensor_container) #' @return A dataframe with the sensor data for device pull_data <- function(stream_parameters, device, sensor, sensor_container, columns){ dbEngine <- get_db_engine(stream_parameters$DATABASE_GROUP) query <- paste0("SELECT ", paste(columns, collapse = ",")," FROM ", sensor_container, " WHERE ", columns$DEVICE_ID ," = '", device,"'") # Letting the user know what we are doing message(paste0("Executing the following query to download data: ", query)) sensor_data <- dbGetQuery(dbEngine, query) dbDisconnect(dbEngine) if(nrow(sensor_data) == 0) warning(paste("The device '", device,"' did not have data in ", sensor_container)) return(sensor_data) } #' @description #' Gets participants' IDs for specified usernames. #' #' @param stream_parameters The PHONE_DATA_STREAMS key in config.yaml. If you need specific parameters add them there. #' @param usernames A vector of usernames #' @param participants_container The name of the database table containing participants data, such as their username. #' @return A dataframe with participant IDs matching usernames pull_participants_ids <- function(stream_parameters, usernames, participants_container) { dbEngine <- get_db_engine(stream_parameters$DATABASE_GROUP) query_participant_id <- tbl(dbEngine, participants_container) %>% filter(username %in% usernames) %>% select(username, id) message(paste0("Executing the following query to get the participant's id: \n", sql_render(query_participant_id))) participant_data <- query_participant_id %>% collect() dbDisconnect(dbEngine) if(nrow(participant_data) == 0) warning(paste("We could not find requested usernames (", usernames, ") in ", participants_container)) return(participant_data) } #' @description #' Gets participants' IDs for specified participant IDs #' #' @param stream_parameters The PHONE_DATA_STREAMS key in config.yaml. If you need specific parameters add them there. #' @param participants_ids A vector of numeric participant IDs #' @param device_id_container The name of the database table which will be used to determine distinct device ID. Ideally, a table that reliably contains data, but not too much. #' @return A dataframe with a row matching each distinct device ID with a participant ID pull_participants_device_ids <- function(stream_parameters, participants_ids, device_id_container) { dbEngine <- get_db_engine(stream_parameters$DATABASE_GROUP) query_device_id <- tbl(dbEngine, device_id_container) %>% filter(participant_id %in% !!participants_ids) %>% group_by(participant_id) %>% distinct(device_id, .keep_all = FALSE) message(paste0("Executing the following query to get the distinct device IDs: \n", sql_render(query_device_id))) device_ids <- query_device_id %>% collect() dbDisconnect(dbEngine) if(nrow(device_ids) == 0) warning(paste("We could not find device IDs for requested participant IDs (", participants_ids, ") in ", device_id_container)) return(device_ids) } #' @description #' Gets start and end datetimes for specified participant IDs. #' #' @param stream_parameters The PHONE_DATA_STREAMS key in config.yaml. If you need specific parameters add them there. #' @param participants_ids A vector of numeric participant IDs #' @param start_end_date_container The name of the database table which will be used to determine when a participant started and ended their participation. Briefing and debriefing EMAs can be meaningfully used here. #' @return A dataframe relating participant IDs with their start and end datetimes. pull_participants_start_end_dates <- function(stream_parameters, participants_ids, start_end_date_container) { dbEngine <- get_db_engine(stream_parameters$DATABASE_GROUP) query_timestamps <- tbl(dbEngine, start_end_date_container) %>% filter( participant_id %in% !!participants_ids, double_esm_user_answer_timestamp > 0 ) %>% group_by(participant_id) %>% summarise( timestamp_min = min(double_esm_user_answer_timestamp, na.rm = TRUE), timestamp_max = max(double_esm_user_answer_timestamp, na.rm = TRUE) ) %>% select(participant_id, timestamp_min, timestamp_max) message(paste0("Executing the following query to get the starting and ending datetimes: \n", sql_render(query_timestamps))) start_end_timestamps <- query_timestamps %>% collect() if(nrow(start_end_timestamps) == 0) warning(paste("We could not find datetimes for requested participant IDs (", participants_ids, ") in ", start_end_date_container)) start_end_times <- start_end_timestamps %>% mutate( datetime_start = as_datetime(timestamp_min/1000, tz = "UTC"), datetime_end = as_datetime(timestamp_max/1000, tz = "UTC") ) %>% select(-c(timestamp_min, timestamp_max)) dbDisconnect(dbEngine) return(start_end_times) }