Compare commits
12 Commits
ed193d2290
...
b99a3c19ed
Author | SHA1 | Date |
---|---|---|
junos | b99a3c19ed | |
junos | 04ad2d0b81 | |
junos | da5ff0f36e | |
junos | 35d9779026 | |
junos | 32025cbd8c | |
junos | 181e4f0118 | |
junos | 39bd244511 | |
junos | ab84109d55 | |
junos | f9863ec622 | |
junos | c1f56c61e8 | |
junos | 3acf6ece14 | |
junos | 8b2717122d |
|
@ -7,6 +7,7 @@ PIDS: [nokia_0000003]
|
|||
|
||||
# See https://www.rapids.science/latest/setup/configuration/#automatic-creation-of-participant-files
|
||||
CREATE_PARTICIPANT_FILES:
|
||||
USERNAMES_CSV: "data/external/example_usernames.csv"
|
||||
CSV_FILE_PATH: "data/external/example_participants.csv" # see docs for required format
|
||||
PHONE_SECTION:
|
||||
ADD: True
|
||||
|
|
|
@ -319,10 +319,10 @@
|
|||
},
|
||||
"dbplyr": {
|
||||
"Package": "dbplyr",
|
||||
"Version": "2.0.0",
|
||||
"Version": "2.1.1",
|
||||
"Source": "Repository",
|
||||
"Repository": "CRAN",
|
||||
"Hash": "714005206038b1dda74cb1de85029a20"
|
||||
"Hash": "1f37fa4ab2f5f7eded42f78b9a887182"
|
||||
},
|
||||
"desc": {
|
||||
"Package": "desc",
|
||||
|
|
|
@ -4,6 +4,19 @@ rule create_example_participant_files:
|
|||
shell:
|
||||
"echo 'PHONE:\n DEVICE_IDS: [a748ee1a-1d0b-4ae9-9074-279a2b6ba524]\n PLATFORMS: [android]\n LABEL: test-01\n START_DATE: 2020-04-23 00:00:00\n END_DATE: 2020-05-04 23:59:59\nFITBIT:\n DEVICE_IDS: [a748ee1a-1d0b-4ae9-9074-279a2b6ba524]\n LABEL: test-01\n START_DATE: 2020-04-23 00:00:00\n END_DATE: 2020-05-04 23:59:59\n' >> ./data/external/participant_files/example01.yaml && echo 'PHONE:\n DEVICE_IDS: [13dbc8a3-dae3-4834-823a-4bc96a7d459d]\n PLATFORMS: [ios]\n LABEL: test-02\n START_DATE: 2020-04-23 00:00:00\n END_DATE: 2020-05-04 23:59:59\nFITBIT:\n DEVICE_IDS: [13dbc8a3-dae3-4834-823a-4bc96a7d459d]\n LABEL: test-02\n START_DATE: 2020-04-23 00:00:00\n END_DATE: 2020-05-04 23:59:59\n' >> ./data/external/participant_files/example02.yaml"
|
||||
|
||||
rule prepare_participants_csv:
|
||||
input:
|
||||
username_list = config["CREATE_PARTICIPANT_FILES"]["USERNAMES_CSV"]
|
||||
params:
|
||||
data_configuration = config["PHONE_DATA_STREAMS"][config["PHONE_DATA_STREAMS"]["USE"]],
|
||||
participants_table = "participants",
|
||||
device_id_table = "light_sensor",
|
||||
start_end_date_table = "esm"
|
||||
output:
|
||||
participants_file = config["CREATE_PARTICIPANT_FILES"]["CSV_FILE_PATH"]
|
||||
script:
|
||||
"../src/data/translate_usernames_into_participants_data.R"
|
||||
|
||||
rule create_participants_files:
|
||||
input:
|
||||
participants_file = config["CREATE_PARTICIPANT_FILES"]["CSV_FILE_PATH"]
|
||||
|
|
|
@ -29,6 +29,7 @@ library(RPostgres)
|
|||
# <stdin>:1:10: fatal error: libpq-fe.h: No such file or directory
|
||||
# compilation terminated.
|
||||
|
||||
library(dbplyr)
|
||||
library(yaml)
|
||||
|
||||
#' @description
|
||||
|
@ -105,3 +106,101 @@ pull_data <- function(stream_parameters, device, sensor, sensor_container, colum
|
|||
return(sensor_data)
|
||||
}
|
||||
|
||||
#' @description
|
||||
#' Gets participants' IDs for specified usernames.
|
||||
#'
|
||||
#' @param stream_parameters The PHONE_DATA_STREAMS key in config.yaml. If you need specific parameters add them there.
|
||||
#' @param usernames A vector of usernames
|
||||
#' @param participants_container The name of the database table containing participants data, such as their username.
|
||||
#' @return A dataframe with participant IDs matching usernames
|
||||
|
||||
pull_participants_ids <- function(stream_parameters, usernames, participants_container) {
|
||||
dbEngine <- get_db_engine(stream_parameters$DATABASE_GROUP)
|
||||
|
||||
query_participant_id <- tbl(dbEngine, participants_container) %>%
|
||||
filter(username %in% usernames) %>%
|
||||
select(username, id)
|
||||
|
||||
message(paste0("Executing the following query to get the participant's id: \n", sql_render(query_participant_id)))
|
||||
|
||||
participant_data <- query_participant_id %>% collect()
|
||||
|
||||
dbDisconnect(dbEngine)
|
||||
|
||||
if(nrow(participant_data) == 0)
|
||||
warning(paste("We could not find requested usernames (", usernames, ") in ", participants_container))
|
||||
|
||||
return(participant_data)
|
||||
}
|
||||
|
||||
#' @description
|
||||
#' Gets participants' IDs for specified participant IDs
|
||||
#'
|
||||
#' @param stream_parameters The PHONE_DATA_STREAMS key in config.yaml. If you need specific parameters add them there.
|
||||
#' @param participants_ids A vector of numeric participant IDs
|
||||
#' @param device_id_container The name of the database table which will be used to determine distinct device ID. Ideally, a table that reliably contains data, but not too much.
|
||||
#' @return A dataframe with a row matching each distinct device ID with a participant ID
|
||||
|
||||
pull_participants_device_ids <- function(stream_parameters, participants_ids, device_id_container) {
|
||||
dbEngine <- get_db_engine(stream_parameters$DATABASE_GROUP)
|
||||
|
||||
query_device_id <- tbl(dbEngine, device_id_container) %>%
|
||||
filter(participant_id %in% !!participants_ids) %>%
|
||||
group_by(participant_id) %>%
|
||||
distinct(device_id, .keep_all = FALSE)
|
||||
|
||||
message(paste0("Executing the following query to get the distinct device IDs: \n", sql_render(query_device_id)))
|
||||
|
||||
device_ids <- query_device_id %>% collect()
|
||||
|
||||
dbDisconnect(dbEngine)
|
||||
|
||||
if(nrow(device_ids) == 0)
|
||||
warning(paste("We could not find device IDs for requested participant IDs (", participants_ids, ") in ", device_id_container))
|
||||
|
||||
return(device_ids)
|
||||
}
|
||||
|
||||
#' @description
|
||||
#' Gets start and end datetimes for specified participant IDs.
|
||||
#'
|
||||
#' @param stream_parameters The PHONE_DATA_STREAMS key in config.yaml. If you need specific parameters add them there.
|
||||
#' @param participants_ids A vector of numeric participant IDs
|
||||
#' @param start_end_date_container The name of the database table which will be used to determine when a participant started and ended their participation. Briefing and debriefing EMAs can be meaningfully used here.
|
||||
#' @return A dataframe relating participant IDs with their start and end datetimes.
|
||||
|
||||
pull_participants_start_end_dates <- function(stream_parameters, participants_ids, start_end_date_container) {
|
||||
dbEngine <- get_db_engine(stream_parameters$DATABASE_GROUP)
|
||||
|
||||
query_timestamps <- tbl(dbEngine, start_end_date_container) %>%
|
||||
filter(
|
||||
participant_id %in% !!participants_ids,
|
||||
double_esm_user_answer_timestamp > 0
|
||||
) %>%
|
||||
group_by(participant_id) %>%
|
||||
summarise(
|
||||
timestamp_min = min(double_esm_user_answer_timestamp, na.rm = TRUE),
|
||||
timestamp_max = max(double_esm_user_answer_timestamp, na.rm = TRUE)
|
||||
) %>%
|
||||
select(participant_id, timestamp_min, timestamp_max)
|
||||
|
||||
message(paste0("Executing the following query to get the starting and ending datetimes: \n", sql_render(query_timestamps)))
|
||||
|
||||
start_end_timestamps <- query_timestamps %>% collect()
|
||||
|
||||
if(nrow(start_end_timestamps) == 0)
|
||||
warning(paste("We could not find datetimes for requested participant IDs (", participants_ids, ") in ", start_end_date_container))
|
||||
|
||||
start_end_times <- start_end_timestamps %>%
|
||||
mutate(
|
||||
datetime_start = as_datetime(timestamp_min/1000, tz = "UTC"),
|
||||
datetime_end = as_datetime(timestamp_max/1000, tz = "UTC")
|
||||
) %>%
|
||||
select(-c(timestamp_min, timestamp_max))
|
||||
|
||||
dbDisconnect(dbEngine)
|
||||
|
||||
return(start_end_times)
|
||||
}
|
||||
|
||||
|
||||
|
|
|
@ -0,0 +1,62 @@
|
|||
source("renv/activate.R")
|
||||
source("src/data/streams/aware_postgresql/container.R")
|
||||
|
||||
library(RPostgres)
|
||||
library(magrittr)
|
||||
library(tidyverse)
|
||||
library(lubridate)
|
||||
|
||||
prepare_participants_file <- function() {
|
||||
|
||||
username_list_csv_location <- snakemake@input[["username_list"]]
|
||||
|
||||
data_configuration <- snakemake@params[["data_configuration"]]
|
||||
participants_container <- snakemake@params[["participants_table"]]
|
||||
device_id_container <- snakemake@params[["device_id_table"]]
|
||||
start_end_date_container <- snakemake@params[["start_end_date_table"]]
|
||||
|
||||
output_data_file <- snakemake@output[["participants_file"]]
|
||||
|
||||
platform <- "android"
|
||||
pid_format <- "p%03d"
|
||||
datetime_format <- "%Y-%m-%d %H:%M:%S"
|
||||
|
||||
participant_data <- read_csv(username_list_csv_location, col_types = "c", progress = FALSE)
|
||||
usernames <- participant_data$label
|
||||
|
||||
participant_ids <- pull_participants_ids(data_configuration, usernames, participants_container)
|
||||
participant_data %<>%
|
||||
left_join(participant_ids, by = c("label" = "username")) %>%
|
||||
rename(participant_id = id)
|
||||
|
||||
device_ids <- pull_participants_device_ids(data_configuration, participant_data$participant_id, device_id_container)
|
||||
device_ids %<>%
|
||||
group_by(participant_id) %>%
|
||||
summarise(device_ids = list(unique(device_id)))
|
||||
participant_data %<>%
|
||||
left_join(device_ids, by = "participant_id")
|
||||
|
||||
start_end_datetimes <- pull_participants_start_end_dates(data_configuration, participant_data$participant_id, start_end_date_container)
|
||||
participant_data %<>%
|
||||
left_join(start_end_datetimes, by = "participant_id")
|
||||
|
||||
participant_data %<>%
|
||||
mutate(
|
||||
pid = sprintf(pid_format, participant_id),
|
||||
start_date = strftime(datetime_start, format=datetime_format, tz = "UTC", usetz = FALSE), #TODO Check what timezone is expected
|
||||
end_date = strftime(datetime_end, format=datetime_format, tz = "UTC", usetz = FALSE),
|
||||
empatica_id = "placeholder", #TODO Provide in file?
|
||||
device_id = map_chr(device_ids, str_c, collapse = ";"),
|
||||
number_of_devices = map_int(device_ids, length),
|
||||
fitbit_id = ""
|
||||
) %>%
|
||||
rowwise() %>%
|
||||
mutate(platform = str_c(replicate(number_of_devices, platform), collapse = ";")) %>%
|
||||
ungroup() %>%
|
||||
arrange(pid) %>%
|
||||
select(pid, label, start_date, end_date, empatica_id, device_id, platform, fitbit_id)
|
||||
|
||||
write_csv(participant_data, output_data_file)
|
||||
}
|
||||
|
||||
prepare_participants_file()
|
Loading…
Reference in New Issue