2021-11-29 18:04:06 +01:00
|
|
|
source("renv/activate.R")
|
2021-11-29 18:19:47 +01:00
|
|
|
source("src/data/streams/aware_postgresql/container.R")
|
2021-11-29 18:04:06 +01:00
|
|
|
|
2021-11-24 19:07:56 +01:00
|
|
|
library(RPostgres)
|
|
|
|
library(magrittr)
|
|
|
|
library(tidyverse)
|
|
|
|
library(lubridate)
|
|
|
|
|
|
|
|
prepare_participants_file <- function() {
|
|
|
|
|
2021-11-29 16:57:50 +01:00
|
|
|
username_list_csv_location <- snakemake@input[["username_list"]]
|
|
|
|
|
|
|
|
data_configuration <- snakemake@params[["data_configuration"]]
|
|
|
|
participants_container <- snakemake@params[["participants_table"]]
|
|
|
|
device_id_container <- snakemake@params[["device_id_table"]]
|
|
|
|
start_end_date_container <- snakemake@params[["start_end_date_table"]]
|
|
|
|
|
2021-11-29 17:54:16 +01:00
|
|
|
output_data_file <- snakemake@output[["participants_file"]]
|
|
|
|
|
|
|
|
platform <- "android"
|
|
|
|
pid_format <- "p%03d"
|
|
|
|
datetime_format <- "%Y-%m-%d %H:%M:%S"
|
|
|
|
|
2021-12-01 17:20:57 +01:00
|
|
|
participant_data <- read_csv(username_list_csv_location, col_types = "cc", progress = FALSE)
|
2021-11-29 17:51:07 +01:00
|
|
|
usernames <- participant_data$label
|
2021-11-24 19:07:56 +01:00
|
|
|
|
2021-11-29 18:19:47 +01:00
|
|
|
participant_ids <- pull_participants_ids(data_configuration, usernames, participants_container)
|
2021-11-24 19:07:56 +01:00
|
|
|
participant_data %<>%
|
2021-11-29 17:51:07 +01:00
|
|
|
left_join(participant_ids, by = c("label" = "username")) %>%
|
2021-11-24 19:07:56 +01:00
|
|
|
rename(participant_id = id)
|
|
|
|
|
2021-11-29 16:57:50 +01:00
|
|
|
device_ids <- pull_participants_device_ids(data_configuration, participant_data$participant_id, device_id_container)
|
2021-11-24 19:07:56 +01:00
|
|
|
device_ids %<>%
|
2021-12-15 20:41:28 +01:00
|
|
|
filter(device_id != "") %>%
|
2021-11-24 19:07:56 +01:00
|
|
|
group_by(participant_id) %>%
|
|
|
|
summarise(device_ids = list(unique(device_id)))
|
|
|
|
participant_data %<>%
|
|
|
|
left_join(device_ids, by = "participant_id")
|
|
|
|
|
2021-11-29 16:57:50 +01:00
|
|
|
start_end_datetimes <- pull_participants_start_end_dates(data_configuration, participant_data$participant_id, start_end_date_container)
|
2021-11-24 19:07:56 +01:00
|
|
|
participant_data %<>%
|
|
|
|
left_join(start_end_datetimes, by = "participant_id")
|
|
|
|
|
2021-11-29 17:54:16 +01:00
|
|
|
participant_data %<>%
|
|
|
|
mutate(
|
|
|
|
pid = sprintf(pid_format, participant_id),
|
|
|
|
start_date = strftime(datetime_start, format=datetime_format, tz = "UTC", usetz = FALSE), #TODO Check what timezone is expected
|
|
|
|
end_date = strftime(datetime_end, format=datetime_format, tz = "UTC", usetz = FALSE),
|
|
|
|
device_id = map_chr(device_ids, str_c, collapse = ";"),
|
|
|
|
number_of_devices = map_int(device_ids, length),
|
|
|
|
fitbit_id = ""
|
|
|
|
) %>%
|
|
|
|
rowwise() %>%
|
|
|
|
mutate(platform = str_c(replicate(number_of_devices, platform), collapse = ";")) %>%
|
|
|
|
ungroup() %>%
|
|
|
|
arrange(pid) %>%
|
|
|
|
select(pid, label, start_date, end_date, empatica_id, device_id, platform, fitbit_id)
|
|
|
|
|
|
|
|
write_csv(participant_data, output_data_file)
|
2021-11-24 19:07:56 +01:00
|
|
|
}
|
|
|
|
|
2021-11-29 17:54:16 +01:00
|
|
|
prepare_participants_file()
|