rapids/src/data/translate_usernames_into_pa...

63 lines
2.3 KiB
R
Raw Normal View History

2021-11-29 18:04:06 +01:00
source("renv/activate.R")
source("src/data/streams/aware_postgresql/container.R")
2021-11-29 18:04:06 +01:00
library(RPostgres)
library(magrittr)
library(tidyverse)
library(lubridate)
prepare_participants_file <- function() {
username_list_csv_location <- snakemake@input[["username_list"]]
data_configuration <- snakemake@params[["data_configuration"]]
participants_container <- snakemake@params[["participants_table"]]
device_id_container <- snakemake@params[["device_id_table"]]
start_end_date_container <- snakemake@params[["start_end_date_table"]]
output_data_file <- snakemake@output[["participants_file"]]
platform <- "android"
pid_format <- "p%03d"
datetime_format <- "%Y-%m-%d %H:%M:%S"
participant_data <- read_csv(username_list_csv_location, col_types = "cc", progress = FALSE)
2021-11-29 17:51:07 +01:00
usernames <- participant_data$label
participant_ids <- pull_participants_ids(data_configuration, usernames, participants_container)
participant_data %<>%
2021-11-29 17:51:07 +01:00
left_join(participant_ids, by = c("label" = "username")) %>%
rename(participant_id = id)
device_ids <- pull_participants_device_ids(data_configuration, participant_data$participant_id, device_id_container)
device_ids %<>%
2021-12-15 20:41:28 +01:00
filter(device_id != "") %>%
group_by(participant_id) %>%
summarise(device_ids = list(unique(device_id)))
participant_data %<>%
left_join(device_ids, by = "participant_id")
start_end_datetimes <- pull_participants_start_end_dates(data_configuration, participant_data$participant_id, start_end_date_container)
participant_data %<>%
left_join(start_end_datetimes, by = "participant_id")
participant_data %<>%
mutate(
pid = sprintf(pid_format, participant_id),
start_date = strftime(datetime_start, format=datetime_format, tz = "UTC", usetz = FALSE), #TODO Check what timezone is expected
end_date = strftime(datetime_end, format=datetime_format, tz = "UTC", usetz = FALSE),
device_id = map_chr(device_ids, str_c, collapse = ";"),
number_of_devices = map_int(device_ids, length),
fitbit_id = ""
) %>%
rowwise() %>%
mutate(platform = str_c(replicate(number_of_devices, platform), collapse = ";")) %>%
ungroup() %>%
arrange(pid) %>%
select(pid, label, start_date, end_date, empatica_id, device_id, platform, fitbit_id)
write_csv(participant_data, output_data_file)
}
prepare_participants_file()