diff --git a/rules/preprocessing.smk b/rules/preprocessing.smk index 5a02361b..f096a641 100644 --- a/rules/preprocessing.smk +++ b/rules/preprocessing.smk @@ -7,6 +7,11 @@ rule create_example_participant_files: rule prepare_participants_csv: input: username_list = config["CREATE_PARTICIPANT_FILES"]["USERNAMES_CSV"] + params: + data_configuration = config["PHONE_DATA_STREAMS"][config["PHONE_DATA_STREAMS"]["USE"]], + participants_table = "participants", + device_id_table = "light_sensor", + start_end_date_table = "esm" output: participants_file = config["CREATE_PARTICIPANT_FILES"]["CSV_FILE_PATH"] script: diff --git a/src/data/translate_usernames_into_participants_data.R b/src/data/translate_usernames_into_participants_data.R index 5194de01..cc19f1a8 100644 --- a/src/data/translate_usernames_into_participants_data.R +++ b/src/data/translate_usernames_into_participants_data.R @@ -3,46 +3,29 @@ library(magrittr) library(tidyverse) library(lubridate) -load_container_script <- function(stream_container){ - language <- if_else(endsWith(tolower(stream_container), "py"), "python", "r") - if(language == "python"){ - library(reticulate) - container <- import_from_path(gsub(pattern = "\\.py$", "", basename(stream_container)), path = dirname(stream_container)) - if(!py_has_attr(container, "pull_data")) - stop(paste0("The following container.py script does not have a pull_data function: ", stream_container)) - if(!py_has_attr(container, "infer_device_os")) - stop(paste0("The following container.py script does not have a infer_device_os function: ", stream_container)) - return(list("infer_device_os" = container$infer_device_os, "pull_data" = container$pull_data)) - } else if(language == "r"){ - source(stream_container) - if(!exists("pull_data")) - stop(paste0("The following container.R script does not have a pull_data function: ", stream_container)) - if(!exists("infer_device_os")) - stop(paste0("The following container.R script does not have a infer_device_os function: ", stream_container)) - return(list("infer_device_os" = infer_device_os, "pull_data" = pull_data)) - } -} - prepare_participants_file <- function() { - #TODO Define appropriate arguments and pass them from config.yaml - stream_container <- snakemake@input[["stream_container"]] - container_functions <- load_container_script(stream_container) - pull_data_container <- container_functions$pull_data - # TODO Figure out how to use the functions in the container + username_list_csv_location <- snakemake@input[["username_list"]] - participant_data <- pull_participants_ids("whatever", usernames, participants_container) + data_configuration <- snakemake@params[["data_configuration"]] + participants_container <- snakemake@params[["participants_table"]] + device_id_container <- snakemake@params[["device_id_table"]] + start_end_date_container <- snakemake@params[["start_end_date_table"]] + + usernames <- read_csv(username_list_csv_location, col_types = "c", progress = FALSE) + + participant_data <- pull_participants_ids(data_configuration, usernames, participants_container) participant_data %<>% rename(participant_id = id) - device_ids <- pull_participants_device_ids("whatever", participant_data$participant_id, device_id_container) + device_ids <- pull_participants_device_ids(data_configuration, participant_data$participant_id, device_id_container) device_ids %<>% group_by(participant_id) %>% summarise(device_ids = list(unique(device_id))) participant_data %<>% left_join(device_ids, by = "participant_id") - start_end_datetimes <- pull_participants_start_end_dates("whatever", participant_data$participant_id, start_end_date_container) + start_end_datetimes <- pull_participants_start_end_dates(data_configuration, participant_data$participant_id, start_end_date_container) participant_data %<>% left_join(start_end_datetimes, by = "participant_id")