diff --git a/rules/preprocessing.snakefile b/rules/preprocessing.snakefile index 70b70a5b..7f31991f 100644 --- a/rules/preprocessing.snakefile +++ b/rules/preprocessing.snakefile @@ -1,7 +1,8 @@ rule download_participants: params: group = config["DOWNLOAD_PARTICIPANTS"]["GROUP"], - ignored_device_ids = config["DOWNLOAD_PARTICIPANTS"]["IGNORED_DEVICE_IDS"] + ignored_device_ids = config["DOWNLOAD_PARTICIPANTS"]["IGNORED_DEVICE_IDS"], + timezone = config["TIMEZONE"] script: "../src/data/download_participants.R" diff --git a/src/data/download_participants.R b/src/data/download_participants.R index 05e4bd86..a067d08b 100644 --- a/src/data/download_participants.R +++ b/src/data/download_participants.R @@ -4,21 +4,26 @@ library(RMySQL) group <- snakemake@params[["group"]] ignored_device_ids <- snakemake@params[["ignored_device_ids"]] +timezone <- snakemake@params[["timezone"]] rmysql.settingsfile <- "./.env" stopDB <- dbConnect(MySQL(), default.file = rmysql.settingsfile, group = group) -query <- paste0("SELECT device_id, brand FROM aware_device order by timestamp asc") +query <- "SELECT device_id, brand, label, timestamp FROM aware_device order by timestamp asc" participants <- dbGetQuery(stopDB, query) pids <- c() +end_date <- format(Sys.Date(), "%Y/%m/%d") + for(id in 1:nrow(participants)){ device_id <- participants$device_id[[id]] brand <- ifelse(participants$brand[[id]] == "iPhone", "ios", "android") + label <- ifelse(participants$label[[id]] == "", "EMPTY_LABEL", participants$label[[id]]) + start_date <- format(as.POSIXct(participants$timestamp[[id]] / 1000, origin = "1970-01-01", tz = timezone), "%Y/%m/%d") if(!(device_id %in% ignored_device_ids)){ pid <- paste0("p", ifelse(id < 10, paste0("0", id), id)) pids <- append(pids, pid) file_connection <- file(paste0("./data/external/", pid)) - writeLines(c(device_id, brand), file_connection) + writeLines(c(device_id, brand, label, paste0(start_date, ",", end_date)), file_connection) close(file_connection) } }