diff --git a/config.yaml b/config.yaml index f4d07658..19b9629f 100644 --- a/config.yaml +++ b/config.yaml @@ -19,9 +19,16 @@ DAY_SEGMENTS: &day_segments TIMEZONE: &timezone America/New_York +DATABASE_GROUP: &database_group + AAPECS + +DOWNLOAD_PARTICIPANTS: + IGNORED_DEVICE_IDS: [] # for example "5a1dd68c-6cd1-48fe-ae1e-14344ac5215f" + GROUP: *database_group + # Download data config DOWNLOAD_DATASET: - GROUP: AAPECS + GROUP: *database_group # Readable datetime config READABLE_DATETIME: @@ -112,4 +119,4 @@ STEP: ALL_STEPS: ["sumallsteps", "maxallsteps", "minallsteps", "avgallsteps", "stdallsteps"] SEDENTARY_BOUT: ["countsedentarybout", "maxdurationsedentarybout", "mindurationsedentarybout", "avgdurationsedentarybout", "stddurationsedentarybout"] ACTIVE_BOUT: ["countactivebout", "maxdurationactivebout", "mindurationactivebout", "avgdurationactivebout", "stddurationactivebout"] - THRESHOLD_ACTIVE_BOUT: 10 # steps \ No newline at end of file + THRESHOLD_ACTIVE_BOUT: 10 # steps diff --git a/rules/preprocessing.snakefile b/rules/preprocessing.snakefile index 4b1463b0..70b70a5b 100644 --- a/rules/preprocessing.snakefile +++ b/rules/preprocessing.snakefile @@ -1,3 +1,10 @@ +rule download_participants: + params: + group = config["DOWNLOAD_PARTICIPANTS"]["GROUP"], + ignored_device_ids = config["DOWNLOAD_PARTICIPANTS"]["IGNORED_DEVICE_IDS"] + script: + "../src/data/download_participants.R" + rule download_dataset: input: "data/external/{pid}" diff --git a/src/data/download_participants.R b/src/data/download_participants.R new file mode 100644 index 00000000..05e4bd86 --- /dev/null +++ b/src/data/download_participants.R @@ -0,0 +1,32 @@ +source("packrat/init.R") + +library(RMySQL) + +group <- snakemake@params[["group"]] +ignored_device_ids <- snakemake@params[["ignored_device_ids"]] +rmysql.settingsfile <- "./.env" + +stopDB <- dbConnect(MySQL(), default.file = rmysql.settingsfile, group = group) +query <- paste0("SELECT device_id, brand FROM aware_device order by timestamp asc") +participants <- dbGetQuery(stopDB, query) +pids <- c() + +for(id in 1:nrow(participants)){ + device_id <- participants$device_id[[id]] + brand <- ifelse(participants$brand[[id]] == "iPhone", "ios", "android") + if(!(device_id %in% ignored_device_ids)){ + pid <- paste0("p", ifelse(id < 10, paste0("0", id), id)) + pids <- append(pids, pid) + file_connection <- file(paste0("./data/external/", pid)) + writeLines(c(device_id, brand), file_connection) + close(file_connection) + } +} + +file_lines <-readLines("./config.yaml") +for (i in 1:length(file_lines)){ + if(startsWith(file_lines[i], "PIDS:")){ + file_lines[i] <- paste0("PIDS: [", paste(pids, collapse = ", "), "]") + } +} +writeLines(file_lines, con = "./config.yaml") \ No newline at end of file