Refactor script to create participants files
parent
c8176b2d90
commit
d5931c75d8
35
config.yaml
35
config.yaml
|
@ -2,12 +2,6 @@
|
||||||
# You must create a file for each participant named pXXX containing their device_id. This can be done manually or automatically
|
# You must create a file for each participant named pXXX containing their device_id. This can be done manually or automatically
|
||||||
PIDS: [test01]
|
PIDS: [test01]
|
||||||
|
|
||||||
# Global var with common day segments
|
|
||||||
DAY_SEGMENTS: &day_segments
|
|
||||||
TYPE: PERIODIC # FREQUENCY, PERIODIC, EVENT
|
|
||||||
FILE: "data/external/daysegments_periodic.csv"
|
|
||||||
INCLUDE_PAST_PERIODIC_SEGMENTS: FALSE # Only relevant if TYPE=PERIODIC, if set to TRUE we consider day segments back enough in the past as to include the first day of data
|
|
||||||
|
|
||||||
# Use tz codes from https://en.wikipedia.org/wiki/List_of_tz_database_time_zones. Double check your code, for example EST is not US Eastern Time.
|
# Use tz codes from https://en.wikipedia.org/wiki/List_of_tz_database_time_zones. Double check your code, for example EST is not US Eastern Time.
|
||||||
TIMEZONE: &timezone
|
TIMEZONE: &timezone
|
||||||
America/New_York
|
America/New_York
|
||||||
|
@ -15,23 +9,31 @@ TIMEZONE: &timezone
|
||||||
DATABASE_GROUP: &database_group
|
DATABASE_GROUP: &database_group
|
||||||
MY_GROUP
|
MY_GROUP
|
||||||
|
|
||||||
# config section for the script that creates participant files automatically
|
# run 'snakemake -j1 create_participants_files'
|
||||||
PARTICIPANT_FILES: # run snakemake -j1 -R parse_participant_files
|
CREATE_PARTICIPANT_FILES:
|
||||||
|
SOURCE:
|
||||||
|
TYPE: AWARE_DEVICE_TABLE #AWARE_DEVICE_TABLE or CSV_FILE
|
||||||
|
DATABASE_GROUP: *database_group
|
||||||
|
CSV_FILE_PATH: "data/external/example_participants.csv" # must have columns: PHONE DEVICE_ID_COLUMN, FITBIT DEVICE_ID_COLUMN, pid , label, start_date, end_date
|
||||||
|
TIMEZONE: *timezone # only used for AWARE_DEVICE_TABLE
|
||||||
PHONE_SECTION:
|
PHONE_SECTION:
|
||||||
ADD: TRUE
|
ADD: TRUE
|
||||||
PARSED_FROM: AWARE_DEVICE_TABLE #AWARE_DEVICE_TABLE or CSV_FILE
|
DEVICE_ID_COLUMN: device_id # column name
|
||||||
PARSED_SOURCE: *database_group # DB credentials group or CSV file path. If CSV file, it should have: device_id, pid (optional), label (optional), start_date (optional), end_date (optional)
|
|
||||||
IGNORED_DEVICE_IDS: []
|
IGNORED_DEVICE_IDS: []
|
||||||
FITBIT_SECTION:
|
FITBIT_SECTION:
|
||||||
ADD: FALSE
|
ADD: TRUE
|
||||||
SAME_AS_PHONE: FALSE # If TRUE, all config below is ignored
|
DEVICE_ID_COLUMN: device_id # column name
|
||||||
PARSED_FROM: CSV_FILE
|
IGNORED_DEVICE_IDS: []
|
||||||
PARSED_SOURCE: "external/my_fitbit_participants.csv" # CSV file should have: device_id, pid (optional), label (optional), start_date (optional), end_date (optional)
|
|
||||||
|
DAY_SEGMENTS: &day_segments
|
||||||
|
TYPE: PERIODIC # FREQUENCY, PERIODIC, EVENT
|
||||||
|
FILE: "data/external/daysegments_periodic.csv"
|
||||||
|
INCLUDE_PAST_PERIODIC_SEGMENTS: FALSE # Only relevant if TYPE=PERIODIC, if set to TRUE we consider day segments back enough in the past as to include the first day of data
|
||||||
|
|
||||||
SENSOR_DATA:
|
SENSOR_DATA:
|
||||||
PHONE:
|
PHONE:
|
||||||
SOURCE:
|
SOURCE:
|
||||||
TYPE: DATABASE # Phone only supports DATABASE for now
|
TYPE: DATABASE
|
||||||
DATABASE_GROUP: *database_group
|
DATABASE_GROUP: *database_group
|
||||||
DEVICE_ID_COLUMN: device_id # column name
|
DEVICE_ID_COLUMN: device_id # column name
|
||||||
TIMEZONE:
|
TIMEZONE:
|
||||||
|
@ -46,6 +48,9 @@ SENSOR_DATA:
|
||||||
TYPE: SINGLE # Fitbit only supports SINGLE timezones
|
TYPE: SINGLE # Fitbit only supports SINGLE timezones
|
||||||
VALUE: *timezone # timezone code (e.g. America/New_York, see attribute TIMEZONE above and https://en.wikipedia.org/wiki/List_of_tz_database_time_zones)
|
VALUE: *timezone # timezone code (e.g. America/New_York, see attribute TIMEZONE above and https://en.wikipedia.org/wiki/List_of_tz_database_time_zones)
|
||||||
|
|
||||||
|
############## PHONE ###########################################################
|
||||||
|
################################################################################
|
||||||
|
|
||||||
PHONE_VALID_SENSED_BINS:
|
PHONE_VALID_SENSED_BINS:
|
||||||
COMPUTE: False # This flag is automatically ignored (set to True) if you are extracting PHONE_VALID_SENSED_DAYS or screen or Barnett's location features
|
COMPUTE: False # This flag is automatically ignored (set to True) if you are extracting PHONE_VALID_SENSED_DAYS or screen or Barnett's location features
|
||||||
BIN_SIZE: &bin_size 5 # (in minutes)
|
BIN_SIZE: &bin_size 5 # (in minutes)
|
||||||
|
|
|
@ -15,14 +15,13 @@ rule create_example_participant_files:
|
||||||
shell:
|
shell:
|
||||||
"echo 'a748ee1a-1d0b-4ae9-9074-279a2b6ba524\nandroid\ntest01\n2020/04/23,2020/05/04\n' >> ./data/external/example01 && echo '13dbc8a3-dae3-4834-823a-4bc96a7d459d\nios\ntest02\n2020/04/23,2020/05/04\n' >> ./data/external/example02"
|
"echo 'a748ee1a-1d0b-4ae9-9074-279a2b6ba524\nandroid\ntest01\n2020/04/23,2020/05/04\n' >> ./data/external/example01 && echo '13dbc8a3-dae3-4834-823a-4bc96a7d459d\nios\ntest02\n2020/04/23,2020/05/04\n' >> ./data/external/example02"
|
||||||
|
|
||||||
# rule download_participants:
|
rule create_participants_files:
|
||||||
# params:
|
input:
|
||||||
# group = config["DOWNLOAD_PARTICIPANTS"]["GROUP"],
|
participants_file = [] if config["CREATE_PARTICIPANT_FILES"]["SOURCE"]["TYPE"] == "AWARE_DEVICE_TABLE" else config["CREATE_PARTICIPANT_FILES"]["SOURCE"]["CSV_FILE_PATH"]
|
||||||
# ignored_device_ids = config["DOWNLOAD_PARTICIPANTS"]["IGNORED_DEVICE_IDS"],
|
params:
|
||||||
# timezone = config["TIMEZONE"]
|
config = config["CREATE_PARTICIPANT_FILES"]
|
||||||
# priority: 1
|
script:
|
||||||
# script:
|
"../src/data/create_participants_files.R"
|
||||||
# "../src/data/download_participants.R"
|
|
||||||
|
|
||||||
rule download_phone_data:
|
rule download_phone_data:
|
||||||
input:
|
input:
|
||||||
|
|
|
@ -0,0 +1,73 @@
|
||||||
|
source("renv/activate.R")
|
||||||
|
|
||||||
|
library(RMySQL)
|
||||||
|
library(stringr)
|
||||||
|
library(purrr)
|
||||||
|
library(readr)
|
||||||
|
library("dplyr", warn.conflicts = F)
|
||||||
|
|
||||||
|
config <- snakemake@params[["config"]]
|
||||||
|
group <- config$SOURCE$DATABASE_GROUP
|
||||||
|
timezone <- config$SOURCE$TIMEZONE
|
||||||
|
phone_device_id_column = config$PHONE_SECTION$DEVICE_ID_COLUMN
|
||||||
|
fitbit_device_id_column = config$FITBIT_SECTION$DEVICE_ID_COLUMN
|
||||||
|
add_fitbit_section = config$PHONE_SECTION$ADD
|
||||||
|
add_phone_section = config$FITBIT_SECTION$ADD
|
||||||
|
phone_ignored = config$PHONE_SECTION$IGNORED_DEVICE_IDS
|
||||||
|
fitbit_ignored = config$FITBIT_SECTION$IGNORED_DEVICE_IDS
|
||||||
|
|
||||||
|
rmysql.settingsfile <- "./.env"
|
||||||
|
|
||||||
|
if(config$SOURCE$TYPE == "AWARE_DEVICE_TABLE"){
|
||||||
|
database <- dbConnect(MySQL(), default.file = rmysql.settingsfile, group = group)
|
||||||
|
if(config$FITBIT_SECTION$ADD == TRUE){
|
||||||
|
query <- paste("SELECT",phone_device_id_column, ",",fitbit_device_id_column," as _temp_fitbit_id, brand, label, timestamp FROM aware_device order by timestamp asc")
|
||||||
|
fitbit_device_id_column <- "_temp_fitbit_id"
|
||||||
|
}
|
||||||
|
else
|
||||||
|
query <- paste("SELECT ",phone_device_id_column,", brand, label, timestamp FROM aware_device order by timestamp asc")
|
||||||
|
participants <- dbGetQuery(database, query)
|
||||||
|
dbDisconnect(database)
|
||||||
|
participants <- participants %>%
|
||||||
|
mutate(pid = if_else(row_number()<10, paste0("p","0",row_number()), paste0("p", row_number())),
|
||||||
|
platform = if_else(brand == "iPhone", "ios", "android"), brand = NULL,
|
||||||
|
label = iconv(if_else(label == "", "EMPTY_LABEL", label), from = "UTF-8", to = "UTF-8", sub=''),
|
||||||
|
start_date = format(as.POSIXct(timestamp / 1000, origin = "1970-01-01", tz = timezone), "%Y-%m-%d"),
|
||||||
|
end_date = format(Sys.Date(), "%Y-%m-%d"),
|
||||||
|
!!phone_device_id_column := if_else(!!rlang::sym(phone_device_id_column) %in% phone_ignored, NA_character_, !!rlang::sym(phone_device_id_column)),
|
||||||
|
!!fitbit_device_id_column := if_else(!!rlang::sym(fitbit_device_id_column) %in% fitbit_ignored, NA_character_, !!rlang::sym(fitbit_device_id_column)))
|
||||||
|
|
||||||
|
} else if(config$SOURCE$TYPE == "CSV_FILE"){
|
||||||
|
participants <- read_csv(config$SOURCE$CSV_FILE_PATH, col_types=cols_only(device_id="c",pid="c",label="c",platform="c",
|
||||||
|
start_date=col_date(format = "%Y-%m-%d"),end_date=col_date(format = "%Y-%m-%d"),fitbit_id="c"))
|
||||||
|
participants <- participants %>%
|
||||||
|
mutate(!!phone_device_id_column := str_replace(!!rlang::sym(phone_device_id_column), ";",","),
|
||||||
|
platform = str_replace(platform, ";",","),
|
||||||
|
!!phone_device_id_column := if_else(!!rlang::sym(phone_device_id_column) %in% phone_ignored, NA_character_, !!rlang::sym(phone_device_id_column)),
|
||||||
|
!!fitbit_device_id_column := if_else(!!rlang::sym(fitbit_device_id_column) %in% fitbit_ignored, NA_character_, !!rlang::sym(fitbit_device_id_column)))
|
||||||
|
}
|
||||||
|
|
||||||
|
participants %>%
|
||||||
|
pwalk(function(add_phone_section, add_fitbit_section, phone_device_id_column, fitbit_device_id_column, ...) {
|
||||||
|
empty_phone <- c("PHONE:", " DEVICE_IDS:", " PLATFORMS:"," LABEL:", " START_DATE:", " END_DATE:")
|
||||||
|
empty_fitbit <- c("FITBIT:", " DEVICE_IDS:", " LABEL:", " START_DATE:", " END_DATE:")
|
||||||
|
row <- tibble(...)
|
||||||
|
lines <- c()
|
||||||
|
|
||||||
|
if(add_phone_section == TRUE && !is.na(row[phone_device_id_column])){
|
||||||
|
lines <- append(lines, c("PHONE:", paste0(" DEVICE_IDS: [",row[phone_device_id_column],"]"), paste0(" PLATFORMS: [",row$platform,"]"),
|
||||||
|
paste(" LABEL:",row$label), paste(" START_DATE:", row$start_date), paste(" END_DATE:", row$end_date)))
|
||||||
|
}else
|
||||||
|
lines <- append(lines, empty_phone)
|
||||||
|
|
||||||
|
if(add_fitbit_section == TRUE && !is.na(row[fitbit_device_id_column])){
|
||||||
|
lines <- append(lines, c("FITBIT:", paste0(" DEVICE_IDS: [",row[fitbit_device_id_column],"]"),
|
||||||
|
paste(" LABEL:",row$label), paste(" START_DATE:", row$start_date), paste(" END_DATE:", row$end_date)))
|
||||||
|
} else
|
||||||
|
lines <- append(lines, empty_fitbit)
|
||||||
|
|
||||||
|
file_connection <- file(paste0("./data/external/participant_files/", row$pid, ".yaml"))
|
||||||
|
writeLines(lines, file_connection)
|
||||||
|
close(file_connection)
|
||||||
|
|
||||||
|
}, add_phone_section, add_fitbit_section, phone_device_id_column, fitbit_device_id_column)
|
|
@ -1,38 +0,0 @@
|
||||||
source("renv/activate.R")
|
|
||||||
|
|
||||||
library(RMySQL)
|
|
||||||
|
|
||||||
group <- snakemake@params[["group"]]
|
|
||||||
ignored_device_ids <- snakemake@params[["ignored_device_ids"]]
|
|
||||||
timezone <- snakemake@params[["timezone"]]
|
|
||||||
rmysql.settingsfile <- "./.env"
|
|
||||||
|
|
||||||
stopDB <- dbConnect(MySQL(), default.file = rmysql.settingsfile, group = group)
|
|
||||||
query <- "SELECT device_id, brand, label, timestamp FROM aware_device order by timestamp asc"
|
|
||||||
participants <- dbGetQuery(stopDB, query)
|
|
||||||
pids <- c()
|
|
||||||
|
|
||||||
end_date <- format(Sys.Date(), "%Y/%m/%d")
|
|
||||||
|
|
||||||
for(id in 1:nrow(participants)){
|
|
||||||
device_id <- participants$device_id[[id]]
|
|
||||||
brand <- ifelse(participants$brand[[id]] == "iPhone", "ios", "android")
|
|
||||||
label <- ifelse(participants$label[[id]] == "", "EMPTY_LABEL", participants$label[[id]])
|
|
||||||
label <- iconv(label, from = "UTF-8", to = "UTF-8", sub='')
|
|
||||||
start_date <- format(as.POSIXct(participants$timestamp[[id]] / 1000, origin = "1970-01-01", tz = timezone), "%Y/%m/%d")
|
|
||||||
if(!(device_id %in% ignored_device_ids)){
|
|
||||||
pid <- paste0("p", ifelse(id < 10, paste0("0", id), id))
|
|
||||||
pids <- append(pids, pid)
|
|
||||||
file_connection <- file(paste0("./data/external/", pid))
|
|
||||||
writeLines(c(device_id, brand, label, paste0(start_date, ",", end_date)), file_connection)
|
|
||||||
close(file_connection)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
file_lines <-readLines("./config.yaml")
|
|
||||||
for (i in 1:length(file_lines)){
|
|
||||||
if(startsWith(file_lines[i], "PIDS:")){
|
|
||||||
file_lines[i] <- paste0("PIDS: [", paste(pids, collapse = ", "), "]")
|
|
||||||
}
|
|
||||||
}
|
|
||||||
writeLines(file_lines, con = "./config.yaml")
|
|
Loading…
Reference in New Issue