rapids/src/data/download_fitbit_data.R

source("renv/activate.R")
library(RMariaDB)
library("dplyr", warn.conflicts = F)
library(readr)
library(stringr)
library(yaml)


participant_file <- snakemake@input[["participant_file"]]
input_file <- snakemake@input[["input_file"]]
data_configuration <- snakemake@params[["data_configuration"]]
source <- data_configuration$SOURCE
sensor <- snakemake@params[["sensor"]]
table <- snakemake@params[["table"]]
sensor_file <- snakemake@output[[1]]

participant <- read_yaml(participant_file)
if(! "FITBIT" %in% names(participant)){
  stop(paste("The following participant file does not have a FITBIT section, create one manually or automatically (see the docs):", participant_file))
}
device_ids <- participant$FITBIT$DEVICE_IDS
unified_device_id <- tail(device_ids, 1)
# As opposed to phone data, we dont' filter by date here because data can still be in JSON format, we need to parse it first

if(source$TYPE == "DATABASE"){
  dbEngine <- dbConnect(MariaDB(), default.file = "./.env", group = source$DATABASE_GROUP)
  query <- paste0("SELECT * FROM ", table, " WHERE ",source$DEVICE_ID_COLUMN," IN ('", paste0(device_ids, collapse = "','"), "')")
  sensor_data <- dbGetQuery(dbEngine, query)
  dbDisconnect(dbEngine)
} else if(source$TYPE == "FILES"){
  sensor_data <- read_csv_chunked(input_file, callback = DataFrameCallback$new(function(x, pos) subset(x,x[[source$DEVICE_ID_COLUMN]] %in% device_ids)), progress = T, chunk_size = 50000)
  if(is.null(sensor_data)) # emtpy file
    sensor_data <- read.csv(input_file)
}

sensor_data <- sensor_data %>%
  rename(device_id = source$DEVICE_ID_COLUMN) %>% 
  mutate(device_id = unified_device_id) # Unify device_id

if("HIDDEN" %in% names(data_configuration) && data_configuration$HIDDEN$SINGLE_FITBIT_TABLE == TRUE) # For MoSHI use, we didn't split fitbit sensors into different tables
  sensor_data <- sensor_data %>% filter(fitbit_data_type == str_split(sensor, "_", simplify = TRUE)[[2]])

# Droping duplicates on all columns except for _id or id
sensor_data <- sensor_data %>% distinct(!!!syms(setdiff(names(sensor_data), c("_id", "id"))))

write_csv(sensor_data, sensor_file)
Update participant files structure and fitbit download rule 2020-10-21 01:12:01 +02:00			`source("renv/activate.R")`
Swap RMySQL for RMariaDB 2020-12-04 20:57:13 +01:00			`library(RMariaDB)`
Turn off warnings for tidyverse and dplyr 2020-10-23 16:41:00 +02:00			`library("dplyr", warn.conflicts = F)`
Update participant files structure and fitbit download rule 2020-10-21 01:12:01 +02:00			`library(readr)`
			`library(stringr)`
			`library(yaml)`


Add support to read fitbit data from csv files 2020-10-26 22:17:53 +01:00			`participant_file <- snakemake@input[["participant_file"]]`
			`input_file <- snakemake@input[["input_file"]]`
Separate device data configuration and update docs 2020-11-26 01:42:11 +01:00			`data_configuration <- snakemake@params[["data_configuration"]]`
			`source <- data_configuration$SOURCE`
Update participant files structure and fitbit download rule 2020-10-21 01:12:01 +02:00			`sensor <- snakemake@params[["sensor"]]`
			`table <- snakemake@params[["table"]]`
			`sensor_file <- snakemake@output[[1]]`

			`participant <- read_yaml(participant_file)`
			`if(! "FITBIT" %in% names(participant)){`
			`stop(paste("The following participant file does not have a FITBIT section, create one manually or automatically (see the docs):", participant_file))`
			`}`
			`device_ids <- participant$FITBIT$DEVICE_IDS`
			`unified_device_id <- tail(device_ids, 1)`
			`# As opposed to phone data, we dont' filter by date here because data can still be in JSON format, we need to parse it first`

			`if(source$TYPE == "DATABASE"){`
Swap RMySQL for RMariaDB 2020-12-04 20:57:13 +01:00			`dbEngine <- dbConnect(MariaDB(), default.file = "./.env", group = source$DATABASE_GROUP)`
Update participant files structure and fitbit download rule 2020-10-21 01:12:01 +02:00			`query <- paste0("SELECT * FROM ", table, " WHERE ",source$DEVICE_ID_COLUMN," IN ('", paste0(device_ids, collapse = "','"), "')")`
			`sensor_data <- dbGetQuery(dbEngine, query)`
			`dbDisconnect(dbEngine)`
Add support to read fitbit data from csv files 2020-10-26 22:17:53 +01:00			`} else if(source$TYPE == "FILES"){`
			`sensor_data <- read_csv_chunked(input_file, callback = DataFrameCallback$new(function(x, pos) subset(x,x[[source$DEVICE_ID_COLUMN]] %in% device_ids)), progress = T, chunk_size = 50000)`
			`if(is.null(sensor_data)) # emtpy file`
			`sensor_data <- read.csv(input_file)`
			`}`
Update participant files structure and fitbit download rule 2020-10-21 01:12:01 +02:00
Add support to read fitbit data from csv files 2020-10-26 22:17:53 +01:00			`sensor_data <- sensor_data %>%`
			`rename(device_id = source$DEVICE_ID_COLUMN) %>%`
			`mutate(device_id = unified_device_id) # Unify device_id`
Update participant files structure and fitbit download rule 2020-10-21 01:12:01 +02:00
Separate device data configuration and update docs 2020-11-26 01:42:11 +01:00			`if("HIDDEN" %in% names(data_configuration) && data_configuration$HIDDEN$SINGLE_FITBIT_TABLE == TRUE) # For MoSHI use, we didn't split fitbit sensors into different tables`
Add support to read fitbit data from csv files 2020-10-26 22:17:53 +01:00			`sensor_data <- sensor_data %>% filter(fitbit_data_type == str_split(sensor, "_", simplify = TRUE)[[2]])`
Update participant files structure and fitbit download rule 2020-10-21 01:12:01 +02:00
Add support to read fitbit data from csv files 2020-10-26 22:17:53 +01:00			`# Droping duplicates on all columns except for _id or id`
			`sensor_data <- sensor_data %>% distinct(!!!syms(setdiff(names(sensor_data), c("_id", "id"))))`

			`write_csv(sensor_data, sensor_file)`