From 9d27431e3b93b75dd96611760eec0952f85af8b9 Mon Sep 17 00:00:00 2001 From: JulioV Date: Thu, 2 Apr 2020 16:28:19 -0400 Subject: [PATCH] Speed up raw csv reads and writes in R --- src/data/download_dataset.R | 3 ++- src/data/readable_datetime.R | 5 +++-- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/src/data/download_dataset.R b/src/data/download_dataset.R index 6ef53771..48bed63e 100644 --- a/src/data/download_dataset.R +++ b/src/data/download_dataset.R @@ -3,6 +3,7 @@ source("packrat/init.R") library(RMySQL) library(stringr) library(dplyr) +library(readr) participant <- snakemake@input[[1]] group <- snakemake@params[["group"]] @@ -44,5 +45,5 @@ if("device_id" %in% available_columns){ } else { print(paste0("Table ", table, "does not have a device_id column (Aware ID) to link its data to a participant")) } -write.csv(sensor_data, sensor_file, row.names = FALSE) +write_csv(sensor_data, sensor_file) dbDisconnect(stopDB) \ No newline at end of file diff --git a/src/data/readable_datetime.R b/src/data/readable_datetime.R index 604d0d31..fcd2d012 100644 --- a/src/data/readable_datetime.R +++ b/src/data/readable_datetime.R @@ -1,6 +1,7 @@ source("packrat/init.R") library("tidyverse") +library(readr) input <- read.csv(snakemake@input[[1]]) sensor_output <- snakemake@output[[1]] @@ -19,7 +20,7 @@ split_local_date_time <- function(data){ local_hour %in% 18:23 ~ "evening"))) } if(!is.null(timezone_periods)){ - timezones <- read.csv(timezone_periods) + timezones <- read_csv(timezone_periods) tz_starts <- timezones$start output <- input %>% mutate(timezone = findInterval(timestamp / 1000, tz_starts), # Set an interval ID based on timezones' start column @@ -36,5 +37,5 @@ if(!is.null(timezone_periods)){ mutate(utc_date_time = as.POSIXct(timestamp/1000, origin="1970-01-01", tz="UTC"), local_date_time = format(utc_date_time, tz = fixed_timezone, usetz = F)) output <- split_local_date_time(output) - write.csv(output, sensor_output, row.names = FALSE) + write_csv(output, sensor_output) }