Speed up raw csv reads and writes in R
parent
7d770c91f3
commit
9d27431e3b
|
@ -3,6 +3,7 @@ source("packrat/init.R")
|
||||||
library(RMySQL)
|
library(RMySQL)
|
||||||
library(stringr)
|
library(stringr)
|
||||||
library(dplyr)
|
library(dplyr)
|
||||||
|
library(readr)
|
||||||
|
|
||||||
participant <- snakemake@input[[1]]
|
participant <- snakemake@input[[1]]
|
||||||
group <- snakemake@params[["group"]]
|
group <- snakemake@params[["group"]]
|
||||||
|
@ -44,5 +45,5 @@ if("device_id" %in% available_columns){
|
||||||
} else {
|
} else {
|
||||||
print(paste0("Table ", table, "does not have a device_id column (Aware ID) to link its data to a participant"))
|
print(paste0("Table ", table, "does not have a device_id column (Aware ID) to link its data to a participant"))
|
||||||
}
|
}
|
||||||
write.csv(sensor_data, sensor_file, row.names = FALSE)
|
write_csv(sensor_data, sensor_file)
|
||||||
dbDisconnect(stopDB)
|
dbDisconnect(stopDB)
|
|
@ -1,6 +1,7 @@
|
||||||
source("packrat/init.R")
|
source("packrat/init.R")
|
||||||
|
|
||||||
library("tidyverse")
|
library("tidyverse")
|
||||||
|
library(readr)
|
||||||
|
|
||||||
input <- read.csv(snakemake@input[[1]])
|
input <- read.csv(snakemake@input[[1]])
|
||||||
sensor_output <- snakemake@output[[1]]
|
sensor_output <- snakemake@output[[1]]
|
||||||
|
@ -19,7 +20,7 @@ split_local_date_time <- function(data){
|
||||||
local_hour %in% 18:23 ~ "evening")))
|
local_hour %in% 18:23 ~ "evening")))
|
||||||
}
|
}
|
||||||
if(!is.null(timezone_periods)){
|
if(!is.null(timezone_periods)){
|
||||||
timezones <- read.csv(timezone_periods)
|
timezones <- read_csv(timezone_periods)
|
||||||
tz_starts <- timezones$start
|
tz_starts <- timezones$start
|
||||||
output <- input %>%
|
output <- input %>%
|
||||||
mutate(timezone = findInterval(timestamp / 1000, tz_starts), # Set an interval ID based on timezones' start column
|
mutate(timezone = findInterval(timestamp / 1000, tz_starts), # Set an interval ID based on timezones' start column
|
||||||
|
@ -36,5 +37,5 @@ if(!is.null(timezone_periods)){
|
||||||
mutate(utc_date_time = as.POSIXct(timestamp/1000, origin="1970-01-01", tz="UTC"),
|
mutate(utc_date_time = as.POSIXct(timestamp/1000, origin="1970-01-01", tz="UTC"),
|
||||||
local_date_time = format(utc_date_time, tz = fixed_timezone, usetz = F))
|
local_date_time = format(utc_date_time, tz = fixed_timezone, usetz = F))
|
||||||
output <- split_local_date_time(output)
|
output <- split_local_date_time(output)
|
||||||
write.csv(output, sensor_output, row.names = FALSE)
|
write_csv(output, sensor_output)
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue