From 2fdf23e0afa6f950bae47721161f862964b7d2d3 Mon Sep 17 00:00:00 2001 From: JulioV Date: Thu, 24 Oct 2019 16:08:05 -0400 Subject: [PATCH] Add readable time script --- Snakefile | 3 +- packrat/packrat.lock | 328 ++++++++++++++++++++++++++++++++++ rules/preprocessing.snakefile | 18 +- src/data/download_dataset.R | 9 +- src/data/readable_datetime.R | 43 +++++ 5 files changed, 392 insertions(+), 9 deletions(-) create mode 100644 src/data/readable_datetime.R diff --git a/Snakefile b/Snakefile index 581e8981..75719997 100644 --- a/Snakefile +++ b/Snakefile @@ -3,7 +3,8 @@ include: "rules/preprocessing.snakefile" rule all: input: - expand("data/raw/{pid}/{sensor}.csv", pid=config["PIDS"], sensor=config["SENSORS"]) + expand("data/raw/{pid}/{sensor}_raw.csv", pid=config["PIDS"], sensor=config["SENSORS"]), + expand("data/raw/{pid}/{sensor}_with_datetime.csv", pid=config["PIDS"], sensor=config["SENSORS"]) # --- Packrat Rules --- # ## Taken from https://github.com/lachlandeer/snakemake-econ-r diff --git a/packrat/packrat.lock b/packrat/packrat.lock index 4faa102b..cbc81eec 100644 --- a/packrat/packrat.lock +++ b/packrat/packrat.lock @@ -18,6 +18,11 @@ Source: CRAN Version: 2.4.0 Hash: 948a547c484e5bea15eee76cc53cce3f +Package: RColorBrewer +Source: CRAN +Version: 1.1-2 +Hash: c0d56cd15034f395874c870141870c25 + Package: RMySQL Source: CRAN Version: 0.10.17 @@ -29,6 +34,12 @@ Source: CRAN Version: 1.0.2 Hash: d04e441a8d398e3d3a71f294c07fa94d +Package: askpass +Source: CRAN +Version: 1.1 +Hash: 6f6c430e3cd0dd7d48f447700f4d7e7f +Requires: sys + Package: assertthat Source: CRAN Version: 0.2.1 @@ -39,17 +50,63 @@ Source: CRAN Version: 1.1.5 Hash: 35ad64fcf2063e2a52031b0f1a59d8f0 +Package: base64enc +Source: CRAN +Version: 0.1-3 +Hash: c590d29e555926af053055e23ee79efb + +Package: broom +Source: CRAN +Version: 0.5.2 +Hash: 2c1abd749c826a39101a156ed0afc9be +Requires: backports, dplyr, generics, purrr, reshape2, stringr, tibble, + tidyr + +Package: callr +Source: CRAN +Version: 3.3.2 +Hash: 7661e5194cbbe511957b2fb621516c1b +Requires: R6, processx + +Package: cellranger +Source: CRAN +Version: 1.1.0 +Hash: be9d203e7849f73818b36f93e9273c2c +Requires: rematch, tibble + Package: cli Source: CRAN Version: 1.1.0 Hash: bc4e54014c2049f2605ad0c3ba0cce6b Requires: assertthat, crayon +Package: clipr +Source: CRAN +Version: 0.7.0 +Hash: 30cdec6c8cc62c80c485e6bdd0c02b05 + +Package: colorspace +Source: CRAN +Version: 1.4-1 +Hash: 60a56e9998b8b58ee378db3dc91b27a5 + Package: crayon Source: CRAN Version: 1.3.4 Hash: ff2840dd9b0d563fc80377a5a45510cd +Package: curl +Source: CRAN +Version: 4.2 +Hash: 3d7392b36f838a538683f98e580f35dd + +Package: dbplyr +Source: CRAN +Version: 1.4.2 +Hash: 98ff6c6b2ff30fd4007dd16c75027b30 +Requires: DBI, R6, assertthat, dplyr, glue, purrr, rlang, tibble, + tidyselect + Package: digest Source: CRAN Version: 0.6.22 @@ -68,21 +125,146 @@ Version: 0.3.0 Hash: 30b58109e4d7c6184a9c2e32f9ae38c6 Requires: rlang +Package: evaluate +Source: CRAN +Version: 0.14 +Hash: 18306cc3bc1aec7b7360eea8a0eb0ee1 + Package: fansi Source: CRAN Version: 0.4.0 Hash: f147621f72b561485bfffcae78c4f5d5 +Package: forcats +Source: CRAN +Version: 0.4.0 +Hash: 3999d0f4538fe92c7af47b82f3e461b8 +Requires: ellipsis, magrittr, rlang, tibble + +Package: fs +Source: CRAN +Version: 1.3.1 +Hash: 104f1ba37405da6eb4d2391f7c02aaba +Requires: Rcpp + +Package: generics +Source: CRAN +Version: 0.0.2 +Hash: 4aaf002dd434e8c854611c5d11a1d58e + +Package: ggplot2 +Source: CRAN +Version: 3.2.1 +Hash: 7cf3f9df0089475f133ec23befca1032 +Requires: digest, gtable, lazyeval, reshape2, rlang, scales, tibble, + viridisLite, withr + Package: glue Source: CRAN Version: 1.3.1 Hash: 660bbbe3803c7cf7c9489a7d99a9c0ed +Package: gtable +Source: CRAN +Version: 0.3.0 +Hash: a9e7b0666eb933a0cb36779240b4462e + +Package: haven +Source: CRAN +Version: 2.1.1 +Hash: 68fe47f9b101144fd042b7514a16313e +Requires: Rcpp, forcats, hms, readr, tibble + +Package: highr +Source: CRAN +Version: 0.8 +Hash: 16aa2cc98d7b68c9d148c263c8dcdbcd + +Package: hms +Source: CRAN +Version: 0.5.1 +Hash: 426b96bde4c9ed53f39888758ee53451 +Requires: pkgconfig, rlang, vctrs + +Package: htmltools +Source: CRAN +Version: 0.4.0 +Hash: 3f49193aa22146296cee8ae091b6303a +Requires: Rcpp, digest, rlang + +Package: httr +Source: CRAN +Version: 1.4.1 +Hash: cc16de93eaabd3c6d0785cb8e6d059ab +Requires: R6, curl, jsonlite, mime, openssl + +Package: jsonlite +Source: CRAN +Version: 1.6 +Hash: 5f969e213e966135393e3e304abf3f49 + +Package: knitr +Source: CRAN +Version: 1.25 +Hash: c0b5316240a79d38f4785884b95a9609 +Requires: evaluate, highr, markdown, stringr, xfun, yaml + +Package: labeling +Source: CRAN +Version: 0.3 +Hash: ecf589b42cd284b03a4beb9665482d3e + +Package: lazyeval +Source: CRAN +Version: 0.2.2 +Hash: 563563691bea3cde6945a98996d7c166 + +Package: lifecycle +Source: CRAN +Version: 0.1.0 +Hash: fd2ceffa778c010fab2df12b2eccd835 +Requires: glue, rlang + +Package: lubridate +Source: CRAN +Version: 1.7.4 +Hash: 1ea7526e3b50f96340b9a4a1c7521809 +Requires: Rcpp, stringr + Package: magrittr Source: CRAN Version: 1.5 Hash: bdc4d48c3135e8f3b399536ddf160df4 +Package: markdown +Source: CRAN +Version: 1.1 +Hash: 1b6a18fd395589425e338a47b999099f +Requires: mime, xfun + +Package: mime +Source: CRAN +Version: 0.7 +Hash: 0d7563caf453c231b6f8c359c51eecc2 + +Package: modelr +Source: CRAN +Version: 0.1.5 +Hash: c79fcf8da71e70be2d19cd9c505225ca +Requires: broom, dplyr, magrittr, purrr, rlang, tibble, tidyr + +Package: munsell +Source: CRAN +Version: 0.5.0 +Hash: 38d0efee9bb99bef143bde41c4ce715c +Requires: colorspace + +Package: openssl +Source: CRAN +Version: 1.4.1 +Hash: b01fe6ae05ec2a30a777dc338af5bf69 +Requires: askpass + Package: packrat Source: CRAN Version: 0.5.0 @@ -104,17 +286,105 @@ Source: CRAN Version: 0.2.0 Hash: 81a8008a5e7858552503935f1abe48aa +Package: plyr +Source: CRAN +Version: 1.8.4 +Hash: 5c3737d7c1846e30d40c8c38110b5fcd +Requires: Rcpp + +Package: prettyunits +Source: CRAN +Version: 1.0.2 +Hash: 49286102a855640daaa38eafe8b1ec30 +Requires: assertthat, magrittr + +Package: processx +Source: CRAN +Version: 3.4.1 +Hash: 5c1f432b290b26ccc2aaaa8ff1fd2f62 +Requires: R6, ps + +Package: progress +Source: CRAN +Version: 1.2.2 +Hash: 209280eb751acf5899f7c69366e14bd3 +Requires: R6, crayon, hms, prettyunits + +Package: ps +Source: CRAN +Version: 1.3.0 +Hash: 1d4cae95887ffe5b1a22bea5994476cd + Package: purrr Source: CRAN Version: 0.3.3 Hash: d4f497f8a97ef6c7182a87b2476748d1 Requires: magrittr, rlang +Package: readr +Source: CRAN +Version: 1.3.1 +Hash: c45e5ec73f9bf1d1bdc1e4c6e6a87011 +Requires: BH, R6, Rcpp, clipr, crayon, hms, tibble + +Package: readxl +Source: CRAN +Version: 1.3.1 +Hash: 5d7697abddc48ab778f9b41486ec0b61 +Requires: Rcpp, cellranger, progress, tibble + +Package: rematch +Source: CRAN +Version: 1.0.1 +Hash: ad4faf59e7611117ff165817074c50c7 + +Package: reprex +Source: CRAN +Version: 0.3.0 +Hash: 2029355cb70894267c2c7557c79307fd +Requires: callr, clipr, fs, rlang, rmarkdown, whisker, withr + +Package: reshape2 +Source: CRAN +Version: 1.4.3 +Hash: b7ea8a2e7a9df39c2346e78bd83e26f8 +Requires: Rcpp, plyr, stringr + Package: rlang Source: CRAN Version: 0.4.0 Hash: eabda67321fe1d477ea641ddd5d84f00 +Package: rmarkdown +Source: CRAN +Version: 1.16 +Hash: 1f3014c40b12e8af0abf39fd78080237 +Requires: base64enc, evaluate, htmltools, jsonlite, knitr, mime, + stringr, tinytex, xfun, yaml + +Package: rstudioapi +Source: CRAN +Version: 0.10 +Hash: 63f43c6ff14d92e1d65ca6c21d45a1eb + +Package: rvest +Source: CRAN +Version: 0.3.4 +Hash: f183d7dabd2bf9d967bfa53a777955f4 +Requires: httr, magrittr, selectr, xml2 + +Package: scales +Source: CRAN +Version: 1.0.0 +Hash: f4f1b13b6774ee3122a36466d1bc50b8 +Requires: R6, RColorBrewer, Rcpp, labeling, munsell, viridisLite + +Package: selectr +Source: CRAN +Version: 0.4-1 +Hash: b12802c11e35dec9d16a74d30ed0f3ed +Requires: R6, stringr + Package: stringi Source: CRAN Version: 1.4.3 @@ -126,18 +396,45 @@ Version: 1.4.0 Hash: 67da32dbb2a7a16f2ef124336358e54a Requires: glue, magrittr, stringi +Package: sys +Source: CRAN +Version: 3.3 +Hash: d5a4afad9298f42aae77f6389713a066 + Package: tibble Source: CRAN Version: 2.1.3 Hash: f59680d81ddc45fa3fcb8c07686d1d89 Requires: cli, crayon, fansi, pillar, pkgconfig, rlang +Package: tidyr +Source: CRAN +Version: 1.0.0 +Hash: 52f95ddff9a8bbc8e5d7dc0050d6cda4 +Requires: Rcpp, dplyr, ellipsis, glue, lifecycle, magrittr, purrr, + rlang, stringi, tibble, tidyselect, vctrs + Package: tidyselect Source: CRAN Version: 0.2.5 Hash: 9ab4ed03f4b7bbdbd1db9d7a920aae1a Requires: Rcpp, glue, purrr, rlang +Package: tidyverse +Source: CRAN +Version: 1.2.1 +Hash: 1b090209cb20b6fc6eba75de8b7f0b53 +Requires: broom, cli, crayon, dbplyr, dplyr, forcats, ggplot2, haven, + hms, httr, jsonlite, lubridate, magrittr, modelr, purrr, readr, + readxl, reprex, rlang, rstudioapi, rvest, stringr, tibble, tidyr, + xml2 + +Package: tinytex +Source: CRAN +Version: 0.16 +Hash: 569143c7e98bfbd9727b462b57276b09 +Requires: xfun + Package: utf8 Source: CRAN Version: 1.1.4 @@ -149,6 +446,37 @@ Version: 0.2.0 Hash: daf77cb3dbcacd7fb619cb3748dc215f Requires: backports, digest, ellipsis, glue, rlang, zeallot +Package: viridisLite +Source: CRAN +Version: 0.3.0 +Hash: 78bb072c4f9e729a283d4c40ec93f9c6 + +Package: whisker +Source: CRAN +Version: 0.4 +Hash: 5b1ec05cd96c1e0c6048bab49abee3aa + +Package: withr +Source: CRAN +Version: 2.1.2 +Hash: d534108bcd5f34ec73e9eb523751ba20 + +Package: xfun +Source: CRAN +Version: 0.10 +Hash: 83f0de50249d263dfbce9b3aa6d3109a + +Package: xml2 +Source: CRAN +Version: 1.2.2 +Hash: c5258a3beb15da46d4682eda667bf5ec +Requires: Rcpp + +Package: yaml +Source: CRAN +Version: 2.2.0 +Hash: a5ad5616d83d89f8d84cbf3cf4034e13 + Package: zeallot Source: CRAN Version: 0.1.0 diff --git a/rules/preprocessing.snakefile b/rules/preprocessing.snakefile index 3d66b56a..cfeb76b7 100644 --- a/rules/preprocessing.snakefile +++ b/rules/preprocessing.snakefile @@ -2,8 +2,20 @@ rule download_dataset: input: "data/external/{pid}" params: - group="AAPECS" + group = "AAPECS", + table = "{sensor}" output: - "data/raw/{pid}/{sensor}.csv" + "data/raw/{pid}/{sensor}_raw.csv" script: - "../src/data/download_dataset.R" \ No newline at end of file + "../src/data/download_dataset.R" + +rule readable_datetime: + input: + sensor_input = rules.download_dataset.output + params: + timezones = None, + fixed_timezone = "EST" + output: + "data/raw/{pid}/{sensor}_with_datetime.csv" + script: + "../src/data/readable_datetime.R" \ No newline at end of file diff --git a/src/data/download_dataset.R b/src/data/download_dataset.R index afa99351..aed44e57 100644 --- a/src/data/download_dataset.R +++ b/src/data/download_dataset.R @@ -1,20 +1,19 @@ -if (exists("snakemake")) - source("packrat/init.R") +source("packrat/init.R") library(RMySQL) library(stringr) library(dplyr) participant <- snakemake@input[[1]] -group <- snakemake@params[[1]] +group <- snakemake@params[["group"]] +table <- snakemake@params[["table"]] sensor_file <- snakemake@output[[1]] device_id <- readLines(participant, n=1) rmysql.settingsfile <- "./.env" -sensor <- tools::file_path_sans_ext(basename(sensor_file)) stopDB <- dbConnect(MySQL(), default.file = rmysql.settingsfile, group = group) -query <- paste("SELECT * FROM ", sensor, " WHERE device_id LIKE '", device_id, "'", sep = "") +query <- paste("SELECT * FROM ", table, " WHERE device_id LIKE '", device_id, "'", sep = "") sensor_data <- dbGetQuery(stopDB, query) sensor_data <- sensor_data[order(sensor_data$timestamp),] diff --git a/src/data/readable_datetime.R b/src/data/readable_datetime.R new file mode 100644 index 00000000..7dbbd0db --- /dev/null +++ b/src/data/readable_datetime.R @@ -0,0 +1,43 @@ +source("packrat/init.R") + +library("tidyverse") + +input <- read.csv(snakemake@input[[1]]) +sensor_output <- snakemake@output[[1]] +timezone_periods <- snakemake@params[["timezone_periods"]] +fixed_timezone <- snakemake@params[["fixed_timezone"]] + +if(!is.null(timezone_periods)){ + timezones <- read.csv(timezones) + tz_starts <- timezones$start + output <- input %>% + mutate(timezone = findInterval(timestamp / 1000, tz_starts), # Set an interval ID based on timezones' start column + timezone = ifelse(timezone == 0, 1, timezone), # Correct the first timezone ID + timezone = recode(timezone, !!! timezones$timezone), # Swap IDs for text labels + timezone = as.character(timezone)) %>% + rowwise() %>% + mutate(utc_date_time = as.POSIXct(timestamp/1000, origin="1970-01-01", tz="UTC"), + local_date_time = format(utc_date_time, tz = timezone, usetz = F), + local_date = as.Date(local_date_time), + local_time = strsplit(local_date_time, " ")[[1]][2], + local_hour = as.numeric(strsplit(local_time, ":")[[1]][1]), + day_segment = case_when(local_hour %in% 0:5 ~ "night", + local_hour %in% 6:11 ~ "morning", + local_hour %in% 12:17 ~ "afternoon", + local_hour %in% 18:23 ~ "evening")) + + write.csv(output, sensor_output) +} else if(!is.null(fixed_timezone)){ + output <- input %>% + mutate(utc_date_time = as.POSIXct(timestamp/1000, origin="1970-01-01", tz="UTC"), + local_date_time = format(utc_date_time, tz = fixed_timezone, usetz = F), + local_date = as.Date(local_date_time), + local_time = strsplit(local_date_time, " ")[[1]][2], + local_hour = as.numeric(strsplit(local_time, ":")[[1]][1]), + local_day_segment = case_when(local_hour %in% 0:5 ~ "night", + local_hour %in% 6:11 ~ "morning", + local_hour %in% 12:17 ~ "afternoon", + local_hour %in% 18:23 ~ "evening")) + + write.csv(output, sensor_output) +}