diff --git a/.gitignore b/.gitignore index d7c9832f..adfc9b14 100644 --- a/.gitignore +++ b/.gitignore @@ -87,3 +87,9 @@ target/ # Mypy cache .mypy_cache/ +.Rprofile +packrat/lib*/ +packrat/* +!packrat/packrat.lock +!packrat/packrat.opts +.snakemake/ diff --git a/Snakefile b/Snakefile new file mode 100644 index 00000000..581e8981 --- /dev/null +++ b/Snakefile @@ -0,0 +1,29 @@ +configfile: "config.yaml" +include: "rules/preprocessing.snakefile" + +rule all: + input: + expand("data/raw/{pid}/{sensor}.csv", pid=config["PIDS"], sensor=config["SENSORS"]) + +# --- Packrat Rules --- # +## Taken from https://github.com/lachlandeer/snakemake-econ-r + +## packrat_install: installs packrat onto machine +rule packrat_install: + shell: + "R -e 'install.packages(\"packrat\", repos=\"http://cran.us.r-project.org\")'" + +## packrat_install: initialize a packrat environment for this project +rule packrat_init: + shell: + "R -e 'packrat::init()'" + +## packrat_snap : Look for new R packages in files & archives them +rule packrat_snap: + shell: + "R -e 'packrat::snapshot()'" + +## packrat_restore: Installs archived packages onto a new machine +rule packrat_restore: + shell: + "R -e 'packrat::restore()'" \ No newline at end of file diff --git a/config.yaml b/config.yaml new file mode 100644 index 00000000..fff3a17b --- /dev/null +++ b/config.yaml @@ -0,0 +1,7 @@ +# Valid database table names +SENSORS: [messages] + +# Participants to include in the analysis +# You must create a file for each participant +# named pXXX containing their device_id +PIDS: [p01, p02] \ No newline at end of file diff --git a/packrat/packrat.lock b/packrat/packrat.lock new file mode 100644 index 00000000..4faa102b --- /dev/null +++ b/packrat/packrat.lock @@ -0,0 +1,155 @@ +PackratFormat: 1.4 +PackratVersion: 0.5.0 +RVersion: 3.6.1 +Repos: CRAN=https://cran.rstudio.com/ + +Package: BH +Source: CRAN +Version: 1.69.0-1 +Hash: 15f597ed227897f4f793b6161260f4b9 + +Package: DBI +Source: CRAN +Version: 1.0.0 +Hash: 6abedd7919c4457604c0aa44529a6683 + +Package: R6 +Source: CRAN +Version: 2.4.0 +Hash: 948a547c484e5bea15eee76cc53cce3f + +Package: RMySQL +Source: CRAN +Version: 0.10.17 +Hash: 095e258676be1decbe4ee1bf3c164284 +Requires: DBI + +Package: Rcpp +Source: CRAN +Version: 1.0.2 +Hash: d04e441a8d398e3d3a71f294c07fa94d + +Package: assertthat +Source: CRAN +Version: 0.2.1 +Hash: 622be49032fe50bd42e96aaef613e209 + +Package: backports +Source: CRAN +Version: 1.1.5 +Hash: 35ad64fcf2063e2a52031b0f1a59d8f0 + +Package: cli +Source: CRAN +Version: 1.1.0 +Hash: bc4e54014c2049f2605ad0c3ba0cce6b +Requires: assertthat, crayon + +Package: crayon +Source: CRAN +Version: 1.3.4 +Hash: ff2840dd9b0d563fc80377a5a45510cd + +Package: digest +Source: CRAN +Version: 0.6.22 +Hash: 824be063463b3709782ef29a3e8d7079 + +Package: dplyr +Source: CRAN +Version: 0.8.3 +Hash: 201287c2b23cff8b2ef156ec8b1e57f2 +Requires: BH, R6, Rcpp, assertthat, glue, magrittr, pkgconfig, plogr, + rlang, tibble, tidyselect + +Package: ellipsis +Source: CRAN +Version: 0.3.0 +Hash: 30b58109e4d7c6184a9c2e32f9ae38c6 +Requires: rlang + +Package: fansi +Source: CRAN +Version: 0.4.0 +Hash: f147621f72b561485bfffcae78c4f5d5 + +Package: glue +Source: CRAN +Version: 1.3.1 +Hash: 660bbbe3803c7cf7c9489a7d99a9c0ed + +Package: magrittr +Source: CRAN +Version: 1.5 +Hash: bdc4d48c3135e8f3b399536ddf160df4 + +Package: packrat +Source: CRAN +Version: 0.5.0 +Hash: 498643e765d1442ba7b1160a1df3abf9 + +Package: pillar +Source: CRAN +Version: 1.4.2 +Hash: 28ff1862b4e0c8761efca442e80a63d8 +Requires: cli, crayon, fansi, rlang, utf8, vctrs + +Package: pkgconfig +Source: CRAN +Version: 2.0.3 +Hash: 5ff5f2361851a49534c96caa2a8071c7 + +Package: plogr +Source: CRAN +Version: 0.2.0 +Hash: 81a8008a5e7858552503935f1abe48aa + +Package: purrr +Source: CRAN +Version: 0.3.3 +Hash: d4f497f8a97ef6c7182a87b2476748d1 +Requires: magrittr, rlang + +Package: rlang +Source: CRAN +Version: 0.4.0 +Hash: eabda67321fe1d477ea641ddd5d84f00 + +Package: stringi +Source: CRAN +Version: 1.4.3 +Hash: ed2a82fc7cc668c1345223d938cdfaf2 + +Package: stringr +Source: CRAN +Version: 1.4.0 +Hash: 67da32dbb2a7a16f2ef124336358e54a +Requires: glue, magrittr, stringi + +Package: tibble +Source: CRAN +Version: 2.1.3 +Hash: f59680d81ddc45fa3fcb8c07686d1d89 +Requires: cli, crayon, fansi, pillar, pkgconfig, rlang + +Package: tidyselect +Source: CRAN +Version: 0.2.5 +Hash: 9ab4ed03f4b7bbdbd1db9d7a920aae1a +Requires: Rcpp, glue, purrr, rlang + +Package: utf8 +Source: CRAN +Version: 1.1.4 +Hash: f3f97ce59092abc8ed3fd098a59e236c + +Package: vctrs +Source: CRAN +Version: 0.2.0 +Hash: daf77cb3dbcacd7fb619cb3748dc215f +Requires: backports, digest, ellipsis, glue, rlang, zeallot + +Package: zeallot +Source: CRAN +Version: 0.1.0 +Hash: 10b2ed48e202b4db421ae864041dc4b2 diff --git a/packrat/packrat.opts b/packrat/packrat.opts new file mode 100644 index 00000000..1ecf20ce --- /dev/null +++ b/packrat/packrat.opts @@ -0,0 +1,19 @@ +auto.snapshot: FALSE +use.cache: FALSE +print.banner.on.startup: auto +vcs.ignore.lib: TRUE +vcs.ignore.src: FALSE +external.packages: +local.repos: +load.external.packages.on.startup: TRUE +ignored.packages: +ignored.directories: + data + inst +quiet.package.installation: TRUE +snapshot.recommended.packages: FALSE +snapshot.fields: + Imports + Depends + LinkingTo +symlink.system.packages: TRUE diff --git a/rules/preprocessing.snakefile b/rules/preprocessing.snakefile new file mode 100644 index 00000000..3d66b56a --- /dev/null +++ b/rules/preprocessing.snakefile @@ -0,0 +1,9 @@ +rule download_dataset: + input: + "data/external/{pid}" + params: + group="AAPECS" + output: + "data/raw/{pid}/{sensor}.csv" + script: + "../src/data/download_dataset.R" \ No newline at end of file diff --git a/src/data/download_dataset.R b/src/data/download_dataset.R new file mode 100644 index 00000000..afa99351 --- /dev/null +++ b/src/data/download_dataset.R @@ -0,0 +1,24 @@ +if (exists("snakemake")) + source("packrat/init.R") + +library(RMySQL) +library(stringr) +library(dplyr) + +participant <- snakemake@input[[1]] +group <- snakemake@params[[1]] +sensor_file <- snakemake@output[[1]] + +device_id <- readLines(participant, n=1) +rmysql.settingsfile <- "./.env" +sensor <- tools::file_path_sans_ext(basename(sensor_file)) + +stopDB <- dbConnect(MySQL(), default.file = rmysql.settingsfile, group = group) +query <- paste("SELECT * FROM ", sensor, " WHERE device_id LIKE '", device_id, "'", sep = "") +sensor_data <- dbGetQuery(stopDB, query) +sensor_data <- sensor_data[order(sensor_data$timestamp),] + +# Droping duplicates on all columns except for _id +sensor_data <- sensor_data %>% distinct(!!!syms(setdiff(names(sensor_data), "_id"))) +write.table(sensor_data, sensor_file, row.names = FALSE, quote = FALSE, sep = ",") +dbDisconnect(stopDB) \ No newline at end of file