Added dataset download rule
parent
41c233e4ed
commit
1b1799f9d8
|
@ -87,3 +87,9 @@ target/
|
|||
|
||||
# Mypy cache
|
||||
.mypy_cache/
|
||||
.Rprofile
|
||||
packrat/lib*/
|
||||
packrat/*
|
||||
!packrat/packrat.lock
|
||||
!packrat/packrat.opts
|
||||
.snakemake/
|
||||
|
|
|
@ -0,0 +1,29 @@
|
|||
configfile: "config.yaml"
|
||||
include: "rules/preprocessing.snakefile"
|
||||
|
||||
rule all:
|
||||
input:
|
||||
expand("data/raw/{pid}/{sensor}.csv", pid=config["PIDS"], sensor=config["SENSORS"])
|
||||
|
||||
# --- Packrat Rules --- #
|
||||
## Taken from https://github.com/lachlandeer/snakemake-econ-r
|
||||
|
||||
## packrat_install: installs packrat onto machine
|
||||
rule packrat_install:
|
||||
shell:
|
||||
"R -e 'install.packages(\"packrat\", repos=\"http://cran.us.r-project.org\")'"
|
||||
|
||||
## packrat_install: initialize a packrat environment for this project
|
||||
rule packrat_init:
|
||||
shell:
|
||||
"R -e 'packrat::init()'"
|
||||
|
||||
## packrat_snap : Look for new R packages in files & archives them
|
||||
rule packrat_snap:
|
||||
shell:
|
||||
"R -e 'packrat::snapshot()'"
|
||||
|
||||
## packrat_restore: Installs archived packages onto a new machine
|
||||
rule packrat_restore:
|
||||
shell:
|
||||
"R -e 'packrat::restore()'"
|
|
@ -0,0 +1,7 @@
|
|||
# Valid database table names
|
||||
SENSORS: [messages]
|
||||
|
||||
# Participants to include in the analysis
|
||||
# You must create a file for each participant
|
||||
# named pXXX containing their device_id
|
||||
PIDS: [p01, p02]
|
|
@ -0,0 +1,155 @@
|
|||
PackratFormat: 1.4
|
||||
PackratVersion: 0.5.0
|
||||
RVersion: 3.6.1
|
||||
Repos: CRAN=https://cran.rstudio.com/
|
||||
|
||||
Package: BH
|
||||
Source: CRAN
|
||||
Version: 1.69.0-1
|
||||
Hash: 15f597ed227897f4f793b6161260f4b9
|
||||
|
||||
Package: DBI
|
||||
Source: CRAN
|
||||
Version: 1.0.0
|
||||
Hash: 6abedd7919c4457604c0aa44529a6683
|
||||
|
||||
Package: R6
|
||||
Source: CRAN
|
||||
Version: 2.4.0
|
||||
Hash: 948a547c484e5bea15eee76cc53cce3f
|
||||
|
||||
Package: RMySQL
|
||||
Source: CRAN
|
||||
Version: 0.10.17
|
||||
Hash: 095e258676be1decbe4ee1bf3c164284
|
||||
Requires: DBI
|
||||
|
||||
Package: Rcpp
|
||||
Source: CRAN
|
||||
Version: 1.0.2
|
||||
Hash: d04e441a8d398e3d3a71f294c07fa94d
|
||||
|
||||
Package: assertthat
|
||||
Source: CRAN
|
||||
Version: 0.2.1
|
||||
Hash: 622be49032fe50bd42e96aaef613e209
|
||||
|
||||
Package: backports
|
||||
Source: CRAN
|
||||
Version: 1.1.5
|
||||
Hash: 35ad64fcf2063e2a52031b0f1a59d8f0
|
||||
|
||||
Package: cli
|
||||
Source: CRAN
|
||||
Version: 1.1.0
|
||||
Hash: bc4e54014c2049f2605ad0c3ba0cce6b
|
||||
Requires: assertthat, crayon
|
||||
|
||||
Package: crayon
|
||||
Source: CRAN
|
||||
Version: 1.3.4
|
||||
Hash: ff2840dd9b0d563fc80377a5a45510cd
|
||||
|
||||
Package: digest
|
||||
Source: CRAN
|
||||
Version: 0.6.22
|
||||
Hash: 824be063463b3709782ef29a3e8d7079
|
||||
|
||||
Package: dplyr
|
||||
Source: CRAN
|
||||
Version: 0.8.3
|
||||
Hash: 201287c2b23cff8b2ef156ec8b1e57f2
|
||||
Requires: BH, R6, Rcpp, assertthat, glue, magrittr, pkgconfig, plogr,
|
||||
rlang, tibble, tidyselect
|
||||
|
||||
Package: ellipsis
|
||||
Source: CRAN
|
||||
Version: 0.3.0
|
||||
Hash: 30b58109e4d7c6184a9c2e32f9ae38c6
|
||||
Requires: rlang
|
||||
|
||||
Package: fansi
|
||||
Source: CRAN
|
||||
Version: 0.4.0
|
||||
Hash: f147621f72b561485bfffcae78c4f5d5
|
||||
|
||||
Package: glue
|
||||
Source: CRAN
|
||||
Version: 1.3.1
|
||||
Hash: 660bbbe3803c7cf7c9489a7d99a9c0ed
|
||||
|
||||
Package: magrittr
|
||||
Source: CRAN
|
||||
Version: 1.5
|
||||
Hash: bdc4d48c3135e8f3b399536ddf160df4
|
||||
|
||||
Package: packrat
|
||||
Source: CRAN
|
||||
Version: 0.5.0
|
||||
Hash: 498643e765d1442ba7b1160a1df3abf9
|
||||
|
||||
Package: pillar
|
||||
Source: CRAN
|
||||
Version: 1.4.2
|
||||
Hash: 28ff1862b4e0c8761efca442e80a63d8
|
||||
Requires: cli, crayon, fansi, rlang, utf8, vctrs
|
||||
|
||||
Package: pkgconfig
|
||||
Source: CRAN
|
||||
Version: 2.0.3
|
||||
Hash: 5ff5f2361851a49534c96caa2a8071c7
|
||||
|
||||
Package: plogr
|
||||
Source: CRAN
|
||||
Version: 0.2.0
|
||||
Hash: 81a8008a5e7858552503935f1abe48aa
|
||||
|
||||
Package: purrr
|
||||
Source: CRAN
|
||||
Version: 0.3.3
|
||||
Hash: d4f497f8a97ef6c7182a87b2476748d1
|
||||
Requires: magrittr, rlang
|
||||
|
||||
Package: rlang
|
||||
Source: CRAN
|
||||
Version: 0.4.0
|
||||
Hash: eabda67321fe1d477ea641ddd5d84f00
|
||||
|
||||
Package: stringi
|
||||
Source: CRAN
|
||||
Version: 1.4.3
|
||||
Hash: ed2a82fc7cc668c1345223d938cdfaf2
|
||||
|
||||
Package: stringr
|
||||
Source: CRAN
|
||||
Version: 1.4.0
|
||||
Hash: 67da32dbb2a7a16f2ef124336358e54a
|
||||
Requires: glue, magrittr, stringi
|
||||
|
||||
Package: tibble
|
||||
Source: CRAN
|
||||
Version: 2.1.3
|
||||
Hash: f59680d81ddc45fa3fcb8c07686d1d89
|
||||
Requires: cli, crayon, fansi, pillar, pkgconfig, rlang
|
||||
|
||||
Package: tidyselect
|
||||
Source: CRAN
|
||||
Version: 0.2.5
|
||||
Hash: 9ab4ed03f4b7bbdbd1db9d7a920aae1a
|
||||
Requires: Rcpp, glue, purrr, rlang
|
||||
|
||||
Package: utf8
|
||||
Source: CRAN
|
||||
Version: 1.1.4
|
||||
Hash: f3f97ce59092abc8ed3fd098a59e236c
|
||||
|
||||
Package: vctrs
|
||||
Source: CRAN
|
||||
Version: 0.2.0
|
||||
Hash: daf77cb3dbcacd7fb619cb3748dc215f
|
||||
Requires: backports, digest, ellipsis, glue, rlang, zeallot
|
||||
|
||||
Package: zeallot
|
||||
Source: CRAN
|
||||
Version: 0.1.0
|
||||
Hash: 10b2ed48e202b4db421ae864041dc4b2
|
|
@ -0,0 +1,19 @@
|
|||
auto.snapshot: FALSE
|
||||
use.cache: FALSE
|
||||
print.banner.on.startup: auto
|
||||
vcs.ignore.lib: TRUE
|
||||
vcs.ignore.src: FALSE
|
||||
external.packages:
|
||||
local.repos:
|
||||
load.external.packages.on.startup: TRUE
|
||||
ignored.packages:
|
||||
ignored.directories:
|
||||
data
|
||||
inst
|
||||
quiet.package.installation: TRUE
|
||||
snapshot.recommended.packages: FALSE
|
||||
snapshot.fields:
|
||||
Imports
|
||||
Depends
|
||||
LinkingTo
|
||||
symlink.system.packages: TRUE
|
|
@ -0,0 +1,9 @@
|
|||
rule download_dataset:
|
||||
input:
|
||||
"data/external/{pid}"
|
||||
params:
|
||||
group="AAPECS"
|
||||
output:
|
||||
"data/raw/{pid}/{sensor}.csv"
|
||||
script:
|
||||
"../src/data/download_dataset.R"
|
|
@ -0,0 +1,24 @@
|
|||
if (exists("snakemake"))
|
||||
source("packrat/init.R")
|
||||
|
||||
library(RMySQL)
|
||||
library(stringr)
|
||||
library(dplyr)
|
||||
|
||||
participant <- snakemake@input[[1]]
|
||||
group <- snakemake@params[[1]]
|
||||
sensor_file <- snakemake@output[[1]]
|
||||
|
||||
device_id <- readLines(participant, n=1)
|
||||
rmysql.settingsfile <- "./.env"
|
||||
sensor <- tools::file_path_sans_ext(basename(sensor_file))
|
||||
|
||||
stopDB <- dbConnect(MySQL(), default.file = rmysql.settingsfile, group = group)
|
||||
query <- paste("SELECT * FROM ", sensor, " WHERE device_id LIKE '", device_id, "'", sep = "")
|
||||
sensor_data <- dbGetQuery(stopDB, query)
|
||||
sensor_data <- sensor_data[order(sensor_data$timestamp),]
|
||||
|
||||
# Droping duplicates on all columns except for _id
|
||||
sensor_data <- sensor_data %>% distinct(!!!syms(setdiff(names(sensor_data), "_id")))
|
||||
write.table(sensor_data, sensor_file, row.names = FALSE, quote = FALSE, sep = ",")
|
||||
dbDisconnect(stopDB)
|
Loading…
Reference in New Issue