replace/97bc2495d5d106eb09e40d0f6896e486c32ac210
Meng Li 2019-10-25 11:15:49 -04:00
commit eb1a307dc4
6 changed files with 80 additions and 4 deletions

3
.gitignore vendored
View File

@ -102,5 +102,4 @@ data/interim/*
!/data/interim/.gitkeep !/data/interim/.gitkeep
data/processed/* data/processed/*
!/data/processed/.gitkeep !/data/processed/.gitkeep
reports/
reports/

View File

@ -13,6 +13,16 @@ rule all:
day_segment = config["COM_SMS"]["DAY_SEGMENTS"], day_segment = config["COM_SMS"]["DAY_SEGMENTS"],
metric = config["COM_SMS"]["METRICS"]), metric = config["COM_SMS"]["METRICS"]),
expand("reports/figures/{pid}/{sensor}_heatmap_rows.html", pid=config["PIDS"], sensor=config["SENSORS"]), expand("reports/figures/{pid}/{sensor}_heatmap_rows.html", pid=config["PIDS"], sensor=config["SENSORS"]),
expand("data/processed/{pid}/com_call_{call_type}_{segment}_{metric}.csv",
pid=config["PIDS"],
call_type = config["COM_CALL"]["CALL_TYPE_MISSED"],
segment = config["COM_CALL"]["DAY_SEGMENTS"],
metric = config["COM_CALL"]["METRICS_MISSED"]),
expand("data/processed/{pid}/com_call_{call_type}_{segment}_{metric}.csv",
pid=config["PIDS"],
call_type = config["COM_CALL"]["CALL_TYPE_TAKEN"],
segment = config["COM_CALL"]["DAY_SEGMENTS"],
metric = config["COM_CALL"]["METRICS_TAKEN"]),
# --- Packrat Rules --- # # --- Packrat Rules --- #
## Taken from https://github.com/lachlandeer/snakemake-econ-r ## Taken from https://github.com/lachlandeer/snakemake-econ-r

View File

@ -22,4 +22,13 @@ READABLE_DATETIME:
COM_SMS: COM_SMS:
SMS_TYPES : [received, sent] SMS_TYPES : [received, sent]
DAY_SEGMENTS: *day_segments DAY_SEGMENTS: *day_segments
METRICS: [count, distinctcontacts] METRICS: [count, distinctcontacts]
# Communication call features config
# Separate configurations for missed and taken calls
COM_CALL:
CALL_TYPE_MISSED : [missed]
CALL_TYPE_TAKEN : [incoming, outgoing]
DAY_SEGMENTS: *day_segments
METRICS_MISSED: [count, distinctcontacts]
METRICS_TAKEN: [count, distinctcontacts, meanduration, sumduration, hubermduration, varqnduration, entropyduration]

View File

@ -13,6 +13,11 @@ Source: CRAN
Version: 1.0.0 Version: 1.0.0
Hash: 6abedd7919c4457604c0aa44529a6683 Hash: 6abedd7919c4457604c0aa44529a6683
Package: DEoptimR
Source: CRAN
Version: 1.0-8
Hash: adc74e88e85eabe6c7d73db6a86fe6cf
Package: R6 Package: R6
Source: CRAN Source: CRAN
Version: 2.4.0 Version: 2.4.0
@ -125,6 +130,11 @@ Version: 0.3.0
Hash: 30b58109e4d7c6184a9c2e32f9ae38c6 Hash: 30b58109e4d7c6184a9c2e32f9ae38c6
Requires: rlang Requires: rlang
Package: entropy
Source: CRAN
Version: 1.2.1
Hash: ccff926ff232f7c19b4c84bab3d3d6d3
Package: evaluate Package: evaluate
Source: CRAN Source: CRAN
Version: 0.14 Version: 0.14
@ -362,6 +372,12 @@ Hash: 1f3014c40b12e8af0abf39fd78080237
Requires: base64enc, evaluate, htmltools, jsonlite, knitr, mime, Requires: base64enc, evaluate, htmltools, jsonlite, knitr, mime,
stringr, tinytex, xfun, yaml stringr, tinytex, xfun, yaml
Package: robustbase
Source: CRAN
Version: 0.93-5
Hash: 7b6672bf2b47c35d02a5b273393e49f5
Requires: DEoptimR
Package: rstudioapi Package: rstudioapi
Source: CRAN Source: CRAN
Version: 0.10 Version: 0.10

View File

@ -8,4 +8,16 @@ rule communication_sms_metrics:
output: output:
"data/processed/{pid}/com_sms_{sms_type}_{day_segment}_{metric}.csv" "data/processed/{pid}/com_sms_{sms_type}_{day_segment}_{metric}.csv"
script: script:
"../src/features/communication_sms_metrics.R" "../src/features/communication_sms_metrics.R"
rule communication_call_metrics:
input:
"data/raw/{pid}/calls_with_datetime.csv"
params:
call_type = "{call_type}",
day_segment = "{day_segment}",
metric = "{metric}"
output:
"data/processed/{pid}/com_call_{call_type}_{day_segment}_{metric}.csv"
script:
"../src/features/communication_call_metrics.R"

View File

@ -0,0 +1,30 @@
source("packrat/init.R")
library(dplyr)
library(entropy)
library(robustbase)
calls <- read.csv(snakemake@input[[1]])
day_segment <- snakemake@params[["day_segment"]]
metric <- snakemake@params[["metric"]]
type <- snakemake@params[["call_type"]]
output_file <- snakemake@output[[1]]
metrics <- calls %>% filter(call_type == ifelse(type == "incoming", "1", ifelse(type == "outgoing", "2", "3")))
if(day_segment == "daily"){
metrics <- metrics %>% group_by(local_date)
} else {
metrics <- metrics %>% filter(day_segment == local_day_segment) %>% group_by(local_date)
}
metrics <- switch(metric,
"count" = metrics %>% summarise(!!paste("com", "call", type, day_segment, metric, sep = "_") := n()),
"distinctcontacts" = metrics %>% summarise(!!paste("com", "call", type, day_segment, metric, sep = "_") := n_distinct(trace)),
"meanduration" = metrics %>% summarise(!!paste("com", "call", type, day_segment, metric, sep = "_") := mean(call_duration)),
"sumduration" = metrics %>% summarise(!!paste("com", "call", type, day_segment, metric, sep = "_") := sum(call_duration)),
"hubermduration" = metrics %>% summarise(!!paste("com", "call", type, day_segment, metric, sep = "_") := huberM(call_duration)$mu),
"varqnduration" = metrics %>% summarise(!!paste("com", "call", type, day_segment, metric, sep = "_") := Qn(call_duration)),
"entropyduration" = metrics %>% summarise(!!paste("com", "call", type, day_segment, metric, sep = "_") := entropy.MillerMadow(call_duration)))
write.csv(na.omit(metrics), output_file, row.names = F)