replace/97bc2495d5d106eb09e40d0f6896e486c32ac210
Meng Li 2019-10-25 11:15:49 -04:00
commit eb1a307dc4
6 changed files with 80 additions and 4 deletions

1
.gitignore vendored
View File

@ -102,5 +102,4 @@ data/interim/*
!/data/interim/.gitkeep
data/processed/*
!/data/processed/.gitkeep
reports/

View File

@ -13,6 +13,16 @@ rule all:
day_segment = config["COM_SMS"]["DAY_SEGMENTS"],
metric = config["COM_SMS"]["METRICS"]),
expand("reports/figures/{pid}/{sensor}_heatmap_rows.html", pid=config["PIDS"], sensor=config["SENSORS"]),
expand("data/processed/{pid}/com_call_{call_type}_{segment}_{metric}.csv",
pid=config["PIDS"],
call_type = config["COM_CALL"]["CALL_TYPE_MISSED"],
segment = config["COM_CALL"]["DAY_SEGMENTS"],
metric = config["COM_CALL"]["METRICS_MISSED"]),
expand("data/processed/{pid}/com_call_{call_type}_{segment}_{metric}.csv",
pid=config["PIDS"],
call_type = config["COM_CALL"]["CALL_TYPE_TAKEN"],
segment = config["COM_CALL"]["DAY_SEGMENTS"],
metric = config["COM_CALL"]["METRICS_TAKEN"]),
# --- Packrat Rules --- #
## Taken from https://github.com/lachlandeer/snakemake-econ-r

View File

@ -23,3 +23,12 @@ COM_SMS:
SMS_TYPES : [received, sent]
DAY_SEGMENTS: *day_segments
METRICS: [count, distinctcontacts]
# Communication call features config
# Separate configurations for missed and taken calls
COM_CALL:
CALL_TYPE_MISSED : [missed]
CALL_TYPE_TAKEN : [incoming, outgoing]
DAY_SEGMENTS: *day_segments
METRICS_MISSED: [count, distinctcontacts]
METRICS_TAKEN: [count, distinctcontacts, meanduration, sumduration, hubermduration, varqnduration, entropyduration]

View File

@ -13,6 +13,11 @@ Source: CRAN
Version: 1.0.0
Hash: 6abedd7919c4457604c0aa44529a6683
Package: DEoptimR
Source: CRAN
Version: 1.0-8
Hash: adc74e88e85eabe6c7d73db6a86fe6cf
Package: R6
Source: CRAN
Version: 2.4.0
@ -125,6 +130,11 @@ Version: 0.3.0
Hash: 30b58109e4d7c6184a9c2e32f9ae38c6
Requires: rlang
Package: entropy
Source: CRAN
Version: 1.2.1
Hash: ccff926ff232f7c19b4c84bab3d3d6d3
Package: evaluate
Source: CRAN
Version: 0.14
@ -362,6 +372,12 @@ Hash: 1f3014c40b12e8af0abf39fd78080237
Requires: base64enc, evaluate, htmltools, jsonlite, knitr, mime,
stringr, tinytex, xfun, yaml
Package: robustbase
Source: CRAN
Version: 0.93-5
Hash: 7b6672bf2b47c35d02a5b273393e49f5
Requires: DEoptimR
Package: rstudioapi
Source: CRAN
Version: 0.10

View File

@ -9,3 +9,15 @@ rule communication_sms_metrics:
"data/processed/{pid}/com_sms_{sms_type}_{day_segment}_{metric}.csv"
script:
"../src/features/communication_sms_metrics.R"
rule communication_call_metrics:
input:
"data/raw/{pid}/calls_with_datetime.csv"
params:
call_type = "{call_type}",
day_segment = "{day_segment}",
metric = "{metric}"
output:
"data/processed/{pid}/com_call_{call_type}_{day_segment}_{metric}.csv"
script:
"../src/features/communication_call_metrics.R"

View File

@ -0,0 +1,30 @@
source("packrat/init.R")
library(dplyr)
library(entropy)
library(robustbase)
calls <- read.csv(snakemake@input[[1]])
day_segment <- snakemake@params[["day_segment"]]
metric <- snakemake@params[["metric"]]
type <- snakemake@params[["call_type"]]
output_file <- snakemake@output[[1]]
metrics <- calls %>% filter(call_type == ifelse(type == "incoming", "1", ifelse(type == "outgoing", "2", "3")))
if(day_segment == "daily"){
metrics <- metrics %>% group_by(local_date)
} else {
metrics <- metrics %>% filter(day_segment == local_day_segment) %>% group_by(local_date)
}
metrics <- switch(metric,
"count" = metrics %>% summarise(!!paste("com", "call", type, day_segment, metric, sep = "_") := n()),
"distinctcontacts" = metrics %>% summarise(!!paste("com", "call", type, day_segment, metric, sep = "_") := n_distinct(trace)),
"meanduration" = metrics %>% summarise(!!paste("com", "call", type, day_segment, metric, sep = "_") := mean(call_duration)),
"sumduration" = metrics %>% summarise(!!paste("com", "call", type, day_segment, metric, sep = "_") := sum(call_duration)),
"hubermduration" = metrics %>% summarise(!!paste("com", "call", type, day_segment, metric, sep = "_") := huberM(call_duration)$mu),
"varqnduration" = metrics %>% summarise(!!paste("com", "call", type, day_segment, metric, sep = "_") := Qn(call_duration)),
"entropyduration" = metrics %>% summarise(!!paste("com", "call", type, day_segment, metric, sep = "_") := entropy.MillerMadow(call_duration)))
write.csv(na.omit(metrics), output_file, row.names = F)