Add call features

replace/7fed14aa9e03dac5c1881804194019bbca17f078
JulioV 2019-10-25 10:21:09 -04:00
parent 51dac2fb18
commit 6287afcb4c
5 changed files with 80 additions and 3 deletions

View File

@ -11,6 +11,16 @@ rule all:
sms_type = config["COM_SMS"]["SMS_TYPES"],
day_segment = config["COM_SMS"]["DAY_SEGMENTS"],
metric = config["COM_SMS"]["METRICS"]),
expand("data/processed/{pid}/com_call_{call_type}_{segment}_{metric}.csv",
pid=config["PIDS"],
call_type = config["COM_CALL"]["CALL_TYPE_MISSED"],
segment = config["COM_CALL"]["DAY_SEGMENTS"],
metric = config["COM_CALL"]["METRICS_MISSED"]),
expand("data/processed/{pid}/com_call_{call_type}_{segment}_{metric}.csv",
pid=config["PIDS"],
call_type = config["COM_CALL"]["CALL_TYPE_TAKEN"],
segment = config["COM_CALL"]["DAY_SEGMENTS"],
metric = config["COM_CALL"]["METRICS_TAKEN"]),
# --- Packrat Rules --- #
## Taken from https://github.com/lachlandeer/snakemake-econ-r

View File

@ -1,5 +1,5 @@
# Valid database table names
SENSORS: [messages]
SENSORS: [messages, calls]
# Participants to include in the analysis
# You must create a file for each participant
@ -22,4 +22,13 @@ READABLE_DATETIME:
COM_SMS:
SMS_TYPES : [received, sent]
DAY_SEGMENTS: *day_segments
METRICS: [count, distinctcontacts]
METRICS: [count, distinctcontacts]
# Communication call features config
# Separate configurations for missed and taken calls
COM_CALL:
CALL_TYPE_MISSED : [missed]
CALL_TYPE_TAKEN : [incoming, outgoing]
DAY_SEGMENTS: *day_segments
METRICS_MISSED: [count, distinctcontacts]
METRICS_TAKEN: [count, distinctcontacts, meanduration, sumduration, hubermduration, varqnduration, entropyduration]

View File

@ -13,6 +13,11 @@ Source: CRAN
Version: 1.0.0
Hash: 6abedd7919c4457604c0aa44529a6683
Package: DEoptimR
Source: CRAN
Version: 1.0-8
Hash: adc74e88e85eabe6c7d73db6a86fe6cf
Package: R6
Source: CRAN
Version: 2.4.0
@ -125,6 +130,11 @@ Version: 0.3.0
Hash: 30b58109e4d7c6184a9c2e32f9ae38c6
Requires: rlang
Package: entropy
Source: CRAN
Version: 1.2.1
Hash: ccff926ff232f7c19b4c84bab3d3d6d3
Package: evaluate
Source: CRAN
Version: 0.14
@ -362,6 +372,12 @@ Hash: 1f3014c40b12e8af0abf39fd78080237
Requires: base64enc, evaluate, htmltools, jsonlite, knitr, mime,
stringr, tinytex, xfun, yaml
Package: robustbase
Source: CRAN
Version: 0.93-5
Hash: 7b6672bf2b47c35d02a5b273393e49f5
Requires: DEoptimR
Package: rstudioapi
Source: CRAN
Version: 0.10

View File

@ -8,4 +8,16 @@ rule communication_sms_metrics:
output:
"data/processed/{pid}/com_sms_{sms_type}_{day_segment}_{metric}.csv"
script:
"../src/features/communication_sms_metrics.R"
"../src/features/communication_sms_metrics.R"
rule communication_call_metrics:
input:
"data/raw/{pid}/calls_with_datetime.csv"
params:
call_type = "{call_type}",
day_segment = "{day_segment}",
metric = "{metric}"
output:
"data/processed/{pid}/com_call_{call_type}_{day_segment}_{metric}.csv"
script:
"../src/features/communication_call_metrics.R"

View File

@ -0,0 +1,30 @@
source("packrat/init.R")
library(dplyr)
library(entropy)
library(robustbase)
calls <- read.csv(snakemake@input[[1]])
day_segment <- snakemake@params[["day_segment"]]
metric <- snakemake@params[["metric"]]
type <- snakemake@params[["call_type"]]
output_file <- snakemake@output[[1]]
metrics <- calls %>% filter(call_type == ifelse(type == "incoming", "1", ifelse(type == "outgoing", "2", "3")))
if(day_segment == "daily"){
metrics <- metrics %>% group_by(local_date)
} else {
metrics <- metrics %>% filter(day_segment == local_day_segment) %>% group_by(local_date)
}
metrics <- switch(metric,
"count" = metrics %>% summarise(!!paste("com", "call", type, day_segment, metric, sep = "_") := n()),
"distinctcontacts" = metrics %>% summarise(!!paste("com", "call", type, day_segment, metric, sep = "_") := n_distinct(trace)),
"meanduration" = metrics %>% summarise(!!paste("com", "call", type, day_segment, metric, sep = "_") := mean(call_duration)),
"sumduration" = metrics %>% summarise(!!paste("com", "call", type, day_segment, metric, sep = "_") := sum(call_duration)),
"hubermduration" = metrics %>% summarise(!!paste("com", "call", type, day_segment, metric, sep = "_") := huberM(call_duration)$mu),
"varqnduration" = metrics %>% summarise(!!paste("com", "call", type, day_segment, metric, sep = "_") := Qn(call_duration)),
"entropyduration" = metrics %>% summarise(!!paste("com", "call", type, day_segment, metric, sep = "_") := entropy.MillerMadow(call_duration)))
write.csv(na.omit(metrics), output_file, row.names = F)