Add call features
parent
51dac2fb18
commit
6287afcb4c
10
Snakefile
10
Snakefile
|
@ -11,6 +11,16 @@ rule all:
|
|||
sms_type = config["COM_SMS"]["SMS_TYPES"],
|
||||
day_segment = config["COM_SMS"]["DAY_SEGMENTS"],
|
||||
metric = config["COM_SMS"]["METRICS"]),
|
||||
expand("data/processed/{pid}/com_call_{call_type}_{segment}_{metric}.csv",
|
||||
pid=config["PIDS"],
|
||||
call_type = config["COM_CALL"]["CALL_TYPE_MISSED"],
|
||||
segment = config["COM_CALL"]["DAY_SEGMENTS"],
|
||||
metric = config["COM_CALL"]["METRICS_MISSED"]),
|
||||
expand("data/processed/{pid}/com_call_{call_type}_{segment}_{metric}.csv",
|
||||
pid=config["PIDS"],
|
||||
call_type = config["COM_CALL"]["CALL_TYPE_TAKEN"],
|
||||
segment = config["COM_CALL"]["DAY_SEGMENTS"],
|
||||
metric = config["COM_CALL"]["METRICS_TAKEN"]),
|
||||
|
||||
# --- Packrat Rules --- #
|
||||
## Taken from https://github.com/lachlandeer/snakemake-econ-r
|
||||
|
|
13
config.yaml
13
config.yaml
|
@ -1,5 +1,5 @@
|
|||
# Valid database table names
|
||||
SENSORS: [messages]
|
||||
SENSORS: [messages, calls]
|
||||
|
||||
# Participants to include in the analysis
|
||||
# You must create a file for each participant
|
||||
|
@ -22,4 +22,13 @@ READABLE_DATETIME:
|
|||
COM_SMS:
|
||||
SMS_TYPES : [received, sent]
|
||||
DAY_SEGMENTS: *day_segments
|
||||
METRICS: [count, distinctcontacts]
|
||||
METRICS: [count, distinctcontacts]
|
||||
|
||||
# Communication call features config
|
||||
# Separate configurations for missed and taken calls
|
||||
COM_CALL:
|
||||
CALL_TYPE_MISSED : [missed]
|
||||
CALL_TYPE_TAKEN : [incoming, outgoing]
|
||||
DAY_SEGMENTS: *day_segments
|
||||
METRICS_MISSED: [count, distinctcontacts]
|
||||
METRICS_TAKEN: [count, distinctcontacts, meanduration, sumduration, hubermduration, varqnduration, entropyduration]
|
|
@ -13,6 +13,11 @@ Source: CRAN
|
|||
Version: 1.0.0
|
||||
Hash: 6abedd7919c4457604c0aa44529a6683
|
||||
|
||||
Package: DEoptimR
|
||||
Source: CRAN
|
||||
Version: 1.0-8
|
||||
Hash: adc74e88e85eabe6c7d73db6a86fe6cf
|
||||
|
||||
Package: R6
|
||||
Source: CRAN
|
||||
Version: 2.4.0
|
||||
|
@ -125,6 +130,11 @@ Version: 0.3.0
|
|||
Hash: 30b58109e4d7c6184a9c2e32f9ae38c6
|
||||
Requires: rlang
|
||||
|
||||
Package: entropy
|
||||
Source: CRAN
|
||||
Version: 1.2.1
|
||||
Hash: ccff926ff232f7c19b4c84bab3d3d6d3
|
||||
|
||||
Package: evaluate
|
||||
Source: CRAN
|
||||
Version: 0.14
|
||||
|
@ -362,6 +372,12 @@ Hash: 1f3014c40b12e8af0abf39fd78080237
|
|||
Requires: base64enc, evaluate, htmltools, jsonlite, knitr, mime,
|
||||
stringr, tinytex, xfun, yaml
|
||||
|
||||
Package: robustbase
|
||||
Source: CRAN
|
||||
Version: 0.93-5
|
||||
Hash: 7b6672bf2b47c35d02a5b273393e49f5
|
||||
Requires: DEoptimR
|
||||
|
||||
Package: rstudioapi
|
||||
Source: CRAN
|
||||
Version: 0.10
|
||||
|
|
|
@ -8,4 +8,16 @@ rule communication_sms_metrics:
|
|||
output:
|
||||
"data/processed/{pid}/com_sms_{sms_type}_{day_segment}_{metric}.csv"
|
||||
script:
|
||||
"../src/features/communication_sms_metrics.R"
|
||||
"../src/features/communication_sms_metrics.R"
|
||||
|
||||
rule communication_call_metrics:
|
||||
input:
|
||||
"data/raw/{pid}/calls_with_datetime.csv"
|
||||
params:
|
||||
call_type = "{call_type}",
|
||||
day_segment = "{day_segment}",
|
||||
metric = "{metric}"
|
||||
output:
|
||||
"data/processed/{pid}/com_call_{call_type}_{day_segment}_{metric}.csv"
|
||||
script:
|
||||
"../src/features/communication_call_metrics.R"
|
|
@ -0,0 +1,30 @@
|
|||
source("packrat/init.R")
|
||||
|
||||
library(dplyr)
|
||||
library(entropy)
|
||||
library(robustbase)
|
||||
|
||||
calls <- read.csv(snakemake@input[[1]])
|
||||
day_segment <- snakemake@params[["day_segment"]]
|
||||
metric <- snakemake@params[["metric"]]
|
||||
type <- snakemake@params[["call_type"]]
|
||||
output_file <- snakemake@output[[1]]
|
||||
|
||||
metrics <- calls %>% filter(call_type == ifelse(type == "incoming", "1", ifelse(type == "outgoing", "2", "3")))
|
||||
|
||||
if(day_segment == "daily"){
|
||||
metrics <- metrics %>% group_by(local_date)
|
||||
} else {
|
||||
metrics <- metrics %>% filter(day_segment == local_day_segment) %>% group_by(local_date)
|
||||
}
|
||||
|
||||
metrics <- switch(metric,
|
||||
"count" = metrics %>% summarise(!!paste("com", "call", type, day_segment, metric, sep = "_") := n()),
|
||||
"distinctcontacts" = metrics %>% summarise(!!paste("com", "call", type, day_segment, metric, sep = "_") := n_distinct(trace)),
|
||||
"meanduration" = metrics %>% summarise(!!paste("com", "call", type, day_segment, metric, sep = "_") := mean(call_duration)),
|
||||
"sumduration" = metrics %>% summarise(!!paste("com", "call", type, day_segment, metric, sep = "_") := sum(call_duration)),
|
||||
"hubermduration" = metrics %>% summarise(!!paste("com", "call", type, day_segment, metric, sep = "_") := huberM(call_duration)$mu),
|
||||
"varqnduration" = metrics %>% summarise(!!paste("com", "call", type, day_segment, metric, sep = "_") := Qn(call_duration)),
|
||||
"entropyduration" = metrics %>% summarise(!!paste("com", "call", type, day_segment, metric, sep = "_") := entropy.MillerMadow(call_duration)))
|
||||
|
||||
write.csv(na.omit(metrics), output_file, row.names = F)
|
Loading…
Reference in New Issue