Add call features
parent
51dac2fb18
commit
6287afcb4c
10
Snakefile
10
Snakefile
|
@ -11,6 +11,16 @@ rule all:
|
||||||
sms_type = config["COM_SMS"]["SMS_TYPES"],
|
sms_type = config["COM_SMS"]["SMS_TYPES"],
|
||||||
day_segment = config["COM_SMS"]["DAY_SEGMENTS"],
|
day_segment = config["COM_SMS"]["DAY_SEGMENTS"],
|
||||||
metric = config["COM_SMS"]["METRICS"]),
|
metric = config["COM_SMS"]["METRICS"]),
|
||||||
|
expand("data/processed/{pid}/com_call_{call_type}_{segment}_{metric}.csv",
|
||||||
|
pid=config["PIDS"],
|
||||||
|
call_type = config["COM_CALL"]["CALL_TYPE_MISSED"],
|
||||||
|
segment = config["COM_CALL"]["DAY_SEGMENTS"],
|
||||||
|
metric = config["COM_CALL"]["METRICS_MISSED"]),
|
||||||
|
expand("data/processed/{pid}/com_call_{call_type}_{segment}_{metric}.csv",
|
||||||
|
pid=config["PIDS"],
|
||||||
|
call_type = config["COM_CALL"]["CALL_TYPE_TAKEN"],
|
||||||
|
segment = config["COM_CALL"]["DAY_SEGMENTS"],
|
||||||
|
metric = config["COM_CALL"]["METRICS_TAKEN"]),
|
||||||
|
|
||||||
# --- Packrat Rules --- #
|
# --- Packrat Rules --- #
|
||||||
## Taken from https://github.com/lachlandeer/snakemake-econ-r
|
## Taken from https://github.com/lachlandeer/snakemake-econ-r
|
||||||
|
|
11
config.yaml
11
config.yaml
|
@ -1,5 +1,5 @@
|
||||||
# Valid database table names
|
# Valid database table names
|
||||||
SENSORS: [messages]
|
SENSORS: [messages, calls]
|
||||||
|
|
||||||
# Participants to include in the analysis
|
# Participants to include in the analysis
|
||||||
# You must create a file for each participant
|
# You must create a file for each participant
|
||||||
|
@ -23,3 +23,12 @@ COM_SMS:
|
||||||
SMS_TYPES : [received, sent]
|
SMS_TYPES : [received, sent]
|
||||||
DAY_SEGMENTS: *day_segments
|
DAY_SEGMENTS: *day_segments
|
||||||
METRICS: [count, distinctcontacts]
|
METRICS: [count, distinctcontacts]
|
||||||
|
|
||||||
|
# Communication call features config
|
||||||
|
# Separate configurations for missed and taken calls
|
||||||
|
COM_CALL:
|
||||||
|
CALL_TYPE_MISSED : [missed]
|
||||||
|
CALL_TYPE_TAKEN : [incoming, outgoing]
|
||||||
|
DAY_SEGMENTS: *day_segments
|
||||||
|
METRICS_MISSED: [count, distinctcontacts]
|
||||||
|
METRICS_TAKEN: [count, distinctcontacts, meanduration, sumduration, hubermduration, varqnduration, entropyduration]
|
|
@ -13,6 +13,11 @@ Source: CRAN
|
||||||
Version: 1.0.0
|
Version: 1.0.0
|
||||||
Hash: 6abedd7919c4457604c0aa44529a6683
|
Hash: 6abedd7919c4457604c0aa44529a6683
|
||||||
|
|
||||||
|
Package: DEoptimR
|
||||||
|
Source: CRAN
|
||||||
|
Version: 1.0-8
|
||||||
|
Hash: adc74e88e85eabe6c7d73db6a86fe6cf
|
||||||
|
|
||||||
Package: R6
|
Package: R6
|
||||||
Source: CRAN
|
Source: CRAN
|
||||||
Version: 2.4.0
|
Version: 2.4.0
|
||||||
|
@ -125,6 +130,11 @@ Version: 0.3.0
|
||||||
Hash: 30b58109e4d7c6184a9c2e32f9ae38c6
|
Hash: 30b58109e4d7c6184a9c2e32f9ae38c6
|
||||||
Requires: rlang
|
Requires: rlang
|
||||||
|
|
||||||
|
Package: entropy
|
||||||
|
Source: CRAN
|
||||||
|
Version: 1.2.1
|
||||||
|
Hash: ccff926ff232f7c19b4c84bab3d3d6d3
|
||||||
|
|
||||||
Package: evaluate
|
Package: evaluate
|
||||||
Source: CRAN
|
Source: CRAN
|
||||||
Version: 0.14
|
Version: 0.14
|
||||||
|
@ -362,6 +372,12 @@ Hash: 1f3014c40b12e8af0abf39fd78080237
|
||||||
Requires: base64enc, evaluate, htmltools, jsonlite, knitr, mime,
|
Requires: base64enc, evaluate, htmltools, jsonlite, knitr, mime,
|
||||||
stringr, tinytex, xfun, yaml
|
stringr, tinytex, xfun, yaml
|
||||||
|
|
||||||
|
Package: robustbase
|
||||||
|
Source: CRAN
|
||||||
|
Version: 0.93-5
|
||||||
|
Hash: 7b6672bf2b47c35d02a5b273393e49f5
|
||||||
|
Requires: DEoptimR
|
||||||
|
|
||||||
Package: rstudioapi
|
Package: rstudioapi
|
||||||
Source: CRAN
|
Source: CRAN
|
||||||
Version: 0.10
|
Version: 0.10
|
||||||
|
|
|
@ -9,3 +9,15 @@ rule communication_sms_metrics:
|
||||||
"data/processed/{pid}/com_sms_{sms_type}_{day_segment}_{metric}.csv"
|
"data/processed/{pid}/com_sms_{sms_type}_{day_segment}_{metric}.csv"
|
||||||
script:
|
script:
|
||||||
"../src/features/communication_sms_metrics.R"
|
"../src/features/communication_sms_metrics.R"
|
||||||
|
|
||||||
|
rule communication_call_metrics:
|
||||||
|
input:
|
||||||
|
"data/raw/{pid}/calls_with_datetime.csv"
|
||||||
|
params:
|
||||||
|
call_type = "{call_type}",
|
||||||
|
day_segment = "{day_segment}",
|
||||||
|
metric = "{metric}"
|
||||||
|
output:
|
||||||
|
"data/processed/{pid}/com_call_{call_type}_{day_segment}_{metric}.csv"
|
||||||
|
script:
|
||||||
|
"../src/features/communication_call_metrics.R"
|
|
@ -0,0 +1,30 @@
|
||||||
|
source("packrat/init.R")
|
||||||
|
|
||||||
|
library(dplyr)
|
||||||
|
library(entropy)
|
||||||
|
library(robustbase)
|
||||||
|
|
||||||
|
calls <- read.csv(snakemake@input[[1]])
|
||||||
|
day_segment <- snakemake@params[["day_segment"]]
|
||||||
|
metric <- snakemake@params[["metric"]]
|
||||||
|
type <- snakemake@params[["call_type"]]
|
||||||
|
output_file <- snakemake@output[[1]]
|
||||||
|
|
||||||
|
metrics <- calls %>% filter(call_type == ifelse(type == "incoming", "1", ifelse(type == "outgoing", "2", "3")))
|
||||||
|
|
||||||
|
if(day_segment == "daily"){
|
||||||
|
metrics <- metrics %>% group_by(local_date)
|
||||||
|
} else {
|
||||||
|
metrics <- metrics %>% filter(day_segment == local_day_segment) %>% group_by(local_date)
|
||||||
|
}
|
||||||
|
|
||||||
|
metrics <- switch(metric,
|
||||||
|
"count" = metrics %>% summarise(!!paste("com", "call", type, day_segment, metric, sep = "_") := n()),
|
||||||
|
"distinctcontacts" = metrics %>% summarise(!!paste("com", "call", type, day_segment, metric, sep = "_") := n_distinct(trace)),
|
||||||
|
"meanduration" = metrics %>% summarise(!!paste("com", "call", type, day_segment, metric, sep = "_") := mean(call_duration)),
|
||||||
|
"sumduration" = metrics %>% summarise(!!paste("com", "call", type, day_segment, metric, sep = "_") := sum(call_duration)),
|
||||||
|
"hubermduration" = metrics %>% summarise(!!paste("com", "call", type, day_segment, metric, sep = "_") := huberM(call_duration)$mu),
|
||||||
|
"varqnduration" = metrics %>% summarise(!!paste("com", "call", type, day_segment, metric, sep = "_") := Qn(call_duration)),
|
||||||
|
"entropyduration" = metrics %>% summarise(!!paste("com", "call", type, day_segment, metric, sep = "_") := entropy.MillerMadow(call_duration)))
|
||||||
|
|
||||||
|
write.csv(na.omit(metrics), output_file, row.names = F)
|
Loading…
Reference in New Issue