diff --git a/Snakefile b/Snakefile index bd50a655..92e6baca 100644 --- a/Snakefile +++ b/Snakefile @@ -11,6 +11,16 @@ rule all: sms_type = config["COM_SMS"]["SMS_TYPES"], day_segment = config["COM_SMS"]["DAY_SEGMENTS"], metric = config["COM_SMS"]["METRICS"]), + expand("data/processed/{pid}/com_call_{call_type}_{segment}_{metric}.csv", + pid=config["PIDS"], + call_type = config["COM_CALL"]["CALL_TYPE_MISSED"], + segment = config["COM_CALL"]["DAY_SEGMENTS"], + metric = config["COM_CALL"]["METRICS_MISSED"]), + expand("data/processed/{pid}/com_call_{call_type}_{segment}_{metric}.csv", + pid=config["PIDS"], + call_type = config["COM_CALL"]["CALL_TYPE_TAKEN"], + segment = config["COM_CALL"]["DAY_SEGMENTS"], + metric = config["COM_CALL"]["METRICS_TAKEN"]), # --- Packrat Rules --- # ## Taken from https://github.com/lachlandeer/snakemake-econ-r diff --git a/config.yaml b/config.yaml index a2cc57c9..e4e3d9c8 100644 --- a/config.yaml +++ b/config.yaml @@ -1,5 +1,5 @@ # Valid database table names -SENSORS: [messages] +SENSORS: [messages, calls] # Participants to include in the analysis # You must create a file for each participant @@ -22,4 +22,13 @@ READABLE_DATETIME: COM_SMS: SMS_TYPES : [received, sent] DAY_SEGMENTS: *day_segments - METRICS: [count, distinctcontacts] \ No newline at end of file + METRICS: [count, distinctcontacts] + +# Communication call features config +# Separate configurations for missed and taken calls +COM_CALL: + CALL_TYPE_MISSED : [missed] + CALL_TYPE_TAKEN : [incoming, outgoing] + DAY_SEGMENTS: *day_segments + METRICS_MISSED: [count, distinctcontacts] + METRICS_TAKEN: [count, distinctcontacts, meanduration, sumduration, hubermduration, varqnduration, entropyduration] \ No newline at end of file diff --git a/packrat/packrat.lock b/packrat/packrat.lock index cbc81eec..07e4ce25 100644 --- a/packrat/packrat.lock +++ b/packrat/packrat.lock @@ -13,6 +13,11 @@ Source: CRAN Version: 1.0.0 Hash: 6abedd7919c4457604c0aa44529a6683 +Package: DEoptimR +Source: CRAN +Version: 1.0-8 +Hash: adc74e88e85eabe6c7d73db6a86fe6cf + Package: R6 Source: CRAN Version: 2.4.0 @@ -125,6 +130,11 @@ Version: 0.3.0 Hash: 30b58109e4d7c6184a9c2e32f9ae38c6 Requires: rlang +Package: entropy +Source: CRAN +Version: 1.2.1 +Hash: ccff926ff232f7c19b4c84bab3d3d6d3 + Package: evaluate Source: CRAN Version: 0.14 @@ -362,6 +372,12 @@ Hash: 1f3014c40b12e8af0abf39fd78080237 Requires: base64enc, evaluate, htmltools, jsonlite, knitr, mime, stringr, tinytex, xfun, yaml +Package: robustbase +Source: CRAN +Version: 0.93-5 +Hash: 7b6672bf2b47c35d02a5b273393e49f5 +Requires: DEoptimR + Package: rstudioapi Source: CRAN Version: 0.10 diff --git a/rules/features.snakefile b/rules/features.snakefile index 188513a7..bf5ddeef 100644 --- a/rules/features.snakefile +++ b/rules/features.snakefile @@ -8,4 +8,16 @@ rule communication_sms_metrics: output: "data/processed/{pid}/com_sms_{sms_type}_{day_segment}_{metric}.csv" script: - "../src/features/communication_sms_metrics.R" \ No newline at end of file + "../src/features/communication_sms_metrics.R" + +rule communication_call_metrics: + input: + "data/raw/{pid}/calls_with_datetime.csv" + params: + call_type = "{call_type}", + day_segment = "{day_segment}", + metric = "{metric}" + output: + "data/processed/{pid}/com_call_{call_type}_{day_segment}_{metric}.csv" + script: + "../src/features/communication_call_metrics.R" \ No newline at end of file diff --git a/src/features/communication_call_metrics.R b/src/features/communication_call_metrics.R new file mode 100644 index 00000000..5bc54bb5 --- /dev/null +++ b/src/features/communication_call_metrics.R @@ -0,0 +1,30 @@ +source("packrat/init.R") + +library(dplyr) +library(entropy) +library(robustbase) + +calls <- read.csv(snakemake@input[[1]]) +day_segment <- snakemake@params[["day_segment"]] +metric <- snakemake@params[["metric"]] +type <- snakemake@params[["call_type"]] +output_file <- snakemake@output[[1]] + +metrics <- calls %>% filter(call_type == ifelse(type == "incoming", "1", ifelse(type == "outgoing", "2", "3"))) + +if(day_segment == "daily"){ + metrics <- metrics %>% group_by(local_date) +} else { + metrics <- metrics %>% filter(day_segment == local_day_segment) %>% group_by(local_date) +} + +metrics <- switch(metric, + "count" = metrics %>% summarise(!!paste("com", "call", type, day_segment, metric, sep = "_") := n()), + "distinctcontacts" = metrics %>% summarise(!!paste("com", "call", type, day_segment, metric, sep = "_") := n_distinct(trace)), + "meanduration" = metrics %>% summarise(!!paste("com", "call", type, day_segment, metric, sep = "_") := mean(call_duration)), + "sumduration" = metrics %>% summarise(!!paste("com", "call", type, day_segment, metric, sep = "_") := sum(call_duration)), + "hubermduration" = metrics %>% summarise(!!paste("com", "call", type, day_segment, metric, sep = "_") := huberM(call_duration)$mu), + "varqnduration" = metrics %>% summarise(!!paste("com", "call", type, day_segment, metric, sep = "_") := Qn(call_duration)), + "entropyduration" = metrics %>% summarise(!!paste("com", "call", type, day_segment, metric, sep = "_") := entropy.MillerMadow(call_duration))) + +write.csv(na.omit(metrics), output_file, row.names = F)