From ca5e8b6a34ddd944397a12319128dec855ddbce0 Mon Sep 17 00:00:00 2001 From: JulioV Date: Thu, 24 Oct 2019 16:27:43 -0400 Subject: [PATCH] Add communication sms metrics --- Snakefile | 8 +++++++- config.yaml | 12 +++++++++++- rules/features.snakefile | 11 +++++++++++ src/features/communication_sms_metrics.R | 23 +++++++++++++++++++++++ 4 files changed, 52 insertions(+), 2 deletions(-) create mode 100644 rules/features.snakefile create mode 100644 src/features/communication_sms_metrics.R diff --git a/Snakefile b/Snakefile index 75719997..bd50a655 100644 --- a/Snakefile +++ b/Snakefile @@ -1,10 +1,16 @@ configfile: "config.yaml" include: "rules/preprocessing.snakefile" +include: "rules/features.snakefile" rule all: input: expand("data/raw/{pid}/{sensor}_raw.csv", pid=config["PIDS"], sensor=config["SENSORS"]), - expand("data/raw/{pid}/{sensor}_with_datetime.csv", pid=config["PIDS"], sensor=config["SENSORS"]) + expand("data/raw/{pid}/{sensor}_with_datetime.csv", pid=config["PIDS"], sensor=config["SENSORS"]), + expand("data/processed/{pid}/com_sms_{sms_type}_{day_segment}_{metric}.csv", + pid=config["PIDS"], + sms_type = config["COM_SMS"]["SMS_TYPES"], + day_segment = config["COM_SMS"]["DAY_SEGMENTS"], + metric = config["COM_SMS"]["METRICS"]), # --- Packrat Rules --- # ## Taken from https://github.com/lachlandeer/snakemake-econ-r diff --git a/config.yaml b/config.yaml index fff3a17b..15433dd1 100644 --- a/config.yaml +++ b/config.yaml @@ -4,4 +4,14 @@ SENSORS: [messages] # Participants to include in the analysis # You must create a file for each participant # named pXXX containing their device_id -PIDS: [p01, p02] \ No newline at end of file +PIDS: [p01, p02] + +# Global var with common day segments +DAY_SEGMENTS: &day_segments + [daily, morning, afternoon, evening, night] + +# Communication SMS features config +COM_SMS: + SMS_TYPES : [received, sent] + DAY_SEGMENTS: *day_segments + METRICS: [count, distinctcontacts] \ No newline at end of file diff --git a/rules/features.snakefile b/rules/features.snakefile new file mode 100644 index 00000000..188513a7 --- /dev/null +++ b/rules/features.snakefile @@ -0,0 +1,11 @@ +rule communication_sms_metrics: + input: + "data/raw/{pid}/messages_with_datetime.csv" + params: + sms_type = "{sms_type}", + day_segment = "{day_segment}", + metric = "{metric}" + output: + "data/processed/{pid}/com_sms_{sms_type}_{day_segment}_{metric}.csv" + script: + "../src/features/communication_sms_metrics.R" \ No newline at end of file diff --git a/src/features/communication_sms_metrics.R b/src/features/communication_sms_metrics.R new file mode 100644 index 00000000..9aac5a85 --- /dev/null +++ b/src/features/communication_sms_metrics.R @@ -0,0 +1,23 @@ +source("packrat/init.R") + +library(dplyr) + +sms <- read.csv(snakemake@input[[1]]) +day_segment <- snakemake@params[["day_segment"]] +metric <- snakemake@params[["metric"]] +sms_type <- snakemake@params[["sms_type"]] +output_file <- snakemake@output[[1]] + +metrics <- sms %>% filter(message_type == ifelse(sms_type == "received", "1", "2")) + +if(day_segment == "daily"){ + metrics <- metrics %>% group_by(local_date) +} else { + metrics <- metrics %>% filter(day_segment == local_day_segment) %>% group_by(local_date) +} + +metrics <- switch(metric, + "count" = metrics %>% summarise(!!paste("com", "sms", sms_type, day_segment, metric, sep = "_") := n()), + "distinctcontacts" = metrics %>% summarise(!!paste("com", "sms", sms_type, day_segment, metric, sep = "_") := n_distinct(trace))) + +write.csv(na.omit(metrics), output_file, row.names = F) \ No newline at end of file