Add communication sms metrics

replace/468af02e790336dc9e1db980ec40a97b250c7b3b
JulioV 2019-10-24 16:27:43 -04:00
parent 2fdf23e0af
commit ca5e8b6a34
4 changed files with 52 additions and 2 deletions

View File

@ -1,10 +1,16 @@
configfile: "config.yaml"
include: "rules/preprocessing.snakefile"
include: "rules/features.snakefile"
rule all:
input:
expand("data/raw/{pid}/{sensor}_raw.csv", pid=config["PIDS"], sensor=config["SENSORS"]),
expand("data/raw/{pid}/{sensor}_with_datetime.csv", pid=config["PIDS"], sensor=config["SENSORS"])
expand("data/raw/{pid}/{sensor}_with_datetime.csv", pid=config["PIDS"], sensor=config["SENSORS"]),
expand("data/processed/{pid}/com_sms_{sms_type}_{day_segment}_{metric}.csv",
pid=config["PIDS"],
sms_type = config["COM_SMS"]["SMS_TYPES"],
day_segment = config["COM_SMS"]["DAY_SEGMENTS"],
metric = config["COM_SMS"]["METRICS"]),
# --- Packrat Rules --- #
## Taken from https://github.com/lachlandeer/snakemake-econ-r

View File

@ -5,3 +5,13 @@ SENSORS: [messages]
# You must create a file for each participant
# named pXXX containing their device_id
PIDS: [p01, p02]
# Global var with common day segments
DAY_SEGMENTS: &day_segments
[daily, morning, afternoon, evening, night]
# Communication SMS features config
COM_SMS:
SMS_TYPES : [received, sent]
DAY_SEGMENTS: *day_segments
METRICS: [count, distinctcontacts]

View File

@ -0,0 +1,11 @@
rule communication_sms_metrics:
input:
"data/raw/{pid}/messages_with_datetime.csv"
params:
sms_type = "{sms_type}",
day_segment = "{day_segment}",
metric = "{metric}"
output:
"data/processed/{pid}/com_sms_{sms_type}_{day_segment}_{metric}.csv"
script:
"../src/features/communication_sms_metrics.R"

View File

@ -0,0 +1,23 @@
source("packrat/init.R")
library(dplyr)
sms <- read.csv(snakemake@input[[1]])
day_segment <- snakemake@params[["day_segment"]]
metric <- snakemake@params[["metric"]]
sms_type <- snakemake@params[["sms_type"]]
output_file <- snakemake@output[[1]]
metrics <- sms %>% filter(message_type == ifelse(sms_type == "received", "1", "2"))
if(day_segment == "daily"){
metrics <- metrics %>% group_by(local_date)
} else {
metrics <- metrics %>% filter(day_segment == local_day_segment) %>% group_by(local_date)
}
metrics <- switch(metric,
"count" = metrics %>% summarise(!!paste("com", "sms", sms_type, day_segment, metric, sep = "_") := n()),
"distinctcontacts" = metrics %>% summarise(!!paste("com", "sms", sms_type, day_segment, metric, sep = "_") := n_distinct(trace)))
write.csv(na.omit(metrics), output_file, row.names = F)