From 6cf39ad9c44773a69b705ff767262c7d1293dbf4 Mon Sep 17 00:00:00 2001 From: JulioV Date: Tue, 12 Nov 2019 15:53:59 -0500 Subject: [PATCH] Add three more features to sms --- config.yaml | 4 ++-- src/features/sms_metrics.R | 24 +++++++++++++++++++----- 2 files changed, 21 insertions(+), 7 deletions(-) diff --git a/config.yaml b/config.yaml index 74f94644..91c19d45 100644 --- a/config.yaml +++ b/config.yaml @@ -28,8 +28,8 @@ READABLE_DATETIME: SMS: TYPES : [received, sent] METRICS: - received: [count, distinctcontacts] - sent: [count, distinctcontacts] + received: [count, distinctcontacts, timefirstsms, timelastsms, countmostfrequentcontact] + sent: [count, distinctcontacts, timefirstsms, timelastsms, countmostfrequentcontact] DAY_SEGMENTS: *day_segments # Communication call features config, TYPES and METRICS keys need to match diff --git a/src/features/sms_metrics.R b/src/features/sms_metrics.R index fec0c2b6..8a27223d 100644 --- a/src/features/sms_metrics.R +++ b/src/features/sms_metrics.R @@ -10,11 +10,25 @@ filter_by_day_segment <- function(data, day_segment) { } compute_sms_feature <- function(sms, metric, day_segment){ - sms <- sms %>% filter_by_day_segment(day_segment) - feature <- switch(metric, - "count" = sms %>% summarise(!!paste("com", "sms", sms_type, day_segment, metric, sep = "_") := n()), - "distinctcontacts" = sms %>% summarise(!!paste("com", "sms", sms_type, day_segment, metric, sep = "_") := n_distinct(trace))) - return(feature) + if(metric == "countmostfrequentcontact"){ + # Get the most frequent contact + sms <- sms %>% group_by(trace) %>% + mutate(N=n()) %>% + ungroup() %>% + filter(N == max(N)) + + return(sms %>% + filter_by_day_segment(day_segment) %>% + summarise(!!paste("sms", sms_type, day_segment, metric, sep = "_") := n())) + } else { + sms <- sms %>% filter_by_day_segment(day_segment) + feature <- switch(metric, + "count" = sms %>% summarise(!!paste("sms", sms_type, day_segment, metric, sep = "_") := n()), + "distinctcontacts" = sms %>% summarise(!!paste("sms", sms_type, day_segment, metric, sep = "_") := n_distinct(trace)), + "timefirstsms" = sms %>% summarise(!!paste("sms", sms_type, day_segment, metric, sep = "_") := first(local_hour) + (first(local_minute)/60)), + "timelastsms" = sms %>% summarise(!!paste("sms", sms_type, day_segment, metric, sep = "_") := last(local_hour) + (last(local_minute)/60))) + return(feature) + } } sms <- read.csv(snakemake@input[[1]])