From 031ceb00e317ff04bb0c584f3c192034107b09d4 Mon Sep 17 00:00:00 2001 From: JulioV Date: Tue, 12 Nov 2019 15:40:48 -0500 Subject: [PATCH] Add three more features to calls --- config.yaml | 4 ++-- src/features/call_metrics.R | 40 +++++++++++++++++++++++++------------ 2 files changed, 29 insertions(+), 15 deletions(-) diff --git a/config.yaml b/config.yaml index b646f6f9..b4f16253 100644 --- a/config.yaml +++ b/config.yaml @@ -37,8 +37,8 @@ CALLS: TYPES: [missed, incoming, outgoing] METRICS: missed: [count, distinctcontacts] - incoming: [count, distinctcontacts, meanduration, sumduration, minduration, maxduration, stdduration, modeduration, hubermduration, varqnduration, entropyduration] - outgoing: [count, distinctcontacts, meanduration, sumduration, minduration, maxduration, stdduration, modeduration, hubermduration, varqnduration, entropyduration] + incoming: [count, distinctcontacts, meanduration, sumduration, minduration, maxduration, stdduration, modeduration, hubermduration, varqnduration, entropyduration, timefirstcall, timelastcall, countmostfrequentcontact] + outgoing: [count, distinctcontacts, meanduration, sumduration, minduration, maxduration, stdduration, modeduration, hubermduration, varqnduration, entropyduration, timefirstcall, timelastcall, countmostfrequentcontact] DAY_SEGMENTS: *day_segments PHONE_VALID_SENSED_DAYS: diff --git a/src/features/call_metrics.R b/src/features/call_metrics.R index 04123b9a..3b0bb81b 100644 --- a/src/features/call_metrics.R +++ b/src/features/call_metrics.R @@ -17,19 +17,33 @@ Mode <- function(v) { } compute_call_feature <- function(calls, metric, day_segment){ - calls <- calls %>% filter_by_day_segment(day_segment) - feature <- switch(metric, - "count" = calls %>% summarise(!!paste("call", type, day_segment, metric, sep = "_") := n()), - "distinctcontacts" = calls %>% summarise(!!paste("call", type, day_segment, metric, sep = "_") := n_distinct(trace)), - "meanduration" = calls %>% summarise(!!paste("call", type, day_segment, metric, sep = "_") := mean(call_duration)), - "sumduration" = calls %>% summarise(!!paste("call", type, day_segment, metric, sep = "_") := sum(call_duration)), - "minduration" = calls %>% summarise(!!paste("call", type, day_segment, metric, sep = "_") := min(call_duration)), - "maxduration" = calls %>% summarise(!!paste("call", type, day_segment, metric, sep = "_") := max(call_duration)), - "stdduration" = calls %>% summarise(!!paste("call", type, day_segment, metric, sep = "_") := sd(call_duration)), - "modeduration" = calls %>% summarise(!!paste("call", type, day_segment, metric, sep = "_") := Mode(call_duration)), - "hubermduration" = calls %>% summarise(!!paste("call", type, day_segment, metric, sep = "_") := huberM(call_duration)$mu), - "varqnduration" = calls %>% summarise(!!paste("call", type, day_segment, metric, sep = "_") := Qn(call_duration)), - "entropyduration" = calls %>% summarise(!!paste("call", type, day_segment, metric, sep = "_") := entropy.MillerMadow(call_duration))) + if(metric == "countmostfrequentcontact"){ + # Get the most frequent contact + calls <- calls %>% group_by(trace) %>% + mutate(N=n()) %>% + ungroup() %>% + filter(N == max(N)) + + return(calls %>% + filter_by_day_segment(day_segment) %>% + summarise(!!paste("call", type, day_segment, metric, sep = "_") := n())) + } else { + calls <- calls %>% filter_by_day_segment(day_segment) + feature <- switch(metric, + "count" = calls %>% summarise(!!paste("call", type, day_segment, metric, sep = "_") := n()), + "distinctcontacts" = calls %>% summarise(!!paste("call", type, day_segment, metric, sep = "_") := n_distinct(trace)), + "meanduration" = calls %>% summarise(!!paste("call", type, day_segment, metric, sep = "_") := mean(call_duration)), + "sumduration" = calls %>% summarise(!!paste("call", type, day_segment, metric, sep = "_") := sum(call_duration)), + "minduration" = calls %>% summarise(!!paste("call", type, day_segment, metric, sep = "_") := min(call_duration)), + "maxduration" = calls %>% summarise(!!paste("call", type, day_segment, metric, sep = "_") := max(call_duration)), + "stdduration" = calls %>% summarise(!!paste("call", type, day_segment, metric, sep = "_") := sd(call_duration)), + "modeduration" = calls %>% summarise(!!paste("call", type, day_segment, metric, sep = "_") := Mode(call_duration)), + "hubermduration" = calls %>% summarise(!!paste("call", type, day_segment, metric, sep = "_") := huberM(call_duration)$mu), + "varqnduration" = calls %>% summarise(!!paste("call", type, day_segment, metric, sep = "_") := Qn(call_duration)), + "entropyduration" = calls %>% summarise(!!paste("call", type, day_segment, metric, sep = "_") := entropy.MillerMadow(call_duration)), + "timefirstcall" = calls %>% summarise(!!paste("call", type, day_segment, metric, sep = "_") := first(local_hour) + (first(local_minute)/60)), + "timelastcall" = calls %>% summarise(!!paste("call", type, day_segment, metric, sep = "_") := last(local_hour) + (last(local_minute)/60))) + } return(feature) }