Add three more features to calls

replace/7572c4890bff7894df75d122a365f91be6d653e5
JulioV 2019-11-12 15:40:48 -05:00
parent e4008765d3
commit 031ceb00e3
2 changed files with 29 additions and 15 deletions

View File

@ -37,8 +37,8 @@ CALLS:
TYPES: [missed, incoming, outgoing]
METRICS:
missed: [count, distinctcontacts]
incoming: [count, distinctcontacts, meanduration, sumduration, minduration, maxduration, stdduration, modeduration, hubermduration, varqnduration, entropyduration]
outgoing: [count, distinctcontacts, meanduration, sumduration, minduration, maxduration, stdduration, modeduration, hubermduration, varqnduration, entropyduration]
incoming: [count, distinctcontacts, meanduration, sumduration, minduration, maxduration, stdduration, modeduration, hubermduration, varqnduration, entropyduration, timefirstcall, timelastcall, countmostfrequentcontact]
outgoing: [count, distinctcontacts, meanduration, sumduration, minduration, maxduration, stdduration, modeduration, hubermduration, varqnduration, entropyduration, timefirstcall, timelastcall, countmostfrequentcontact]
DAY_SEGMENTS: *day_segments
PHONE_VALID_SENSED_DAYS:

View File

@ -17,19 +17,33 @@ Mode <- function(v) {
}
compute_call_feature <- function(calls, metric, day_segment){
calls <- calls %>% filter_by_day_segment(day_segment)
feature <- switch(metric,
"count" = calls %>% summarise(!!paste("call", type, day_segment, metric, sep = "_") := n()),
"distinctcontacts" = calls %>% summarise(!!paste("call", type, day_segment, metric, sep = "_") := n_distinct(trace)),
"meanduration" = calls %>% summarise(!!paste("call", type, day_segment, metric, sep = "_") := mean(call_duration)),
"sumduration" = calls %>% summarise(!!paste("call", type, day_segment, metric, sep = "_") := sum(call_duration)),
"minduration" = calls %>% summarise(!!paste("call", type, day_segment, metric, sep = "_") := min(call_duration)),
"maxduration" = calls %>% summarise(!!paste("call", type, day_segment, metric, sep = "_") := max(call_duration)),
"stdduration" = calls %>% summarise(!!paste("call", type, day_segment, metric, sep = "_") := sd(call_duration)),
"modeduration" = calls %>% summarise(!!paste("call", type, day_segment, metric, sep = "_") := Mode(call_duration)),
"hubermduration" = calls %>% summarise(!!paste("call", type, day_segment, metric, sep = "_") := huberM(call_duration)$mu),
"varqnduration" = calls %>% summarise(!!paste("call", type, day_segment, metric, sep = "_") := Qn(call_duration)),
"entropyduration" = calls %>% summarise(!!paste("call", type, day_segment, metric, sep = "_") := entropy.MillerMadow(call_duration)))
if(metric == "countmostfrequentcontact"){
# Get the most frequent contact
calls <- calls %>% group_by(trace) %>%
mutate(N=n()) %>%
ungroup() %>%
filter(N == max(N))
return(calls %>%
filter_by_day_segment(day_segment) %>%
summarise(!!paste("call", type, day_segment, metric, sep = "_") := n()))
} else {
calls <- calls %>% filter_by_day_segment(day_segment)
feature <- switch(metric,
"count" = calls %>% summarise(!!paste("call", type, day_segment, metric, sep = "_") := n()),
"distinctcontacts" = calls %>% summarise(!!paste("call", type, day_segment, metric, sep = "_") := n_distinct(trace)),
"meanduration" = calls %>% summarise(!!paste("call", type, day_segment, metric, sep = "_") := mean(call_duration)),
"sumduration" = calls %>% summarise(!!paste("call", type, day_segment, metric, sep = "_") := sum(call_duration)),
"minduration" = calls %>% summarise(!!paste("call", type, day_segment, metric, sep = "_") := min(call_duration)),
"maxduration" = calls %>% summarise(!!paste("call", type, day_segment, metric, sep = "_") := max(call_duration)),
"stdduration" = calls %>% summarise(!!paste("call", type, day_segment, metric, sep = "_") := sd(call_duration)),
"modeduration" = calls %>% summarise(!!paste("call", type, day_segment, metric, sep = "_") := Mode(call_duration)),
"hubermduration" = calls %>% summarise(!!paste("call", type, day_segment, metric, sep = "_") := huberM(call_duration)$mu),
"varqnduration" = calls %>% summarise(!!paste("call", type, day_segment, metric, sep = "_") := Qn(call_duration)),
"entropyduration" = calls %>% summarise(!!paste("call", type, day_segment, metric, sep = "_") := entropy.MillerMadow(call_duration)),
"timefirstcall" = calls %>% summarise(!!paste("call", type, day_segment, metric, sep = "_") := first(local_hour) + (first(local_minute)/60)),
"timelastcall" = calls %>% summarise(!!paste("call", type, day_segment, metric, sep = "_") := last(local_hour) + (last(local_minute)/60)))
}
return(feature)
}