Add three more features to calls

replace/7572c4890bff7894df75d122a365f91be6d653e5
JulioV 2019-11-12 15:40:48 -05:00
parent e4008765d3
commit 031ceb00e3
2 changed files with 29 additions and 15 deletions

View File

@ -37,8 +37,8 @@ CALLS:
TYPES: [missed, incoming, outgoing] TYPES: [missed, incoming, outgoing]
METRICS: METRICS:
missed: [count, distinctcontacts] missed: [count, distinctcontacts]
incoming: [count, distinctcontacts, meanduration, sumduration, minduration, maxduration, stdduration, modeduration, hubermduration, varqnduration, entropyduration] incoming: [count, distinctcontacts, meanduration, sumduration, minduration, maxduration, stdduration, modeduration, hubermduration, varqnduration, entropyduration, timefirstcall, timelastcall, countmostfrequentcontact]
outgoing: [count, distinctcontacts, meanduration, sumduration, minduration, maxduration, stdduration, modeduration, hubermduration, varqnduration, entropyduration] outgoing: [count, distinctcontacts, meanduration, sumduration, minduration, maxduration, stdduration, modeduration, hubermduration, varqnduration, entropyduration, timefirstcall, timelastcall, countmostfrequentcontact]
DAY_SEGMENTS: *day_segments DAY_SEGMENTS: *day_segments
PHONE_VALID_SENSED_DAYS: PHONE_VALID_SENSED_DAYS:

View File

@ -17,19 +17,33 @@ Mode <- function(v) {
} }
compute_call_feature <- function(calls, metric, day_segment){ compute_call_feature <- function(calls, metric, day_segment){
calls <- calls %>% filter_by_day_segment(day_segment) if(metric == "countmostfrequentcontact"){
feature <- switch(metric, # Get the most frequent contact
"count" = calls %>% summarise(!!paste("call", type, day_segment, metric, sep = "_") := n()), calls <- calls %>% group_by(trace) %>%
"distinctcontacts" = calls %>% summarise(!!paste("call", type, day_segment, metric, sep = "_") := n_distinct(trace)), mutate(N=n()) %>%
"meanduration" = calls %>% summarise(!!paste("call", type, day_segment, metric, sep = "_") := mean(call_duration)), ungroup() %>%
"sumduration" = calls %>% summarise(!!paste("call", type, day_segment, metric, sep = "_") := sum(call_duration)), filter(N == max(N))
"minduration" = calls %>% summarise(!!paste("call", type, day_segment, metric, sep = "_") := min(call_duration)),
"maxduration" = calls %>% summarise(!!paste("call", type, day_segment, metric, sep = "_") := max(call_duration)), return(calls %>%
"stdduration" = calls %>% summarise(!!paste("call", type, day_segment, metric, sep = "_") := sd(call_duration)), filter_by_day_segment(day_segment) %>%
"modeduration" = calls %>% summarise(!!paste("call", type, day_segment, metric, sep = "_") := Mode(call_duration)), summarise(!!paste("call", type, day_segment, metric, sep = "_") := n()))
"hubermduration" = calls %>% summarise(!!paste("call", type, day_segment, metric, sep = "_") := huberM(call_duration)$mu), } else {
"varqnduration" = calls %>% summarise(!!paste("call", type, day_segment, metric, sep = "_") := Qn(call_duration)), calls <- calls %>% filter_by_day_segment(day_segment)
"entropyduration" = calls %>% summarise(!!paste("call", type, day_segment, metric, sep = "_") := entropy.MillerMadow(call_duration))) feature <- switch(metric,
"count" = calls %>% summarise(!!paste("call", type, day_segment, metric, sep = "_") := n()),
"distinctcontacts" = calls %>% summarise(!!paste("call", type, day_segment, metric, sep = "_") := n_distinct(trace)),
"meanduration" = calls %>% summarise(!!paste("call", type, day_segment, metric, sep = "_") := mean(call_duration)),
"sumduration" = calls %>% summarise(!!paste("call", type, day_segment, metric, sep = "_") := sum(call_duration)),
"minduration" = calls %>% summarise(!!paste("call", type, day_segment, metric, sep = "_") := min(call_duration)),
"maxduration" = calls %>% summarise(!!paste("call", type, day_segment, metric, sep = "_") := max(call_duration)),
"stdduration" = calls %>% summarise(!!paste("call", type, day_segment, metric, sep = "_") := sd(call_duration)),
"modeduration" = calls %>% summarise(!!paste("call", type, day_segment, metric, sep = "_") := Mode(call_duration)),
"hubermduration" = calls %>% summarise(!!paste("call", type, day_segment, metric, sep = "_") := huberM(call_duration)$mu),
"varqnduration" = calls %>% summarise(!!paste("call", type, day_segment, metric, sep = "_") := Qn(call_duration)),
"entropyduration" = calls %>% summarise(!!paste("call", type, day_segment, metric, sep = "_") := entropy.MillerMadow(call_duration)),
"timefirstcall" = calls %>% summarise(!!paste("call", type, day_segment, metric, sep = "_") := first(local_hour) + (first(local_minute)/60)),
"timelastcall" = calls %>% summarise(!!paste("call", type, day_segment, metric, sep = "_") := last(local_hour) + (last(local_minute)/60)))
}
return(feature) return(feature)
} }