Add three more features to sms

replace/3656e8b149ad6fbea1811b73f32cbe519079ffa1
JulioV 2019-11-12 15:53:59 -05:00
parent 364200a304
commit 6cf39ad9c4
2 changed files with 21 additions and 7 deletions

View File

@ -28,8 +28,8 @@ READABLE_DATETIME:
SMS:
TYPES : [received, sent]
METRICS:
received: [count, distinctcontacts]
sent: [count, distinctcontacts]
received: [count, distinctcontacts, timefirstsms, timelastsms, countmostfrequentcontact]
sent: [count, distinctcontacts, timefirstsms, timelastsms, countmostfrequentcontact]
DAY_SEGMENTS: *day_segments
# Communication call features config, TYPES and METRICS keys need to match

View File

@ -10,11 +10,25 @@ filter_by_day_segment <- function(data, day_segment) {
}
compute_sms_feature <- function(sms, metric, day_segment){
sms <- sms %>% filter_by_day_segment(day_segment)
feature <- switch(metric,
"count" = sms %>% summarise(!!paste("com", "sms", sms_type, day_segment, metric, sep = "_") := n()),
"distinctcontacts" = sms %>% summarise(!!paste("com", "sms", sms_type, day_segment, metric, sep = "_") := n_distinct(trace)))
return(feature)
if(metric == "countmostfrequentcontact"){
# Get the most frequent contact
sms <- sms %>% group_by(trace) %>%
mutate(N=n()) %>%
ungroup() %>%
filter(N == max(N))
return(sms %>%
filter_by_day_segment(day_segment) %>%
summarise(!!paste("sms", sms_type, day_segment, metric, sep = "_") := n()))
} else {
sms <- sms %>% filter_by_day_segment(day_segment)
feature <- switch(metric,
"count" = sms %>% summarise(!!paste("sms", sms_type, day_segment, metric, sep = "_") := n()),
"distinctcontacts" = sms %>% summarise(!!paste("sms", sms_type, day_segment, metric, sep = "_") := n_distinct(trace)),
"timefirstsms" = sms %>% summarise(!!paste("sms", sms_type, day_segment, metric, sep = "_") := first(local_hour) + (first(local_minute)/60)),
"timelastsms" = sms %>% summarise(!!paste("sms", sms_type, day_segment, metric, sep = "_") := last(local_hour) + (last(local_minute)/60)))
return(feature)
}
}
sms <- read.csv(snakemake@input[[1]])