Add three more features to sms

replace/3656e8b149ad6fbea1811b73f32cbe519079ffa1
JulioV 2019-11-12 15:53:59 -05:00
parent 364200a304
commit 6cf39ad9c4
2 changed files with 21 additions and 7 deletions

View File

@ -28,8 +28,8 @@ READABLE_DATETIME:
SMS: SMS:
TYPES : [received, sent] TYPES : [received, sent]
METRICS: METRICS:
received: [count, distinctcontacts] received: [count, distinctcontacts, timefirstsms, timelastsms, countmostfrequentcontact]
sent: [count, distinctcontacts] sent: [count, distinctcontacts, timefirstsms, timelastsms, countmostfrequentcontact]
DAY_SEGMENTS: *day_segments DAY_SEGMENTS: *day_segments
# Communication call features config, TYPES and METRICS keys need to match # Communication call features config, TYPES and METRICS keys need to match

View File

@ -10,11 +10,25 @@ filter_by_day_segment <- function(data, day_segment) {
} }
compute_sms_feature <- function(sms, metric, day_segment){ compute_sms_feature <- function(sms, metric, day_segment){
sms <- sms %>% filter_by_day_segment(day_segment) if(metric == "countmostfrequentcontact"){
feature <- switch(metric, # Get the most frequent contact
"count" = sms %>% summarise(!!paste("com", "sms", sms_type, day_segment, metric, sep = "_") := n()), sms <- sms %>% group_by(trace) %>%
"distinctcontacts" = sms %>% summarise(!!paste("com", "sms", sms_type, day_segment, metric, sep = "_") := n_distinct(trace))) mutate(N=n()) %>%
return(feature) ungroup() %>%
filter(N == max(N))
return(sms %>%
filter_by_day_segment(day_segment) %>%
summarise(!!paste("sms", sms_type, day_segment, metric, sep = "_") := n()))
} else {
sms <- sms %>% filter_by_day_segment(day_segment)
feature <- switch(metric,
"count" = sms %>% summarise(!!paste("sms", sms_type, day_segment, metric, sep = "_") := n()),
"distinctcontacts" = sms %>% summarise(!!paste("sms", sms_type, day_segment, metric, sep = "_") := n_distinct(trace)),
"timefirstsms" = sms %>% summarise(!!paste("sms", sms_type, day_segment, metric, sep = "_") := first(local_hour) + (first(local_minute)/60)),
"timelastsms" = sms %>% summarise(!!paste("sms", sms_type, day_segment, metric, sep = "_") := last(local_hour) + (last(local_minute)/60)))
return(feature)
}
} }
sms <- read.csv(snakemake@input[[1]]) sms <- read.csv(snakemake@input[[1]])