Fix bug in SMS features

2020-05-29 11:41:17 -04:00 · 2020-05-29 11:41:17 -04:00 · f344c14e92
parent cdb0670c85
commit f344c14e92
3 changed files with 3 additions and 2 deletions
--- a/docs/features/extracted.rst
+++ b/docs/features/extracted.rst
@ -110,7 +110,7 @@ count                       SMS           Number of SMS of type ``sms_type`` tha
 distinctcontacts            contacts      Number of distinct contacts that are associated with a particular ``sms_type`` during a particular ``day_segment``.
 timefirstsms                hours         Number of hours between 12:00am (midnight) and the first ``SMS`` of a particular ``sms_type``.
 timelastsms                 hours         Number of hours between 12:00am (midnight) and the last ``SMS`` of a particular ``sms_type``.
-countmostfrequentcontact    SMS           The count of the number of ``SMS`` messages of a particular ``sms_type`` for the most contacted contact for a particular ``day_segment``.
+countmostfrequentcontact    SMS           Number of ``SMS`` messages from the contact with the most messages of ``sms_type`` during a ``day_segment`` throughout the whole dataset of each participant.
 =========================   =========     =============

 **Assumptions/Observations:** 
--- a/src/data/readable_datetime.R
+++ b/src/data/readable_datetime.R
@ -3,7 +3,7 @@ source("renv/activate.R")
 library("tidyverse")
 library(readr)

-input <- read.csv(snakemake@input[[1]])
+input <- read.csv(snakemake@input[[1]]) %>% arrange(timestamp)
 sensor_output <- snakemake@output[[1]]
 timezone_periods <- snakemake@params[["timezone_periods"]]
 fixed_timezone <- snakemake@params[["fixed_timezone"]]
--- a/src/features/sms/sms_base.R
+++ b/src/features/sms/sms_base.R
@ -31,6 +31,7 @@ base_sms_features <- function(sms, sms_type, day_segment, requested_features){
                mutate(N=n()) %>% 
                ungroup() %>%
                filter(N == max(N)) %>% 
+                head(1) %>% # if there are multiple contacts with the same amount of messages pick the first one only
                group_by(local_date) %>% 
                summarise(!!paste("sms", sms_type, day_segment, feature_name, sep = "_") := n())  %>% 
                replace(is.na(.), 0)