Fix bug in SMS features

pull/95/head
JulioV 2020-05-29 11:41:17 -04:00
parent cdb0670c85
commit f344c14e92
3 changed files with 3 additions and 2 deletions

View File

@ -110,7 +110,7 @@ count SMS Number of SMS of type ``sms_type`` tha
distinctcontacts contacts Number of distinct contacts that are associated with a particular ``sms_type`` during a particular ``day_segment``.
timefirstsms hours Number of hours between 12:00am (midnight) and the first ``SMS`` of a particular ``sms_type``.
timelastsms hours Number of hours between 12:00am (midnight) and the last ``SMS`` of a particular ``sms_type``.
countmostfrequentcontact SMS The count of the number of ``SMS`` messages of a particular ``sms_type`` for the most contacted contact for a particular ``day_segment``.
countmostfrequentcontact SMS Number of ``SMS`` messages from the contact with the most messages of ``sms_type`` during a ``day_segment`` throughout the whole dataset of each participant.
========================= ========= =============
**Assumptions/Observations:**

View File

@ -3,7 +3,7 @@ source("renv/activate.R")
library("tidyverse")
library(readr)
input <- read.csv(snakemake@input[[1]])
input <- read.csv(snakemake@input[[1]]) %>% arrange(timestamp)
sensor_output <- snakemake@output[[1]]
timezone_periods <- snakemake@params[["timezone_periods"]]
fixed_timezone <- snakemake@params[["fixed_timezone"]]

View File

@ -31,6 +31,7 @@ base_sms_features <- function(sms, sms_type, day_segment, requested_features){
mutate(N=n()) %>%
ungroup() %>%
filter(N == max(N)) %>%
head(1) %>% # if there are multiple contacts with the same amount of messages pick the first one only
group_by(local_date) %>%
summarise(!!paste("sms", sms_type, day_segment, feature_name, sep = "_") := n()) %>%
replace(is.na(.), 0)