Fix bug in SMS features
parent
cdb0670c85
commit
f344c14e92
|
@ -110,7 +110,7 @@ count SMS Number of SMS of type ``sms_type`` tha
|
|||
distinctcontacts contacts Number of distinct contacts that are associated with a particular ``sms_type`` during a particular ``day_segment``.
|
||||
timefirstsms hours Number of hours between 12:00am (midnight) and the first ``SMS`` of a particular ``sms_type``.
|
||||
timelastsms hours Number of hours between 12:00am (midnight) and the last ``SMS`` of a particular ``sms_type``.
|
||||
countmostfrequentcontact SMS The count of the number of ``SMS`` messages of a particular ``sms_type`` for the most contacted contact for a particular ``day_segment``.
|
||||
countmostfrequentcontact SMS Number of ``SMS`` messages from the contact with the most messages of ``sms_type`` during a ``day_segment`` throughout the whole dataset of each participant.
|
||||
========================= ========= =============
|
||||
|
||||
**Assumptions/Observations:**
|
||||
|
|
|
@ -3,7 +3,7 @@ source("renv/activate.R")
|
|||
library("tidyverse")
|
||||
library(readr)
|
||||
|
||||
input <- read.csv(snakemake@input[[1]])
|
||||
input <- read.csv(snakemake@input[[1]]) %>% arrange(timestamp)
|
||||
sensor_output <- snakemake@output[[1]]
|
||||
timezone_periods <- snakemake@params[["timezone_periods"]]
|
||||
fixed_timezone <- snakemake@params[["fixed_timezone"]]
|
||||
|
|
|
@ -31,6 +31,7 @@ base_sms_features <- function(sms, sms_type, day_segment, requested_features){
|
|||
mutate(N=n()) %>%
|
||||
ungroup() %>%
|
||||
filter(N == max(N)) %>%
|
||||
head(1) %>% # if there are multiple contacts with the same amount of messages pick the first one only
|
||||
group_by(local_date) %>%
|
||||
summarise(!!paste("sms", sms_type, day_segment, feature_name, sep = "_") := n()) %>%
|
||||
replace(is.na(.), 0)
|
||||
|
|
Loading…
Reference in New Issue