Add communication sms metrics
parent
2fdf23e0af
commit
ca5e8b6a34
|
@ -1,10 +1,16 @@
|
||||||
configfile: "config.yaml"
|
configfile: "config.yaml"
|
||||||
include: "rules/preprocessing.snakefile"
|
include: "rules/preprocessing.snakefile"
|
||||||
|
include: "rules/features.snakefile"
|
||||||
|
|
||||||
rule all:
|
rule all:
|
||||||
input:
|
input:
|
||||||
expand("data/raw/{pid}/{sensor}_raw.csv", pid=config["PIDS"], sensor=config["SENSORS"]),
|
expand("data/raw/{pid}/{sensor}_raw.csv", pid=config["PIDS"], sensor=config["SENSORS"]),
|
||||||
expand("data/raw/{pid}/{sensor}_with_datetime.csv", pid=config["PIDS"], sensor=config["SENSORS"])
|
expand("data/raw/{pid}/{sensor}_with_datetime.csv", pid=config["PIDS"], sensor=config["SENSORS"]),
|
||||||
|
expand("data/processed/{pid}/com_sms_{sms_type}_{day_segment}_{metric}.csv",
|
||||||
|
pid=config["PIDS"],
|
||||||
|
sms_type = config["COM_SMS"]["SMS_TYPES"],
|
||||||
|
day_segment = config["COM_SMS"]["DAY_SEGMENTS"],
|
||||||
|
metric = config["COM_SMS"]["METRICS"]),
|
||||||
|
|
||||||
# --- Packrat Rules --- #
|
# --- Packrat Rules --- #
|
||||||
## Taken from https://github.com/lachlandeer/snakemake-econ-r
|
## Taken from https://github.com/lachlandeer/snakemake-econ-r
|
||||||
|
|
12
config.yaml
12
config.yaml
|
@ -4,4 +4,14 @@ SENSORS: [messages]
|
||||||
# Participants to include in the analysis
|
# Participants to include in the analysis
|
||||||
# You must create a file for each participant
|
# You must create a file for each participant
|
||||||
# named pXXX containing their device_id
|
# named pXXX containing their device_id
|
||||||
PIDS: [p01, p02]
|
PIDS: [p01, p02]
|
||||||
|
|
||||||
|
# Global var with common day segments
|
||||||
|
DAY_SEGMENTS: &day_segments
|
||||||
|
[daily, morning, afternoon, evening, night]
|
||||||
|
|
||||||
|
# Communication SMS features config
|
||||||
|
COM_SMS:
|
||||||
|
SMS_TYPES : [received, sent]
|
||||||
|
DAY_SEGMENTS: *day_segments
|
||||||
|
METRICS: [count, distinctcontacts]
|
|
@ -0,0 +1,11 @@
|
||||||
|
rule communication_sms_metrics:
|
||||||
|
input:
|
||||||
|
"data/raw/{pid}/messages_with_datetime.csv"
|
||||||
|
params:
|
||||||
|
sms_type = "{sms_type}",
|
||||||
|
day_segment = "{day_segment}",
|
||||||
|
metric = "{metric}"
|
||||||
|
output:
|
||||||
|
"data/processed/{pid}/com_sms_{sms_type}_{day_segment}_{metric}.csv"
|
||||||
|
script:
|
||||||
|
"../src/features/communication_sms_metrics.R"
|
|
@ -0,0 +1,23 @@
|
||||||
|
source("packrat/init.R")
|
||||||
|
|
||||||
|
library(dplyr)
|
||||||
|
|
||||||
|
sms <- read.csv(snakemake@input[[1]])
|
||||||
|
day_segment <- snakemake@params[["day_segment"]]
|
||||||
|
metric <- snakemake@params[["metric"]]
|
||||||
|
sms_type <- snakemake@params[["sms_type"]]
|
||||||
|
output_file <- snakemake@output[[1]]
|
||||||
|
|
||||||
|
metrics <- sms %>% filter(message_type == ifelse(sms_type == "received", "1", "2"))
|
||||||
|
|
||||||
|
if(day_segment == "daily"){
|
||||||
|
metrics <- metrics %>% group_by(local_date)
|
||||||
|
} else {
|
||||||
|
metrics <- metrics %>% filter(day_segment == local_day_segment) %>% group_by(local_date)
|
||||||
|
}
|
||||||
|
|
||||||
|
metrics <- switch(metric,
|
||||||
|
"count" = metrics %>% summarise(!!paste("com", "sms", sms_type, day_segment, metric, sep = "_") := n()),
|
||||||
|
"distinctcontacts" = metrics %>% summarise(!!paste("com", "sms", sms_type, day_segment, metric, sep = "_") := n_distinct(trace)))
|
||||||
|
|
||||||
|
write.csv(na.omit(metrics), output_file, row.names = F)
|
Loading…
Reference in New Issue