From aba9f133320ddc30faabf546e0e220e54d8ce705 Mon Sep 17 00:00:00 2001 From: Meng Li <34143965+Meng6@users.noreply.github.com> Date: Mon, 9 Mar 2020 13:32:14 -0400 Subject: [PATCH] Add merge metrics module for analysis rules --- .gitignore | 4 +++- Snakefile | 9 ++++++++ config.yaml | 7 +++++++ rules/models.snakefile | 21 +++++++++++++++++++ .../merge_metrics_of_all_participants.R | 16 ++++++++++++++ .../merge_metrics_of_single_participant.R | 13 ++++++++++++ 6 files changed, 69 insertions(+), 1 deletion(-) create mode 100644 rules/models.snakefile create mode 100644 src/models/merge_metrics_of_all_participants.R create mode 100644 src/models/merge_metrics_of_single_participant.R diff --git a/.gitignore b/.gitignore index 52de3caa..c8dbef8b 100644 --- a/.gitignore +++ b/.gitignore @@ -92,7 +92,6 @@ packrat/* !packrat/packrat.opts .snakemake/ -# exclude data from source control by default # exclude data from source control by default data/external/* !/data/external/.gitkeep @@ -108,3 +107,6 @@ reports/ *.Rproj .RData .Rhistory + +# analysis part +models/* \ No newline at end of file diff --git a/Snakefile b/Snakefile index 310886ab..c2bc5d62 100644 --- a/Snakefile +++ b/Snakefile @@ -2,6 +2,7 @@ configfile: "config.yaml" include: "rules/packrat.snakefile" include: "rules/preprocessing.snakefile" include: "rules/features.snakefile" +include: "rules/models.snakefile" include: "rules/reports.snakefile" rule all: @@ -55,6 +56,14 @@ rule all: expand("data/processed/{pid}/fitbit_step_{day_segment}.csv", pid = config["PIDS"], day_segment = config["STEP"]["DAY_SEGMENTS"]), + # Models + expand("models/input/merged_single_participant/{pid}/{source}_{day_segment}.csv", + pid = config["PIDS"], + source = config["METRICS_FOR_ANALYSIS"]["SOURCES"], + day_segment = config["METRICS_FOR_ANALYSIS"]["DAY_SEGMENTS"]), + expand("models/input/merged_all_participants/{source}_{day_segment}.csv", + source = config["METRICS_FOR_ANALYSIS"]["SOURCES"], + day_segment = config["METRICS_FOR_ANALYSIS"]["DAY_SEGMENTS"]), # Reports expand("reports/figures/{pid}/{sensor}_heatmap_rows.html", pid=config["PIDS"], sensor=config["SENSORS"]), expand("reports/figures/{pid}/compliance_heatmap.html", pid=config["PIDS"]), diff --git a/config.yaml b/config.yaml index 63022413..11465b42 100644 --- a/config.yaml +++ b/config.yaml @@ -122,3 +122,10 @@ STEP: SEDENTARY_BOUT: ["countsedentarybout", "maxdurationsedentarybout", "mindurationsedentarybout", "avgdurationsedentarybout", "stddurationsedentarybout"] ACTIVE_BOUT: ["countactivebout", "maxdurationactivebout", "mindurationactivebout", "avgdurationactivebout", "stddurationactivebout"] THRESHOLD_ACTIVE_BOUT: 10 # steps + +METRICS_FOR_ANALYSIS: + SOURCES: &sources ["phone_metrics", "fitbit_metrics", "phone_fitbit_metrics"] + DAY_SEGMENTS: *day_segments + PHONE_METRICS: [accelerometer, applications_foreground, battery, call_incoming, call_missed, call_outgoing, google_activity_recognition, light, location_barnett, screen, sms_received, sms_sent] + FITBIT_METRICS: [fitbit_heartrate, fitbit_step] + PHONE_FITBIT_METRICS: "" # This array is merged in the input_merge_features_of_single_participant function in models.snakefile diff --git a/rules/models.snakefile b/rules/models.snakefile new file mode 100644 index 00000000..1fea93f1 --- /dev/null +++ b/rules/models.snakefile @@ -0,0 +1,21 @@ +def input_merge_metrics_of_single_participant(wildcards): + if wildcards.source == "phone_fitbit_metrics": + return expand("data/processed/{pid}/{metrics}_{day_segment}.csv", pid=wildcards.pid, metrics=config["METRICS_FOR_ANALYSIS"]["PHONE_METRICS"] + config["METRICS_FOR_ANALYSIS"]["FITBIT_METRICS"], day_segment=wildcards.day_segment) + else: + return expand("data/processed/{pid}/{metrics}_{day_segment}.csv", pid=wildcards.pid, metrics=config["METRICS_FOR_ANALYSIS"][wildcards.source.upper()], day_segment=wildcards.day_segment) + +rule merge_metrics_of_single_participant: + input: + metric_files = input_merge_metrics_of_single_participant + output: + "models/input/merged_single_participant/{pid}/{source}_{day_segment}.csv" + script: + "../src/models/merge_metrics_of_single_participant.R" + +rule merge_metrics_of_all_participants: + input: + metric_files = expand("models/input/merged_single_participant/{pid}/{{source}}_{{day_segment}}.csv", pid=config["PIDS"]) + output: + "models/input/merged_all_participants/{source}_{day_segment}.csv" + script: + "../src/models/merge_metrics_of_all_participants.R" \ No newline at end of file diff --git a/src/models/merge_metrics_of_all_participants.R b/src/models/merge_metrics_of_all_participants.R new file mode 100644 index 00000000..c08958dc --- /dev/null +++ b/src/models/merge_metrics_of_all_participants.R @@ -0,0 +1,16 @@ +source("packrat/init.R") + +library(tidyr) +library(purrr) +library(dplyr) +library(stringr) + +metric_files <- snakemake@input[["metric_files"]] + +metrics_of_all_participants <- data_frame(filename = metric_files) %>% # create a data frame + mutate(file_contents = map(filename, ~ read.csv(., stringsAsFactors = F, colClasses = c(local_date = "character"))), + pid = str_match(filename, ".*/(p[0-9]{2})/.*")[,2]) %>% + unnest() %>% + select(-filename) + +write.csv(metrics_of_all_participants, snakemake@output[[1]], row.names = FALSE) \ No newline at end of file diff --git a/src/models/merge_metrics_of_single_participant.R b/src/models/merge_metrics_of_single_participant.R new file mode 100644 index 00000000..60605298 --- /dev/null +++ b/src/models/merge_metrics_of_single_participant.R @@ -0,0 +1,13 @@ +source("packrat/init.R") + +library(tidyr) +library(purrr) +library(dplyr) + +metric_files <- snakemake@input[["metric_files"]] + +metrics_of_single_participant <- metric_files %>% + map(read.csv, stringsAsFactors = F, colClasses = c(local_date = "character")) %>% + reduce(full_join, by="local_date") + +write.csv(metrics_of_single_participant, snakemake@output[[1]], row.names = FALSE) \ No newline at end of file