From 3d4c26754ec6c5433bf5150268473914351b975f Mon Sep 17 00:00:00 2001 From: JulioV Date: Thu, 12 Mar 2020 17:31:46 -0400 Subject: [PATCH] Rename merge metrics for models and add filter valid sensed days Co-authored-by: Meng Li --- .gitignore | 3 --- Snakefile | 4 ++-- config.yaml | 1 + rules/models.snakefile | 20 +++++++++++-------- .../merge_metrics_for_individual_model.R | 20 +++++++++++++++++++ ...R => merge_metrics_for_population_model.R} | 0 .../merge_metrics_of_single_participant.R | 13 ------------ 7 files changed, 35 insertions(+), 26 deletions(-) create mode 100644 src/models/merge_metrics_for_individual_model.R rename src/models/{merge_metrics_of_all_participants.R => merge_metrics_for_population_model.R} (100%) delete mode 100644 src/models/merge_metrics_of_single_participant.R diff --git a/.gitignore b/.gitignore index c8dbef8b..a053f461 100644 --- a/.gitignore +++ b/.gitignore @@ -107,6 +107,3 @@ reports/ *.Rproj .RData .Rhistory - -# analysis part -models/* \ No newline at end of file diff --git a/Snakefile b/Snakefile index c2bc5d62..9797b5ea 100644 --- a/Snakefile +++ b/Snakefile @@ -57,11 +57,11 @@ rule all: pid = config["PIDS"], day_segment = config["STEP"]["DAY_SEGMENTS"]), # Models - expand("models/input/merged_single_participant/{pid}/{source}_{day_segment}.csv", + expand("data/processed/{pid}/metrics_for_individual_model/{source}_{day_segment}.csv", pid = config["PIDS"], source = config["METRICS_FOR_ANALYSIS"]["SOURCES"], day_segment = config["METRICS_FOR_ANALYSIS"]["DAY_SEGMENTS"]), - expand("models/input/merged_all_participants/{source}_{day_segment}.csv", + expand("data/processed/metrics_for_population_model/{source}_{day_segment}.csv", source = config["METRICS_FOR_ANALYSIS"]["SOURCES"], day_segment = config["METRICS_FOR_ANALYSIS"]["DAY_SEGMENTS"]), # Reports diff --git a/config.yaml b/config.yaml index 60bdae5f..98d97d40 100644 --- a/config.yaml +++ b/config.yaml @@ -130,3 +130,4 @@ METRICS_FOR_ANALYSIS: PHONE_METRICS: [accelerometer, applications_foreground, battery, call_incoming, call_missed, call_outgoing, google_activity_recognition, light, location_barnett, screen, sms_received, sms_sent] FITBIT_METRICS: [fitbit_heartrate, fitbit_step] PHONE_FITBIT_METRICS: "" # This array is merged in the input_merge_features_of_single_participant function in models.snakefile + DROP_VALID_SENSED_DAYS: True \ No newline at end of file diff --git a/rules/models.snakefile b/rules/models.snakefile index 1fea93f1..78dc3f60 100644 --- a/rules/models.snakefile +++ b/rules/models.snakefile @@ -4,18 +4,22 @@ def input_merge_metrics_of_single_participant(wildcards): else: return expand("data/processed/{pid}/{metrics}_{day_segment}.csv", pid=wildcards.pid, metrics=config["METRICS_FOR_ANALYSIS"][wildcards.source.upper()], day_segment=wildcards.day_segment) -rule merge_metrics_of_single_participant: +rule merge_metrics_for_individual_model: input: - metric_files = input_merge_metrics_of_single_participant + metric_files = input_merge_metrics_of_single_participant, + phone_valid_sensed_days = "data/interim/{pid}/phone_valid_sensed_days.csv" + params: + drop_valid_sensed_days = config["METRICS_FOR_ANALYSIS"]["DROP_VALID_SENSED_DAYS"], + source = "{source}" output: - "models/input/merged_single_participant/{pid}/{source}_{day_segment}.csv" + "data/processed/{pid}/metrics_for_individual_model/{source}_{day_segment}.csv" script: - "../src/models/merge_metrics_of_single_participant.R" + "../src/models/merge_metrics_for_individual_model.R" -rule merge_metrics_of_all_participants: +rule merge_metrics_for_population_model: input: - metric_files = expand("models/input/merged_single_participant/{pid}/{{source}}_{{day_segment}}.csv", pid=config["PIDS"]) + metric_files = expand("data/processed/{pid}/metrics_for_individual_model/{{source}}_{{day_segment}}.csv", pid=config["PIDS"]) output: - "models/input/merged_all_participants/{source}_{day_segment}.csv" + "data/processed/metrics_for_population_model/{source}_{day_segment}.csv" script: - "../src/models/merge_metrics_of_all_participants.R" \ No newline at end of file + "../src/models/merge_metrics_for_population_model.R" \ No newline at end of file diff --git a/src/models/merge_metrics_for_individual_model.R b/src/models/merge_metrics_for_individual_model.R new file mode 100644 index 00000000..03730d6c --- /dev/null +++ b/src/models/merge_metrics_for_individual_model.R @@ -0,0 +1,20 @@ +source("packrat/init.R") + +library(tidyr) +library(purrr) +library(dplyr) + +metric_files <- snakemake@input[["metric_files"]] +phone_valid_sensed_days <- read.csv(snakemake@input[["phone_valid_sensed_days"]]) +drop_valid_sensed_days <- snakemake@params[["drop_valid_sensed_days"]] +source <- snakemake@params[["source"]] + +metrics_for_individual_model <- metric_files %>% + map(read.csv, stringsAsFactors = F, colClasses = c(local_date = "character")) %>% + reduce(full_join, by="local_date") + +if(drop_valid_sensed_days && source == "phone_metrics"){ + metrics_for_individual_model <- merge(metrics_for_individual_model, phone_valid_sensed_days, by="local_date") %>% select(-valid_hours) + } + +write.csv(metrics_for_individual_model, snakemake@output[[1]], row.names = FALSE) \ No newline at end of file diff --git a/src/models/merge_metrics_of_all_participants.R b/src/models/merge_metrics_for_population_model.R similarity index 100% rename from src/models/merge_metrics_of_all_participants.R rename to src/models/merge_metrics_for_population_model.R diff --git a/src/models/merge_metrics_of_single_participant.R b/src/models/merge_metrics_of_single_participant.R deleted file mode 100644 index 60605298..00000000 --- a/src/models/merge_metrics_of_single_participant.R +++ /dev/null @@ -1,13 +0,0 @@ -source("packrat/init.R") - -library(tidyr) -library(purrr) -library(dplyr) - -metric_files <- snakemake@input[["metric_files"]] - -metrics_of_single_participant <- metric_files %>% - map(read.csv, stringsAsFactors = F, colClasses = c(local_date = "character")) %>% - reduce(full_join, by="local_date") - -write.csv(metrics_of_single_participant, snakemake@output[[1]], row.names = FALSE) \ No newline at end of file