From b3aa4d82e1084d2c3139fd615c5f152768fff492 Mon Sep 17 00:00:00 2001 From: Meng Li <34143965+Meng6@users.noreply.github.com> Date: Fri, 24 Jul 2020 12:58:48 -0400 Subject: [PATCH] Overall compliance heatmap: shows all dates for all participants (only supports last certain dates previously) --- Snakefile | 7 +- config.yaml | 5 +- rules/preprocessing.snakefile | 4 +- rules/reports.snakefile | 7 +- src/data/phone_valid_sensed_days.R | 2 +- .../overall_compliance_heatmap.py | 80 +++++++++++++------ 6 files changed, 71 insertions(+), 34 deletions(-) diff --git a/Snakefile b/Snakefile index 2a0944d3..45bc64f2 100644 --- a/Snakefile +++ b/Snakefile @@ -22,7 +22,10 @@ if config["PHONE_VALID_SENSED_DAYS"]["COMPUTE"]: if len(config["PHONE_VALID_SENSED_BINS"]["TABLES"]) == 0: raise ValueError("If you want to compute PHONE_VALID_SENSED_DAYS, you need to add at least one table to [PHONE_VALID_SENSED_BINS][TABLES] in config.yaml") files_to_compute.extend(expand("data/interim/{pid}/phone_sensed_bins.csv", pid=config["PIDS"])) - files_to_compute.extend(expand("data/interim/{pid}/phone_valid_sensed_days_{min_valid_hours_per_day}h.csv", pid=config["PIDS"], min_valid_hours_per_day=config["PHONE_VALID_SENSED_DAYS"]["MIN_VALID_HOURS_PER_DAY"])) + files_to_compute.extend(expand("data/interim/{pid}/phone_valid_sensed_days_{min_valid_hours_per_day}hours_{min_valid_bins_per_hour}bins.csv", + pid=config["PIDS"], + min_valid_hours_per_day=config["PHONE_VALID_SENSED_DAYS"]["MIN_VALID_HOURS_PER_DAY"], + min_valid_bins_per_hour=config["PHONE_VALID_SENSED_DAYS"]["MIN_VALID_BINS_PER_HOUR"])) if config["MESSAGES"]["COMPUTE"]: files_to_compute.extend(expand("data/raw/{pid}/{sensor}_raw.csv", pid=config["PIDS"], sensor=config["MESSAGES"]["DB_TABLE"])) @@ -143,7 +146,7 @@ if config["HEATMAP_SENSED_BINS"]["PLOT"]: files_to_compute.extend(["reports/data_exploration/heatmap_sensed_bins_all_participants.html"]) if config["OVERALL_COMPLIANCE_HEATMAP"]["PLOT"]: - files_to_compute.extend(expand("reports/data_exploration/{min_valid_hours_per_day}h/overall_compliance_heatmap.html", min_valid_hours_per_day=config["OVERALL_COMPLIANCE_HEATMAP"]["MIN_VALID_HOURS_PER_DAY"])) + files_to_compute.extend(expand("reports/data_exploration/{min_valid_hours_per_day}hours_{min_valid_bins_per_hour}bins/overall_compliance_heatmap.html", min_valid_hours_per_day=config["OVERALL_COMPLIANCE_HEATMAP"]["MIN_VALID_HOURS_PER_DAY"], min_valid_bins_per_hour=config["PHONE_VALID_SENSED_DAYS"]["MIN_VALID_BINS_PER_HOUR"])) # analysis example if config["PARAMS_FOR_ANALYSIS"]["COMPUTE"]: diff --git a/config.yaml b/config.yaml index 5e2892c3..476235e3 100644 --- a/config.yaml +++ b/config.yaml @@ -37,7 +37,7 @@ PHONE_VALID_SENSED_BINS: PHONE_VALID_SENSED_DAYS: COMPUTE: False MIN_VALID_HOURS_PER_DAY: &min_valid_hours_per_day [16] # (out of 24) MIN_HOURS_PER_DAY - MIN_VALID_BINS_PER_HOUR: &min_valid_bins_per_hour 6 # (out of 60min/BIN_SIZE bins) + MIN_VALID_BINS_PER_HOUR: &min_valid_bins_per_hour [6] # (out of 60min/BIN_SIZE bins) # Communication SMS features config, TYPES and FEATURES keys need to match MESSAGES: @@ -229,8 +229,9 @@ HEATMAP_SENSED_BINS: OVERALL_COMPLIANCE_HEATMAP: PLOT: False + ONLY_SHOW_VALID_DAYS: False + EXPECTED_NUM_OF_DAYS: -1 BIN_SIZE: *bin_size - EXPECTED_NUM_OF_DAYS: 7 MIN_VALID_BINS_PER_HOUR: *min_valid_bins_per_hour MIN_VALID_HOURS_PER_DAY: *min_valid_hours_per_day diff --git a/rules/preprocessing.snakefile b/rules/preprocessing.snakefile index 2fb4c70e..97e34781 100644 --- a/rules/preprocessing.snakefile +++ b/rules/preprocessing.snakefile @@ -52,9 +52,9 @@ rule phone_valid_sensed_days: phone_sensed_bins = "data/interim/{pid}/phone_sensed_bins.csv" params: min_valid_hours_per_day = "{min_valid_hours_per_day}", - min_valid_bins_per_hour = config["PHONE_VALID_SENSED_DAYS"]["MIN_VALID_BINS_PER_HOUR"] + min_valid_bins_per_hour = "{min_valid_bins_per_hour}" output: - "data/interim/{pid}/phone_valid_sensed_days_{min_valid_hours_per_day}h.csv" + "data/interim/{pid}/phone_valid_sensed_days_{min_valid_hours_per_day}hours_{min_valid_bins_per_hour}bins.csv" script: "../src/data/phone_valid_sensed_days.R" diff --git a/rules/reports.snakefile b/rules/reports.snakefile index da9a0809..31f7b439 100644 --- a/rules/reports.snakefile +++ b/rules/reports.snakefile @@ -62,15 +62,16 @@ rule heatmap_sensed_bins_all_participants: rule overall_compliance_heatmap: input: phone_sensed_bins = expand("data/interim/{pid}/phone_sensed_bins.csv", pid=config["PIDS"]), - phone_valid_sensed_days = expand("data/interim/{pid}/phone_valid_sensed_days_{{min_valid_hours_per_day}}h.csv", pid=config["PIDS"]), + phone_valid_sensed_days = expand("data/interim/{pid}/phone_valid_sensed_days_{{min_valid_hours_per_day}}hours_{{min_valid_bins_per_hour}}bins.csv", pid=config["PIDS"]), pid_files = expand("data/external/{pid}", pid=config["PIDS"]) params: + only_show_valid_days = config["OVERALL_COMPLIANCE_HEATMAP"]["ONLY_SHOW_VALID_DAYS"], local_timezone = config["READABLE_DATETIME"]["FIXED_TIMEZONE"], expected_num_of_days = config["OVERALL_COMPLIANCE_HEATMAP"]["EXPECTED_NUM_OF_DAYS"], bin_size = config["OVERALL_COMPLIANCE_HEATMAP"]["BIN_SIZE"], - min_bins_per_hour = config["OVERALL_COMPLIANCE_HEATMAP"]["MIN_VALID_BINS_PER_HOUR"] + min_bins_per_hour = "{min_valid_bins_per_hour}" output: - "reports/data_exploration/{min_valid_hours_per_day}h/overall_compliance_heatmap.html" + "reports/data_exploration/{min_valid_hours_per_day}hours_{min_valid_bins_per_hour}bins/overall_compliance_heatmap.html" script: "../src/visualization/overall_compliance_heatmap.py" diff --git a/src/data/phone_valid_sensed_days.R b/src/data/phone_valid_sensed_days.R index a3046ae9..12daf6b7 100644 --- a/src/data/phone_valid_sensed_days.R +++ b/src/data/phone_valid_sensed_days.R @@ -4,7 +4,7 @@ library("tidyr") phone_sensed_bins <- read.csv(snakemake@input[["phone_sensed_bins"]]) min_valid_hours_per_day <- as.integer(snakemake@params[["min_valid_hours_per_day"]]) -min_valid_bins_per_hour <- snakemake@params[["min_valid_bins_per_hour"]] +min_valid_bins_per_hour <- as.integer(snakemake@params[["min_valid_bins_per_hour"]]) output_file <- snakemake@output[[1]] phone_valid_sensed_days <- phone_sensed_bins %>% diff --git a/src/visualization/overall_compliance_heatmap.py b/src/visualization/overall_compliance_heatmap.py index 40ca1783..5f2b879f 100644 --- a/src/visualization/overall_compliance_heatmap.py +++ b/src/visualization/overall_compliance_heatmap.py @@ -1,31 +1,54 @@ import pandas as pd import numpy as np import plotly.io as pio -import plotly.figure_factory as ff +import plotly.graph_objects as go from dateutil import tz import datetime -def getOneRow(data_per_participant, last_seven_dates, col_name, row): +def getOneRow(data_per_participant, last_certain_dates, col_name, row, expected_num_of_days, only_show_valid_days): + data = pd.read_csv(data_per_participant, index_col=["local_date"]) + if col_name == "num_sensors": data["num_sensors"] = data.max(axis=1) - for date in last_seven_dates: - if date in data.index: - row.append(data.loc[date][col_name]) - else: - row.append(0) + + if only_show_valid_days and col_name == "valid_sensed_hours": + # replace invalid days' valid sensed hours with np.nan to let our heatmap only shows valid days + data.loc[data[data["is_valid_sensed_day"] == False].index, "valid_sensed_hours"] = np.nan + + if expected_num_of_days == -1: + # show all days + data.index = pd.to_datetime(data.index) + start_date = data.index.min() + # upsample data into one day bins + data = data.resample("1D").sum() + data["date_idx"] = (data.index - start_date).days + data.set_index("date_idx", inplace=True, drop=True) + row = row + data[col_name].tolist() + else: + # only show last certain days + for date in last_certain_dates: + if date in data.index: + row.append(data.loc[date][col_name]) + else: + row.append(0) + return row -def getOverallComplianceHeatmap(sensors_with_data, valid_sensed_hours, last_seven_dates, bin_size, min_bins_per_hour, expected_num_of_days, output_path): - plot = ff.create_annotated_heatmap(z=sensors_with_data[last_seven_dates].values, - x=[date.replace("-", "/") for date in last_seven_dates], +def getOverallComplianceHeatmap(sensors_with_data, valid_sensed_hours, last_certain_dates, bin_size, min_bins_per_hour, expected_num_of_days, output_path): + plot = go.Figure(data=go.Heatmap(z=valid_sensed_hours[last_certain_dates].values, + x=[date.replace("-", "/") for date in last_certain_dates] if expected_num_of_days != -1 else last_certain_dates, y=[pid + "." + label for pid, label in zip(sensors_with_data["pid"].to_list(), sensors_with_data["label"].to_list())], - annotation_text=valid_sensed_hours[last_seven_dates].values, - hovertemplate='Date: %{x}
Participant: %{y}
Number of sensors with data: %{z}', + text=sensors_with_data[last_certain_dates].values, + hovertemplate="Date: %{x}
Participant: %{y}
Valid sensed hours: %{z}
Number of sensors with data: %{text}" if expected_num_of_days != -1 else "Date_idx: %{x}
Participant: %{y}
Valid sensed hours: %{z}
Number of sensors with data: %{text}", colorscale="Viridis", colorbar={"tick0": 0,"dtick": 1}, - showscale=True) - plot.update_layout(title="Overall compliance heatmap for last " + str(expected_num_of_days) + " days.
Bin's color shows how many sensors logged at least one row of data for that day.
Bin's text shows the valid hours of that day.(A valid hour has at least one row of any sensor in "+ str(min_bins_per_hour) +" out of " + str(int(60 / bin_size)) + " bins of " + str(bin_size) + " minutes)") + showscale=True)) + if expected_num_of_days != -1: + plot.update_layout(title="Overall compliance heatmap for last " + str(expected_num_of_days) + " days.
Bin's color shows valid sensed hours for that day.
A valid hour has at least one row of any sensor in "+ str(min_bins_per_hour) +" out of " + str(int(60 / bin_size)) + " bins of " + str(bin_size) + " minutes") + else: + plot.update_layout(title="Overall compliance heatmap for all days.
Bin's color shows valid sensed hours for that day.
A valid hour has at least one row of any sensor in "+ str(min_bins_per_hour) +" out of " + str(int(60 / bin_size)) + " bins of " + str(bin_size) + " minutes") + plot["layout"]["xaxis"].update(side="bottom") pio.write_html(plot, file=output_path, auto_open=False, include_plotlyjs="cdn") @@ -33,17 +56,21 @@ def getOverallComplianceHeatmap(sensors_with_data, valid_sensed_hours, last_seve phone_sensed_bins = snakemake.input["phone_sensed_bins"] phone_valid_sensed_days = snakemake.input["phone_valid_sensed_days"] pid_files = snakemake.input["pid_files"] +only_show_valid_days = snakemake.params["only_show_valid_days"] local_timezone = snakemake.params["local_timezone"] bin_size = snakemake.params["bin_size"] min_bins_per_hour = snakemake.params["min_bins_per_hour"] expected_num_of_days = int(snakemake.params["expected_num_of_days"]) +if expected_num_of_days < -1: + raise ValueError("EXPECTED_NUM_OF_DAYS of OVERALL_COMPLIANCE_HEATMAP section in config.yaml must be larger or equal to -1.") -cur_date = datetime.datetime.now().astimezone(tz.gettz(local_timezone)).date() -last_seven_dates = [] -for date_offset in range(expected_num_of_days-1, -1, -1): - last_seven_dates.append((cur_date - datetime.timedelta(days=date_offset)).strftime("%Y-%m-%d")) - +last_certain_dates = [] +if expected_num_of_days != -1: + # get the list of dates to show + cur_date = datetime.datetime.now().astimezone(tz.gettz(local_timezone)).date() + for date_offset in range(expected_num_of_days-1, -1, -1): + last_certain_dates.append((cur_date - datetime.timedelta(days=date_offset)).strftime("%Y-%m-%d")) sensors_with_data_records, valid_sensed_hours_records = [], [] for sensors_with_data_individual, valid_sensed_hours_individual, pid_file in zip(phone_sensed_bins, phone_valid_sensed_days, pid_files): @@ -54,15 +81,20 @@ for sensors_with_data_individual, valid_sensed_hours_individual, pid_file in zip label = external_file_content[2].strip() pid = pid_file.split("/")[-1] - sensors_with_data_records.append(getOneRow(sensors_with_data_individual, last_seven_dates, "num_sensors", [pid, label, device_id])) - valid_sensed_hours_records.append(getOneRow(valid_sensed_hours_individual, last_seven_dates, "valid_hours", [pid, label, device_id])) + sensors_with_data_records.append(getOneRow(sensors_with_data_individual, last_certain_dates, "num_sensors", [pid, label, device_id], expected_num_of_days, only_show_valid_days)) + valid_sensed_hours_records.append(getOneRow(valid_sensed_hours_individual, last_certain_dates, "valid_sensed_hours", [pid, label, device_id], expected_num_of_days, only_show_valid_days)) -sensors_with_data = pd.DataFrame(data=sensors_with_data_records, columns=["pid", "label", "device_id"] + last_seven_dates) -valid_sensed_hours = pd.DataFrame(data=valid_sensed_hours_records, columns=["pid", "label", "device_id"] + last_seven_dates) +if expected_num_of_days == -1: + # get the date_idx of all days + total_num_of_days = max([len(x) for x in sensors_with_data_records]) - 3 + last_certain_dates = [date_idx for date_idx in range(total_num_of_days)] + +sensors_with_data = pd.DataFrame(data=sensors_with_data_records, columns=["pid", "label", "device_id"] + last_certain_dates).replace(0, np.nan) +valid_sensed_hours = pd.DataFrame(data=valid_sensed_hours_records, columns=["pid", "label", "device_id"] + last_certain_dates).replace(0, np.nan) if sensors_with_data.empty: empty_html = open(snakemake.output[0], "w") empty_html.write("There is no sensor data for all participants") empty_html.close() else: - getOverallComplianceHeatmap(sensors_with_data, valid_sensed_hours, last_seven_dates, bin_size, min_bins_per_hour, expected_num_of_days, snakemake.output[0]) \ No newline at end of file + getOverallComplianceHeatmap(sensors_with_data, valid_sensed_hours, last_certain_dates, bin_size, min_bins_per_hour, expected_num_of_days, snakemake.output[0])