From c177b393b9b419ce4f68ba51833634622c09e432 Mon Sep 17 00:00:00 2001 From: Meng Li <34143965+Meng6@users.noreply.github.com> Date: Fri, 1 Nov 2019 14:26:51 -0400 Subject: [PATCH] Refactor row heatmap and add all sensors compliance --- Snakefile | 4 +- rules/reports.snakefile | 12 ++++- src/visualization/compliance_heatmap.py | 65 +++++++++++++++++++++++++ src/visualization/heatmap_rows.py | 59 ++++++++++++++-------- 4 files changed, 118 insertions(+), 22 deletions(-) create mode 100644 src/visualization/compliance_heatmap.py diff --git a/Snakefile b/Snakefile index 8ed8a647..647388aa 100644 --- a/Snakefile +++ b/Snakefile @@ -12,7 +12,6 @@ rule all: sms_type = config["COM_SMS"]["SMS_TYPES"], day_segment = config["COM_SMS"]["DAY_SEGMENTS"], metric = config["COM_SMS"]["METRICS"]), - expand("reports/figures/{pid}/{sensor}_heatmap_rows.html", pid=config["PIDS"], sensor=config["SENSORS"]), expand("data/processed/{pid}/com_call_{call_type}_{segment}_{metric}.csv", pid=config["PIDS"], call_type = config["COM_CALL"]["CALL_TYPE_MISSED"], @@ -23,6 +22,9 @@ rule all: call_type = config["COM_CALL"]["CALL_TYPE_TAKEN"], segment = config["COM_CALL"]["DAY_SEGMENTS"], metric = config["COM_CALL"]["METRICS_TAKEN"]), + # Reports + expand("reports/figures/{pid}/{sensor}_heatmap_rows.html", pid=config["PIDS"], sensor=config["SENSORS"]), + expand("reports/figures/{pid}/compliance_heatmap.html", pid=config["PIDS"], sensor=config["SENSORS"]), # --- Packrat Rules --- # ## Taken from https://github.com/lachlandeer/snakemake-econ-r diff --git a/rules/reports.snakefile b/rules/reports.snakefile index 112d5302..75df3f72 100644 --- a/rules/reports.snakefile +++ b/rules/reports.snakefile @@ -7,4 +7,14 @@ rule heatmap_rows: output: "reports/figures/{pid}/{sensor}_heatmap_rows.html" script: - "../src/visualization/heatmap_rows.py" \ No newline at end of file + "../src/visualization/heatmap_rows.py" + +rule compliance_heatmap: + input: + expand("data/raw/{{pid}}/{sensor}_with_datetime.csv", sensor=config["SENSORS"]) + params: + pid = "{pid}" + output: + "reports/figures/{pid}/compliance_heatmap.html" + script: + "../src/visualization/compliance_heatmap.py" diff --git a/src/visualization/compliance_heatmap.py b/src/visualization/compliance_heatmap.py new file mode 100644 index 00000000..b1d8595c --- /dev/null +++ b/src/visualization/compliance_heatmap.py @@ -0,0 +1,65 @@ +import pandas as pd +import numpy as np +import plotly.io as pio +import plotly.graph_objects as go +import datetime + +def getComplianceMatrix(dates, compliance_bins): + compliance_matrix = [] + for date in dates: + date_bins = compliance_bins[compliance_bins["local_date"] == date] + compliance_matrix.append(((date_bins["has_row"]>0).astype(int)).tolist()) + return compliance_matrix + +def getComplianceHeatmap(dates, compliance_matrix, pid, output_path, bin_size): + bins_per_hour = int(60 / bin_size) + x_axis_labels = ["{0:0=2d}".format(x // bins_per_hour) + ":" + \ + "{0:0=2d}".format(x % bins_per_hour * bin_size) for x in range(24 * bins_per_hour)] + plot = go.Figure(data=go.Heatmap(z=compliance_matrix, + x=x_axis_labels, + y=dates, + colorscale=[[0, "rgb(255, 255, 255)"],[1, "rgb(120, 120, 120)"]])) + plot.update_layout(title="Five minutes has_row heatmap for " + pid) + pio.write_html(plot, file=output_path, auto_open=False) + +# get current patient id +pid = snakemake.params["pid"] +sensors_dates = [] +sensors_five_minutes_row_is = pd.DataFrame() +for sensor_path in snakemake.input: + sensor_data = pd.read_csv(sensor_path) + + # create a dataframe contains 2 columns: local_date_time, has_row + sensor_data["has_row"] = [1]*sensor_data.shape[0] + sensor_data["local_date_time"] = pd.to_datetime(sensor_data["local_date_time"]) + sensed_bins = sensor_data[["local_date_time", "has_row"]] + + # get the first date and the last date of current sensor + start_date = datetime.datetime.combine(sensed_bins["local_date_time"][0].date(), datetime.time(0,0,0)) + end_date = datetime.datetime.combine(sensed_bins["local_date_time"][sensed_bins.shape[0]-1].date(), datetime.time(23,59,59)) + + # add the above datetime with has_row=0 to our dataframe + sensed_bins.loc[sensed_bins.shape[0], :] = [start_date, 0] + sensed_bins.loc[sensed_bins.shape[0], :] = [end_date, 0] + # get bins with 5 min + sensor_five_minutes_row_is = pd.DataFrame(sensed_bins.resample("5T", on="local_date_time")["has_row"].sum()) + # merge current sensor with previous sensors + if sensors_five_minutes_row_is.empty: + sensors_five_minutes_row_is = sensor_five_minutes_row_is + else: + sensors_five_minutes_row_is = pd.concat([sensors_five_minutes_row_is, sensor_five_minutes_row_is]).groupby("local_date_time").sum() + + +sensors_five_minutes_row_is.reset_index(inplace=True) +# resample again to impute missing dates +sensors_five_minutes_row_is_successive = pd.DataFrame(sensors_five_minutes_row_is.resample("5T", on="local_date_time")["has_row"].sum()) + +# get sorted date list +sensors_five_minutes_row_is_successive.reset_index(inplace=True) +sensors_five_minutes_row_is_successive["local_date"] = sensors_five_minutes_row_is_successive["local_date_time"].apply(lambda x: x.date()) +dates = list(set(sensors_five_minutes_row_is_successive["local_date"])) +dates.sort() +compliance_matrix = getComplianceMatrix(dates, sensors_five_minutes_row_is_successive) + +# get heatmap +getComplianceHeatmap(dates, compliance_matrix, pid, snakemake.output[0], 5) \ No newline at end of file diff --git a/src/visualization/heatmap_rows.py b/src/visualization/heatmap_rows.py index 56084f78..956351ed 100644 --- a/src/visualization/heatmap_rows.py +++ b/src/visualization/heatmap_rows.py @@ -1,35 +1,54 @@ import pandas as pd -import numpy as np import plotly.io as pio import plotly.graph_objects as go +import datetime -def getHourlyRowCount(dates, sensor_data): - hourly_row_count = [] +def getComplianceMatrix(dates, compliance_bins): + compliance_matrix = [] for date in dates: - num_rows = [] - daily_rows = sensor_data[sensor_data["local_date"] == date] - for hour in range(24): - hourly_rows = daily_rows[daily_rows["local_hour"] == hour] - num_rows.append(hourly_rows.shape[0]) - hourly_row_count.append(num_rows) - return hourly_row_count + date_bins = compliance_bins[compliance_bins["local_date"] == date]["count"].tolist() + compliance_matrix.append(date_bins) + return compliance_matrix + def getHourlyRowCountHeatmap(dates, hourly_row_count, sensor_name, pid, output_path): - plot = go.Figure(data=go.Heatmap(z=hourly_row_count,x=[x for x in range(24)],y=dates,colorscale='Viridis')) - plot.update_layout(title="Hourly row count heatmap for " + pid + " for sensor " + sensor_name) + plot = go.Figure(data=go.Heatmap(z=hourly_row_count, + x=[x for x in range(24)], + y=[datetime.datetime.strftime(date, '%Y/%m/%d') for date in dates], + colorscale='Viridis')) + plot.update_layout(title="Hourly row count heatmap for " + pid + " and sensor " + sensor_name) pio.write_html(plot, file=output_path, auto_open=False) sensor_data = pd.read_csv(snakemake.input[0]) -# get current sensor name sensor_name = snakemake.params["table"] -# get current patient id pid = snakemake.params["pid"] -# get sorted date list -dates = list(set(sensor_data["local_date"])) -dates.sort() -# get num of rows per hour per day -hourly_row_count = getHourlyRowCount(dates, sensor_data) -# get heatmap + +start_date = sensor_data["local_date"][0] +end_date = sensor_data.at[sensor_data.index[-1],"local_date"] + +# Make local hour double digit +sensor_data["local_hour"] = sensor_data["local_hour"].map("{0:0=2d}".format) + +# Group and count by local_date and local_hour +sensor_data_hourly_bins = sensor_data.groupby(["local_date","local_hour"]).agg(count=("timestamp","count")).reset_index() + +# Add first and last day boundaries for resampling +sensor_data_hourly_bins = sensor_data_hourly_bins.append([pd.Series([start_date, "00", 0], sensor_data_hourly_bins.columns), + pd.Series([end_date, "23", 0], sensor_data_hourly_bins.columns)]) + +# Rebuild local date hour for resampling +sensor_data_hourly_bins["local_date_hour"] = pd.to_datetime(sensor_data_hourly_bins["local_date"] + \ + " " + sensor_data_hourly_bins["local_hour"] + ":00:00") + +resampled_hourly_bins = pd.DataFrame(sensor_data_hourly_bins.resample("1H", on="local_date_hour")["count"].sum()) + +# Extract list of dates for creating the heatmap +resampled_hourly_bins.reset_index(inplace=True) +resampled_hourly_bins["local_date"] = resampled_hourly_bins["local_date_hour"].dt.date +dates = resampled_hourly_bins["local_date"].drop_duplicates().tolist() + +# Create heatmap +hourly_row_count = getComplianceMatrix(dates, resampled_hourly_bins) getHourlyRowCountHeatmap(dates, hourly_row_count, sensor_name, pid, snakemake.output[0])