Refactor row heatmap and add all sensors compliance
parent
2cc73985aa
commit
c177b393b9
|
@ -12,7 +12,6 @@ rule all:
|
||||||
sms_type = config["COM_SMS"]["SMS_TYPES"],
|
sms_type = config["COM_SMS"]["SMS_TYPES"],
|
||||||
day_segment = config["COM_SMS"]["DAY_SEGMENTS"],
|
day_segment = config["COM_SMS"]["DAY_SEGMENTS"],
|
||||||
metric = config["COM_SMS"]["METRICS"]),
|
metric = config["COM_SMS"]["METRICS"]),
|
||||||
expand("reports/figures/{pid}/{sensor}_heatmap_rows.html", pid=config["PIDS"], sensor=config["SENSORS"]),
|
|
||||||
expand("data/processed/{pid}/com_call_{call_type}_{segment}_{metric}.csv",
|
expand("data/processed/{pid}/com_call_{call_type}_{segment}_{metric}.csv",
|
||||||
pid=config["PIDS"],
|
pid=config["PIDS"],
|
||||||
call_type = config["COM_CALL"]["CALL_TYPE_MISSED"],
|
call_type = config["COM_CALL"]["CALL_TYPE_MISSED"],
|
||||||
|
@ -23,6 +22,9 @@ rule all:
|
||||||
call_type = config["COM_CALL"]["CALL_TYPE_TAKEN"],
|
call_type = config["COM_CALL"]["CALL_TYPE_TAKEN"],
|
||||||
segment = config["COM_CALL"]["DAY_SEGMENTS"],
|
segment = config["COM_CALL"]["DAY_SEGMENTS"],
|
||||||
metric = config["COM_CALL"]["METRICS_TAKEN"]),
|
metric = config["COM_CALL"]["METRICS_TAKEN"]),
|
||||||
|
# Reports
|
||||||
|
expand("reports/figures/{pid}/{sensor}_heatmap_rows.html", pid=config["PIDS"], sensor=config["SENSORS"]),
|
||||||
|
expand("reports/figures/{pid}/compliance_heatmap.html", pid=config["PIDS"], sensor=config["SENSORS"]),
|
||||||
|
|
||||||
# --- Packrat Rules --- #
|
# --- Packrat Rules --- #
|
||||||
## Taken from https://github.com/lachlandeer/snakemake-econ-r
|
## Taken from https://github.com/lachlandeer/snakemake-econ-r
|
||||||
|
|
|
@ -7,4 +7,14 @@ rule heatmap_rows:
|
||||||
output:
|
output:
|
||||||
"reports/figures/{pid}/{sensor}_heatmap_rows.html"
|
"reports/figures/{pid}/{sensor}_heatmap_rows.html"
|
||||||
script:
|
script:
|
||||||
"../src/visualization/heatmap_rows.py"
|
"../src/visualization/heatmap_rows.py"
|
||||||
|
|
||||||
|
rule compliance_heatmap:
|
||||||
|
input:
|
||||||
|
expand("data/raw/{{pid}}/{sensor}_with_datetime.csv", sensor=config["SENSORS"])
|
||||||
|
params:
|
||||||
|
pid = "{pid}"
|
||||||
|
output:
|
||||||
|
"reports/figures/{pid}/compliance_heatmap.html"
|
||||||
|
script:
|
||||||
|
"../src/visualization/compliance_heatmap.py"
|
||||||
|
|
|
@ -0,0 +1,65 @@
|
||||||
|
import pandas as pd
|
||||||
|
import numpy as np
|
||||||
|
import plotly.io as pio
|
||||||
|
import plotly.graph_objects as go
|
||||||
|
import datetime
|
||||||
|
|
||||||
|
def getComplianceMatrix(dates, compliance_bins):
|
||||||
|
compliance_matrix = []
|
||||||
|
for date in dates:
|
||||||
|
date_bins = compliance_bins[compliance_bins["local_date"] == date]
|
||||||
|
compliance_matrix.append(((date_bins["has_row"]>0).astype(int)).tolist())
|
||||||
|
return compliance_matrix
|
||||||
|
|
||||||
|
def getComplianceHeatmap(dates, compliance_matrix, pid, output_path, bin_size):
|
||||||
|
bins_per_hour = int(60 / bin_size)
|
||||||
|
x_axis_labels = ["{0:0=2d}".format(x // bins_per_hour) + ":" + \
|
||||||
|
"{0:0=2d}".format(x % bins_per_hour * bin_size) for x in range(24 * bins_per_hour)]
|
||||||
|
plot = go.Figure(data=go.Heatmap(z=compliance_matrix,
|
||||||
|
x=x_axis_labels,
|
||||||
|
y=dates,
|
||||||
|
colorscale=[[0, "rgb(255, 255, 255)"],[1, "rgb(120, 120, 120)"]]))
|
||||||
|
plot.update_layout(title="Five minutes has_row heatmap for " + pid)
|
||||||
|
pio.write_html(plot, file=output_path, auto_open=False)
|
||||||
|
|
||||||
|
# get current patient id
|
||||||
|
pid = snakemake.params["pid"]
|
||||||
|
sensors_dates = []
|
||||||
|
sensors_five_minutes_row_is = pd.DataFrame()
|
||||||
|
for sensor_path in snakemake.input:
|
||||||
|
sensor_data = pd.read_csv(sensor_path)
|
||||||
|
|
||||||
|
# create a dataframe contains 2 columns: local_date_time, has_row
|
||||||
|
sensor_data["has_row"] = [1]*sensor_data.shape[0]
|
||||||
|
sensor_data["local_date_time"] = pd.to_datetime(sensor_data["local_date_time"])
|
||||||
|
sensed_bins = sensor_data[["local_date_time", "has_row"]]
|
||||||
|
|
||||||
|
# get the first date and the last date of current sensor
|
||||||
|
start_date = datetime.datetime.combine(sensed_bins["local_date_time"][0].date(), datetime.time(0,0,0))
|
||||||
|
end_date = datetime.datetime.combine(sensed_bins["local_date_time"][sensed_bins.shape[0]-1].date(), datetime.time(23,59,59))
|
||||||
|
|
||||||
|
# add the above datetime with has_row=0 to our dataframe
|
||||||
|
sensed_bins.loc[sensed_bins.shape[0], :] = [start_date, 0]
|
||||||
|
sensed_bins.loc[sensed_bins.shape[0], :] = [end_date, 0]
|
||||||
|
# get bins with 5 min
|
||||||
|
sensor_five_minutes_row_is = pd.DataFrame(sensed_bins.resample("5T", on="local_date_time")["has_row"].sum())
|
||||||
|
# merge current sensor with previous sensors
|
||||||
|
if sensors_five_minutes_row_is.empty:
|
||||||
|
sensors_five_minutes_row_is = sensor_five_minutes_row_is
|
||||||
|
else:
|
||||||
|
sensors_five_minutes_row_is = pd.concat([sensors_five_minutes_row_is, sensor_five_minutes_row_is]).groupby("local_date_time").sum()
|
||||||
|
|
||||||
|
|
||||||
|
sensors_five_minutes_row_is.reset_index(inplace=True)
|
||||||
|
# resample again to impute missing dates
|
||||||
|
sensors_five_minutes_row_is_successive = pd.DataFrame(sensors_five_minutes_row_is.resample("5T", on="local_date_time")["has_row"].sum())
|
||||||
|
|
||||||
|
# get sorted date list
|
||||||
|
sensors_five_minutes_row_is_successive.reset_index(inplace=True)
|
||||||
|
sensors_five_minutes_row_is_successive["local_date"] = sensors_five_minutes_row_is_successive["local_date_time"].apply(lambda x: x.date())
|
||||||
|
dates = list(set(sensors_five_minutes_row_is_successive["local_date"]))
|
||||||
|
dates.sort()
|
||||||
|
compliance_matrix = getComplianceMatrix(dates, sensors_five_minutes_row_is_successive)
|
||||||
|
|
||||||
|
# get heatmap
|
||||||
|
getComplianceHeatmap(dates, compliance_matrix, pid, snakemake.output[0], 5)
|
|
@ -1,35 +1,54 @@
|
||||||
import pandas as pd
|
import pandas as pd
|
||||||
import numpy as np
|
|
||||||
import plotly.io as pio
|
import plotly.io as pio
|
||||||
import plotly.graph_objects as go
|
import plotly.graph_objects as go
|
||||||
|
import datetime
|
||||||
|
|
||||||
def getHourlyRowCount(dates, sensor_data):
|
def getComplianceMatrix(dates, compliance_bins):
|
||||||
hourly_row_count = []
|
compliance_matrix = []
|
||||||
for date in dates:
|
for date in dates:
|
||||||
num_rows = []
|
date_bins = compliance_bins[compliance_bins["local_date"] == date]["count"].tolist()
|
||||||
daily_rows = sensor_data[sensor_data["local_date"] == date]
|
compliance_matrix.append(date_bins)
|
||||||
for hour in range(24):
|
return compliance_matrix
|
||||||
hourly_rows = daily_rows[daily_rows["local_hour"] == hour]
|
|
||||||
num_rows.append(hourly_rows.shape[0])
|
|
||||||
hourly_row_count.append(num_rows)
|
|
||||||
return hourly_row_count
|
|
||||||
|
|
||||||
def getHourlyRowCountHeatmap(dates, hourly_row_count, sensor_name, pid, output_path):
|
def getHourlyRowCountHeatmap(dates, hourly_row_count, sensor_name, pid, output_path):
|
||||||
plot = go.Figure(data=go.Heatmap(z=hourly_row_count,x=[x for x in range(24)],y=dates,colorscale='Viridis'))
|
plot = go.Figure(data=go.Heatmap(z=hourly_row_count,
|
||||||
plot.update_layout(title="Hourly row count heatmap for " + pid + " for sensor " + sensor_name)
|
x=[x for x in range(24)],
|
||||||
|
y=[datetime.datetime.strftime(date, '%Y/%m/%d') for date in dates],
|
||||||
|
colorscale='Viridis'))
|
||||||
|
plot.update_layout(title="Hourly row count heatmap for " + pid + " and sensor " + sensor_name)
|
||||||
pio.write_html(plot, file=output_path, auto_open=False)
|
pio.write_html(plot, file=output_path, auto_open=False)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
sensor_data = pd.read_csv(snakemake.input[0])
|
sensor_data = pd.read_csv(snakemake.input[0])
|
||||||
# get current sensor name
|
|
||||||
sensor_name = snakemake.params["table"]
|
sensor_name = snakemake.params["table"]
|
||||||
# get current patient id
|
|
||||||
pid = snakemake.params["pid"]
|
pid = snakemake.params["pid"]
|
||||||
# get sorted date list
|
|
||||||
dates = list(set(sensor_data["local_date"]))
|
start_date = sensor_data["local_date"][0]
|
||||||
dates.sort()
|
end_date = sensor_data.at[sensor_data.index[-1],"local_date"]
|
||||||
# get num of rows per hour per day
|
|
||||||
hourly_row_count = getHourlyRowCount(dates, sensor_data)
|
# Make local hour double digit
|
||||||
# get heatmap
|
sensor_data["local_hour"] = sensor_data["local_hour"].map("{0:0=2d}".format)
|
||||||
|
|
||||||
|
# Group and count by local_date and local_hour
|
||||||
|
sensor_data_hourly_bins = sensor_data.groupby(["local_date","local_hour"]).agg(count=("timestamp","count")).reset_index()
|
||||||
|
|
||||||
|
# Add first and last day boundaries for resampling
|
||||||
|
sensor_data_hourly_bins = sensor_data_hourly_bins.append([pd.Series([start_date, "00", 0], sensor_data_hourly_bins.columns),
|
||||||
|
pd.Series([end_date, "23", 0], sensor_data_hourly_bins.columns)])
|
||||||
|
|
||||||
|
# Rebuild local date hour for resampling
|
||||||
|
sensor_data_hourly_bins["local_date_hour"] = pd.to_datetime(sensor_data_hourly_bins["local_date"] + \
|
||||||
|
" " + sensor_data_hourly_bins["local_hour"] + ":00:00")
|
||||||
|
|
||||||
|
resampled_hourly_bins = pd.DataFrame(sensor_data_hourly_bins.resample("1H", on="local_date_hour")["count"].sum())
|
||||||
|
|
||||||
|
# Extract list of dates for creating the heatmap
|
||||||
|
resampled_hourly_bins.reset_index(inplace=True)
|
||||||
|
resampled_hourly_bins["local_date"] = resampled_hourly_bins["local_date_hour"].dt.date
|
||||||
|
dates = resampled_hourly_bins["local_date"].drop_duplicates().tolist()
|
||||||
|
|
||||||
|
# Create heatmap
|
||||||
|
hourly_row_count = getComplianceMatrix(dates, resampled_hourly_bins)
|
||||||
getHourlyRowCountHeatmap(dates, hourly_row_count, sensor_name, pid, snakemake.output[0])
|
getHourlyRowCountHeatmap(dates, hourly_row_count, sensor_name, pid, snakemake.output[0])
|
||||||
|
|
Loading…
Reference in New Issue