From 70f3c336b59a2ed9091927f6fd5bc4d3b4c91fad Mon Sep 17 00:00:00 2001 From: Meng Li <34143965+Meng6@users.noreply.github.com> Date: Tue, 5 Nov 2019 16:18:02 -0500 Subject: [PATCH] Fix empty df bug and add color code to compliance heatmap --- src/visualization/compliance_heatmap.py | 9 +++-- src/visualization/heatmap_rows.py | 46 ++++++++++++++----------- 2 files changed, 33 insertions(+), 22 deletions(-) diff --git a/src/visualization/compliance_heatmap.py b/src/visualization/compliance_heatmap.py index b1d8595c..9e1f413e 100644 --- a/src/visualization/compliance_heatmap.py +++ b/src/visualization/compliance_heatmap.py @@ -8,7 +8,7 @@ def getComplianceMatrix(dates, compliance_bins): compliance_matrix = [] for date in dates: date_bins = compliance_bins[compliance_bins["local_date"] == date] - compliance_matrix.append(((date_bins["has_row"]>0).astype(int)).tolist()) + compliance_matrix.append(date_bins["has_row"].tolist()) return compliance_matrix def getComplianceHeatmap(dates, compliance_matrix, pid, output_path, bin_size): @@ -18,7 +18,7 @@ def getComplianceHeatmap(dates, compliance_matrix, pid, output_path, bin_size): plot = go.Figure(data=go.Heatmap(z=compliance_matrix, x=x_axis_labels, y=dates, - colorscale=[[0, "rgb(255, 255, 255)"],[1, "rgb(120, 120, 120)"]])) + colorscale='Viridis')) plot.update_layout(title="Five minutes has_row heatmap for " + pid) pio.write_html(plot, file=output_path, auto_open=False) @@ -28,6 +28,10 @@ sensors_dates = [] sensors_five_minutes_row_is = pd.DataFrame() for sensor_path in snakemake.input: sensor_data = pd.read_csv(sensor_path) + + # check if the sensor is off + if sensor_data.empty: + continue # create a dataframe contains 2 columns: local_date_time, has_row sensor_data["has_row"] = [1]*sensor_data.shape[0] @@ -43,6 +47,7 @@ for sensor_path in snakemake.input: sensed_bins.loc[sensed_bins.shape[0], :] = [end_date, 0] # get bins with 5 min sensor_five_minutes_row_is = pd.DataFrame(sensed_bins.resample("5T", on="local_date_time")["has_row"].sum()) + sensor_five_minutes_row_is["has_row"] = (sensor_five_minutes_row_is["has_row"]>0).astype(int) # merge current sensor with previous sensors if sensors_five_minutes_row_is.empty: sensors_five_minutes_row_is = sensor_five_minutes_row_is diff --git a/src/visualization/heatmap_rows.py b/src/visualization/heatmap_rows.py index 956351ed..2e8b8e0b 100644 --- a/src/visualization/heatmap_rows.py +++ b/src/visualization/heatmap_rows.py @@ -25,30 +25,36 @@ sensor_data = pd.read_csv(snakemake.input[0]) sensor_name = snakemake.params["table"] pid = snakemake.params["pid"] -start_date = sensor_data["local_date"][0] -end_date = sensor_data.at[sensor_data.index[-1],"local_date"] +# check if we have sensor data +if sensor_data.empty: + empty_html = open(snakemake.output[0], "w") + empty_html.write("There is no "+ sensor_name + " data for "+pid) + empty_html.close() +else: + start_date = sensor_data["local_date"][0] + end_date = sensor_data.at[sensor_data.index[-1],"local_date"] -# Make local hour double digit -sensor_data["local_hour"] = sensor_data["local_hour"].map("{0:0=2d}".format) + # Make local hour double digit + sensor_data["local_hour"] = sensor_data["local_hour"].map("{0:0=2d}".format) -# Group and count by local_date and local_hour -sensor_data_hourly_bins = sensor_data.groupby(["local_date","local_hour"]).agg(count=("timestamp","count")).reset_index() + # Group and count by local_date and local_hour + sensor_data_hourly_bins = sensor_data.groupby(["local_date","local_hour"]).agg(count=("timestamp","count")).reset_index() -# Add first and last day boundaries for resampling -sensor_data_hourly_bins = sensor_data_hourly_bins.append([pd.Series([start_date, "00", 0], sensor_data_hourly_bins.columns), - pd.Series([end_date, "23", 0], sensor_data_hourly_bins.columns)]) + # Add first and last day boundaries for resampling + sensor_data_hourly_bins = sensor_data_hourly_bins.append([pd.Series([start_date, "00", 0], sensor_data_hourly_bins.columns), + pd.Series([end_date, "23", 0], sensor_data_hourly_bins.columns)]) -# Rebuild local date hour for resampling -sensor_data_hourly_bins["local_date_hour"] = pd.to_datetime(sensor_data_hourly_bins["local_date"] + \ - " " + sensor_data_hourly_bins["local_hour"] + ":00:00") + # Rebuild local date hour for resampling + sensor_data_hourly_bins["local_date_hour"] = pd.to_datetime(sensor_data_hourly_bins["local_date"] + \ + " " + sensor_data_hourly_bins["local_hour"] + ":00:00") -resampled_hourly_bins = pd.DataFrame(sensor_data_hourly_bins.resample("1H", on="local_date_hour")["count"].sum()) + resampled_hourly_bins = pd.DataFrame(sensor_data_hourly_bins.resample("1H", on="local_date_hour")["count"].sum()) -# Extract list of dates for creating the heatmap -resampled_hourly_bins.reset_index(inplace=True) -resampled_hourly_bins["local_date"] = resampled_hourly_bins["local_date_hour"].dt.date -dates = resampled_hourly_bins["local_date"].drop_duplicates().tolist() + # Extract list of dates for creating the heatmap + resampled_hourly_bins.reset_index(inplace=True) + resampled_hourly_bins["local_date"] = resampled_hourly_bins["local_date_hour"].dt.date + dates = resampled_hourly_bins["local_date"].drop_duplicates().tolist() -# Create heatmap -hourly_row_count = getComplianceMatrix(dates, resampled_hourly_bins) -getHourlyRowCountHeatmap(dates, hourly_row_count, sensor_name, pid, snakemake.output[0]) + # Create heatmap + hourly_row_count = getComplianceMatrix(dates, resampled_hourly_bins) + getHourlyRowCountHeatmap(dates, hourly_row_count, sensor_name, pid, snakemake.output[0])