replace/3121abc9100beb40bdbd774004618c991a5528d1
JulioV 2019-11-05 16:22:55 -05:00
commit 58299a8869
2 changed files with 33 additions and 22 deletions

View File

@ -8,7 +8,7 @@ def getComplianceMatrix(dates, compliance_bins):
compliance_matrix = [] compliance_matrix = []
for date in dates: for date in dates:
date_bins = compliance_bins[compliance_bins["local_date"] == date] date_bins = compliance_bins[compliance_bins["local_date"] == date]
compliance_matrix.append(((date_bins["has_row"]>0).astype(int)).tolist()) compliance_matrix.append(date_bins["has_row"].tolist())
return compliance_matrix return compliance_matrix
def getComplianceHeatmap(dates, compliance_matrix, pid, output_path, bin_size): def getComplianceHeatmap(dates, compliance_matrix, pid, output_path, bin_size):
@ -18,7 +18,7 @@ def getComplianceHeatmap(dates, compliance_matrix, pid, output_path, bin_size):
plot = go.Figure(data=go.Heatmap(z=compliance_matrix, plot = go.Figure(data=go.Heatmap(z=compliance_matrix,
x=x_axis_labels, x=x_axis_labels,
y=dates, y=dates,
colorscale=[[0, "rgb(255, 255, 255)"],[1, "rgb(120, 120, 120)"]])) colorscale='Viridis'))
plot.update_layout(title="Five minutes has_row heatmap for " + pid) plot.update_layout(title="Five minutes has_row heatmap for " + pid)
pio.write_html(plot, file=output_path, auto_open=False) pio.write_html(plot, file=output_path, auto_open=False)
@ -28,6 +28,10 @@ sensors_dates = []
sensors_five_minutes_row_is = pd.DataFrame() sensors_five_minutes_row_is = pd.DataFrame()
for sensor_path in snakemake.input: for sensor_path in snakemake.input:
sensor_data = pd.read_csv(sensor_path) sensor_data = pd.read_csv(sensor_path)
# check if the sensor is off
if sensor_data.empty:
continue
# create a dataframe contains 2 columns: local_date_time, has_row # create a dataframe contains 2 columns: local_date_time, has_row
sensor_data["has_row"] = [1]*sensor_data.shape[0] sensor_data["has_row"] = [1]*sensor_data.shape[0]
@ -43,6 +47,7 @@ for sensor_path in snakemake.input:
sensed_bins.loc[sensed_bins.shape[0], :] = [end_date, 0] sensed_bins.loc[sensed_bins.shape[0], :] = [end_date, 0]
# get bins with 5 min # get bins with 5 min
sensor_five_minutes_row_is = pd.DataFrame(sensed_bins.resample("5T", on="local_date_time")["has_row"].sum()) sensor_five_minutes_row_is = pd.DataFrame(sensed_bins.resample("5T", on="local_date_time")["has_row"].sum())
sensor_five_minutes_row_is["has_row"] = (sensor_five_minutes_row_is["has_row"]>0).astype(int)
# merge current sensor with previous sensors # merge current sensor with previous sensors
if sensors_five_minutes_row_is.empty: if sensors_five_minutes_row_is.empty:
sensors_five_minutes_row_is = sensor_five_minutes_row_is sensors_five_minutes_row_is = sensor_five_minutes_row_is

View File

@ -25,30 +25,36 @@ sensor_data = pd.read_csv(snakemake.input[0])
sensor_name = snakemake.params["table"] sensor_name = snakemake.params["table"]
pid = snakemake.params["pid"] pid = snakemake.params["pid"]
start_date = sensor_data["local_date"][0] # check if we have sensor data
end_date = sensor_data.at[sensor_data.index[-1],"local_date"] if sensor_data.empty:
empty_html = open(snakemake.output[0], "w")
empty_html.write("There is no "+ sensor_name + " data for "+pid)
empty_html.close()
else:
start_date = sensor_data["local_date"][0]
end_date = sensor_data.at[sensor_data.index[-1],"local_date"]
# Make local hour double digit # Make local hour double digit
sensor_data["local_hour"] = sensor_data["local_hour"].map("{0:0=2d}".format) sensor_data["local_hour"] = sensor_data["local_hour"].map("{0:0=2d}".format)
# Group and count by local_date and local_hour # Group and count by local_date and local_hour
sensor_data_hourly_bins = sensor_data.groupby(["local_date","local_hour"]).agg(count=("timestamp","count")).reset_index() sensor_data_hourly_bins = sensor_data.groupby(["local_date","local_hour"]).agg(count=("timestamp","count")).reset_index()
# Add first and last day boundaries for resampling # Add first and last day boundaries for resampling
sensor_data_hourly_bins = sensor_data_hourly_bins.append([pd.Series([start_date, "00", 0], sensor_data_hourly_bins.columns), sensor_data_hourly_bins = sensor_data_hourly_bins.append([pd.Series([start_date, "00", 0], sensor_data_hourly_bins.columns),
pd.Series([end_date, "23", 0], sensor_data_hourly_bins.columns)]) pd.Series([end_date, "23", 0], sensor_data_hourly_bins.columns)])
# Rebuild local date hour for resampling # Rebuild local date hour for resampling
sensor_data_hourly_bins["local_date_hour"] = pd.to_datetime(sensor_data_hourly_bins["local_date"] + \ sensor_data_hourly_bins["local_date_hour"] = pd.to_datetime(sensor_data_hourly_bins["local_date"] + \
" " + sensor_data_hourly_bins["local_hour"] + ":00:00") " " + sensor_data_hourly_bins["local_hour"] + ":00:00")
resampled_hourly_bins = pd.DataFrame(sensor_data_hourly_bins.resample("1H", on="local_date_hour")["count"].sum()) resampled_hourly_bins = pd.DataFrame(sensor_data_hourly_bins.resample("1H", on="local_date_hour")["count"].sum())
# Extract list of dates for creating the heatmap # Extract list of dates for creating the heatmap
resampled_hourly_bins.reset_index(inplace=True) resampled_hourly_bins.reset_index(inplace=True)
resampled_hourly_bins["local_date"] = resampled_hourly_bins["local_date_hour"].dt.date resampled_hourly_bins["local_date"] = resampled_hourly_bins["local_date_hour"].dt.date
dates = resampled_hourly_bins["local_date"].drop_duplicates().tolist() dates = resampled_hourly_bins["local_date"].drop_duplicates().tolist()
# Create heatmap # Create heatmap
hourly_row_count = getComplianceMatrix(dates, resampled_hourly_bins) hourly_row_count = getComplianceMatrix(dates, resampled_hourly_bins)
getHourlyRowCountHeatmap(dates, hourly_row_count, sensor_name, pid, snakemake.output[0]) getHourlyRowCountHeatmap(dates, hourly_row_count, sensor_name, pid, snakemake.output[0])