Set color range and avoid SettingWithCopyWarning

pull/147/head
Meng Li 2021-06-28 17:31:35 -04:00
parent bb3c614135
commit 97ef8a8368
4 changed files with 10 additions and 5 deletions

View File

@ -10,7 +10,8 @@ def getCorrMatrixHeatmap(corr_matrix, time_segment, html_file):
fig = go.Figure(data=go.Heatmap(z=corr_matrix.values.tolist(), fig = go.Figure(data=go.Heatmap(z=corr_matrix.values.tolist(),
x=feature_names, x=feature_names,
y=feature_names, y=feature_names,
colorscale="Viridis")) colorscale="Viridis",
zmin=-1, zmax=1))
fig.update_layout(title="Correlation matrix of features of " + time_segment + " segments.") fig.update_layout(title="Correlation matrix of features of " + time_segment + " segments.")

View File

@ -33,6 +33,7 @@ def getPhoneDataYieldHeatmap(phone_data_yield, time, time_segment, html_file):
y="y_axis_label", y="y_axis_label",
color=column_name, color=column_name,
color_continuous_scale="Viridis", color_continuous_scale="Viridis",
range_color=[0, 1],
opacity=0.7, opacity=0.7,
hover_data={'local_segment_start_datetime':False, 'local_segment_end_datetime':False, 'local_segment':True}) hover_data={'local_segment_start_datetime':False, 'local_segment_end_datetime':False, 'local_segment':True})
@ -68,7 +69,7 @@ else:
if ("phone_data_yield_rapids_ratiovalidyieldedminutes" not in phone_data_yield.columns) or ("phone_data_yield_rapids_ratiovalidyieldedhours" not in phone_data_yield.columns): if ("phone_data_yield_rapids_ratiovalidyieldedminutes" not in phone_data_yield.columns) or ("phone_data_yield_rapids_ratiovalidyieldedhours" not in phone_data_yield.columns):
raise ValueError("Please make sure [PHONE_DATA_YIELD][RAPIDS][COMPUTE] is True AND [PHONE_DATA_YIELD][RAPIDS][FEATURES] contains [ratiovalidyieldedminutes, ratiovalidyieldedhours].") raise ValueError("Please make sure [PHONE_DATA_YIELD][RAPIDS][COMPUTE] is True AND [PHONE_DATA_YIELD][RAPIDS][FEATURES] contains [ratiovalidyieldedminutes, ratiovalidyieldedhours].")
phone_data_yield[["phone_data_yield_rapids_ratiovalidyieldedminutes", "phone_data_yield_rapids_ratiovalidyieldedhours"]] = phone_data_yield[["phone_data_yield_rapids_ratiovalidyieldedminutes", "phone_data_yield_rapids_ratiovalidyieldedhours"]].round(3).clip(upper=1) phone_data_yield.loc[:, ["phone_data_yield_rapids_ratiovalidyieldedminutes", "phone_data_yield_rapids_ratiovalidyieldedhours"]] = phone_data_yield.loc[:, ["phone_data_yield_rapids_ratiovalidyieldedminutes", "phone_data_yield_rapids_ratiovalidyieldedhours"]].round(3).clip(upper=1)
phone_data_yield["y_axis_label"] = phone_data_yield["pid"].apply(lambda pid: pid + "." + str(pid2label[pid])) phone_data_yield["y_axis_label"] = phone_data_yield["pid"].apply(lambda pid: pid + "." + str(pid2label[pid]))
if time_segments_type == "EVENT": if time_segments_type == "EVENT":
@ -76,7 +77,7 @@ else:
else: # FREQUENCY or PERIODIC else: # FREQUENCY or PERIODIC
for time_segment in time_segments: for time_segment in time_segments:
phone_data_yield_per_segment = phone_data_yield[phone_data_yield["local_segment_label"] == time_segment] phone_data_yield_per_segment = phone_data_yield[phone_data_yield["local_segment_label"] == time_segment].copy()
if not phone_data_yield_per_segment.empty: if not phone_data_yield_per_segment.empty:

View File

@ -38,6 +38,7 @@ def getRowCountHeatmap(data_for_plot, pid, time_segment, html_file):
y="sensor", y="sensor",
color="scaled_value", color="scaled_value",
color_continuous_scale="Peach", color_continuous_scale="Peach",
range_color=[0, 1],
opacity=0.7, opacity=0.7,
hover_data={"local_segment_start_datetime":False, "local_segment_end_datetime":False, "local_segment":True, "value":True, "scaled_value":True}) hover_data={"local_segment_start_datetime":False, "local_segment_end_datetime":False, "local_segment":True, "value":True, "scaled_value":True})
@ -66,6 +67,7 @@ phone_data_yield = pd.read_csv(snakemake.input["phone_data_yield"], index_col=["
# make sure the phone_data_yield file contains "phone_data_yield_rapids_ratiovalidyieldedminutes" and "phone_data_yield_rapids_ratiovalidyieldedhours" columns # make sure the phone_data_yield file contains "phone_data_yield_rapids_ratiovalidyieldedminutes" and "phone_data_yield_rapids_ratiovalidyieldedhours" columns
if ("phone_data_yield_rapids_ratiovalidyieldedminutes" not in phone_data_yield.columns) or ("phone_data_yield_rapids_ratiovalidyieldedhours" not in phone_data_yield.columns): if ("phone_data_yield_rapids_ratiovalidyieldedminutes" not in phone_data_yield.columns) or ("phone_data_yield_rapids_ratiovalidyieldedhours" not in phone_data_yield.columns):
raise ValueError("Please make sure [PHONE_DATA_YIELD][RAPIDS][COMPUTE] is True AND [PHONE_DATA_YIELD][RAPIDS][FEATURES] contains [ratiovalidyieldedminutes, ratiovalidyieldedhours].") raise ValueError("Please make sure [PHONE_DATA_YIELD][RAPIDS][COMPUTE] is True AND [PHONE_DATA_YIELD][RAPIDS][FEATURES] contains [ratiovalidyieldedminutes, ratiovalidyieldedhours].")
phone_data_yield.loc[:, ["phone_data_yield_rapids_ratiovalidyieldedminutes", "phone_data_yield_rapids_ratiovalidyieldedhours"]] = phone_data_yield.loc[:, ["phone_data_yield_rapids_ratiovalidyieldedminutes", "phone_data_yield_rapids_ratiovalidyieldedhours"]].round(3).clip(upper=1)
sensors_row_count = getRowCount(snakemake.input["all_sensors"], sensor_names, time_segments_labels) sensors_row_count = getRowCount(snakemake.input["all_sensors"], sensor_names, time_segments_labels)
data_for_plot = phone_data_yield.rename(columns={"phone_data_yield_rapids_ratiovalidyieldedminutes": "ratiovalidyieldedminutes","phone_data_yield_rapids_ratiovalidyieldedhours": "ratiovalidyieldedhours"}).merge(sensors_row_count, how="left", left_index=True, right_index=True).reset_index() data_for_plot = phone_data_yield.rename(columns={"phone_data_yield_rapids_ratiovalidyieldedminutes": "ratiovalidyieldedminutes","phone_data_yield_rapids_ratiovalidyieldedhours": "ratiovalidyieldedhours"}).merge(sensors_row_count, how="left", left_index=True, right_index=True).reset_index()
@ -88,8 +90,7 @@ for time_segment in set(data_for_plot["local_segment_label"]):
# except for phone data yield sensor, scale each sensor (row) to the range of [0, 1] # except for phone data yield sensor, scale each sensor (row) to the range of [0, 1]
scaled_data_for_plot_per_segment = data_for_plot_per_segment.copy() scaled_data_for_plot_per_segment = data_for_plot_per_segment.copy()
scaled_data_for_plot_per_segment[sensor_names[:-2]] = scaled_data_for_plot_per_segment.fillna(np.nan)[sensor_names[:-2]].apply(lambda x: (x - np.nanmin(x)) / (np.nanmax(x) - np.nanmin(x)) if np.nanmax(x) != np.nanmin(x) else (x / np.nanmin(x)), axis=0) scaled_data_for_plot_per_segment[sensor_names[:-2]] = scaled_data_for_plot_per_segment.fillna(np.nan)[sensor_names[:-2]].apply(lambda x: (x - np.nanmin(x)) / (np.nanmax(x) - np.nanmin(x)) if np.nanmax(x) != np.nanmin(x) else (x / np.nanmin(x)), axis=0)
data_for_plot_processed = pd.concat([data_for_plot_per_segment.stack(dropna=False).to_frame("value"), scaled_data_for_plot_per_segment.stack(dropna=False).to_frame("scaled_value")], axis=1).reset_index().rename(columns={"level_3": "sensor"}) data_for_plot_processed = pd.concat([data_for_plot_per_segment.stack(dropna=False).to_frame("value"), scaled_data_for_plot_per_segment.stack(dropna=False).round(3).to_frame("scaled_value")], axis=1).reset_index().rename(columns={"level_3": "sensor"})
data_for_plot_processed[["value", "scaled_value"]] = data_for_plot_processed[["value", "scaled_value"]].round(3).clip(upper=1)
getRowCountHeatmap(data_for_plot_processed, pid, time_segment, html_file) getRowCountHeatmap(data_for_plot_processed, pid, time_segment, html_file)
html_file.close() html_file.close()

View File

@ -18,6 +18,8 @@ html_file = open(snakemake.output[0], "a", encoding="utf-8")
if phone_data_yield.empty: if phone_data_yield.empty:
html_file.write("There is no sensor data for the sensors in [PHONE_DATA_YIELD][SENSORS].") html_file.write("There is no sensor data for the sensors in [PHONE_DATA_YIELD][SENSORS].")
else: else:
phone_data_yield.loc[:, ["phone_data_yield_rapids_ratiovalidyieldedminutes", "phone_data_yield_rapids_ratiovalidyieldedhours"]] = phone_data_yield.loc[:, ["phone_data_yield_rapids_ratiovalidyieldedminutes", "phone_data_yield_rapids_ratiovalidyieldedhours"]].round(3).clip(upper=1)
# plot ratio valid yielded minutes histogram # plot ratio valid yielded minutes histogram
fig_ratiovalidyieldedminutes = px.histogram(phone_data_yield, x="phone_data_yield_rapids_ratiovalidyieldedminutes", color="local_segment_label") fig_ratiovalidyieldedminutes = px.histogram(phone_data_yield, x="phone_data_yield_rapids_ratiovalidyieldedminutes", color="local_segment_label")
fig_ratiovalidyieldedminutes.update_layout(title="Histogram of valid yielded minutes ratio per time segment.") fig_ratiovalidyieldedminutes.update_layout(title="Histogram of valid yielded minutes ratio per time segment.")