rapids/src/visualization/heatmap_phone_data_yield_pe...

88 lines
4.8 KiB
Python
Raw Permalink Normal View History

2020-12-04 03:00:32 +01:00
import pandas as pd
2021-06-05 04:58:27 +02:00
import plotly.express as px
2020-12-04 03:00:32 +01:00
import yaml
2021-06-05 04:58:27 +02:00
def getPidAndLabel(participant_file_paths, pids):
pid2label, y_axis_labels = {}, []
for participant_file_path, pid in zip(participant_file_paths, pids):
2020-12-04 03:00:32 +01:00
2021-06-05 04:58:27 +02:00
with open(participant_file_path, "r", encoding="utf-8") as f:
participant_file = yaml.safe_load(f)
label = str(participant_file["PHONE"]["LABEL"])
2020-12-04 03:00:32 +01:00
2021-06-05 04:58:27 +02:00
pid2label[pid] = label
y_axis_labels.append(pid + "." + label)
return pid2label, y_axis_labels
2020-12-04 03:00:32 +01:00
2021-06-05 04:58:27 +02:00
def getPhoneDataYieldHeatmap(phone_data_yield, time, time_segment, html_file):
2020-12-04 03:00:32 +01:00
if time == "RELATIVE_TIME":
2021-06-05 04:58:27 +02:00
# Number of minutes after the first start date time of local segments
phone_data_yield["local_segment_end_datetime"] = (phone_data_yield["local_segment_end_datetime"] - phone_data_yield["local_segment_start_datetime"].min()) + pd.Timestamp(2000,1,1)
phone_data_yield["local_segment_start_datetime"] = (phone_data_yield["local_segment_start_datetime"] - phone_data_yield["local_segment_start_datetime"].min()) + pd.Timestamp(2000,1,1)
2021-06-05 04:58:27 +02:00
for type in ["minutes", "hours"]:
2020-12-04 03:00:32 +01:00
2021-06-05 04:58:27 +02:00
column_name = "phone_data_yield_rapids_ratiovalidyielded" + type
2020-12-04 03:00:32 +01:00
2021-06-05 04:58:27 +02:00
fig = px.timeline(phone_data_yield,
x_start="local_segment_start_datetime",
x_end="local_segment_end_datetime",
y="y_axis_label",
color=column_name,
color_continuous_scale="Viridis",
range_color=[0, 1],
2021-06-05 04:58:27 +02:00
opacity=0.7,
hover_data={'local_segment_start_datetime':False, 'local_segment_end_datetime':False, 'local_segment':True})
2020-12-04 03:00:32 +01:00
2021-06-05 04:58:27 +02:00
fig.update_layout(title="Heatmap of valid yielded " + type + " ratio for " + time_segment + " segments and " + time.lower().replace("_", " ") + ".<br>y-axis shows participant information (format: pid.label).<br>x-axis shows the time" + (" since their first segment" if time == "RELATIVE_TIME" else "") + ".<br>z-axis (color) shows valid yielded " + type + " ratio during a segment instance.",
xaxis=dict(side="bottom", title="Time Since First Segment" if time == "RELATIVE_TIME" else "Time"),
yaxis=dict(side="left", title="Participant information"),
margin=dict(t=160))
2020-12-04 03:00:32 +01:00
2021-06-05 04:58:27 +02:00
if time == "RELATIVE_TIME":
fig.update_layout(xaxis_tickformat="%y years %j days<br>%X")
2020-12-04 03:00:32 +01:00
2021-06-05 04:58:27 +02:00
html_file.write(fig.to_html(full_html=False, include_plotlyjs="cdn"))
2020-12-04 03:00:32 +01:00
2021-06-05 04:58:27 +02:00
return html_file
2020-12-04 03:00:32 +01:00
2021-06-05 04:58:27 +02:00
pid2label, y_axis_labels = getPidAndLabel(snakemake.input["participant_files"], snakemake.params["pids"])
time_segments_type = snakemake.params["time_segments_type"] # FREQUENCY or PERIODIC or EVENT
time = snakemake.params["time"] # ABSOLUTE_TIME or RELATIVE_TIME
time_segments = pd.read_csv(snakemake.input["time_segments_file"])["label"].unique()
phone_data_yield = pd.read_csv(snakemake.input["phone_data_yield"], parse_dates=["local_segment_start_datetime", "local_segment_end_datetime"]).sort_values(by=["pid", "local_segment_start_datetime"])
if time_segments_type == "FREQUENCY":
phone_data_yield["local_segment_label"] = phone_data_yield["local_segment_label"].str[:-4]
2021-06-05 04:58:27 +02:00
html_file = open(snakemake.output[0], "w", encoding="utf-8")
if phone_data_yield.empty:
html_file.write("There is no sensor data for the sensors in [PHONE_DATA_YIELD][SENSORS].")
else:
# Make sure the phone_data_yield file contains both "phone_data_yield_rapids_ratiovalidyieldedminutes" and "phone_data_yield_rapids_ratiovalidyieldedhours" columns
2020-12-04 03:00:32 +01:00
if ("phone_data_yield_rapids_ratiovalidyieldedminutes" not in phone_data_yield.columns) or ("phone_data_yield_rapids_ratiovalidyieldedhours" not in phone_data_yield.columns):
raise ValueError("Please make sure [PHONE_DATA_YIELD][RAPIDS][COMPUTE] is True AND [PHONE_DATA_YIELD][RAPIDS][FEATURES] contains [ratiovalidyieldedminutes, ratiovalidyieldedhours].")
phone_data_yield.loc[:, ["phone_data_yield_rapids_ratiovalidyieldedminutes", "phone_data_yield_rapids_ratiovalidyieldedhours"]] = phone_data_yield.loc[:, ["phone_data_yield_rapids_ratiovalidyieldedminutes", "phone_data_yield_rapids_ratiovalidyieldedhours"]].round(3).clip(upper=1)
2021-06-05 04:58:27 +02:00
phone_data_yield["y_axis_label"] = phone_data_yield["pid"].apply(lambda pid: pid + "." + str(pid2label[pid]))
2020-12-04 03:00:32 +01:00
2021-06-05 04:58:27 +02:00
if time_segments_type == "EVENT":
html_file = getPhoneDataYieldHeatmap(phone_data_yield, time, "event", html_file)
else: # FREQUENCY or PERIODIC
2020-12-04 03:00:32 +01:00
for time_segment in time_segments:
2021-06-05 04:58:27 +02:00
phone_data_yield_per_segment = phone_data_yield[phone_data_yield["local_segment_label"] == time_segment].copy()
2020-12-04 03:00:32 +01:00
if not phone_data_yield_per_segment.empty:
2021-06-05 04:58:27 +02:00
html_file = getPhoneDataYieldHeatmap(phone_data_yield_per_segment, time, time_segment, html_file)
2020-12-04 03:00:32 +01:00
html_file.close()