Overall compliance heatmap: shows all dates for all participants (only supports last certain dates previously)
parent
5f771618ae
commit
b3aa4d82e1
|
@ -22,7 +22,10 @@ if config["PHONE_VALID_SENSED_DAYS"]["COMPUTE"]:
|
||||||
if len(config["PHONE_VALID_SENSED_BINS"]["TABLES"]) == 0:
|
if len(config["PHONE_VALID_SENSED_BINS"]["TABLES"]) == 0:
|
||||||
raise ValueError("If you want to compute PHONE_VALID_SENSED_DAYS, you need to add at least one table to [PHONE_VALID_SENSED_BINS][TABLES] in config.yaml")
|
raise ValueError("If you want to compute PHONE_VALID_SENSED_DAYS, you need to add at least one table to [PHONE_VALID_SENSED_BINS][TABLES] in config.yaml")
|
||||||
files_to_compute.extend(expand("data/interim/{pid}/phone_sensed_bins.csv", pid=config["PIDS"]))
|
files_to_compute.extend(expand("data/interim/{pid}/phone_sensed_bins.csv", pid=config["PIDS"]))
|
||||||
files_to_compute.extend(expand("data/interim/{pid}/phone_valid_sensed_days_{min_valid_hours_per_day}h.csv", pid=config["PIDS"], min_valid_hours_per_day=config["PHONE_VALID_SENSED_DAYS"]["MIN_VALID_HOURS_PER_DAY"]))
|
files_to_compute.extend(expand("data/interim/{pid}/phone_valid_sensed_days_{min_valid_hours_per_day}hours_{min_valid_bins_per_hour}bins.csv",
|
||||||
|
pid=config["PIDS"],
|
||||||
|
min_valid_hours_per_day=config["PHONE_VALID_SENSED_DAYS"]["MIN_VALID_HOURS_PER_DAY"],
|
||||||
|
min_valid_bins_per_hour=config["PHONE_VALID_SENSED_DAYS"]["MIN_VALID_BINS_PER_HOUR"]))
|
||||||
|
|
||||||
if config["MESSAGES"]["COMPUTE"]:
|
if config["MESSAGES"]["COMPUTE"]:
|
||||||
files_to_compute.extend(expand("data/raw/{pid}/{sensor}_raw.csv", pid=config["PIDS"], sensor=config["MESSAGES"]["DB_TABLE"]))
|
files_to_compute.extend(expand("data/raw/{pid}/{sensor}_raw.csv", pid=config["PIDS"], sensor=config["MESSAGES"]["DB_TABLE"]))
|
||||||
|
@ -143,7 +146,7 @@ if config["HEATMAP_SENSED_BINS"]["PLOT"]:
|
||||||
files_to_compute.extend(["reports/data_exploration/heatmap_sensed_bins_all_participants.html"])
|
files_to_compute.extend(["reports/data_exploration/heatmap_sensed_bins_all_participants.html"])
|
||||||
|
|
||||||
if config["OVERALL_COMPLIANCE_HEATMAP"]["PLOT"]:
|
if config["OVERALL_COMPLIANCE_HEATMAP"]["PLOT"]:
|
||||||
files_to_compute.extend(expand("reports/data_exploration/{min_valid_hours_per_day}h/overall_compliance_heatmap.html", min_valid_hours_per_day=config["OVERALL_COMPLIANCE_HEATMAP"]["MIN_VALID_HOURS_PER_DAY"]))
|
files_to_compute.extend(expand("reports/data_exploration/{min_valid_hours_per_day}hours_{min_valid_bins_per_hour}bins/overall_compliance_heatmap.html", min_valid_hours_per_day=config["OVERALL_COMPLIANCE_HEATMAP"]["MIN_VALID_HOURS_PER_DAY"], min_valid_bins_per_hour=config["PHONE_VALID_SENSED_DAYS"]["MIN_VALID_BINS_PER_HOUR"]))
|
||||||
|
|
||||||
# analysis example
|
# analysis example
|
||||||
if config["PARAMS_FOR_ANALYSIS"]["COMPUTE"]:
|
if config["PARAMS_FOR_ANALYSIS"]["COMPUTE"]:
|
||||||
|
|
|
@ -37,7 +37,7 @@ PHONE_VALID_SENSED_BINS:
|
||||||
PHONE_VALID_SENSED_DAYS:
|
PHONE_VALID_SENSED_DAYS:
|
||||||
COMPUTE: False
|
COMPUTE: False
|
||||||
MIN_VALID_HOURS_PER_DAY: &min_valid_hours_per_day [16] # (out of 24) MIN_HOURS_PER_DAY
|
MIN_VALID_HOURS_PER_DAY: &min_valid_hours_per_day [16] # (out of 24) MIN_HOURS_PER_DAY
|
||||||
MIN_VALID_BINS_PER_HOUR: &min_valid_bins_per_hour 6 # (out of 60min/BIN_SIZE bins)
|
MIN_VALID_BINS_PER_HOUR: &min_valid_bins_per_hour [6] # (out of 60min/BIN_SIZE bins)
|
||||||
|
|
||||||
# Communication SMS features config, TYPES and FEATURES keys need to match
|
# Communication SMS features config, TYPES and FEATURES keys need to match
|
||||||
MESSAGES:
|
MESSAGES:
|
||||||
|
@ -229,8 +229,9 @@ HEATMAP_SENSED_BINS:
|
||||||
|
|
||||||
OVERALL_COMPLIANCE_HEATMAP:
|
OVERALL_COMPLIANCE_HEATMAP:
|
||||||
PLOT: False
|
PLOT: False
|
||||||
|
ONLY_SHOW_VALID_DAYS: False
|
||||||
|
EXPECTED_NUM_OF_DAYS: -1
|
||||||
BIN_SIZE: *bin_size
|
BIN_SIZE: *bin_size
|
||||||
EXPECTED_NUM_OF_DAYS: 7
|
|
||||||
MIN_VALID_BINS_PER_HOUR: *min_valid_bins_per_hour
|
MIN_VALID_BINS_PER_HOUR: *min_valid_bins_per_hour
|
||||||
MIN_VALID_HOURS_PER_DAY: *min_valid_hours_per_day
|
MIN_VALID_HOURS_PER_DAY: *min_valid_hours_per_day
|
||||||
|
|
||||||
|
|
|
@ -52,9 +52,9 @@ rule phone_valid_sensed_days:
|
||||||
phone_sensed_bins = "data/interim/{pid}/phone_sensed_bins.csv"
|
phone_sensed_bins = "data/interim/{pid}/phone_sensed_bins.csv"
|
||||||
params:
|
params:
|
||||||
min_valid_hours_per_day = "{min_valid_hours_per_day}",
|
min_valid_hours_per_day = "{min_valid_hours_per_day}",
|
||||||
min_valid_bins_per_hour = config["PHONE_VALID_SENSED_DAYS"]["MIN_VALID_BINS_PER_HOUR"]
|
min_valid_bins_per_hour = "{min_valid_bins_per_hour}"
|
||||||
output:
|
output:
|
||||||
"data/interim/{pid}/phone_valid_sensed_days_{min_valid_hours_per_day}h.csv"
|
"data/interim/{pid}/phone_valid_sensed_days_{min_valid_hours_per_day}hours_{min_valid_bins_per_hour}bins.csv"
|
||||||
script:
|
script:
|
||||||
"../src/data/phone_valid_sensed_days.R"
|
"../src/data/phone_valid_sensed_days.R"
|
||||||
|
|
||||||
|
|
|
@ -62,15 +62,16 @@ rule heatmap_sensed_bins_all_participants:
|
||||||
rule overall_compliance_heatmap:
|
rule overall_compliance_heatmap:
|
||||||
input:
|
input:
|
||||||
phone_sensed_bins = expand("data/interim/{pid}/phone_sensed_bins.csv", pid=config["PIDS"]),
|
phone_sensed_bins = expand("data/interim/{pid}/phone_sensed_bins.csv", pid=config["PIDS"]),
|
||||||
phone_valid_sensed_days = expand("data/interim/{pid}/phone_valid_sensed_days_{{min_valid_hours_per_day}}h.csv", pid=config["PIDS"]),
|
phone_valid_sensed_days = expand("data/interim/{pid}/phone_valid_sensed_days_{{min_valid_hours_per_day}}hours_{{min_valid_bins_per_hour}}bins.csv", pid=config["PIDS"]),
|
||||||
pid_files = expand("data/external/{pid}", pid=config["PIDS"])
|
pid_files = expand("data/external/{pid}", pid=config["PIDS"])
|
||||||
params:
|
params:
|
||||||
|
only_show_valid_days = config["OVERALL_COMPLIANCE_HEATMAP"]["ONLY_SHOW_VALID_DAYS"],
|
||||||
local_timezone = config["READABLE_DATETIME"]["FIXED_TIMEZONE"],
|
local_timezone = config["READABLE_DATETIME"]["FIXED_TIMEZONE"],
|
||||||
expected_num_of_days = config["OVERALL_COMPLIANCE_HEATMAP"]["EXPECTED_NUM_OF_DAYS"],
|
expected_num_of_days = config["OVERALL_COMPLIANCE_HEATMAP"]["EXPECTED_NUM_OF_DAYS"],
|
||||||
bin_size = config["OVERALL_COMPLIANCE_HEATMAP"]["BIN_SIZE"],
|
bin_size = config["OVERALL_COMPLIANCE_HEATMAP"]["BIN_SIZE"],
|
||||||
min_bins_per_hour = config["OVERALL_COMPLIANCE_HEATMAP"]["MIN_VALID_BINS_PER_HOUR"]
|
min_bins_per_hour = "{min_valid_bins_per_hour}"
|
||||||
output:
|
output:
|
||||||
"reports/data_exploration/{min_valid_hours_per_day}h/overall_compliance_heatmap.html"
|
"reports/data_exploration/{min_valid_hours_per_day}hours_{min_valid_bins_per_hour}bins/overall_compliance_heatmap.html"
|
||||||
script:
|
script:
|
||||||
"../src/visualization/overall_compliance_heatmap.py"
|
"../src/visualization/overall_compliance_heatmap.py"
|
||||||
|
|
||||||
|
|
|
@ -4,7 +4,7 @@ library("tidyr")
|
||||||
|
|
||||||
phone_sensed_bins <- read.csv(snakemake@input[["phone_sensed_bins"]])
|
phone_sensed_bins <- read.csv(snakemake@input[["phone_sensed_bins"]])
|
||||||
min_valid_hours_per_day <- as.integer(snakemake@params[["min_valid_hours_per_day"]])
|
min_valid_hours_per_day <- as.integer(snakemake@params[["min_valid_hours_per_day"]])
|
||||||
min_valid_bins_per_hour <- snakemake@params[["min_valid_bins_per_hour"]]
|
min_valid_bins_per_hour <- as.integer(snakemake@params[["min_valid_bins_per_hour"]])
|
||||||
output_file <- snakemake@output[[1]]
|
output_file <- snakemake@output[[1]]
|
||||||
|
|
||||||
phone_valid_sensed_days <- phone_sensed_bins %>%
|
phone_valid_sensed_days <- phone_sensed_bins %>%
|
||||||
|
|
|
@ -1,31 +1,54 @@
|
||||||
import pandas as pd
|
import pandas as pd
|
||||||
import numpy as np
|
import numpy as np
|
||||||
import plotly.io as pio
|
import plotly.io as pio
|
||||||
import plotly.figure_factory as ff
|
import plotly.graph_objects as go
|
||||||
from dateutil import tz
|
from dateutil import tz
|
||||||
import datetime
|
import datetime
|
||||||
|
|
||||||
def getOneRow(data_per_participant, last_seven_dates, col_name, row):
|
def getOneRow(data_per_participant, last_certain_dates, col_name, row, expected_num_of_days, only_show_valid_days):
|
||||||
|
|
||||||
data = pd.read_csv(data_per_participant, index_col=["local_date"])
|
data = pd.read_csv(data_per_participant, index_col=["local_date"])
|
||||||
|
|
||||||
if col_name == "num_sensors":
|
if col_name == "num_sensors":
|
||||||
data["num_sensors"] = data.max(axis=1)
|
data["num_sensors"] = data.max(axis=1)
|
||||||
for date in last_seven_dates:
|
|
||||||
if date in data.index:
|
if only_show_valid_days and col_name == "valid_sensed_hours":
|
||||||
row.append(data.loc[date][col_name])
|
# replace invalid days' valid sensed hours with np.nan to let our heatmap only shows valid days
|
||||||
else:
|
data.loc[data[data["is_valid_sensed_day"] == False].index, "valid_sensed_hours"] = np.nan
|
||||||
row.append(0)
|
|
||||||
|
if expected_num_of_days == -1:
|
||||||
|
# show all days
|
||||||
|
data.index = pd.to_datetime(data.index)
|
||||||
|
start_date = data.index.min()
|
||||||
|
# upsample data into one day bins
|
||||||
|
data = data.resample("1D").sum()
|
||||||
|
data["date_idx"] = (data.index - start_date).days
|
||||||
|
data.set_index("date_idx", inplace=True, drop=True)
|
||||||
|
row = row + data[col_name].tolist()
|
||||||
|
else:
|
||||||
|
# only show last certain days
|
||||||
|
for date in last_certain_dates:
|
||||||
|
if date in data.index:
|
||||||
|
row.append(data.loc[date][col_name])
|
||||||
|
else:
|
||||||
|
row.append(0)
|
||||||
|
|
||||||
return row
|
return row
|
||||||
|
|
||||||
def getOverallComplianceHeatmap(sensors_with_data, valid_sensed_hours, last_seven_dates, bin_size, min_bins_per_hour, expected_num_of_days, output_path):
|
def getOverallComplianceHeatmap(sensors_with_data, valid_sensed_hours, last_certain_dates, bin_size, min_bins_per_hour, expected_num_of_days, output_path):
|
||||||
plot = ff.create_annotated_heatmap(z=sensors_with_data[last_seven_dates].values,
|
plot = go.Figure(data=go.Heatmap(z=valid_sensed_hours[last_certain_dates].values,
|
||||||
x=[date.replace("-", "/") for date in last_seven_dates],
|
x=[date.replace("-", "/") for date in last_certain_dates] if expected_num_of_days != -1 else last_certain_dates,
|
||||||
y=[pid + "." + label for pid, label in zip(sensors_with_data["pid"].to_list(), sensors_with_data["label"].to_list())],
|
y=[pid + "." + label for pid, label in zip(sensors_with_data["pid"].to_list(), sensors_with_data["label"].to_list())],
|
||||||
annotation_text=valid_sensed_hours[last_seven_dates].values,
|
text=sensors_with_data[last_certain_dates].values,
|
||||||
hovertemplate='Date: %{x}<br>Participant: %{y}<br>Number of sensors with data: %{z}<extra></extra>',
|
hovertemplate="Date: %{x}<br>Participant: %{y}<br>Valid sensed hours: %{z}<br>Number of sensors with data: %{text}<extra></extra>" if expected_num_of_days != -1 else "Date_idx: %{x}<br>Participant: %{y}<br>Valid sensed hours: %{z}<br>Number of sensors with data: %{text}<extra></extra>",
|
||||||
colorscale="Viridis",
|
colorscale="Viridis",
|
||||||
colorbar={"tick0": 0,"dtick": 1},
|
colorbar={"tick0": 0,"dtick": 1},
|
||||||
showscale=True)
|
showscale=True))
|
||||||
plot.update_layout(title="Overall compliance heatmap for last " + str(expected_num_of_days) + " days.<br>Bin's color shows how many sensors logged at least one row of data for that day.<br>Bin's text shows the valid hours of that day.(A valid hour has at least one row of any sensor in "+ str(min_bins_per_hour) +" out of " + str(int(60 / bin_size)) + " bins of " + str(bin_size) + " minutes)")
|
if expected_num_of_days != -1:
|
||||||
|
plot.update_layout(title="Overall compliance heatmap for last " + str(expected_num_of_days) + " days.<br>Bin's color shows valid sensed hours for that day.<br>A valid hour has at least one row of any sensor in "+ str(min_bins_per_hour) +" out of " + str(int(60 / bin_size)) + " bins of " + str(bin_size) + " minutes")
|
||||||
|
else:
|
||||||
|
plot.update_layout(title="Overall compliance heatmap for all days.<br>Bin's color shows valid sensed hours for that day.<br>A valid hour has at least one row of any sensor in "+ str(min_bins_per_hour) +" out of " + str(int(60 / bin_size)) + " bins of " + str(bin_size) + " minutes")
|
||||||
|
|
||||||
plot["layout"]["xaxis"].update(side="bottom")
|
plot["layout"]["xaxis"].update(side="bottom")
|
||||||
pio.write_html(plot, file=output_path, auto_open=False, include_plotlyjs="cdn")
|
pio.write_html(plot, file=output_path, auto_open=False, include_plotlyjs="cdn")
|
||||||
|
|
||||||
|
@ -33,17 +56,21 @@ def getOverallComplianceHeatmap(sensors_with_data, valid_sensed_hours, last_seve
|
||||||
phone_sensed_bins = snakemake.input["phone_sensed_bins"]
|
phone_sensed_bins = snakemake.input["phone_sensed_bins"]
|
||||||
phone_valid_sensed_days = snakemake.input["phone_valid_sensed_days"]
|
phone_valid_sensed_days = snakemake.input["phone_valid_sensed_days"]
|
||||||
pid_files = snakemake.input["pid_files"]
|
pid_files = snakemake.input["pid_files"]
|
||||||
|
only_show_valid_days = snakemake.params["only_show_valid_days"]
|
||||||
local_timezone = snakemake.params["local_timezone"]
|
local_timezone = snakemake.params["local_timezone"]
|
||||||
bin_size = snakemake.params["bin_size"]
|
bin_size = snakemake.params["bin_size"]
|
||||||
min_bins_per_hour = snakemake.params["min_bins_per_hour"]
|
min_bins_per_hour = snakemake.params["min_bins_per_hour"]
|
||||||
expected_num_of_days = int(snakemake.params["expected_num_of_days"])
|
expected_num_of_days = int(snakemake.params["expected_num_of_days"])
|
||||||
|
|
||||||
|
if expected_num_of_days < -1:
|
||||||
|
raise ValueError("EXPECTED_NUM_OF_DAYS of OVERALL_COMPLIANCE_HEATMAP section in config.yaml must be larger or equal to -1.")
|
||||||
|
|
||||||
cur_date = datetime.datetime.now().astimezone(tz.gettz(local_timezone)).date()
|
last_certain_dates = []
|
||||||
last_seven_dates = []
|
if expected_num_of_days != -1:
|
||||||
for date_offset in range(expected_num_of_days-1, -1, -1):
|
# get the list of dates to show
|
||||||
last_seven_dates.append((cur_date - datetime.timedelta(days=date_offset)).strftime("%Y-%m-%d"))
|
cur_date = datetime.datetime.now().astimezone(tz.gettz(local_timezone)).date()
|
||||||
|
for date_offset in range(expected_num_of_days-1, -1, -1):
|
||||||
|
last_certain_dates.append((cur_date - datetime.timedelta(days=date_offset)).strftime("%Y-%m-%d"))
|
||||||
|
|
||||||
sensors_with_data_records, valid_sensed_hours_records = [], []
|
sensors_with_data_records, valid_sensed_hours_records = [], []
|
||||||
for sensors_with_data_individual, valid_sensed_hours_individual, pid_file in zip(phone_sensed_bins, phone_valid_sensed_days, pid_files):
|
for sensors_with_data_individual, valid_sensed_hours_individual, pid_file in zip(phone_sensed_bins, phone_valid_sensed_days, pid_files):
|
||||||
|
@ -54,15 +81,20 @@ for sensors_with_data_individual, valid_sensed_hours_individual, pid_file in zip
|
||||||
label = external_file_content[2].strip()
|
label = external_file_content[2].strip()
|
||||||
pid = pid_file.split("/")[-1]
|
pid = pid_file.split("/")[-1]
|
||||||
|
|
||||||
sensors_with_data_records.append(getOneRow(sensors_with_data_individual, last_seven_dates, "num_sensors", [pid, label, device_id]))
|
sensors_with_data_records.append(getOneRow(sensors_with_data_individual, last_certain_dates, "num_sensors", [pid, label, device_id], expected_num_of_days, only_show_valid_days))
|
||||||
valid_sensed_hours_records.append(getOneRow(valid_sensed_hours_individual, last_seven_dates, "valid_hours", [pid, label, device_id]))
|
valid_sensed_hours_records.append(getOneRow(valid_sensed_hours_individual, last_certain_dates, "valid_sensed_hours", [pid, label, device_id], expected_num_of_days, only_show_valid_days))
|
||||||
|
|
||||||
sensors_with_data = pd.DataFrame(data=sensors_with_data_records, columns=["pid", "label", "device_id"] + last_seven_dates)
|
if expected_num_of_days == -1:
|
||||||
valid_sensed_hours = pd.DataFrame(data=valid_sensed_hours_records, columns=["pid", "label", "device_id"] + last_seven_dates)
|
# get the date_idx of all days
|
||||||
|
total_num_of_days = max([len(x) for x in sensors_with_data_records]) - 3
|
||||||
|
last_certain_dates = [date_idx for date_idx in range(total_num_of_days)]
|
||||||
|
|
||||||
|
sensors_with_data = pd.DataFrame(data=sensors_with_data_records, columns=["pid", "label", "device_id"] + last_certain_dates).replace(0, np.nan)
|
||||||
|
valid_sensed_hours = pd.DataFrame(data=valid_sensed_hours_records, columns=["pid", "label", "device_id"] + last_certain_dates).replace(0, np.nan)
|
||||||
|
|
||||||
if sensors_with_data.empty:
|
if sensors_with_data.empty:
|
||||||
empty_html = open(snakemake.output[0], "w")
|
empty_html = open(snakemake.output[0], "w")
|
||||||
empty_html.write("There is no sensor data for all participants")
|
empty_html.write("There is no sensor data for all participants")
|
||||||
empty_html.close()
|
empty_html.close()
|
||||||
else:
|
else:
|
||||||
getOverallComplianceHeatmap(sensors_with_data, valid_sensed_hours, last_seven_dates, bin_size, min_bins_per_hour, expected_num_of_days, snakemake.output[0])
|
getOverallComplianceHeatmap(sensors_with_data, valid_sensed_hours, last_certain_dates, bin_size, min_bins_per_hour, expected_num_of_days, snakemake.output[0])
|
||||||
|
|
Loading…
Reference in New Issue