Add visualization for data exploration

pull/95/head
Meng Li 2020-07-22 16:47:32 -04:00
parent 124843f96e
commit c135b6ef78
10 changed files with 340 additions and 46 deletions

View File

@ -127,6 +127,25 @@ if config["DORYAB_LOCATION"]["COMPUTE"]:
files_to_compute.extend(expand("data/raw/{pid}/{sensor}_with_datetime.csv", pid=config["PIDS"], sensor=config["DORYAB_LOCATION"]["DB_TABLE"])) files_to_compute.extend(expand("data/raw/{pid}/{sensor}_with_datetime.csv", pid=config["PIDS"], sensor=config["DORYAB_LOCATION"]["DB_TABLE"]))
files_to_compute.extend(expand("data/processed/{pid}/location_doryab_{segment}.csv", pid=config["PIDS"], segment = config["DORYAB_LOCATION"]["DAY_SEGMENTS"])) files_to_compute.extend(expand("data/processed/{pid}/location_doryab_{segment}.csv", pid=config["PIDS"], segment = config["DORYAB_LOCATION"]["DAY_SEGMENTS"]))
# visualization for data exploration
if config["HEATMAP_FEATURES_CORRELATIONS"]["PLOT"]:
files_to_compute.extend(expand("reports/data_exploration/{min_valid_hours_per_day}h/heatmap_features_correlations.html", min_valid_hours_per_day=config["HEATMAP_FEATURES_CORRELATIONS"]["MIN_VALID_HOURS_PER_DAY"]))
if config["HISTOGRAM_VALID_SENSED_HOURS"]["PLOT"]:
files_to_compute.extend(expand("reports/data_exploration/{min_valid_hours_per_day}h/histogram_valid_sensed_hours.html", min_valid_hours_per_day=config["HISTOGRAM_VALID_SENSED_HOURS"]["MIN_VALID_HOURS_PER_DAY"]))
if config["HEATMAP_DAYS_BY_SENSORS"]["PLOT"]:
files_to_compute.extend(expand("reports/interim/{min_valid_hours_per_day}h/{pid}/heatmap_days_by_sensors.html", pid=config["PIDS"], min_valid_hours_per_day=config["HEATMAP_DAYS_BY_SENSORS"]["MIN_VALID_HOURS_PER_DAY"]))
files_to_compute.extend(expand("reports/data_exploration/{min_valid_hours_per_day}h/heatmap_days_by_sensors_all_participants.html", min_valid_hours_per_day=config["HEATMAP_DAYS_BY_SENSORS"]["MIN_VALID_HOURS_PER_DAY"]))
if config["HEATMAP_SENSED_BINS"]["PLOT"]:
files_to_compute.extend(expand("reports/interim/heatmap_sensed_bins/{pid}/heatmap_sensed_bins.html", pid=config["PIDS"]))
files_to_compute.extend(["reports/data_exploration/heatmap_sensed_bins_all_participants.html"])
if config["OVERALL_COMPLIANCE_HEATMAP"]["PLOT"]:
files_to_compute.extend(expand("reports/data_exploration/{min_valid_hours_per_day}h/overall_compliance_heatmap.html", min_valid_hours_per_day=config["OVERALL_COMPLIANCE_HEATMAP"]["MIN_VALID_HOURS_PER_DAY"]))
# analysis example
if config["PARAMS_FOR_ANALYSIS"]["COMPUTE"]: if config["PARAMS_FOR_ANALYSIS"]["COMPUTE"]:
rows_nan_threshold = config["PARAMS_FOR_ANALYSIS"]["ROWS_NAN_THRESHOLD"] rows_nan_threshold = config["PARAMS_FOR_ANALYSIS"]["ROWS_NAN_THRESHOLD"]
cols_nan_threshold = config["PARAMS_FOR_ANALYSIS"]["COLS_NAN_THRESHOLD"] cols_nan_threshold = config["PARAMS_FOR_ANALYSIS"]["COLS_NAN_THRESHOLD"]

View File

@ -29,15 +29,15 @@ READABLE_DATETIME:
PHONE_VALID_SENSED_BINS: PHONE_VALID_SENSED_BINS:
COMPUTE: False # This flag is automatically ignored (set to True) if you are extracting PHONE_VALID_SENSED_DAYS or screen or Barnett's location features COMPUTE: False # This flag is automatically ignored (set to True) if you are extracting PHONE_VALID_SENSED_DAYS or screen or Barnett's location features
BIN_SIZE: 5 # (in minutes) BIN_SIZE: &bin_size 5 # (in minutes)
# Add as many sensor tables as you have, they all improve the computation of PHONE_VALID_SENSED_BINS and PHONE_VALID_SENSED_DAYS. # Add as many sensor tables as you have, they all improve the computation of PHONE_VALID_SENSED_BINS and PHONE_VALID_SENSED_DAYS.
# If you are extracting screen or Barnett's location features, screen and locations tables are mandatory. # If you are extracting screen or Barnett's location features, screen and locations tables are mandatory.
TABLES: [] TABLES: []
PHONE_VALID_SENSED_DAYS: PHONE_VALID_SENSED_DAYS:
COMPUTE: False COMPUTE: False
MIN_VALID_HOURS_PER_DAY: [16, 18, 20] # (out of 24) MIN_HOURS_PER_DAY MIN_VALID_HOURS_PER_DAY: &min_valid_hours_per_day [16] # (out of 24) MIN_HOURS_PER_DAY
MIN_VALID_BINS_PER_HOUR: 6 # (out of 60min/BIN_SIZE bins) MIN_VALID_BINS_PER_HOUR: &min_valid_bins_per_hour 6 # (out of 60min/BIN_SIZE bins)
# Communication SMS features config, TYPES and FEATURES keys need to match # Communication SMS features config, TYPES and FEATURES keys need to match
MESSAGES: MESSAGES:
@ -200,14 +200,43 @@ CONVERSATION:
RECORDINGMINUTES: 1 RECORDINGMINUTES: 1
PAUSEDMINUTES : 3 PAUSEDMINUTES : 3
### Visualizations ################################################################
HEATMAP_FEATURES_CORRELATIONS:
PLOT: False
MIN_ROWS_RATIO: 0.5
MIN_VALID_HOURS_PER_DAY: *min_valid_hours_per_day
PHONE_FEATURES: [accelerometer, activity_recognition, applications_foreground, battery, calls_incoming, calls_missed, calls_outgoing, conversation, light, location_doryab, messages_received, messages_sent, screen]
FITBIT_FEATURES: [fitbit_heartrate, fitbit_step, fitbit_sleep]
CORR_THRESHOLD: 0.1
### Analysis ################################################################ HISTOGRAM_VALID_SENSED_HOURS:
PLOT: False
MIN_VALID_HOURS_PER_DAY: *min_valid_hours_per_day
HEATMAP_DAYS_BY_SENSORS:
PLOT: False
MIN_VALID_HOURS_PER_DAY: *min_valid_hours_per_day
EXPECTED_NUM_OF_DAYS: -1
PHONE_SENSORS_TABLES: ["accelerometer", "applications_foreground", "battery", "calls", "light", "locations", "messages", "screen", "plugin_google_activity_recognition", "plugin_studentlife_audio_android"]
HEATMAP_SENSED_BINS:
PLOT: False
BIN_SIZE: *bin_size
OVERALL_COMPLIANCE_HEATMAP:
PLOT: False
BIN_SIZE: *bin_size
EXPECTED_NUM_OF_DAYS: 7
MIN_VALID_BINS_PER_HOUR: *min_valid_bins_per_hour
MIN_VALID_HOURS_PER_DAY: *min_valid_hours_per_day
### Example Analysis ################################################################
PARAMS_FOR_ANALYSIS: PARAMS_FOR_ANALYSIS:
COMPUTE: False COMPUTE: False
GROUNDTRUTH_TABLE: participant_info GROUNDTRUTH_TABLE: participant_info
SOURCES: &sources ["phone_features", "fitbit_features", "phone_fitbit_features"] SOURCES: &sources ["phone_features", "fitbit_features", "phone_fitbit_features"]
DAY_SEGMENTS: *day_segments DAY_SEGMENTS: *day_segments
PHONE_FEATURES: [accelerometer, applications_foreground, battery, call_incoming, call_missed, call_outgoing, activity_recognition, light, location_barnett, screen, sms_received, sms_sent] PHONE_FEATURES: [accelerometer, activity_recognition, applications_foreground, battery, calls_incoming, calls_missed, calls_outgoing, conversation, light, location_doryab, messages_received, messages_sent, screen]
FITBIT_FEATURES: [fitbit_heartrate, fitbit_step, fitbit_sleep] FITBIT_FEATURES: [fitbit_heartrate, fitbit_step, fitbit_sleep]
PHONE_FITBIT_FEATURES: "" # This array is merged in the input_merge_features_of_single_participant function in models.snakefile PHONE_FITBIT_FEATURES: "" # This array is merged in the input_merge_features_of_single_participant function in models.snakefile
DEMOGRAPHIC_FEATURES: [age, gender, inpatientdays] DEMOGRAPHIC_FEATURES: [age, gender, inpatientdays]

View File

@ -1,61 +1,107 @@
rule heatmap_rows: rule heatmap_features_correlations:
input: input:
sensor = "data/raw/{pid}/{sensor}_with_datetime.csv", features = expand("data/processed/{pid}/{sensor}_{day_segment}.csv", pid=config["PIDS"], sensor=config["HEATMAP_FEATURES_CORRELATIONS"]["PHONE_FEATURES"]+config["HEATMAP_FEATURES_CORRELATIONS"]["FITBIT_FEATURES"], day_segment=config["DAY_SEGMENTS"]),
pid_file = "data/external/{pid}" phone_valid_sensed_days = expand("data/interim/{pid}/phone_valid_sensed_days_{{min_valid_hours_per_day}}h.csv", pid=config["PIDS"])
params: params:
table = "{sensor}", min_rows_ratio = config["HEATMAP_FEATURES_CORRELATIONS"]["MIN_ROWS_RATIO"],
pid = "{pid}", corr_threshold = config["HEATMAP_FEATURES_CORRELATIONS"]["CORR_THRESHOLD"] #0.75
bin_size = config["PHONE_VALID_SENSED_BINS"]["BIN_SIZE"]
output: output:
"reports/figures/{pid}/{sensor}_heatmap_rows.html" "reports/data_exploration/{min_valid_hours_per_day}h/heatmap_features_correlations.html"
script: script:
"../src/visualization/heatmap_rows.py" "../src/visualization/heatmap_features_correlations.py"
rule compliance_heatmap: rule histogram_valid_sensed_hours:
input:
phone_valid_sensed_days = expand("data/interim/{pid}/phone_valid_sensed_days_{{min_valid_hours_per_day}}h.csv", pid=config["PIDS"])
output:
"reports/data_exploration/{min_valid_hours_per_day}h/histogram_valid_sensed_hours.html"
script:
"../src/visualization/histogram_valid_sensed_hours.py"
rule heatmap_days_by_sensors:
input:
sensors = expand("data/raw/{{pid}}/{sensor}_with_datetime.csv", sensor=config["HEATMAP_DAYS_BY_SENSORS"]["PHONE_SENSORS_TABLES"]),
phone_valid_sensed_days = "data/interim/{pid}/phone_valid_sensed_days_{min_valid_hours_per_day}h.csv"
params:
pid = "{pid}",
expected_num_of_days = config["HEATMAP_DAYS_BY_SENSORS"]["EXPECTED_NUM_OF_DAYS"]
output:
"reports/interim/{min_valid_hours_per_day}h/{pid}/heatmap_days_by_sensors.html"
script:
"../src/visualization/heatmap_days_by_sensors.py"
rule heatmap_days_by_sensors_all_participants:
input:
heatmap_rows = expand("reports/interim/{{min_valid_hours_per_day}}h/{pid}/heatmap_days_by_sensors.html", pid=config["PIDS"])
output:
"reports/data_exploration/{min_valid_hours_per_day}h/heatmap_days_by_sensors_all_participants.html"
script:
"../src/visualization/heatmap_days_by_sensors_all_participants.Rmd"
rule heatmap_sensed_bins:
input: input:
sensor = "data/interim/{pid}/phone_sensed_bins.csv", sensor = "data/interim/{pid}/phone_sensed_bins.csv",
pid_file = "data/external/{pid}" pid_file = "data/external/{pid}"
params: params:
pid = "{pid}", pid = "{pid}",
bin_size = config["PHONE_VALID_SENSED_BINS"]["BIN_SIZE"] bin_size = config["HEATMAP_SENSED_BINS"]["BIN_SIZE"]
output: output:
"reports/figures/{pid}/compliance_heatmap.html" "reports/interim/heatmap_sensed_bins/{pid}/heatmap_sensed_bins.html"
script: script:
"../src/visualization/compliance_heatmap.py" "../src/visualization/heatmap_sensed_bins.py"
rule heatmap_sensed_bins_all_participants:
input:
heatmap_sensed_bins = expand("reports/interim/heatmap_sensed_bins/{pid}/heatmap_sensed_bins.html", pid=config["PIDS"])
output:
"reports/data_exploration/heatmap_sensed_bins_all_participants.html"
script:
"../src/visualization/heatmap_sensed_bins_all_participants.Rmd"
rule overall_compliance_heatmap: rule overall_compliance_heatmap:
input: input:
phone_sensed_bins = expand("data/interim/{pid}/phone_sensed_bins.csv", pid=config["PIDS"]), phone_sensed_bins = expand("data/interim/{pid}/phone_sensed_bins.csv", pid=config["PIDS"]),
phone_valid_sensed_days = expand("data/interim/{pid}/phone_valid_sensed_days.csv", pid=config["PIDS"]), phone_valid_sensed_days = expand("data/interim/{pid}/phone_valid_sensed_days_{{min_valid_hours_per_day}}h.csv", pid=config["PIDS"]),
pid_files = expand("data/external/{pid}", pid=config["PIDS"]) pid_files = expand("data/external/{pid}", pid=config["PIDS"])
params: params:
local_timezone = config["READABLE_DATETIME"]["FIXED_TIMEZONE"], local_timezone = config["READABLE_DATETIME"]["FIXED_TIMEZONE"],
bin_size = config["PHONE_VALID_SENSED_BINS"]["BIN_SIZE"], expected_num_of_days = config["OVERALL_COMPLIANCE_HEATMAP"]["EXPECTED_NUM_OF_DAYS"],
min_bins_per_hour = config["PHONE_VALID_SENSED_DAYS"]["MIN_VALID_BINS_PER_HOUR"] bin_size = config["OVERALL_COMPLIANCE_HEATMAP"]["BIN_SIZE"],
min_bins_per_hour = config["OVERALL_COMPLIANCE_HEATMAP"]["MIN_VALID_BINS_PER_HOUR"]
output: output:
"reports/figures/overall_compliance_heatmap.html" "reports/data_exploration/{min_valid_hours_per_day}h/overall_compliance_heatmap.html"
script: script:
"../src/visualization/overall_compliance_heatmap.py" "../src/visualization/overall_compliance_heatmap.py"
rule battery_consumption_rates_barchart: # rule heatmap_rows:
input: # input:
sensor = "data/processed/{pid}/battery_daily.csv", # sensor = "data/raw/{pid}/{sensor}_with_datetime.csv",
pid_file = "data/external/{pid}" # pid_file = "data/external/{pid}"
params: # params:
pid = "{pid}" # table = "{sensor}",
output: # pid = "{pid}",
"reports/figures/{pid}/battery_consumption_rates_barchart.html" # bin_size = config["PHONE_VALID_SENSED_BINS"]["BIN_SIZE"]
script: # output:
"../src/visualization/battery_consumption_rates_barchart.py" # "reports/figures/{pid}/{sensor}_heatmap_rows.html"
# script:
# "../src/visualization/heatmap_rows.py"
PHONE_SENSORS = [] # rule battery_consumption_rates_barchart:
PHONE_SENSORS.extend([config["MESSAGES"]["DB_TABLE"], config["CALLS"]["DB_TABLE"], config["BARNETT_LOCATION"]["DB_TABLE"], config["BLUETOOTH"]["DB_TABLE"], config["BATTERY"]["DB_TABLE"], config["SCREEN"]["DB_TABLE"], config["LIGHT"]["DB_TABLE"], config["ACCELEROMETER"]["DB_TABLE"], config["APPLICATIONS_FOREGROUND"]["DB_TABLE"],config["WIFI"]["DB_TABLE"]]) # input:
# sensor = "data/processed/{pid}/battery_daily.csv",
# pid_file = "data/external/{pid}"
# params:
# pid = "{pid}"
# output:
# "reports/figures/{pid}/battery_consumption_rates_barchart.html"
# script:
# "../src/visualization/battery_consumption_rates_barchart.py"
rule compliance_report: # rule compliance_report:
input: # input:
sensor_heatmaps = expand("reports/figures/{{pid}}/{sensor}_heatmap_rows.html", sensor=PHONE_SENSORS), # sensor_heatmaps = expand("reports/figures/{{pid}}/{sensor}_heatmap_rows.html", sensor=PHONE_SENSORS),
compliance_heatmap = rules.compliance_heatmap.output # compliance_heatmap = rules.compliance_heatmap.output
output: # output:
"reports/compliance/{pid}/compliance_report.html", # "reports/compliance/{pid}/compliance_report.html",
script: # script:
"../src/visualization/compliance_report.Rmd" # "../src/visualization/compliance_report.Rmd"

View File

@ -0,0 +1,56 @@
import pandas as pd
import plotly.io as pio
import plotly.graph_objects as go
from datetime import datetime, timedelta
def getRowCountHeatmap(row_count_sensors_normalized, row_count_sensors, pid, output_path):
plot = go.Figure(data=go.Heatmap(z=row_count_sensors_normalized.T.values.tolist(),
x=[datetime.strftime(idx[0], "%Y/%m/%d")+"("+str(idx[1])+")" for idx in row_count_sensors.index],
y=row_count_sensors.columns.tolist(),
hovertext=row_count_sensors.T.values.tolist(),
hovertemplate="Date: %{x}<br>Sensor: %{y}<br>Row count: %{hovertext}<extra></extra>",
colorscale="Viridis"))
plot.update_layout(title="Row count heatmap for " + pid)
pio.write_html(plot, file=output_path, auto_open=False, include_plotlyjs="cdn")
phone_valid_sensed_days = pd.read_csv(snakemake.input["phone_valid_sensed_days"], parse_dates=["local_date"], index_col=["local_date"])
phone_valid_sensed_days = phone_valid_sensed_days[phone_valid_sensed_days["is_valid_sensed_day"] == True]
row_count_sensors = pd.DataFrame()
for sensor_path in snakemake.input["sensors"]:
# plugin_studentlife_audio_android => conversion; plugin_google_activity_recognition => AR; applications_foreground => apps
sensor_name = sensor_path.split("/")[-1].replace("_with_datetime.csv", "").replace("plugin_studentlife_audio_android", "conversion").replace("plugin_google_activity_recognition", "AR").replace("applications_foreground", "apps")
sensor_data = pd.read_csv(sensor_path, encoding="ISO-8859-1", parse_dates=["local_date"], dtype={"label": str})
if sensor_data.empty:
row_count_sensor = pd.DataFrame(columns=[sensor_name])
else:
row_count_sensor = sensor_data[["timestamp", "local_date"]].groupby(["local_date"]).count().rename(columns={"timestamp": sensor_name})
row_count_sensors = row_count_sensors.join(row_count_sensor, how="outer")
row_count_sensors.index = pd.to_datetime(row_count_sensors.index)
row_count_sensors = row_count_sensors.join(phone_valid_sensed_days[["valid_sensed_hours"]], how="outer")
# set date_idx based on the first date
reference_date = row_count_sensors.index.min()
row_count_sensors["date_idx"] = (row_count_sensors.index - reference_date).days
row_count_sensors["local_date"] = row_count_sensors.index
row_count_sensors.set_index(["local_date", "date_idx"], inplace=True)
# add empty rows to make sure different participants have the same date_idx range
date_idx_range = [idx for idx in range(int(snakemake.params["expected_num_of_days"]))]
date_range = [reference_date + timedelta(days=idx) for idx in date_idx_range]
all_dates = pd.DataFrame({"local_date": date_range, "date_idx": date_idx_range})
all_dates.set_index(["local_date", "date_idx"], inplace=True)
row_count_sensors = row_count_sensors.merge(all_dates, left_index=True, right_index=True, how="right")
# normalize each sensor (column)
if row_count_sensors.count().max() > 1:
row_count_sensors_normalized = (row_count_sensors-row_count_sensors.min())/(row_count_sensors.max()-row_count_sensors.min())
else:
row_count_sensors_normalized = row_count_sensors
pid = sensor_path.split("/")[2]
getRowCountHeatmap(row_count_sensors_normalized, row_count_sensors, pid, snakemake.output[0])

View File

@ -0,0 +1,34 @@
---
title: "Heatmap Rows Report"
author:
- "MoSHI Pipeline"
date: "`r format(Sys.time(), '%d %B, %Y')`"
params:
rmd: "heatmap_days_by_sensors_all_participants.Rmd"
output:
html_document:
highlight: tango
number_sections: no
theme: default
toc: yes
toc_depth: 3
toc_float:
collapsed: no
smooth_scroll: yes
---
```{r include=FALSE}
source("renv/activate.R")
```
## All phone sensors
```{r, echo=FALSE}
heatmaps <- snakemake@input[["heatmap_rows"]]
heatmaps.html <- vector(mode="list", length(heatmaps))
for(pid in 1:length(heatmaps)){
heatmaps.html[[pid]] <- htmltools::includeHTML(heatmaps[pid])
}
htmltools::tagList(heatmaps.html)
```

View File

@ -0,0 +1,59 @@
import numpy as np
import pandas as pd
import plotly.io as pio
import plotly.graph_objects as go
def getCorrMatrixHeatmap(corr_matrix, output_path):
colnames = corr_matrix.columns
plot = go.Figure(data=go.Heatmap(z=corr_matrix.values.tolist(),
x=colnames,
y=colnames,
colorscale="Viridis"))
plot.update_layout(title="Correlation Matrix Heatmap")
pio.write_html(plot, file=output_path, auto_open=False, include_plotlyjs="cdn")
min_rows_ratio = snakemake.params["min_rows_ratio"]
corr_threshold = snakemake.params["corr_threshold"]
# merge features
features, features_all_sensors = pd.DataFrame(columns=["local_date"]), pd.DataFrame(columns=["local_date"])
pids = set()
last_pid = None
for path in snakemake.input["features"]:
pid = path.split("/")[2]
if pid not in pids:
pids.add(pid)
features_all_sensors["pid"] = last_pid
features = pd.concat([features, features_all_sensors], axis=0, ignore_index=True, sort=False)
features_all_sensors = pd.DataFrame(columns=["local_date"])
features_per_sensor = pd.read_csv(path)
features_all_sensors = features_all_sensors.merge(features_per_sensor, on="local_date", how="outer")
last_pid = pid
features_all_sensors["pid"] = last_pid
features = pd.concat([features, features_all_sensors], axis=0, ignore_index=True, sort=False)
features.set_index(["pid", "local_date"], inplace=True)
# select days based on the input of "phone_valid_sensed_days"
selected_participants_and_days = pd.DataFrame()
for path in snakemake.input["phone_valid_sensed_days"]:
pid = path.split("/")[2]
phone_valid_sensed_days = pd.read_csv(path)
phone_valid_sensed_days = phone_valid_sensed_days[phone_valid_sensed_days["is_valid_sensed_day"] == True]
phone_valid_sensed_days["pid"] = pid
selected_participants_and_days = pd.concat([selected_participants_and_days, phone_valid_sensed_days], axis=0)
selected_participants_and_days.set_index(["pid", "local_date"], inplace=True)
features = features.loc[features.index.intersection(selected_participants_and_days.index), :]
# get correlation matrix
features = features.astype(float)
corr_matrix = features.corr(min_periods=min_rows_ratio * features.shape[0])
# replace correlation coefficients less than corr_threshold with NA
corr_matrix[(corr_matrix > -corr_threshold) & (corr_matrix < corr_threshold)] = np.nan
# plot heatmap
getCorrMatrixHeatmap(corr_matrix, snakemake.output[0])

View File

@ -20,7 +20,7 @@ def getComplianceHeatmap(dates, compliance_matrix, pid, output_path, bin_size):
y=[datetime.datetime.strftime(date, '%Y/%m/%d') for date in dates], y=[datetime.datetime.strftime(date, '%Y/%m/%d') for date in dates],
colorscale='Viridis', colorscale='Viridis',
colorbar={'tick0': 0,'dtick': 1})) colorbar={'tick0': 0,'dtick': 1}))
plot.update_layout(title="Compliance heatmap.<br>Five-minute bins showing how many sensors logged at least one row of data in that period for " + pid + "<br>Label: " + label + ", device_id: " + device_id) plot.update_layout(title="Heatmap sensed bins.<br>Five-minute bins showing how many sensors logged at least one row of data in that period for " + pid + "<br>Label: " + label + ", device_id: " + device_id)
pio.write_html(plot, file=output_path, auto_open=False, include_plotlyjs="cdn") pio.write_html(plot, file=output_path, auto_open=False, include_plotlyjs="cdn")
# get current patient id # get current patient id

View File

@ -0,0 +1,34 @@
---
title: "Heatmap Sensed Bins Report"
author:
- "MoSHI Pipeline"
date: "`r format(Sys.time(), '%d %B, %Y')`"
params:
rmd: "heatmap_sensed_bins_all_participants.Rmd"
output:
html_document:
highlight: tango
number_sections: no
theme: default
toc: yes
toc_depth: 3
toc_float:
collapsed: no
smooth_scroll: yes
---
```{r include=FALSE}
source("renv/activate.R")
```
## All phone sensors
```{r, echo=FALSE}
heatmaps <- snakemake@input[["heatmap_sensed_bins"]]
heatmaps.html <- vector(mode="list", length(heatmaps))
for(pid in 1:length(heatmaps)){
heatmaps.html[[pid]] <- htmltools::includeHTML(heatmaps[pid])
}
htmltools::tagList(heatmaps.html)
```

View File

@ -0,0 +1,16 @@
import pandas as pd
import plotly.express as px
import plotly.io as pio
# merge "phone_valid_sensed_days" for all participants
selected_participants_and_days = pd.DataFrame()
for path in snakemake.input["phone_valid_sensed_days"]:
phone_valid_sensed_days = pd.read_csv(path)
phone_valid_sensed_days = phone_valid_sensed_days[phone_valid_sensed_days["is_valid_sensed_day"] == True]
selected_participants_and_days = pd.concat([selected_participants_and_days, phone_valid_sensed_days], axis=0)
# plot histogram
fig = px.histogram(selected_participants_and_days, x="valid_sensed_hours")
fig.update_layout(title="Phone Valid Hours Histogram")
pio.write_html(fig, file=snakemake.output[0], auto_open=False, include_plotlyjs="cdn")

View File

@ -16,7 +16,7 @@ def getOneRow(data_per_participant, last_seven_dates, col_name, row):
row.append(0) row.append(0)
return row return row
def getOverallComplianceHeatmap(sensors_with_data, valid_sensed_hours, last_seven_dates, bin_size, min_bins_per_hour, output_path): def getOverallComplianceHeatmap(sensors_with_data, valid_sensed_hours, last_seven_dates, bin_size, min_bins_per_hour, expected_num_of_days, output_path):
plot = ff.create_annotated_heatmap(z=sensors_with_data[last_seven_dates].values, plot = ff.create_annotated_heatmap(z=sensors_with_data[last_seven_dates].values,
x=[date.replace("-", "/") for date in last_seven_dates], x=[date.replace("-", "/") for date in last_seven_dates],
y=[pid + "." + label for pid, label in zip(sensors_with_data["pid"].to_list(), sensors_with_data["label"].to_list())], y=[pid + "." + label for pid, label in zip(sensors_with_data["pid"].to_list(), sensors_with_data["label"].to_list())],
@ -25,7 +25,7 @@ def getOverallComplianceHeatmap(sensors_with_data, valid_sensed_hours, last_seve
colorscale="Viridis", colorscale="Viridis",
colorbar={"tick0": 0,"dtick": 1}, colorbar={"tick0": 0,"dtick": 1},
showscale=True) showscale=True)
plot.update_layout(title="Overall compliance heatmap for last seven days.<br>Bin's color shows how many sensors logged at least one row of data for that day.<br>Bin's text shows the valid hours of that day.(A valid hour has at least one row of any sensor in "+ str(min_bins_per_hour) +" out of " + str(int(60 / bin_size)) + " bins of " + str(bin_size) + " minutes)") plot.update_layout(title="Overall compliance heatmap for last " + str(expected_num_of_days) + " days.<br>Bin's color shows how many sensors logged at least one row of data for that day.<br>Bin's text shows the valid hours of that day.(A valid hour has at least one row of any sensor in "+ str(min_bins_per_hour) +" out of " + str(int(60 / bin_size)) + " bins of " + str(bin_size) + " minutes)")
plot["layout"]["xaxis"].update(side="bottom") plot["layout"]["xaxis"].update(side="bottom")
pio.write_html(plot, file=output_path, auto_open=False, include_plotlyjs="cdn") pio.write_html(plot, file=output_path, auto_open=False, include_plotlyjs="cdn")
@ -36,11 +36,12 @@ pid_files = snakemake.input["pid_files"]
local_timezone = snakemake.params["local_timezone"] local_timezone = snakemake.params["local_timezone"]
bin_size = snakemake.params["bin_size"] bin_size = snakemake.params["bin_size"]
min_bins_per_hour = snakemake.params["min_bins_per_hour"] min_bins_per_hour = snakemake.params["min_bins_per_hour"]
expected_num_of_days = int(snakemake.params["expected_num_of_days"])
cur_date = datetime.datetime.now().astimezone(tz.gettz(local_timezone)).date() cur_date = datetime.datetime.now().astimezone(tz.gettz(local_timezone)).date()
last_seven_dates = [] last_seven_dates = []
for date_offset in range(6,-1,-1): for date_offset in range(expected_num_of_days-1, -1, -1):
last_seven_dates.append((cur_date - datetime.timedelta(days=date_offset)).strftime("%Y-%m-%d")) last_seven_dates.append((cur_date - datetime.timedelta(days=date_offset)).strftime("%Y-%m-%d"))
@ -64,4 +65,4 @@ if sensors_with_data.empty:
empty_html.write("There is no sensor data for all participants") empty_html.write("There is no sensor data for all participants")
empty_html.close() empty_html.close()
else: else:
getOverallComplianceHeatmap(sensors_with_data, valid_sensed_hours, last_seven_dates, bin_size, min_bins_per_hour, snakemake.output[0]) getOverallComplianceHeatmap(sensors_with_data, valid_sensed_hours, last_seven_dates, bin_size, min_bins_per_hour, expected_num_of_days, snakemake.output[0])