Add visualization for data exploration
parent
124843f96e
commit
c135b6ef78
19
Snakefile
19
Snakefile
|
@ -127,6 +127,25 @@ if config["DORYAB_LOCATION"]["COMPUTE"]:
|
|||
files_to_compute.extend(expand("data/raw/{pid}/{sensor}_with_datetime.csv", pid=config["PIDS"], sensor=config["DORYAB_LOCATION"]["DB_TABLE"]))
|
||||
files_to_compute.extend(expand("data/processed/{pid}/location_doryab_{segment}.csv", pid=config["PIDS"], segment = config["DORYAB_LOCATION"]["DAY_SEGMENTS"]))
|
||||
|
||||
# visualization for data exploration
|
||||
if config["HEATMAP_FEATURES_CORRELATIONS"]["PLOT"]:
|
||||
files_to_compute.extend(expand("reports/data_exploration/{min_valid_hours_per_day}h/heatmap_features_correlations.html", min_valid_hours_per_day=config["HEATMAP_FEATURES_CORRELATIONS"]["MIN_VALID_HOURS_PER_DAY"]))
|
||||
|
||||
if config["HISTOGRAM_VALID_SENSED_HOURS"]["PLOT"]:
|
||||
files_to_compute.extend(expand("reports/data_exploration/{min_valid_hours_per_day}h/histogram_valid_sensed_hours.html", min_valid_hours_per_day=config["HISTOGRAM_VALID_SENSED_HOURS"]["MIN_VALID_HOURS_PER_DAY"]))
|
||||
|
||||
if config["HEATMAP_DAYS_BY_SENSORS"]["PLOT"]:
|
||||
files_to_compute.extend(expand("reports/interim/{min_valid_hours_per_day}h/{pid}/heatmap_days_by_sensors.html", pid=config["PIDS"], min_valid_hours_per_day=config["HEATMAP_DAYS_BY_SENSORS"]["MIN_VALID_HOURS_PER_DAY"]))
|
||||
files_to_compute.extend(expand("reports/data_exploration/{min_valid_hours_per_day}h/heatmap_days_by_sensors_all_participants.html", min_valid_hours_per_day=config["HEATMAP_DAYS_BY_SENSORS"]["MIN_VALID_HOURS_PER_DAY"]))
|
||||
|
||||
if config["HEATMAP_SENSED_BINS"]["PLOT"]:
|
||||
files_to_compute.extend(expand("reports/interim/heatmap_sensed_bins/{pid}/heatmap_sensed_bins.html", pid=config["PIDS"]))
|
||||
files_to_compute.extend(["reports/data_exploration/heatmap_sensed_bins_all_participants.html"])
|
||||
|
||||
if config["OVERALL_COMPLIANCE_HEATMAP"]["PLOT"]:
|
||||
files_to_compute.extend(expand("reports/data_exploration/{min_valid_hours_per_day}h/overall_compliance_heatmap.html", min_valid_hours_per_day=config["OVERALL_COMPLIANCE_HEATMAP"]["MIN_VALID_HOURS_PER_DAY"]))
|
||||
|
||||
# analysis example
|
||||
if config["PARAMS_FOR_ANALYSIS"]["COMPUTE"]:
|
||||
rows_nan_threshold = config["PARAMS_FOR_ANALYSIS"]["ROWS_NAN_THRESHOLD"]
|
||||
cols_nan_threshold = config["PARAMS_FOR_ANALYSIS"]["COLS_NAN_THRESHOLD"]
|
||||
|
|
39
config.yaml
39
config.yaml
|
@ -29,15 +29,15 @@ READABLE_DATETIME:
|
|||
|
||||
PHONE_VALID_SENSED_BINS:
|
||||
COMPUTE: False # This flag is automatically ignored (set to True) if you are extracting PHONE_VALID_SENSED_DAYS or screen or Barnett's location features
|
||||
BIN_SIZE: 5 # (in minutes)
|
||||
BIN_SIZE: &bin_size 5 # (in minutes)
|
||||
# Add as many sensor tables as you have, they all improve the computation of PHONE_VALID_SENSED_BINS and PHONE_VALID_SENSED_DAYS.
|
||||
# If you are extracting screen or Barnett's location features, screen and locations tables are mandatory.
|
||||
TABLES: []
|
||||
|
||||
PHONE_VALID_SENSED_DAYS:
|
||||
COMPUTE: False
|
||||
MIN_VALID_HOURS_PER_DAY: [16, 18, 20] # (out of 24) MIN_HOURS_PER_DAY
|
||||
MIN_VALID_BINS_PER_HOUR: 6 # (out of 60min/BIN_SIZE bins)
|
||||
MIN_VALID_HOURS_PER_DAY: &min_valid_hours_per_day [16] # (out of 24) MIN_HOURS_PER_DAY
|
||||
MIN_VALID_BINS_PER_HOUR: &min_valid_bins_per_hour 6 # (out of 60min/BIN_SIZE bins)
|
||||
|
||||
# Communication SMS features config, TYPES and FEATURES keys need to match
|
||||
MESSAGES:
|
||||
|
@ -200,14 +200,43 @@ CONVERSATION:
|
|||
RECORDINGMINUTES: 1
|
||||
PAUSEDMINUTES : 3
|
||||
|
||||
### Visualizations ################################################################
|
||||
HEATMAP_FEATURES_CORRELATIONS:
|
||||
PLOT: False
|
||||
MIN_ROWS_RATIO: 0.5
|
||||
MIN_VALID_HOURS_PER_DAY: *min_valid_hours_per_day
|
||||
PHONE_FEATURES: [accelerometer, activity_recognition, applications_foreground, battery, calls_incoming, calls_missed, calls_outgoing, conversation, light, location_doryab, messages_received, messages_sent, screen]
|
||||
FITBIT_FEATURES: [fitbit_heartrate, fitbit_step, fitbit_sleep]
|
||||
CORR_THRESHOLD: 0.1
|
||||
|
||||
### Analysis ################################################################
|
||||
HISTOGRAM_VALID_SENSED_HOURS:
|
||||
PLOT: False
|
||||
MIN_VALID_HOURS_PER_DAY: *min_valid_hours_per_day
|
||||
|
||||
HEATMAP_DAYS_BY_SENSORS:
|
||||
PLOT: False
|
||||
MIN_VALID_HOURS_PER_DAY: *min_valid_hours_per_day
|
||||
EXPECTED_NUM_OF_DAYS: -1
|
||||
PHONE_SENSORS_TABLES: ["accelerometer", "applications_foreground", "battery", "calls", "light", "locations", "messages", "screen", "plugin_google_activity_recognition", "plugin_studentlife_audio_android"]
|
||||
|
||||
HEATMAP_SENSED_BINS:
|
||||
PLOT: False
|
||||
BIN_SIZE: *bin_size
|
||||
|
||||
OVERALL_COMPLIANCE_HEATMAP:
|
||||
PLOT: False
|
||||
BIN_SIZE: *bin_size
|
||||
EXPECTED_NUM_OF_DAYS: 7
|
||||
MIN_VALID_BINS_PER_HOUR: *min_valid_bins_per_hour
|
||||
MIN_VALID_HOURS_PER_DAY: *min_valid_hours_per_day
|
||||
|
||||
### Example Analysis ################################################################
|
||||
PARAMS_FOR_ANALYSIS:
|
||||
COMPUTE: False
|
||||
GROUNDTRUTH_TABLE: participant_info
|
||||
SOURCES: &sources ["phone_features", "fitbit_features", "phone_fitbit_features"]
|
||||
DAY_SEGMENTS: *day_segments
|
||||
PHONE_FEATURES: [accelerometer, applications_foreground, battery, call_incoming, call_missed, call_outgoing, activity_recognition, light, location_barnett, screen, sms_received, sms_sent]
|
||||
PHONE_FEATURES: [accelerometer, activity_recognition, applications_foreground, battery, calls_incoming, calls_missed, calls_outgoing, conversation, light, location_doryab, messages_received, messages_sent, screen]
|
||||
FITBIT_FEATURES: [fitbit_heartrate, fitbit_step, fitbit_sleep]
|
||||
PHONE_FITBIT_FEATURES: "" # This array is merged in the input_merge_features_of_single_participant function in models.snakefile
|
||||
DEMOGRAPHIC_FEATURES: [age, gender, inpatientdays]
|
||||
|
|
|
@ -1,61 +1,107 @@
|
|||
rule heatmap_rows:
|
||||
rule heatmap_features_correlations:
|
||||
input:
|
||||
sensor = "data/raw/{pid}/{sensor}_with_datetime.csv",
|
||||
pid_file = "data/external/{pid}"
|
||||
features = expand("data/processed/{pid}/{sensor}_{day_segment}.csv", pid=config["PIDS"], sensor=config["HEATMAP_FEATURES_CORRELATIONS"]["PHONE_FEATURES"]+config["HEATMAP_FEATURES_CORRELATIONS"]["FITBIT_FEATURES"], day_segment=config["DAY_SEGMENTS"]),
|
||||
phone_valid_sensed_days = expand("data/interim/{pid}/phone_valid_sensed_days_{{min_valid_hours_per_day}}h.csv", pid=config["PIDS"])
|
||||
params:
|
||||
table = "{sensor}",
|
||||
pid = "{pid}",
|
||||
bin_size = config["PHONE_VALID_SENSED_BINS"]["BIN_SIZE"]
|
||||
min_rows_ratio = config["HEATMAP_FEATURES_CORRELATIONS"]["MIN_ROWS_RATIO"],
|
||||
corr_threshold = config["HEATMAP_FEATURES_CORRELATIONS"]["CORR_THRESHOLD"] #0.75
|
||||
output:
|
||||
"reports/figures/{pid}/{sensor}_heatmap_rows.html"
|
||||
"reports/data_exploration/{min_valid_hours_per_day}h/heatmap_features_correlations.html"
|
||||
script:
|
||||
"../src/visualization/heatmap_rows.py"
|
||||
"../src/visualization/heatmap_features_correlations.py"
|
||||
|
||||
rule compliance_heatmap:
|
||||
rule histogram_valid_sensed_hours:
|
||||
input:
|
||||
phone_valid_sensed_days = expand("data/interim/{pid}/phone_valid_sensed_days_{{min_valid_hours_per_day}}h.csv", pid=config["PIDS"])
|
||||
output:
|
||||
"reports/data_exploration/{min_valid_hours_per_day}h/histogram_valid_sensed_hours.html"
|
||||
script:
|
||||
"../src/visualization/histogram_valid_sensed_hours.py"
|
||||
|
||||
rule heatmap_days_by_sensors:
|
||||
input:
|
||||
sensors = expand("data/raw/{{pid}}/{sensor}_with_datetime.csv", sensor=config["HEATMAP_DAYS_BY_SENSORS"]["PHONE_SENSORS_TABLES"]),
|
||||
phone_valid_sensed_days = "data/interim/{pid}/phone_valid_sensed_days_{min_valid_hours_per_day}h.csv"
|
||||
params:
|
||||
pid = "{pid}",
|
||||
expected_num_of_days = config["HEATMAP_DAYS_BY_SENSORS"]["EXPECTED_NUM_OF_DAYS"]
|
||||
output:
|
||||
"reports/interim/{min_valid_hours_per_day}h/{pid}/heatmap_days_by_sensors.html"
|
||||
script:
|
||||
"../src/visualization/heatmap_days_by_sensors.py"
|
||||
|
||||
rule heatmap_days_by_sensors_all_participants:
|
||||
input:
|
||||
heatmap_rows = expand("reports/interim/{{min_valid_hours_per_day}}h/{pid}/heatmap_days_by_sensors.html", pid=config["PIDS"])
|
||||
output:
|
||||
"reports/data_exploration/{min_valid_hours_per_day}h/heatmap_days_by_sensors_all_participants.html"
|
||||
script:
|
||||
"../src/visualization/heatmap_days_by_sensors_all_participants.Rmd"
|
||||
|
||||
rule heatmap_sensed_bins:
|
||||
input:
|
||||
sensor = "data/interim/{pid}/phone_sensed_bins.csv",
|
||||
pid_file = "data/external/{pid}"
|
||||
params:
|
||||
pid = "{pid}",
|
||||
bin_size = config["PHONE_VALID_SENSED_BINS"]["BIN_SIZE"]
|
||||
bin_size = config["HEATMAP_SENSED_BINS"]["BIN_SIZE"]
|
||||
output:
|
||||
"reports/figures/{pid}/compliance_heatmap.html"
|
||||
"reports/interim/heatmap_sensed_bins/{pid}/heatmap_sensed_bins.html"
|
||||
script:
|
||||
"../src/visualization/compliance_heatmap.py"
|
||||
"../src/visualization/heatmap_sensed_bins.py"
|
||||
|
||||
rule heatmap_sensed_bins_all_participants:
|
||||
input:
|
||||
heatmap_sensed_bins = expand("reports/interim/heatmap_sensed_bins/{pid}/heatmap_sensed_bins.html", pid=config["PIDS"])
|
||||
output:
|
||||
"reports/data_exploration/heatmap_sensed_bins_all_participants.html"
|
||||
script:
|
||||
"../src/visualization/heatmap_sensed_bins_all_participants.Rmd"
|
||||
|
||||
rule overall_compliance_heatmap:
|
||||
input:
|
||||
phone_sensed_bins = expand("data/interim/{pid}/phone_sensed_bins.csv", pid=config["PIDS"]),
|
||||
phone_valid_sensed_days = expand("data/interim/{pid}/phone_valid_sensed_days.csv", pid=config["PIDS"]),
|
||||
phone_valid_sensed_days = expand("data/interim/{pid}/phone_valid_sensed_days_{{min_valid_hours_per_day}}h.csv", pid=config["PIDS"]),
|
||||
pid_files = expand("data/external/{pid}", pid=config["PIDS"])
|
||||
params:
|
||||
local_timezone = config["READABLE_DATETIME"]["FIXED_TIMEZONE"],
|
||||
bin_size = config["PHONE_VALID_SENSED_BINS"]["BIN_SIZE"],
|
||||
min_bins_per_hour = config["PHONE_VALID_SENSED_DAYS"]["MIN_VALID_BINS_PER_HOUR"]
|
||||
expected_num_of_days = config["OVERALL_COMPLIANCE_HEATMAP"]["EXPECTED_NUM_OF_DAYS"],
|
||||
bin_size = config["OVERALL_COMPLIANCE_HEATMAP"]["BIN_SIZE"],
|
||||
min_bins_per_hour = config["OVERALL_COMPLIANCE_HEATMAP"]["MIN_VALID_BINS_PER_HOUR"]
|
||||
output:
|
||||
"reports/figures/overall_compliance_heatmap.html"
|
||||
"reports/data_exploration/{min_valid_hours_per_day}h/overall_compliance_heatmap.html"
|
||||
script:
|
||||
"../src/visualization/overall_compliance_heatmap.py"
|
||||
|
||||
rule battery_consumption_rates_barchart:
|
||||
input:
|
||||
sensor = "data/processed/{pid}/battery_daily.csv",
|
||||
pid_file = "data/external/{pid}"
|
||||
params:
|
||||
pid = "{pid}"
|
||||
output:
|
||||
"reports/figures/{pid}/battery_consumption_rates_barchart.html"
|
||||
script:
|
||||
"../src/visualization/battery_consumption_rates_barchart.py"
|
||||
# rule heatmap_rows:
|
||||
# input:
|
||||
# sensor = "data/raw/{pid}/{sensor}_with_datetime.csv",
|
||||
# pid_file = "data/external/{pid}"
|
||||
# params:
|
||||
# table = "{sensor}",
|
||||
# pid = "{pid}",
|
||||
# bin_size = config["PHONE_VALID_SENSED_BINS"]["BIN_SIZE"]
|
||||
# output:
|
||||
# "reports/figures/{pid}/{sensor}_heatmap_rows.html"
|
||||
# script:
|
||||
# "../src/visualization/heatmap_rows.py"
|
||||
|
||||
PHONE_SENSORS = []
|
||||
PHONE_SENSORS.extend([config["MESSAGES"]["DB_TABLE"], config["CALLS"]["DB_TABLE"], config["BARNETT_LOCATION"]["DB_TABLE"], config["BLUETOOTH"]["DB_TABLE"], config["BATTERY"]["DB_TABLE"], config["SCREEN"]["DB_TABLE"], config["LIGHT"]["DB_TABLE"], config["ACCELEROMETER"]["DB_TABLE"], config["APPLICATIONS_FOREGROUND"]["DB_TABLE"],config["WIFI"]["DB_TABLE"]])
|
||||
# rule battery_consumption_rates_barchart:
|
||||
# input:
|
||||
# sensor = "data/processed/{pid}/battery_daily.csv",
|
||||
# pid_file = "data/external/{pid}"
|
||||
# params:
|
||||
# pid = "{pid}"
|
||||
# output:
|
||||
# "reports/figures/{pid}/battery_consumption_rates_barchart.html"
|
||||
# script:
|
||||
# "../src/visualization/battery_consumption_rates_barchart.py"
|
||||
|
||||
rule compliance_report:
|
||||
input:
|
||||
sensor_heatmaps = expand("reports/figures/{{pid}}/{sensor}_heatmap_rows.html", sensor=PHONE_SENSORS),
|
||||
compliance_heatmap = rules.compliance_heatmap.output
|
||||
output:
|
||||
"reports/compliance/{pid}/compliance_report.html",
|
||||
script:
|
||||
"../src/visualization/compliance_report.Rmd"
|
||||
# rule compliance_report:
|
||||
# input:
|
||||
# sensor_heatmaps = expand("reports/figures/{{pid}}/{sensor}_heatmap_rows.html", sensor=PHONE_SENSORS),
|
||||
# compliance_heatmap = rules.compliance_heatmap.output
|
||||
# output:
|
||||
# "reports/compliance/{pid}/compliance_report.html",
|
||||
# script:
|
||||
# "../src/visualization/compliance_report.Rmd"
|
|
@ -0,0 +1,56 @@
|
|||
import pandas as pd
|
||||
import plotly.io as pio
|
||||
import plotly.graph_objects as go
|
||||
from datetime import datetime, timedelta
|
||||
|
||||
def getRowCountHeatmap(row_count_sensors_normalized, row_count_sensors, pid, output_path):
|
||||
plot = go.Figure(data=go.Heatmap(z=row_count_sensors_normalized.T.values.tolist(),
|
||||
x=[datetime.strftime(idx[0], "%Y/%m/%d")+"("+str(idx[1])+")" for idx in row_count_sensors.index],
|
||||
y=row_count_sensors.columns.tolist(),
|
||||
hovertext=row_count_sensors.T.values.tolist(),
|
||||
hovertemplate="Date: %{x}<br>Sensor: %{y}<br>Row count: %{hovertext}<extra></extra>",
|
||||
colorscale="Viridis"))
|
||||
plot.update_layout(title="Row count heatmap for " + pid)
|
||||
pio.write_html(plot, file=output_path, auto_open=False, include_plotlyjs="cdn")
|
||||
|
||||
|
||||
|
||||
phone_valid_sensed_days = pd.read_csv(snakemake.input["phone_valid_sensed_days"], parse_dates=["local_date"], index_col=["local_date"])
|
||||
phone_valid_sensed_days = phone_valid_sensed_days[phone_valid_sensed_days["is_valid_sensed_day"] == True]
|
||||
|
||||
row_count_sensors = pd.DataFrame()
|
||||
for sensor_path in snakemake.input["sensors"]:
|
||||
# plugin_studentlife_audio_android => conversion; plugin_google_activity_recognition => AR; applications_foreground => apps
|
||||
sensor_name = sensor_path.split("/")[-1].replace("_with_datetime.csv", "").replace("plugin_studentlife_audio_android", "conversion").replace("plugin_google_activity_recognition", "AR").replace("applications_foreground", "apps")
|
||||
sensor_data = pd.read_csv(sensor_path, encoding="ISO-8859-1", parse_dates=["local_date"], dtype={"label": str})
|
||||
if sensor_data.empty:
|
||||
row_count_sensor = pd.DataFrame(columns=[sensor_name])
|
||||
else:
|
||||
row_count_sensor = sensor_data[["timestamp", "local_date"]].groupby(["local_date"]).count().rename(columns={"timestamp": sensor_name})
|
||||
row_count_sensors = row_count_sensors.join(row_count_sensor, how="outer")
|
||||
|
||||
row_count_sensors.index = pd.to_datetime(row_count_sensors.index)
|
||||
row_count_sensors = row_count_sensors.join(phone_valid_sensed_days[["valid_sensed_hours"]], how="outer")
|
||||
|
||||
# set date_idx based on the first date
|
||||
reference_date = row_count_sensors.index.min()
|
||||
row_count_sensors["date_idx"] = (row_count_sensors.index - reference_date).days
|
||||
row_count_sensors["local_date"] = row_count_sensors.index
|
||||
row_count_sensors.set_index(["local_date", "date_idx"], inplace=True)
|
||||
|
||||
# add empty rows to make sure different participants have the same date_idx range
|
||||
date_idx_range = [idx for idx in range(int(snakemake.params["expected_num_of_days"]))]
|
||||
date_range = [reference_date + timedelta(days=idx) for idx in date_idx_range]
|
||||
all_dates = pd.DataFrame({"local_date": date_range, "date_idx": date_idx_range})
|
||||
all_dates.set_index(["local_date", "date_idx"], inplace=True)
|
||||
|
||||
row_count_sensors = row_count_sensors.merge(all_dates, left_index=True, right_index=True, how="right")
|
||||
|
||||
# normalize each sensor (column)
|
||||
if row_count_sensors.count().max() > 1:
|
||||
row_count_sensors_normalized = (row_count_sensors-row_count_sensors.min())/(row_count_sensors.max()-row_count_sensors.min())
|
||||
else:
|
||||
row_count_sensors_normalized = row_count_sensors
|
||||
|
||||
pid = sensor_path.split("/")[2]
|
||||
getRowCountHeatmap(row_count_sensors_normalized, row_count_sensors, pid, snakemake.output[0])
|
|
@ -0,0 +1,34 @@
|
|||
---
|
||||
title: "Heatmap Rows Report"
|
||||
author:
|
||||
- "MoSHI Pipeline"
|
||||
date: "`r format(Sys.time(), '%d %B, %Y')`"
|
||||
params:
|
||||
rmd: "heatmap_days_by_sensors_all_participants.Rmd"
|
||||
output:
|
||||
html_document:
|
||||
highlight: tango
|
||||
number_sections: no
|
||||
theme: default
|
||||
toc: yes
|
||||
toc_depth: 3
|
||||
toc_float:
|
||||
collapsed: no
|
||||
smooth_scroll: yes
|
||||
---
|
||||
|
||||
```{r include=FALSE}
|
||||
source("renv/activate.R")
|
||||
```
|
||||
|
||||
## All phone sensors
|
||||
|
||||
```{r, echo=FALSE}
|
||||
heatmaps <- snakemake@input[["heatmap_rows"]]
|
||||
heatmaps.html <- vector(mode="list", length(heatmaps))
|
||||
|
||||
for(pid in 1:length(heatmaps)){
|
||||
heatmaps.html[[pid]] <- htmltools::includeHTML(heatmaps[pid])
|
||||
}
|
||||
htmltools::tagList(heatmaps.html)
|
||||
```
|
|
@ -0,0 +1,59 @@
|
|||
import numpy as np
|
||||
import pandas as pd
|
||||
import plotly.io as pio
|
||||
import plotly.graph_objects as go
|
||||
|
||||
|
||||
def getCorrMatrixHeatmap(corr_matrix, output_path):
|
||||
colnames = corr_matrix.columns
|
||||
plot = go.Figure(data=go.Heatmap(z=corr_matrix.values.tolist(),
|
||||
x=colnames,
|
||||
y=colnames,
|
||||
colorscale="Viridis"))
|
||||
plot.update_layout(title="Correlation Matrix Heatmap")
|
||||
pio.write_html(plot, file=output_path, auto_open=False, include_plotlyjs="cdn")
|
||||
|
||||
|
||||
min_rows_ratio = snakemake.params["min_rows_ratio"]
|
||||
corr_threshold = snakemake.params["corr_threshold"]
|
||||
|
||||
# merge features
|
||||
features, features_all_sensors = pd.DataFrame(columns=["local_date"]), pd.DataFrame(columns=["local_date"])
|
||||
pids = set()
|
||||
last_pid = None
|
||||
for path in snakemake.input["features"]:
|
||||
pid = path.split("/")[2]
|
||||
if pid not in pids:
|
||||
pids.add(pid)
|
||||
features_all_sensors["pid"] = last_pid
|
||||
features = pd.concat([features, features_all_sensors], axis=0, ignore_index=True, sort=False)
|
||||
features_all_sensors = pd.DataFrame(columns=["local_date"])
|
||||
features_per_sensor = pd.read_csv(path)
|
||||
features_all_sensors = features_all_sensors.merge(features_per_sensor, on="local_date", how="outer")
|
||||
last_pid = pid
|
||||
|
||||
features_all_sensors["pid"] = last_pid
|
||||
features = pd.concat([features, features_all_sensors], axis=0, ignore_index=True, sort=False)
|
||||
features.set_index(["pid", "local_date"], inplace=True)
|
||||
|
||||
# select days based on the input of "phone_valid_sensed_days"
|
||||
selected_participants_and_days = pd.DataFrame()
|
||||
for path in snakemake.input["phone_valid_sensed_days"]:
|
||||
pid = path.split("/")[2]
|
||||
phone_valid_sensed_days = pd.read_csv(path)
|
||||
phone_valid_sensed_days = phone_valid_sensed_days[phone_valid_sensed_days["is_valid_sensed_day"] == True]
|
||||
phone_valid_sensed_days["pid"] = pid
|
||||
selected_participants_and_days = pd.concat([selected_participants_and_days, phone_valid_sensed_days], axis=0)
|
||||
|
||||
selected_participants_and_days.set_index(["pid", "local_date"], inplace=True)
|
||||
features = features.loc[features.index.intersection(selected_participants_and_days.index), :]
|
||||
|
||||
# get correlation matrix
|
||||
features = features.astype(float)
|
||||
corr_matrix = features.corr(min_periods=min_rows_ratio * features.shape[0])
|
||||
|
||||
# replace correlation coefficients less than corr_threshold with NA
|
||||
corr_matrix[(corr_matrix > -corr_threshold) & (corr_matrix < corr_threshold)] = np.nan
|
||||
|
||||
# plot heatmap
|
||||
getCorrMatrixHeatmap(corr_matrix, snakemake.output[0])
|
|
@ -20,7 +20,7 @@ def getComplianceHeatmap(dates, compliance_matrix, pid, output_path, bin_size):
|
|||
y=[datetime.datetime.strftime(date, '%Y/%m/%d') for date in dates],
|
||||
colorscale='Viridis',
|
||||
colorbar={'tick0': 0,'dtick': 1}))
|
||||
plot.update_layout(title="Compliance heatmap.<br>Five-minute bins showing how many sensors logged at least one row of data in that period for " + pid + "<br>Label: " + label + ", device_id: " + device_id)
|
||||
plot.update_layout(title="Heatmap sensed bins.<br>Five-minute bins showing how many sensors logged at least one row of data in that period for " + pid + "<br>Label: " + label + ", device_id: " + device_id)
|
||||
pio.write_html(plot, file=output_path, auto_open=False, include_plotlyjs="cdn")
|
||||
|
||||
# get current patient id
|
|
@ -0,0 +1,34 @@
|
|||
---
|
||||
title: "Heatmap Sensed Bins Report"
|
||||
author:
|
||||
- "MoSHI Pipeline"
|
||||
date: "`r format(Sys.time(), '%d %B, %Y')`"
|
||||
params:
|
||||
rmd: "heatmap_sensed_bins_all_participants.Rmd"
|
||||
output:
|
||||
html_document:
|
||||
highlight: tango
|
||||
number_sections: no
|
||||
theme: default
|
||||
toc: yes
|
||||
toc_depth: 3
|
||||
toc_float:
|
||||
collapsed: no
|
||||
smooth_scroll: yes
|
||||
---
|
||||
|
||||
```{r include=FALSE}
|
||||
source("renv/activate.R")
|
||||
```
|
||||
|
||||
## All phone sensors
|
||||
|
||||
```{r, echo=FALSE}
|
||||
heatmaps <- snakemake@input[["heatmap_sensed_bins"]]
|
||||
heatmaps.html <- vector(mode="list", length(heatmaps))
|
||||
|
||||
for(pid in 1:length(heatmaps)){
|
||||
heatmaps.html[[pid]] <- htmltools::includeHTML(heatmaps[pid])
|
||||
}
|
||||
htmltools::tagList(heatmaps.html)
|
||||
```
|
|
@ -0,0 +1,16 @@
|
|||
import pandas as pd
|
||||
import plotly.express as px
|
||||
import plotly.io as pio
|
||||
|
||||
|
||||
# merge "phone_valid_sensed_days" for all participants
|
||||
selected_participants_and_days = pd.DataFrame()
|
||||
for path in snakemake.input["phone_valid_sensed_days"]:
|
||||
phone_valid_sensed_days = pd.read_csv(path)
|
||||
phone_valid_sensed_days = phone_valid_sensed_days[phone_valid_sensed_days["is_valid_sensed_day"] == True]
|
||||
selected_participants_and_days = pd.concat([selected_participants_and_days, phone_valid_sensed_days], axis=0)
|
||||
|
||||
# plot histogram
|
||||
fig = px.histogram(selected_participants_and_days, x="valid_sensed_hours")
|
||||
fig.update_layout(title="Phone Valid Hours Histogram")
|
||||
pio.write_html(fig, file=snakemake.output[0], auto_open=False, include_plotlyjs="cdn")
|
|
@ -16,7 +16,7 @@ def getOneRow(data_per_participant, last_seven_dates, col_name, row):
|
|||
row.append(0)
|
||||
return row
|
||||
|
||||
def getOverallComplianceHeatmap(sensors_with_data, valid_sensed_hours, last_seven_dates, bin_size, min_bins_per_hour, output_path):
|
||||
def getOverallComplianceHeatmap(sensors_with_data, valid_sensed_hours, last_seven_dates, bin_size, min_bins_per_hour, expected_num_of_days, output_path):
|
||||
plot = ff.create_annotated_heatmap(z=sensors_with_data[last_seven_dates].values,
|
||||
x=[date.replace("-", "/") for date in last_seven_dates],
|
||||
y=[pid + "." + label for pid, label in zip(sensors_with_data["pid"].to_list(), sensors_with_data["label"].to_list())],
|
||||
|
@ -25,7 +25,7 @@ def getOverallComplianceHeatmap(sensors_with_data, valid_sensed_hours, last_seve
|
|||
colorscale="Viridis",
|
||||
colorbar={"tick0": 0,"dtick": 1},
|
||||
showscale=True)
|
||||
plot.update_layout(title="Overall compliance heatmap for last seven days.<br>Bin's color shows how many sensors logged at least one row of data for that day.<br>Bin's text shows the valid hours of that day.(A valid hour has at least one row of any sensor in "+ str(min_bins_per_hour) +" out of " + str(int(60 / bin_size)) + " bins of " + str(bin_size) + " minutes)")
|
||||
plot.update_layout(title="Overall compliance heatmap for last " + str(expected_num_of_days) + " days.<br>Bin's color shows how many sensors logged at least one row of data for that day.<br>Bin's text shows the valid hours of that day.(A valid hour has at least one row of any sensor in "+ str(min_bins_per_hour) +" out of " + str(int(60 / bin_size)) + " bins of " + str(bin_size) + " minutes)")
|
||||
plot["layout"]["xaxis"].update(side="bottom")
|
||||
pio.write_html(plot, file=output_path, auto_open=False, include_plotlyjs="cdn")
|
||||
|
||||
|
@ -36,11 +36,12 @@ pid_files = snakemake.input["pid_files"]
|
|||
local_timezone = snakemake.params["local_timezone"]
|
||||
bin_size = snakemake.params["bin_size"]
|
||||
min_bins_per_hour = snakemake.params["min_bins_per_hour"]
|
||||
expected_num_of_days = int(snakemake.params["expected_num_of_days"])
|
||||
|
||||
|
||||
cur_date = datetime.datetime.now().astimezone(tz.gettz(local_timezone)).date()
|
||||
last_seven_dates = []
|
||||
for date_offset in range(6,-1,-1):
|
||||
for date_offset in range(expected_num_of_days-1, -1, -1):
|
||||
last_seven_dates.append((cur_date - datetime.timedelta(days=date_offset)).strftime("%Y-%m-%d"))
|
||||
|
||||
|
||||
|
@ -64,4 +65,4 @@ if sensors_with_data.empty:
|
|||
empty_html.write("There is no sensor data for all participants")
|
||||
empty_html.close()
|
||||
else:
|
||||
getOverallComplianceHeatmap(sensors_with_data, valid_sensed_hours, last_seven_dates, bin_size, min_bins_per_hour, snakemake.output[0])
|
||||
getOverallComplianceHeatmap(sensors_with_data, valid_sensed_hours, last_seven_dates, bin_size, min_bins_per_hour, expected_num_of_days, snakemake.output[0])
|
Loading…
Reference in New Issue