Add visualization for data exploration
parent
124843f96e
commit
c135b6ef78
19
Snakefile
19
Snakefile
|
@ -127,6 +127,25 @@ if config["DORYAB_LOCATION"]["COMPUTE"]:
|
||||||
files_to_compute.extend(expand("data/raw/{pid}/{sensor}_with_datetime.csv", pid=config["PIDS"], sensor=config["DORYAB_LOCATION"]["DB_TABLE"]))
|
files_to_compute.extend(expand("data/raw/{pid}/{sensor}_with_datetime.csv", pid=config["PIDS"], sensor=config["DORYAB_LOCATION"]["DB_TABLE"]))
|
||||||
files_to_compute.extend(expand("data/processed/{pid}/location_doryab_{segment}.csv", pid=config["PIDS"], segment = config["DORYAB_LOCATION"]["DAY_SEGMENTS"]))
|
files_to_compute.extend(expand("data/processed/{pid}/location_doryab_{segment}.csv", pid=config["PIDS"], segment = config["DORYAB_LOCATION"]["DAY_SEGMENTS"]))
|
||||||
|
|
||||||
|
# visualization for data exploration
|
||||||
|
if config["HEATMAP_FEATURES_CORRELATIONS"]["PLOT"]:
|
||||||
|
files_to_compute.extend(expand("reports/data_exploration/{min_valid_hours_per_day}h/heatmap_features_correlations.html", min_valid_hours_per_day=config["HEATMAP_FEATURES_CORRELATIONS"]["MIN_VALID_HOURS_PER_DAY"]))
|
||||||
|
|
||||||
|
if config["HISTOGRAM_VALID_SENSED_HOURS"]["PLOT"]:
|
||||||
|
files_to_compute.extend(expand("reports/data_exploration/{min_valid_hours_per_day}h/histogram_valid_sensed_hours.html", min_valid_hours_per_day=config["HISTOGRAM_VALID_SENSED_HOURS"]["MIN_VALID_HOURS_PER_DAY"]))
|
||||||
|
|
||||||
|
if config["HEATMAP_DAYS_BY_SENSORS"]["PLOT"]:
|
||||||
|
files_to_compute.extend(expand("reports/interim/{min_valid_hours_per_day}h/{pid}/heatmap_days_by_sensors.html", pid=config["PIDS"], min_valid_hours_per_day=config["HEATMAP_DAYS_BY_SENSORS"]["MIN_VALID_HOURS_PER_DAY"]))
|
||||||
|
files_to_compute.extend(expand("reports/data_exploration/{min_valid_hours_per_day}h/heatmap_days_by_sensors_all_participants.html", min_valid_hours_per_day=config["HEATMAP_DAYS_BY_SENSORS"]["MIN_VALID_HOURS_PER_DAY"]))
|
||||||
|
|
||||||
|
if config["HEATMAP_SENSED_BINS"]["PLOT"]:
|
||||||
|
files_to_compute.extend(expand("reports/interim/heatmap_sensed_bins/{pid}/heatmap_sensed_bins.html", pid=config["PIDS"]))
|
||||||
|
files_to_compute.extend(["reports/data_exploration/heatmap_sensed_bins_all_participants.html"])
|
||||||
|
|
||||||
|
if config["OVERALL_COMPLIANCE_HEATMAP"]["PLOT"]:
|
||||||
|
files_to_compute.extend(expand("reports/data_exploration/{min_valid_hours_per_day}h/overall_compliance_heatmap.html", min_valid_hours_per_day=config["OVERALL_COMPLIANCE_HEATMAP"]["MIN_VALID_HOURS_PER_DAY"]))
|
||||||
|
|
||||||
|
# analysis example
|
||||||
if config["PARAMS_FOR_ANALYSIS"]["COMPUTE"]:
|
if config["PARAMS_FOR_ANALYSIS"]["COMPUTE"]:
|
||||||
rows_nan_threshold = config["PARAMS_FOR_ANALYSIS"]["ROWS_NAN_THRESHOLD"]
|
rows_nan_threshold = config["PARAMS_FOR_ANALYSIS"]["ROWS_NAN_THRESHOLD"]
|
||||||
cols_nan_threshold = config["PARAMS_FOR_ANALYSIS"]["COLS_NAN_THRESHOLD"]
|
cols_nan_threshold = config["PARAMS_FOR_ANALYSIS"]["COLS_NAN_THRESHOLD"]
|
||||||
|
|
39
config.yaml
39
config.yaml
|
@ -29,15 +29,15 @@ READABLE_DATETIME:
|
||||||
|
|
||||||
PHONE_VALID_SENSED_BINS:
|
PHONE_VALID_SENSED_BINS:
|
||||||
COMPUTE: False # This flag is automatically ignored (set to True) if you are extracting PHONE_VALID_SENSED_DAYS or screen or Barnett's location features
|
COMPUTE: False # This flag is automatically ignored (set to True) if you are extracting PHONE_VALID_SENSED_DAYS or screen or Barnett's location features
|
||||||
BIN_SIZE: 5 # (in minutes)
|
BIN_SIZE: &bin_size 5 # (in minutes)
|
||||||
# Add as many sensor tables as you have, they all improve the computation of PHONE_VALID_SENSED_BINS and PHONE_VALID_SENSED_DAYS.
|
# Add as many sensor tables as you have, they all improve the computation of PHONE_VALID_SENSED_BINS and PHONE_VALID_SENSED_DAYS.
|
||||||
# If you are extracting screen or Barnett's location features, screen and locations tables are mandatory.
|
# If you are extracting screen or Barnett's location features, screen and locations tables are mandatory.
|
||||||
TABLES: []
|
TABLES: []
|
||||||
|
|
||||||
PHONE_VALID_SENSED_DAYS:
|
PHONE_VALID_SENSED_DAYS:
|
||||||
COMPUTE: False
|
COMPUTE: False
|
||||||
MIN_VALID_HOURS_PER_DAY: [16, 18, 20] # (out of 24) MIN_HOURS_PER_DAY
|
MIN_VALID_HOURS_PER_DAY: &min_valid_hours_per_day [16] # (out of 24) MIN_HOURS_PER_DAY
|
||||||
MIN_VALID_BINS_PER_HOUR: 6 # (out of 60min/BIN_SIZE bins)
|
MIN_VALID_BINS_PER_HOUR: &min_valid_bins_per_hour 6 # (out of 60min/BIN_SIZE bins)
|
||||||
|
|
||||||
# Communication SMS features config, TYPES and FEATURES keys need to match
|
# Communication SMS features config, TYPES and FEATURES keys need to match
|
||||||
MESSAGES:
|
MESSAGES:
|
||||||
|
@ -200,14 +200,43 @@ CONVERSATION:
|
||||||
RECORDINGMINUTES: 1
|
RECORDINGMINUTES: 1
|
||||||
PAUSEDMINUTES : 3
|
PAUSEDMINUTES : 3
|
||||||
|
|
||||||
|
### Visualizations ################################################################
|
||||||
|
HEATMAP_FEATURES_CORRELATIONS:
|
||||||
|
PLOT: False
|
||||||
|
MIN_ROWS_RATIO: 0.5
|
||||||
|
MIN_VALID_HOURS_PER_DAY: *min_valid_hours_per_day
|
||||||
|
PHONE_FEATURES: [accelerometer, activity_recognition, applications_foreground, battery, calls_incoming, calls_missed, calls_outgoing, conversation, light, location_doryab, messages_received, messages_sent, screen]
|
||||||
|
FITBIT_FEATURES: [fitbit_heartrate, fitbit_step, fitbit_sleep]
|
||||||
|
CORR_THRESHOLD: 0.1
|
||||||
|
|
||||||
### Analysis ################################################################
|
HISTOGRAM_VALID_SENSED_HOURS:
|
||||||
|
PLOT: False
|
||||||
|
MIN_VALID_HOURS_PER_DAY: *min_valid_hours_per_day
|
||||||
|
|
||||||
|
HEATMAP_DAYS_BY_SENSORS:
|
||||||
|
PLOT: False
|
||||||
|
MIN_VALID_HOURS_PER_DAY: *min_valid_hours_per_day
|
||||||
|
EXPECTED_NUM_OF_DAYS: -1
|
||||||
|
PHONE_SENSORS_TABLES: ["accelerometer", "applications_foreground", "battery", "calls", "light", "locations", "messages", "screen", "plugin_google_activity_recognition", "plugin_studentlife_audio_android"]
|
||||||
|
|
||||||
|
HEATMAP_SENSED_BINS:
|
||||||
|
PLOT: False
|
||||||
|
BIN_SIZE: *bin_size
|
||||||
|
|
||||||
|
OVERALL_COMPLIANCE_HEATMAP:
|
||||||
|
PLOT: False
|
||||||
|
BIN_SIZE: *bin_size
|
||||||
|
EXPECTED_NUM_OF_DAYS: 7
|
||||||
|
MIN_VALID_BINS_PER_HOUR: *min_valid_bins_per_hour
|
||||||
|
MIN_VALID_HOURS_PER_DAY: *min_valid_hours_per_day
|
||||||
|
|
||||||
|
### Example Analysis ################################################################
|
||||||
PARAMS_FOR_ANALYSIS:
|
PARAMS_FOR_ANALYSIS:
|
||||||
COMPUTE: False
|
COMPUTE: False
|
||||||
GROUNDTRUTH_TABLE: participant_info
|
GROUNDTRUTH_TABLE: participant_info
|
||||||
SOURCES: &sources ["phone_features", "fitbit_features", "phone_fitbit_features"]
|
SOURCES: &sources ["phone_features", "fitbit_features", "phone_fitbit_features"]
|
||||||
DAY_SEGMENTS: *day_segments
|
DAY_SEGMENTS: *day_segments
|
||||||
PHONE_FEATURES: [accelerometer, applications_foreground, battery, call_incoming, call_missed, call_outgoing, activity_recognition, light, location_barnett, screen, sms_received, sms_sent]
|
PHONE_FEATURES: [accelerometer, activity_recognition, applications_foreground, battery, calls_incoming, calls_missed, calls_outgoing, conversation, light, location_doryab, messages_received, messages_sent, screen]
|
||||||
FITBIT_FEATURES: [fitbit_heartrate, fitbit_step, fitbit_sleep]
|
FITBIT_FEATURES: [fitbit_heartrate, fitbit_step, fitbit_sleep]
|
||||||
PHONE_FITBIT_FEATURES: "" # This array is merged in the input_merge_features_of_single_participant function in models.snakefile
|
PHONE_FITBIT_FEATURES: "" # This array is merged in the input_merge_features_of_single_participant function in models.snakefile
|
||||||
DEMOGRAPHIC_FEATURES: [age, gender, inpatientdays]
|
DEMOGRAPHIC_FEATURES: [age, gender, inpatientdays]
|
||||||
|
|
|
@ -1,61 +1,107 @@
|
||||||
rule heatmap_rows:
|
rule heatmap_features_correlations:
|
||||||
input:
|
input:
|
||||||
sensor = "data/raw/{pid}/{sensor}_with_datetime.csv",
|
features = expand("data/processed/{pid}/{sensor}_{day_segment}.csv", pid=config["PIDS"], sensor=config["HEATMAP_FEATURES_CORRELATIONS"]["PHONE_FEATURES"]+config["HEATMAP_FEATURES_CORRELATIONS"]["FITBIT_FEATURES"], day_segment=config["DAY_SEGMENTS"]),
|
||||||
pid_file = "data/external/{pid}"
|
phone_valid_sensed_days = expand("data/interim/{pid}/phone_valid_sensed_days_{{min_valid_hours_per_day}}h.csv", pid=config["PIDS"])
|
||||||
params:
|
params:
|
||||||
table = "{sensor}",
|
min_rows_ratio = config["HEATMAP_FEATURES_CORRELATIONS"]["MIN_ROWS_RATIO"],
|
||||||
pid = "{pid}",
|
corr_threshold = config["HEATMAP_FEATURES_CORRELATIONS"]["CORR_THRESHOLD"] #0.75
|
||||||
bin_size = config["PHONE_VALID_SENSED_BINS"]["BIN_SIZE"]
|
|
||||||
output:
|
output:
|
||||||
"reports/figures/{pid}/{sensor}_heatmap_rows.html"
|
"reports/data_exploration/{min_valid_hours_per_day}h/heatmap_features_correlations.html"
|
||||||
script:
|
script:
|
||||||
"../src/visualization/heatmap_rows.py"
|
"../src/visualization/heatmap_features_correlations.py"
|
||||||
|
|
||||||
rule compliance_heatmap:
|
rule histogram_valid_sensed_hours:
|
||||||
|
input:
|
||||||
|
phone_valid_sensed_days = expand("data/interim/{pid}/phone_valid_sensed_days_{{min_valid_hours_per_day}}h.csv", pid=config["PIDS"])
|
||||||
|
output:
|
||||||
|
"reports/data_exploration/{min_valid_hours_per_day}h/histogram_valid_sensed_hours.html"
|
||||||
|
script:
|
||||||
|
"../src/visualization/histogram_valid_sensed_hours.py"
|
||||||
|
|
||||||
|
rule heatmap_days_by_sensors:
|
||||||
|
input:
|
||||||
|
sensors = expand("data/raw/{{pid}}/{sensor}_with_datetime.csv", sensor=config["HEATMAP_DAYS_BY_SENSORS"]["PHONE_SENSORS_TABLES"]),
|
||||||
|
phone_valid_sensed_days = "data/interim/{pid}/phone_valid_sensed_days_{min_valid_hours_per_day}h.csv"
|
||||||
|
params:
|
||||||
|
pid = "{pid}",
|
||||||
|
expected_num_of_days = config["HEATMAP_DAYS_BY_SENSORS"]["EXPECTED_NUM_OF_DAYS"]
|
||||||
|
output:
|
||||||
|
"reports/interim/{min_valid_hours_per_day}h/{pid}/heatmap_days_by_sensors.html"
|
||||||
|
script:
|
||||||
|
"../src/visualization/heatmap_days_by_sensors.py"
|
||||||
|
|
||||||
|
rule heatmap_days_by_sensors_all_participants:
|
||||||
|
input:
|
||||||
|
heatmap_rows = expand("reports/interim/{{min_valid_hours_per_day}}h/{pid}/heatmap_days_by_sensors.html", pid=config["PIDS"])
|
||||||
|
output:
|
||||||
|
"reports/data_exploration/{min_valid_hours_per_day}h/heatmap_days_by_sensors_all_participants.html"
|
||||||
|
script:
|
||||||
|
"../src/visualization/heatmap_days_by_sensors_all_participants.Rmd"
|
||||||
|
|
||||||
|
rule heatmap_sensed_bins:
|
||||||
input:
|
input:
|
||||||
sensor = "data/interim/{pid}/phone_sensed_bins.csv",
|
sensor = "data/interim/{pid}/phone_sensed_bins.csv",
|
||||||
pid_file = "data/external/{pid}"
|
pid_file = "data/external/{pid}"
|
||||||
params:
|
params:
|
||||||
pid = "{pid}",
|
pid = "{pid}",
|
||||||
bin_size = config["PHONE_VALID_SENSED_BINS"]["BIN_SIZE"]
|
bin_size = config["HEATMAP_SENSED_BINS"]["BIN_SIZE"]
|
||||||
output:
|
output:
|
||||||
"reports/figures/{pid}/compliance_heatmap.html"
|
"reports/interim/heatmap_sensed_bins/{pid}/heatmap_sensed_bins.html"
|
||||||
script:
|
script:
|
||||||
"../src/visualization/compliance_heatmap.py"
|
"../src/visualization/heatmap_sensed_bins.py"
|
||||||
|
|
||||||
|
rule heatmap_sensed_bins_all_participants:
|
||||||
|
input:
|
||||||
|
heatmap_sensed_bins = expand("reports/interim/heatmap_sensed_bins/{pid}/heatmap_sensed_bins.html", pid=config["PIDS"])
|
||||||
|
output:
|
||||||
|
"reports/data_exploration/heatmap_sensed_bins_all_participants.html"
|
||||||
|
script:
|
||||||
|
"../src/visualization/heatmap_sensed_bins_all_participants.Rmd"
|
||||||
|
|
||||||
rule overall_compliance_heatmap:
|
rule overall_compliance_heatmap:
|
||||||
input:
|
input:
|
||||||
phone_sensed_bins = expand("data/interim/{pid}/phone_sensed_bins.csv", pid=config["PIDS"]),
|
phone_sensed_bins = expand("data/interim/{pid}/phone_sensed_bins.csv", pid=config["PIDS"]),
|
||||||
phone_valid_sensed_days = expand("data/interim/{pid}/phone_valid_sensed_days.csv", pid=config["PIDS"]),
|
phone_valid_sensed_days = expand("data/interim/{pid}/phone_valid_sensed_days_{{min_valid_hours_per_day}}h.csv", pid=config["PIDS"]),
|
||||||
pid_files = expand("data/external/{pid}", pid=config["PIDS"])
|
pid_files = expand("data/external/{pid}", pid=config["PIDS"])
|
||||||
params:
|
params:
|
||||||
local_timezone = config["READABLE_DATETIME"]["FIXED_TIMEZONE"],
|
local_timezone = config["READABLE_DATETIME"]["FIXED_TIMEZONE"],
|
||||||
bin_size = config["PHONE_VALID_SENSED_BINS"]["BIN_SIZE"],
|
expected_num_of_days = config["OVERALL_COMPLIANCE_HEATMAP"]["EXPECTED_NUM_OF_DAYS"],
|
||||||
min_bins_per_hour = config["PHONE_VALID_SENSED_DAYS"]["MIN_VALID_BINS_PER_HOUR"]
|
bin_size = config["OVERALL_COMPLIANCE_HEATMAP"]["BIN_SIZE"],
|
||||||
|
min_bins_per_hour = config["OVERALL_COMPLIANCE_HEATMAP"]["MIN_VALID_BINS_PER_HOUR"]
|
||||||
output:
|
output:
|
||||||
"reports/figures/overall_compliance_heatmap.html"
|
"reports/data_exploration/{min_valid_hours_per_day}h/overall_compliance_heatmap.html"
|
||||||
script:
|
script:
|
||||||
"../src/visualization/overall_compliance_heatmap.py"
|
"../src/visualization/overall_compliance_heatmap.py"
|
||||||
|
|
||||||
rule battery_consumption_rates_barchart:
|
# rule heatmap_rows:
|
||||||
input:
|
# input:
|
||||||
sensor = "data/processed/{pid}/battery_daily.csv",
|
# sensor = "data/raw/{pid}/{sensor}_with_datetime.csv",
|
||||||
pid_file = "data/external/{pid}"
|
# pid_file = "data/external/{pid}"
|
||||||
params:
|
# params:
|
||||||
pid = "{pid}"
|
# table = "{sensor}",
|
||||||
output:
|
# pid = "{pid}",
|
||||||
"reports/figures/{pid}/battery_consumption_rates_barchart.html"
|
# bin_size = config["PHONE_VALID_SENSED_BINS"]["BIN_SIZE"]
|
||||||
script:
|
# output:
|
||||||
"../src/visualization/battery_consumption_rates_barchart.py"
|
# "reports/figures/{pid}/{sensor}_heatmap_rows.html"
|
||||||
|
# script:
|
||||||
|
# "../src/visualization/heatmap_rows.py"
|
||||||
|
|
||||||
PHONE_SENSORS = []
|
# rule battery_consumption_rates_barchart:
|
||||||
PHONE_SENSORS.extend([config["MESSAGES"]["DB_TABLE"], config["CALLS"]["DB_TABLE"], config["BARNETT_LOCATION"]["DB_TABLE"], config["BLUETOOTH"]["DB_TABLE"], config["BATTERY"]["DB_TABLE"], config["SCREEN"]["DB_TABLE"], config["LIGHT"]["DB_TABLE"], config["ACCELEROMETER"]["DB_TABLE"], config["APPLICATIONS_FOREGROUND"]["DB_TABLE"],config["WIFI"]["DB_TABLE"]])
|
# input:
|
||||||
|
# sensor = "data/processed/{pid}/battery_daily.csv",
|
||||||
|
# pid_file = "data/external/{pid}"
|
||||||
|
# params:
|
||||||
|
# pid = "{pid}"
|
||||||
|
# output:
|
||||||
|
# "reports/figures/{pid}/battery_consumption_rates_barchart.html"
|
||||||
|
# script:
|
||||||
|
# "../src/visualization/battery_consumption_rates_barchart.py"
|
||||||
|
|
||||||
rule compliance_report:
|
# rule compliance_report:
|
||||||
input:
|
# input:
|
||||||
sensor_heatmaps = expand("reports/figures/{{pid}}/{sensor}_heatmap_rows.html", sensor=PHONE_SENSORS),
|
# sensor_heatmaps = expand("reports/figures/{{pid}}/{sensor}_heatmap_rows.html", sensor=PHONE_SENSORS),
|
||||||
compliance_heatmap = rules.compliance_heatmap.output
|
# compliance_heatmap = rules.compliance_heatmap.output
|
||||||
output:
|
# output:
|
||||||
"reports/compliance/{pid}/compliance_report.html",
|
# "reports/compliance/{pid}/compliance_report.html",
|
||||||
script:
|
# script:
|
||||||
"../src/visualization/compliance_report.Rmd"
|
# "../src/visualization/compliance_report.Rmd"
|
|
@ -0,0 +1,56 @@
|
||||||
|
import pandas as pd
|
||||||
|
import plotly.io as pio
|
||||||
|
import plotly.graph_objects as go
|
||||||
|
from datetime import datetime, timedelta
|
||||||
|
|
||||||
|
def getRowCountHeatmap(row_count_sensors_normalized, row_count_sensors, pid, output_path):
|
||||||
|
plot = go.Figure(data=go.Heatmap(z=row_count_sensors_normalized.T.values.tolist(),
|
||||||
|
x=[datetime.strftime(idx[0], "%Y/%m/%d")+"("+str(idx[1])+")" for idx in row_count_sensors.index],
|
||||||
|
y=row_count_sensors.columns.tolist(),
|
||||||
|
hovertext=row_count_sensors.T.values.tolist(),
|
||||||
|
hovertemplate="Date: %{x}<br>Sensor: %{y}<br>Row count: %{hovertext}<extra></extra>",
|
||||||
|
colorscale="Viridis"))
|
||||||
|
plot.update_layout(title="Row count heatmap for " + pid)
|
||||||
|
pio.write_html(plot, file=output_path, auto_open=False, include_plotlyjs="cdn")
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
phone_valid_sensed_days = pd.read_csv(snakemake.input["phone_valid_sensed_days"], parse_dates=["local_date"], index_col=["local_date"])
|
||||||
|
phone_valid_sensed_days = phone_valid_sensed_days[phone_valid_sensed_days["is_valid_sensed_day"] == True]
|
||||||
|
|
||||||
|
row_count_sensors = pd.DataFrame()
|
||||||
|
for sensor_path in snakemake.input["sensors"]:
|
||||||
|
# plugin_studentlife_audio_android => conversion; plugin_google_activity_recognition => AR; applications_foreground => apps
|
||||||
|
sensor_name = sensor_path.split("/")[-1].replace("_with_datetime.csv", "").replace("plugin_studentlife_audio_android", "conversion").replace("plugin_google_activity_recognition", "AR").replace("applications_foreground", "apps")
|
||||||
|
sensor_data = pd.read_csv(sensor_path, encoding="ISO-8859-1", parse_dates=["local_date"], dtype={"label": str})
|
||||||
|
if sensor_data.empty:
|
||||||
|
row_count_sensor = pd.DataFrame(columns=[sensor_name])
|
||||||
|
else:
|
||||||
|
row_count_sensor = sensor_data[["timestamp", "local_date"]].groupby(["local_date"]).count().rename(columns={"timestamp": sensor_name})
|
||||||
|
row_count_sensors = row_count_sensors.join(row_count_sensor, how="outer")
|
||||||
|
|
||||||
|
row_count_sensors.index = pd.to_datetime(row_count_sensors.index)
|
||||||
|
row_count_sensors = row_count_sensors.join(phone_valid_sensed_days[["valid_sensed_hours"]], how="outer")
|
||||||
|
|
||||||
|
# set date_idx based on the first date
|
||||||
|
reference_date = row_count_sensors.index.min()
|
||||||
|
row_count_sensors["date_idx"] = (row_count_sensors.index - reference_date).days
|
||||||
|
row_count_sensors["local_date"] = row_count_sensors.index
|
||||||
|
row_count_sensors.set_index(["local_date", "date_idx"], inplace=True)
|
||||||
|
|
||||||
|
# add empty rows to make sure different participants have the same date_idx range
|
||||||
|
date_idx_range = [idx for idx in range(int(snakemake.params["expected_num_of_days"]))]
|
||||||
|
date_range = [reference_date + timedelta(days=idx) for idx in date_idx_range]
|
||||||
|
all_dates = pd.DataFrame({"local_date": date_range, "date_idx": date_idx_range})
|
||||||
|
all_dates.set_index(["local_date", "date_idx"], inplace=True)
|
||||||
|
|
||||||
|
row_count_sensors = row_count_sensors.merge(all_dates, left_index=True, right_index=True, how="right")
|
||||||
|
|
||||||
|
# normalize each sensor (column)
|
||||||
|
if row_count_sensors.count().max() > 1:
|
||||||
|
row_count_sensors_normalized = (row_count_sensors-row_count_sensors.min())/(row_count_sensors.max()-row_count_sensors.min())
|
||||||
|
else:
|
||||||
|
row_count_sensors_normalized = row_count_sensors
|
||||||
|
|
||||||
|
pid = sensor_path.split("/")[2]
|
||||||
|
getRowCountHeatmap(row_count_sensors_normalized, row_count_sensors, pid, snakemake.output[0])
|
|
@ -0,0 +1,34 @@
|
||||||
|
---
|
||||||
|
title: "Heatmap Rows Report"
|
||||||
|
author:
|
||||||
|
- "MoSHI Pipeline"
|
||||||
|
date: "`r format(Sys.time(), '%d %B, %Y')`"
|
||||||
|
params:
|
||||||
|
rmd: "heatmap_days_by_sensors_all_participants.Rmd"
|
||||||
|
output:
|
||||||
|
html_document:
|
||||||
|
highlight: tango
|
||||||
|
number_sections: no
|
||||||
|
theme: default
|
||||||
|
toc: yes
|
||||||
|
toc_depth: 3
|
||||||
|
toc_float:
|
||||||
|
collapsed: no
|
||||||
|
smooth_scroll: yes
|
||||||
|
---
|
||||||
|
|
||||||
|
```{r include=FALSE}
|
||||||
|
source("renv/activate.R")
|
||||||
|
```
|
||||||
|
|
||||||
|
## All phone sensors
|
||||||
|
|
||||||
|
```{r, echo=FALSE}
|
||||||
|
heatmaps <- snakemake@input[["heatmap_rows"]]
|
||||||
|
heatmaps.html <- vector(mode="list", length(heatmaps))
|
||||||
|
|
||||||
|
for(pid in 1:length(heatmaps)){
|
||||||
|
heatmaps.html[[pid]] <- htmltools::includeHTML(heatmaps[pid])
|
||||||
|
}
|
||||||
|
htmltools::tagList(heatmaps.html)
|
||||||
|
```
|
|
@ -0,0 +1,59 @@
|
||||||
|
import numpy as np
|
||||||
|
import pandas as pd
|
||||||
|
import plotly.io as pio
|
||||||
|
import plotly.graph_objects as go
|
||||||
|
|
||||||
|
|
||||||
|
def getCorrMatrixHeatmap(corr_matrix, output_path):
|
||||||
|
colnames = corr_matrix.columns
|
||||||
|
plot = go.Figure(data=go.Heatmap(z=corr_matrix.values.tolist(),
|
||||||
|
x=colnames,
|
||||||
|
y=colnames,
|
||||||
|
colorscale="Viridis"))
|
||||||
|
plot.update_layout(title="Correlation Matrix Heatmap")
|
||||||
|
pio.write_html(plot, file=output_path, auto_open=False, include_plotlyjs="cdn")
|
||||||
|
|
||||||
|
|
||||||
|
min_rows_ratio = snakemake.params["min_rows_ratio"]
|
||||||
|
corr_threshold = snakemake.params["corr_threshold"]
|
||||||
|
|
||||||
|
# merge features
|
||||||
|
features, features_all_sensors = pd.DataFrame(columns=["local_date"]), pd.DataFrame(columns=["local_date"])
|
||||||
|
pids = set()
|
||||||
|
last_pid = None
|
||||||
|
for path in snakemake.input["features"]:
|
||||||
|
pid = path.split("/")[2]
|
||||||
|
if pid not in pids:
|
||||||
|
pids.add(pid)
|
||||||
|
features_all_sensors["pid"] = last_pid
|
||||||
|
features = pd.concat([features, features_all_sensors], axis=0, ignore_index=True, sort=False)
|
||||||
|
features_all_sensors = pd.DataFrame(columns=["local_date"])
|
||||||
|
features_per_sensor = pd.read_csv(path)
|
||||||
|
features_all_sensors = features_all_sensors.merge(features_per_sensor, on="local_date", how="outer")
|
||||||
|
last_pid = pid
|
||||||
|
|
||||||
|
features_all_sensors["pid"] = last_pid
|
||||||
|
features = pd.concat([features, features_all_sensors], axis=0, ignore_index=True, sort=False)
|
||||||
|
features.set_index(["pid", "local_date"], inplace=True)
|
||||||
|
|
||||||
|
# select days based on the input of "phone_valid_sensed_days"
|
||||||
|
selected_participants_and_days = pd.DataFrame()
|
||||||
|
for path in snakemake.input["phone_valid_sensed_days"]:
|
||||||
|
pid = path.split("/")[2]
|
||||||
|
phone_valid_sensed_days = pd.read_csv(path)
|
||||||
|
phone_valid_sensed_days = phone_valid_sensed_days[phone_valid_sensed_days["is_valid_sensed_day"] == True]
|
||||||
|
phone_valid_sensed_days["pid"] = pid
|
||||||
|
selected_participants_and_days = pd.concat([selected_participants_and_days, phone_valid_sensed_days], axis=0)
|
||||||
|
|
||||||
|
selected_participants_and_days.set_index(["pid", "local_date"], inplace=True)
|
||||||
|
features = features.loc[features.index.intersection(selected_participants_and_days.index), :]
|
||||||
|
|
||||||
|
# get correlation matrix
|
||||||
|
features = features.astype(float)
|
||||||
|
corr_matrix = features.corr(min_periods=min_rows_ratio * features.shape[0])
|
||||||
|
|
||||||
|
# replace correlation coefficients less than corr_threshold with NA
|
||||||
|
corr_matrix[(corr_matrix > -corr_threshold) & (corr_matrix < corr_threshold)] = np.nan
|
||||||
|
|
||||||
|
# plot heatmap
|
||||||
|
getCorrMatrixHeatmap(corr_matrix, snakemake.output[0])
|
|
@ -20,7 +20,7 @@ def getComplianceHeatmap(dates, compliance_matrix, pid, output_path, bin_size):
|
||||||
y=[datetime.datetime.strftime(date, '%Y/%m/%d') for date in dates],
|
y=[datetime.datetime.strftime(date, '%Y/%m/%d') for date in dates],
|
||||||
colorscale='Viridis',
|
colorscale='Viridis',
|
||||||
colorbar={'tick0': 0,'dtick': 1}))
|
colorbar={'tick0': 0,'dtick': 1}))
|
||||||
plot.update_layout(title="Compliance heatmap.<br>Five-minute bins showing how many sensors logged at least one row of data in that period for " + pid + "<br>Label: " + label + ", device_id: " + device_id)
|
plot.update_layout(title="Heatmap sensed bins.<br>Five-minute bins showing how many sensors logged at least one row of data in that period for " + pid + "<br>Label: " + label + ", device_id: " + device_id)
|
||||||
pio.write_html(plot, file=output_path, auto_open=False, include_plotlyjs="cdn")
|
pio.write_html(plot, file=output_path, auto_open=False, include_plotlyjs="cdn")
|
||||||
|
|
||||||
# get current patient id
|
# get current patient id
|
|
@ -0,0 +1,34 @@
|
||||||
|
---
|
||||||
|
title: "Heatmap Sensed Bins Report"
|
||||||
|
author:
|
||||||
|
- "MoSHI Pipeline"
|
||||||
|
date: "`r format(Sys.time(), '%d %B, %Y')`"
|
||||||
|
params:
|
||||||
|
rmd: "heatmap_sensed_bins_all_participants.Rmd"
|
||||||
|
output:
|
||||||
|
html_document:
|
||||||
|
highlight: tango
|
||||||
|
number_sections: no
|
||||||
|
theme: default
|
||||||
|
toc: yes
|
||||||
|
toc_depth: 3
|
||||||
|
toc_float:
|
||||||
|
collapsed: no
|
||||||
|
smooth_scroll: yes
|
||||||
|
---
|
||||||
|
|
||||||
|
```{r include=FALSE}
|
||||||
|
source("renv/activate.R")
|
||||||
|
```
|
||||||
|
|
||||||
|
## All phone sensors
|
||||||
|
|
||||||
|
```{r, echo=FALSE}
|
||||||
|
heatmaps <- snakemake@input[["heatmap_sensed_bins"]]
|
||||||
|
heatmaps.html <- vector(mode="list", length(heatmaps))
|
||||||
|
|
||||||
|
for(pid in 1:length(heatmaps)){
|
||||||
|
heatmaps.html[[pid]] <- htmltools::includeHTML(heatmaps[pid])
|
||||||
|
}
|
||||||
|
htmltools::tagList(heatmaps.html)
|
||||||
|
```
|
|
@ -0,0 +1,16 @@
|
||||||
|
import pandas as pd
|
||||||
|
import plotly.express as px
|
||||||
|
import plotly.io as pio
|
||||||
|
|
||||||
|
|
||||||
|
# merge "phone_valid_sensed_days" for all participants
|
||||||
|
selected_participants_and_days = pd.DataFrame()
|
||||||
|
for path in snakemake.input["phone_valid_sensed_days"]:
|
||||||
|
phone_valid_sensed_days = pd.read_csv(path)
|
||||||
|
phone_valid_sensed_days = phone_valid_sensed_days[phone_valid_sensed_days["is_valid_sensed_day"] == True]
|
||||||
|
selected_participants_and_days = pd.concat([selected_participants_and_days, phone_valid_sensed_days], axis=0)
|
||||||
|
|
||||||
|
# plot histogram
|
||||||
|
fig = px.histogram(selected_participants_and_days, x="valid_sensed_hours")
|
||||||
|
fig.update_layout(title="Phone Valid Hours Histogram")
|
||||||
|
pio.write_html(fig, file=snakemake.output[0], auto_open=False, include_plotlyjs="cdn")
|
|
@ -16,7 +16,7 @@ def getOneRow(data_per_participant, last_seven_dates, col_name, row):
|
||||||
row.append(0)
|
row.append(0)
|
||||||
return row
|
return row
|
||||||
|
|
||||||
def getOverallComplianceHeatmap(sensors_with_data, valid_sensed_hours, last_seven_dates, bin_size, min_bins_per_hour, output_path):
|
def getOverallComplianceHeatmap(sensors_with_data, valid_sensed_hours, last_seven_dates, bin_size, min_bins_per_hour, expected_num_of_days, output_path):
|
||||||
plot = ff.create_annotated_heatmap(z=sensors_with_data[last_seven_dates].values,
|
plot = ff.create_annotated_heatmap(z=sensors_with_data[last_seven_dates].values,
|
||||||
x=[date.replace("-", "/") for date in last_seven_dates],
|
x=[date.replace("-", "/") for date in last_seven_dates],
|
||||||
y=[pid + "." + label for pid, label in zip(sensors_with_data["pid"].to_list(), sensors_with_data["label"].to_list())],
|
y=[pid + "." + label for pid, label in zip(sensors_with_data["pid"].to_list(), sensors_with_data["label"].to_list())],
|
||||||
|
@ -25,7 +25,7 @@ def getOverallComplianceHeatmap(sensors_with_data, valid_sensed_hours, last_seve
|
||||||
colorscale="Viridis",
|
colorscale="Viridis",
|
||||||
colorbar={"tick0": 0,"dtick": 1},
|
colorbar={"tick0": 0,"dtick": 1},
|
||||||
showscale=True)
|
showscale=True)
|
||||||
plot.update_layout(title="Overall compliance heatmap for last seven days.<br>Bin's color shows how many sensors logged at least one row of data for that day.<br>Bin's text shows the valid hours of that day.(A valid hour has at least one row of any sensor in "+ str(min_bins_per_hour) +" out of " + str(int(60 / bin_size)) + " bins of " + str(bin_size) + " minutes)")
|
plot.update_layout(title="Overall compliance heatmap for last " + str(expected_num_of_days) + " days.<br>Bin's color shows how many sensors logged at least one row of data for that day.<br>Bin's text shows the valid hours of that day.(A valid hour has at least one row of any sensor in "+ str(min_bins_per_hour) +" out of " + str(int(60 / bin_size)) + " bins of " + str(bin_size) + " minutes)")
|
||||||
plot["layout"]["xaxis"].update(side="bottom")
|
plot["layout"]["xaxis"].update(side="bottom")
|
||||||
pio.write_html(plot, file=output_path, auto_open=False, include_plotlyjs="cdn")
|
pio.write_html(plot, file=output_path, auto_open=False, include_plotlyjs="cdn")
|
||||||
|
|
||||||
|
@ -36,11 +36,12 @@ pid_files = snakemake.input["pid_files"]
|
||||||
local_timezone = snakemake.params["local_timezone"]
|
local_timezone = snakemake.params["local_timezone"]
|
||||||
bin_size = snakemake.params["bin_size"]
|
bin_size = snakemake.params["bin_size"]
|
||||||
min_bins_per_hour = snakemake.params["min_bins_per_hour"]
|
min_bins_per_hour = snakemake.params["min_bins_per_hour"]
|
||||||
|
expected_num_of_days = int(snakemake.params["expected_num_of_days"])
|
||||||
|
|
||||||
|
|
||||||
cur_date = datetime.datetime.now().astimezone(tz.gettz(local_timezone)).date()
|
cur_date = datetime.datetime.now().astimezone(tz.gettz(local_timezone)).date()
|
||||||
last_seven_dates = []
|
last_seven_dates = []
|
||||||
for date_offset in range(6,-1,-1):
|
for date_offset in range(expected_num_of_days-1, -1, -1):
|
||||||
last_seven_dates.append((cur_date - datetime.timedelta(days=date_offset)).strftime("%Y-%m-%d"))
|
last_seven_dates.append((cur_date - datetime.timedelta(days=date_offset)).strftime("%Y-%m-%d"))
|
||||||
|
|
||||||
|
|
||||||
|
@ -64,4 +65,4 @@ if sensors_with_data.empty:
|
||||||
empty_html.write("There is no sensor data for all participants")
|
empty_html.write("There is no sensor data for all participants")
|
||||||
empty_html.close()
|
empty_html.close()
|
||||||
else:
|
else:
|
||||||
getOverallComplianceHeatmap(sensors_with_data, valid_sensed_hours, last_seven_dates, bin_size, min_bins_per_hour, snakemake.output[0])
|
getOverallComplianceHeatmap(sensors_with_data, valid_sensed_hours, last_seven_dates, bin_size, min_bins_per_hour, expected_num_of_days, snakemake.output[0])
|
Loading…
Reference in New Issue