From 9a0e57301b8e7e5e00cc1118740dab01981b1867 Mon Sep 17 00:00:00 2001
From: Meng Li <34143965+Meng6@users.noreply.github.com>
Date: Thu, 3 Dec 2020 21:00:32 -0500
Subject: [PATCH] Update 4 plots for time_segments
---
Snakefile | 23 ++--
config.yaml | 39 ++-----
example_profile/Snakefile | 14 +++
example_profile/example_config.yaml | 16 +++
rules/common.smk | 27 -----
rules/reports.smk | 96 ++++++++---------
.../battery_consumption_rates_barchart.py | 34 ------
src/visualization/compliance_report.Rmd | 39 -------
src/visualization/heatmap_days_by_sensors.py | 74 -------------
.../heatmap_feature_correlation_matrix.py | 48 +++++++++
.../heatmap_features_correlations.py | 59 ----------
..._yield_per_participant_per_time_segment.py | 85 +++++++++++++++
src/visualization/heatmap_rows.py | 68 ------------
src/visualization/heatmap_sensed_bins.py | 50 ---------
...atmap_sensor_row_count_per_time_segment.py | 89 +++++++++++++++
...map_sensors_per_minute_per_time_segment.py | 100 +++++++++++++++++
.../histogram_phone_data_yield.py | 23 ++--
...map_sensor_row_count_per_time_segment.Rmd} | 13 ++-
...p_sensors_per_minute_per_time_segment.Rmd} | 13 ++-
.../overall_compliance_heatmap.py | 102 ------------------
src/visualization/visualize.py | 0
21 files changed, 447 insertions(+), 565 deletions(-)
delete mode 100644 src/visualization/battery_consumption_rates_barchart.py
delete mode 100644 src/visualization/compliance_report.Rmd
delete mode 100644 src/visualization/heatmap_days_by_sensors.py
create mode 100644 src/visualization/heatmap_feature_correlation_matrix.py
delete mode 100644 src/visualization/heatmap_features_correlations.py
create mode 100644 src/visualization/heatmap_phone_data_yield_per_participant_per_time_segment.py
delete mode 100644 src/visualization/heatmap_rows.py
delete mode 100644 src/visualization/heatmap_sensed_bins.py
create mode 100644 src/visualization/heatmap_sensor_row_count_per_time_segment.py
create mode 100644 src/visualization/heatmap_sensors_per_minute_per_time_segment.py
rename src/visualization/{heatmap_sensed_bins_all_participants.Rmd => merge_heatmap_sensor_row_count_per_time_segment.Rmd} (63%)
rename src/visualization/{heatmap_days_by_sensors_all_participants.Rmd => merge_heatmap_sensors_per_minute_per_time_segment.Rmd} (63%)
delete mode 100644 src/visualization/overall_compliance_heatmap.py
delete mode 100644 src/visualization/visualize.py
diff --git a/Snakefile b/Snakefile
index 688e7675..04882a79 100644
--- a/Snakefile
+++ b/Snakefile
@@ -231,20 +231,19 @@ for provider in config["FITBIT_STEPS_INTRADAY"]["PROVIDERS"].keys():
if config["HISTOGRAM_PHONE_DATA_YIELD"]["PLOT"]:
files_to_compute.append("reports/data_exploration/histogram_phone_data_yield.html")
-# visualization for data exploration
-# if config["HEATMAP_FEATURES_CORRELATIONS"]["PLOT"]:
-# files_to_compute.extend(expand("reports/data_exploration/{min_valid_hours_per_day}hours_{min_valid_bins_per_hour}bins/heatmap_features_correlations.html", min_valid_hours_per_day=config["HEATMAP_FEATURES_CORRELATIONS"]["MIN_VALID_HOURS_PER_DAY"], min_valid_bins_per_hour=config["PHONE_VALID_SENSED_DAYS"]["MIN_VALID_BINS_PER_HOUR"]))
-
-# if config["HEATMAP_DAYS_BY_SENSORS"]["PLOT"]:
-# files_to_compute.extend(expand("reports/interim/{min_valid_hours_per_day}hours_{min_valid_bins_per_hour}bins/{pid}/heatmap_days_by_sensors.html", pid=config["PIDS"], min_valid_hours_per_day=config["HEATMAP_DAYS_BY_SENSORS"]["MIN_VALID_HOURS_PER_DAY"], min_valid_bins_per_hour=config["PHONE_VALID_SENSED_DAYS"]["MIN_VALID_BINS_PER_HOUR"]))
-# files_to_compute.extend(expand("reports/data_exploration/{min_valid_hours_per_day}hours_{min_valid_bins_per_hour}bins/heatmap_days_by_sensors_all_participants.html", min_valid_hours_per_day=config["HEATMAP_DAYS_BY_SENSORS"]["MIN_VALID_HOURS_PER_DAY"], min_valid_bins_per_hour=config["PHONE_VALID_SENSED_DAYS"]["MIN_VALID_BINS_PER_HOUR"]))
+if config["HEATMAP_SENSORS_PER_MINUTE_PER_TIME_SEGMENT"]["PLOT"]:
+ files_to_compute.extend(expand("reports/interim/{pid}/heatmap_sensors_per_minute_per_time_segment.html", pid=config["PIDS"]))
+ files_to_compute.append("reports/data_exploration/heatmap_sensors_per_minute_per_time_segment.html")
-# if config["HEATMAP_SENSED_BINS"]["PLOT"]:
-# files_to_compute.extend(expand("reports/interim/heatmap_sensed_bins/{pid}/heatmap_sensed_bins.html", pid=config["PIDS"]))
-# files_to_compute.extend(["reports/data_exploration/heatmap_sensed_bins_all_participants.html"])
+if config["HEATMAP_SENSOR_ROW_COUNT_PER_TIME_SEGMENT"]["PLOT"]:
+ files_to_compute.extend(expand("reports/interim/{pid}/heatmap_sensor_row_count_per_time_segment.html", pid=config["PIDS"]))
+ files_to_compute.append("reports/data_exploration/heatmap_sensor_row_count_per_time_segment.html")
-# if config["OVERALL_COMPLIANCE_HEATMAP"]["PLOT"]:
-# files_to_compute.extend(expand("reports/data_exploration/{min_valid_hours_per_day}hours_{min_valid_bins_per_hour}bins/overall_compliance_heatmap.html", min_valid_hours_per_day=config["OVERALL_COMPLIANCE_HEATMAP"]["MIN_VALID_HOURS_PER_DAY"], min_valid_bins_per_hour=config["PHONE_VALID_SENSED_DAYS"]["MIN_VALID_BINS_PER_HOUR"]))
+if config["HEATMAP_PHONE_DATA_YIELD_PER_PARTICIPANT_PER_TIME_SEGMENT"]["PLOT"]:
+ files_to_compute.append("reports/data_exploration/heatmap_phone_data_yield_per_participant_per_time_segment.html")
+
+if config["HEATMAP_FEATURE_CORRELATION_MATRIX"]["PLOT"]:
+ files_to_compute.append("reports/data_exploration/heatmap_feature_correlation_matrix.html")
rule all:
diff --git a/config.yaml b/config.yaml
index 1aef391f..49cfed3e 100644
--- a/config.yaml
+++ b/config.yaml
@@ -259,9 +259,6 @@ PHONE_WIFI_VISIBLE:
-
-
-
########################################################################################################################
# FITBIT #
########################################################################################################################
@@ -350,7 +347,6 @@ FITBIT_STEPS_INTRADAY:
-
########################################################################################################################
# PLOTS #
########################################################################################################################
@@ -358,32 +354,19 @@ FITBIT_STEPS_INTRADAY:
HISTOGRAM_PHONE_DATA_YIELD:
PLOT: False
-HEATMAP_FEATURES_CORRELATIONS:
+HEATMAP_SENSORS_PER_MINUTE_PER_TIME_SEGMENT:
+ PLOT: False
+
+HEATMAP_SENSOR_ROW_COUNT_PER_TIME_SEGMENT:
+ PLOT: False
+ SENSORS: [PHONE_ACCELEROMETER, PHONE_ACTIVITY_RECOGNITION, PHONE_APPLICATIONS_FOREGROUND, PHONE_BATTERY, PHONE_BLUETOOTH, PHONE_CALLS, PHONE_CONVERSATION, PHONE_LIGHT, PHONE_LOCATIONS, PHONE_MESSAGES, PHONE_SCREEN, PHONE_WIFI_CONNECTED, PHONE_WIFI_VISIBLE]
+
+HEATMAP_PHONE_DATA_YIELD_PER_PARTICIPANT_PER_TIME_SEGMENT:
+ PLOT: False
+
+HEATMAP_FEATURE_CORRELATION_MATRIX:
PLOT: False
MIN_ROWS_RATIO: 0.5
- MIN_VALID_HOURS_PER_DAY: #*min_valid_hours_per_day
- MIN_VALID_BINS_PER_HOUR: #*min_valid_bins_per_hour
- PHONE_FEATURES: [accelerometer, activity_recognition, applications_foreground, battery, calls_incoming, calls_missed, calls_outgoing, conversation, light, location_doryab, messages_received, messages_sent, screen]
- FITBIT_FEATURES: [fitbit_heartrate, fitbit_step, fitbit_sleep]
CORR_THRESHOLD: 0.1
CORR_METHOD: "pearson" # choose from {"pearson", "kendall", "spearman"}
-HEATMAP_DAYS_BY_SENSORS:
- PLOT: False
- MIN_VALID_HOURS_PER_DAY: #*min_valid_hours_per_day
- MIN_VALID_BINS_PER_HOUR: #*min_valid_bins_per_hour
- EXPECTED_NUM_OF_DAYS: -1
- DB_TABLES: [accelerometer, applications_foreground, battery, bluetooth, calls, light, locations, messages, screen, wifi, sensor_wifi, plugin_google_activity_recognition, plugin_ios_activity_recognition, plugin_studentlife_audio_android, plugin_studentlife_audio]
-
-HEATMAP_SENSED_BINS:
- PLOT: False
- BIN_SIZE: #*bin_size
-
-OVERALL_COMPLIANCE_HEATMAP:
- PLOT: False
- ONLY_SHOW_VALID_DAYS: False
- EXPECTED_NUM_OF_DAYS: -1
- BIN_SIZE: #*bin_size
- MIN_VALID_HOURS_PER_DAY: #*min_valid_hours_per_day
- MIN_VALID_BINS_PER_HOUR: #*min_valid_bins_per_hour
-
diff --git a/example_profile/Snakefile b/example_profile/Snakefile
index 49530a30..acb3b1e1 100644
--- a/example_profile/Snakefile
+++ b/example_profile/Snakefile
@@ -214,6 +214,20 @@ for provider in config["FITBIT_STEPS_INTRADAY"]["PROVIDERS"].keys():
if config["HISTOGRAM_PHONE_DATA_YIELD"]["PLOT"]:
files_to_compute.append("reports/data_exploration/histogram_phone_data_yield.html")
+if config["HEATMAP_SENSORS_PER_MINUTE_PER_TIME_SEGMENT"]["PLOT"]:
+ files_to_compute.extend(expand("reports/interim/{pid}/heatmap_sensors_per_minute_per_time_segment.html", pid=config["PIDS"]))
+ files_to_compute.append("reports/data_exploration/heatmap_sensors_per_minute_per_time_segment.html")
+
+if config["HEATMAP_SENSOR_ROW_COUNT_PER_TIME_SEGMENT"]["PLOT"]:
+ files_to_compute.extend(expand("reports/interim/{pid}/heatmap_sensor_row_count_per_time_segment.html", pid=config["PIDS"]))
+ files_to_compute.append("reports/data_exploration/heatmap_sensor_row_count_per_time_segment.html")
+
+if config["HEATMAP_PHONE_DATA_YIELD_PER_PARTICIPANT_PER_TIME_SEGMENT"]["PLOT"]:
+ files_to_compute.append("reports/data_exploration/heatmap_phone_data_yield_per_participant_per_time_segment.html")
+
+if config["HEATMAP_FEATURE_CORRELATION_MATRIX"]["PLOT"]:
+ files_to_compute.append("reports/data_exploration/heatmap_feature_correlation_matrix.html")
+
# Analysis Workflow Example
models, scalers = [], []
for model_name in config["PARAMS_FOR_ANALYSIS"]["MODEL_NAMES"]:
diff --git a/example_profile/example_config.yaml b/example_profile/example_config.yaml
index 8285c716..b78a731d 100644
--- a/example_profile/example_config.yaml
+++ b/example_profile/example_config.yaml
@@ -323,6 +323,22 @@ FITBIT_STEPS_INTRADAY:
HISTOGRAM_PHONE_DATA_YIELD:
PLOT: True
+HEATMAP_SENSORS_PER_MINUTE_PER_TIME_SEGMENT:
+ PLOT: True
+
+HEATMAP_SENSOR_ROW_COUNT_PER_TIME_SEGMENT:
+ PLOT: True
+ SENSORS: [PHONE_ACCELEROMETER, PHONE_ACTIVITY_RECOGNITION, PHONE_APPLICATIONS_FOREGROUND, PHONE_BATTERY, PHONE_BLUETOOTH, PHONE_CALLS, PHONE_CONVERSATION, PHONE_LIGHT, PHONE_LOCATIONS, PHONE_MESSAGES, PHONE_SCREEN, PHONE_WIFI_CONNECTED, PHONE_WIFI_VISIBLE]
+
+HEATMAP_PHONE_DATA_YIELD_PER_PARTICIPANT_PER_TIME_SEGMENT:
+ PLOT: True
+
+HEATMAP_FEATURE_CORRELATION_MATRIX:
+ PLOT: TRUE
+ MIN_ROWS_RATIO: 0.5
+ CORR_THRESHOLD: 0.1
+ CORR_METHOD: "pearson" # choose from {"pearson", "kendall", "spearman"}
+
########################################################################################################################
diff --git a/rules/common.smk b/rules/common.smk
index b3b0c815..ef9af1ca 100644
--- a/rules/common.smk
+++ b/rules/common.smk
@@ -1,19 +1,3 @@
-# Common.smk ##########################################################################################################
-
-def infer_participant_platform(participant_file):
- with open(participant_file, encoding="ISO-8859-1") as external_file:
- external_file_content = external_file.readlines()
- platforms = external_file_content[1].strip().split(",")
- if platforms[0] == "multiple" or (len(platforms) > 1 and "android" in platforms and "ios" in platforms):
- platform = "android"
- else:
- platform = platforms[0]
-
- if platform not in ["android", "ios"]:
- raise ValueError("Platform (line 2) in a participant file should be 'android', 'ios', or 'multiple'. You typed '" + platforms + "'")
-
- return platform
-
# Features.smk #########################################################################################################
def find_features_files(wildcards):
feature_files = []
@@ -38,14 +22,3 @@ def input_merge_sensor_features_for_individual_participants(wildcards):
break
return feature_files
-# Reports.smk ###########################################################################################################
-
-def optional_heatmap_days_by_sensors_input(wildcards):
- platform = infer_participant_platform("data/external/"+wildcards.pid)
-
- if platform == "android":
- tables_platform = [table for table in config["HEATMAP_DAYS_BY_SENSORS"]["DB_TABLES"] if table not in [config["CONVERSATION"]["DB_TABLE"]["IOS"], config["ACTIVITY_RECOGNITION"]["DB_TABLE"]["IOS"]]] # for android, discard any ios tables that may exist
- elif platform == "ios":
- tables_platform = [table for table in config["HEATMAP_DAYS_BY_SENSORS"]["DB_TABLES"] if table not in [config["CONVERSATION"]["DB_TABLE"]["ANDROID"], config["ACTIVITY_RECOGNITION"]["DB_TABLE"]["ANDROID"]]] # for ios, discard any android tables that may exist
-
- return expand("data/raw/{{pid}}/{table}_with_datetime.csv", table = tables_platform)
diff --git a/rules/reports.smk b/rules/reports.smk
index 6071db09..9e30ed35 100644
--- a/rules/reports.smk
+++ b/rules/reports.smk
@@ -6,74 +6,66 @@ rule histogram_phone_data_yield:
script:
"../src/visualization/histogram_phone_data_yield.py"
-
-
-
-rule heatmap_features_correlations:
+rule heatmap_sensors_per_minute_per_time_segment:
input:
- features = expand("data/processed/{pid}/{sensor}_{time_segment}.csv", pid=config["PIDS"], sensor=config["HEATMAP_FEATURES_CORRELATIONS"]["PHONE_FEATURES"]+config["HEATMAP_FEATURES_CORRELATIONS"]["FITBIT_FEATURES"], time_segment=config["TIME_SEGMENTS"]),
- phone_valid_sensed_days = expand("data/interim/{pid}/phone_valid_sensed_days_{{min_valid_hours_per_day}}hours_{{min_valid_bins_per_hour}}bins.csv", pid=config["PIDS"])
+ phone_data_yield = "data/interim/{pid}/phone_yielded_timestamps_with_datetime.csv",
+ participant_file = "data/external/participant_files/{pid}.yaml",
+ time_segments_labels = "data/interim/time_segments/{pid}_time_segments_labels.csv"
params:
- min_rows_ratio = config["HEATMAP_FEATURES_CORRELATIONS"]["MIN_ROWS_RATIO"],
- corr_threshold = config["HEATMAP_FEATURES_CORRELATIONS"]["CORR_THRESHOLD"],
- corr_method = config["HEATMAP_FEATURES_CORRELATIONS"]["CORR_METHOD"]
+ pid = "{pid}"
output:
- "reports/data_exploration/{min_valid_hours_per_day}hours_{min_valid_bins_per_hour}bins/heatmap_features_correlations.html"
+ "reports/interim/{pid}/heatmap_sensors_per_minute_per_time_segment.html"
script:
- "../src/visualization/heatmap_features_correlations.py"
+ "../src/visualization/heatmap_sensors_per_minute_per_time_segment.py"
-rule heatmap_days_by_sensors:
+rule merge_heatmap_sensors_per_minute_per_time_segment:
input:
- sensors = optional_heatmap_days_by_sensors_input,
- phone_valid_sensed_days = "data/interim/{pid}/phone_valid_sensed_days_{min_valid_hours_per_day}hours_{min_valid_bins_per_hour}bins.csv"
+ heatmap_sensors_per_minute_per_time_segment = expand("reports/interim/{pid}/heatmap_sensors_per_minute_per_time_segment.html", pid=config["PIDS"])
+ output:
+ "reports/data_exploration/heatmap_sensors_per_minute_per_time_segment.html"
+ script:
+ "../src/visualization/merge_heatmap_sensors_per_minute_per_time_segment.Rmd"
+
+rule heatmap_sensor_row_count_per_time_segment:
+ input:
+ all_sensors = expand("data/raw/{{pid}}/{sensor}_with_datetime.csv", sensor = map(str.lower, config["HEATMAP_SENSOR_ROW_COUNT_PER_TIME_SEGMENT"]["SENSORS"])),
+ phone_data_yield = "data/processed/features/{pid}/phone_data_yield.csv",
+ participant_file = "data/external/participant_files/{pid}.yaml",
+ time_segments_labels = "data/interim/time_segments/{pid}_time_segments_labels.csv"
params:
- pid = "{pid}",
- expected_num_of_days = config["HEATMAP_DAYS_BY_SENSORS"]["EXPECTED_NUM_OF_DAYS"]
+ pid = "{pid}"
output:
- "reports/interim/{min_valid_hours_per_day}hours_{min_valid_bins_per_hour}bins/{pid}/heatmap_days_by_sensors.html"
+ "reports/interim/{pid}/heatmap_sensor_row_count_per_time_segment.html"
script:
- "../src/visualization/heatmap_days_by_sensors.py"
+ "../src/visualization/heatmap_sensor_row_count_per_time_segment.py"
-rule heatmap_days_by_sensors_all_participants:
+rule merge_heatmap_sensor_row_count_per_time_segment:
input:
- heatmap_rows = expand("reports/interim/{{min_valid_hours_per_day}}hours_{{min_valid_bins_per_hour}}bins/{pid}/heatmap_days_by_sensors.html", pid=config["PIDS"])
+ heatmap_sensor_row_count_per_time_segment = expand("reports/interim/{pid}/heatmap_sensor_row_count_per_time_segment.html", pid=config["PIDS"])
output:
- "reports/data_exploration/{min_valid_hours_per_day}hours_{min_valid_bins_per_hour}bins/heatmap_days_by_sensors_all_participants.html"
+ "reports/data_exploration/heatmap_sensor_row_count_per_time_segment.html"
script:
- "../src/visualization/heatmap_days_by_sensors_all_participants.Rmd"
+ "../src/visualization/merge_heatmap_sensor_row_count_per_time_segment.Rmd"
-rule heatmap_sensed_bins:
+rule heatmap_phone_data_yield_per_participant_per_time_segment:
input:
- sensor = "data/interim/{pid}/phone_sensed_bins.csv",
- pid_file = "data/external/{pid}"
+ phone_data_yield = expand("data/processed/features/{pid}/phone_data_yield.csv", pid=config["PIDS"]),
+ participant_file = expand("data/external/participant_files/{pid}.yaml", pid=config["PIDS"]),
+ time_segments_labels = expand("data/interim/time_segments/{pid}_time_segments_labels.csv", pid=config["PIDS"])
+ output:
+ "reports/data_exploration/heatmap_phone_data_yield_per_participant_per_time_segment.html"
+ script:
+ "../src/visualization/heatmap_phone_data_yield_per_participant_per_time_segment.py"
+
+rule heatmap_feature_correlation_matrix:
+ input:
+ all_sensor_features = "data/processed/features/all_participants/all_sensor_features.csv" # before data cleaning
params:
- pid = "{pid}",
- bin_size = config["HEATMAP_SENSED_BINS"]["BIN_SIZE"]
+ min_rows_ratio = config["HEATMAP_FEATURE_CORRELATION_MATRIX"]["MIN_ROWS_RATIO"],
+ corr_threshold = config["HEATMAP_FEATURE_CORRELATION_MATRIX"]["CORR_THRESHOLD"],
+ corr_method = config["HEATMAP_FEATURE_CORRELATION_MATRIX"]["CORR_METHOD"]
output:
- "reports/interim/heatmap_sensed_bins/{pid}/heatmap_sensed_bins.html"
+ "reports/data_exploration/heatmap_feature_correlation_matrix.html"
script:
- "../src/visualization/heatmap_sensed_bins.py"
+ "../src/visualization/heatmap_feature_correlation_matrix.py"
-rule heatmap_sensed_bins_all_participants:
- input:
- heatmap_sensed_bins = expand("reports/interim/heatmap_sensed_bins/{pid}/heatmap_sensed_bins.html", pid=config["PIDS"])
- output:
- "reports/data_exploration/heatmap_sensed_bins_all_participants.html"
- script:
- "../src/visualization/heatmap_sensed_bins_all_participants.Rmd"
-
-rule overall_compliance_heatmap:
- input:
- phone_sensed_bins = expand("data/interim/{pid}/phone_sensed_bins.csv", pid=config["PIDS"]),
- phone_valid_sensed_days = expand("data/interim/{pid}/phone_valid_sensed_days_{{min_valid_hours_per_day}}hours_{{min_valid_bins_per_hour}}bins.csv", pid=config["PIDS"]),
- pid_files = expand("data/external/{pid}", pid=config["PIDS"])
- params:
- only_show_valid_days = config["OVERALL_COMPLIANCE_HEATMAP"]["ONLY_SHOW_VALID_DAYS"],
- local_timezone = config["PHONE_DATA_CONFIGURATION"]["TIMEZONE"]["VALUE"],
- expected_num_of_days = config["OVERALL_COMPLIANCE_HEATMAP"]["EXPECTED_NUM_OF_DAYS"],
- bin_size = config["OVERALL_COMPLIANCE_HEATMAP"]["BIN_SIZE"],
- min_bins_per_hour = "{min_valid_bins_per_hour}"
- output:
- "reports/data_exploration/{min_valid_hours_per_day}hours_{min_valid_bins_per_hour}bins/overall_compliance_heatmap.html"
- script:
- "../src/visualization/overall_compliance_heatmap.py"
diff --git a/src/visualization/battery_consumption_rates_barchart.py b/src/visualization/battery_consumption_rates_barchart.py
deleted file mode 100644
index 148b76ef..00000000
--- a/src/visualization/battery_consumption_rates_barchart.py
+++ /dev/null
@@ -1,34 +0,0 @@
-import pandas as pd
-import datetime
-import plotly.io as pio
-import plotly.graph_objects as go
-
-def getBatteryConsumptionRatesBarChart(battery_data, pid):
- plot = go.Figure(go.Bar(
- x=battery_data["battery_daily_avgconsumptionrate"],
- y=battery_data["local_date"].apply(lambda x: x.strftime("%Y/%m/%d")).tolist(),
- orientation='h'))
- plot.update_layout(title="Daily battery consumption rates bar chart for " + pid + "
Label: " + label + ", device_id: " + device_id,
- xaxis_title="battery drains % per hour",
- )
- return plot
-
-
-
-battery_data = pd.read_csv(snakemake.input["sensor"], parse_dates=["local_date"])
-pid = snakemake.params["pid"]
-
-with open(snakemake.input["pid_file"], encoding="ISO-8859-1") as external_file:
- external_file_content = external_file.readlines()
-device_id = external_file_content[0].split(",")[-1]
-label = external_file_content[2]
-
-if battery_data.empty:
- empty_html = open(snakemake.output[0], "w")
- empty_html.write("There is no battery data for " + pid + "
Label: " + label + ", device_id: " + device_id)
- empty_html.close()
-else:
- battery_data.set_index(["local_date"], inplace=True)
- battery_data = battery_data.resample("1D").asfreq().fillna(0).reset_index()
- plot = getBatteryConsumptionRatesBarChart(battery_data, pid)
- pio.write_html(plot, file=snakemake.output[0], auto_open=False, include_plotlyjs="cdn")
\ No newline at end of file
diff --git a/src/visualization/compliance_report.Rmd b/src/visualization/compliance_report.Rmd
deleted file mode 100644
index 2717875d..00000000
--- a/src/visualization/compliance_report.Rmd
+++ /dev/null
@@ -1,39 +0,0 @@
----
-title: "Compliance Report"
-author:
- - "MoSHI Pipeline"
-date: "`r format(Sys.time(), '%d %B, %Y')`"
-params:
- rmd: "compliance_report.Rmd"
-output:
- html_document:
- highlight: tango
- number_sections: no
- theme: default
- toc: yes
- toc_depth: 3
- toc_float:
- collapsed: no
- smooth_scroll: yes
----
-
-```{r include=FALSE}
-source("renv/activate.R")
-```
-
-## Overall phone compliance
-
-```{r, echo=FALSE}
-htmltools::includeHTML(snakemake@input[["compliance_heatmap"]])
-```
-
-## Per sensor compliance
-```{r, echo=FALSE}
-heatmaps <- snakemake@input[["sensor_heatmaps"]]
-heatmaps.html <- vector(mode="list", length(heatmaps))
-
-for(sensor_id in 1:length(heatmaps)){
- heatmaps.html[[sensor_id]] <- htmltools::includeHTML(heatmaps[sensor_id])
-}
-htmltools::tagList(heatmaps.html)
-```
diff --git a/src/visualization/heatmap_days_by_sensors.py b/src/visualization/heatmap_days_by_sensors.py
deleted file mode 100644
index f1ab53c0..00000000
--- a/src/visualization/heatmap_days_by_sensors.py
+++ /dev/null
@@ -1,74 +0,0 @@
-import numpy as np
-import pandas as pd
-import plotly.io as pio
-import plotly.graph_objects as go
-from datetime import datetime, timedelta
-
-def getRowCountHeatmap(row_count_sensors_normalized, row_count_sensors, pid, output_path):
- plot = go.Figure(data=go.Heatmap(z=row_count_sensors_normalized.T.values.tolist(),
- x=[datetime.strftime(idx[0], "%Y/%m/%d")+"("+str(idx[1])+")" for idx in row_count_sensors.index],
- y=row_count_sensors.columns.tolist(),
- hovertext=row_count_sensors.T.values.tolist(),
- hovertemplate="Date: %{x}
Sensor: %{y}
Row count: %{hovertext}",
- colorscale="Viridis"))
- plot.update_layout(title="Row count heatmap for " + pid)
- pio.write_html(plot, file=output_path, auto_open=False, include_plotlyjs="cdn")
-
-
-
-phone_valid_sensed_days = pd.read_csv(snakemake.input["phone_valid_sensed_days"], parse_dates=["local_date"], index_col=["local_date"])
-phone_valid_sensed_days = phone_valid_sensed_days[phone_valid_sensed_days["is_valid_sensed_day"] == True]
-
-row_count_sensors = pd.DataFrame()
-for sensor_path in snakemake.input["sensors"]:
- sensor_name = sensor_path.split("/")[-1].replace("_with_datetime.csv", "")
- # plugin_studentlife_audio_android or plugin_studentlife_audio => conversion; plugin_google_activity_recognition or plugin_ios_activity_recognition => AR; applications_foreground => apps
- sensor_name = sensor_name.replace("plugin_studentlife_audio_android", "conversion").replace("plugin_studentlife_audio", "conversion") \
- .replace("plugin_google_activity_recognition", "AR").replace("plugin_ios_activity_recognition", "AR") \
- .replace("applications_foreground", "apps")
-
- sensor_data = pd.read_csv(sensor_path, encoding="ISO-8859-1", parse_dates=["local_date"], dtype={"label": str})
- if sensor_data.empty:
- row_count_sensor = pd.DataFrame(columns=[sensor_name])
- else:
- row_count_sensor = sensor_data[["timestamp", "local_date"]].groupby(["local_date"]).count().rename(columns={"timestamp": sensor_name})
- row_count_sensors = row_count_sensors.join(row_count_sensor, how="outer")
-
-row_count_sensors.index = pd.to_datetime(row_count_sensors.index)
-row_count_sensors = row_count_sensors.join(phone_valid_sensed_days[["valid_sensed_hours"]], how="outer")
-
-if row_count_sensors.empty:
- empty_html = open(snakemake.output[0], "w")
- empty_html.write("There are no records of sensors in database.")
- empty_html.close()
-else:
- # set date_idx based on the first date
- reference_date = row_count_sensors.index.min()
- last_date = row_count_sensors.index.max()
- row_count_sensors["date_idx"] = (row_count_sensors.index - reference_date).days
- row_count_sensors["local_date"] = row_count_sensors.index
- row_count_sensors.set_index(["local_date", "date_idx"], inplace=True)
-
-
- expected_num_of_days = int(snakemake.params["expected_num_of_days"])
- if expected_num_of_days < -1:
- raise ValueError("EXPECTED_NUM_OF_DAYS of HEATMAP_DAYS_BY_SENSORS section in config.yaml must be larger or equal to -1.")
- # if expected_num_of_days = -1, return all dates
- expected_num_of_days = (last_date - reference_date).days if expected_num_of_days == -1 else expected_num_of_days
-
- # add empty rows to make sure different participants have the same date_idx range
- date_idx_range = [idx for idx in range(expected_num_of_days)]
- date_range = [reference_date + timedelta(days=idx) for idx in date_idx_range]
- all_dates = pd.DataFrame({"local_date": date_range, "date_idx": date_idx_range})
- all_dates.set_index(["local_date", "date_idx"], inplace=True)
-
- row_count_sensors = row_count_sensors.merge(all_dates, left_index=True, right_index=True, how="right")
-
- # normalize each sensor (column)
- if row_count_sensors.count().max() > 1:
- row_count_sensors_normalized = row_count_sensors.fillna(np.nan).apply(lambda x: (x - np.nanmin(x)) / (np.nanmax(x) - np.nanmin(x)) if np.nanmax(x) != np.nanmin(x) else (x / np.nanmin(x)), axis=0)
- else:
- row_count_sensors_normalized = row_count_sensors
-
- pid = sensor_path.split("/")[2]
- getRowCountHeatmap(row_count_sensors_normalized, row_count_sensors, pid, snakemake.output[0])
diff --git a/src/visualization/heatmap_feature_correlation_matrix.py b/src/visualization/heatmap_feature_correlation_matrix.py
new file mode 100644
index 00000000..8fb01934
--- /dev/null
+++ b/src/visualization/heatmap_feature_correlation_matrix.py
@@ -0,0 +1,48 @@
+import numpy as np
+import pandas as pd
+import plotly.graph_objects as go
+
+
+def getCorrMatrixHeatmap(corr_matrix, time_segment, html_file):
+
+ feature_names = corr_matrix.columns
+
+ fig = go.Figure(data=go.Heatmap(z=corr_matrix.values.tolist(),
+ x=feature_names,
+ y=feature_names,
+ colorscale="Viridis"))
+
+ fig.update_layout(title="Correlation matrix of features of " + time_segment + " segments.")
+
+ html_file.write(fig.to_html(full_html=False, include_plotlyjs="cdn"))
+
+
+
+min_rows_ratio = snakemake.params["min_rows_ratio"]
+corr_threshold = snakemake.params["corr_threshold"]
+corr_method = snakemake.params["corr_method"]
+
+features = pd.read_csv(snakemake.input["all_sensor_features"])
+time_segments = set(features["local_segment_label"])
+
+html_file = open(snakemake.output[0], "a", encoding="utf-8")
+if features.empty:
+ html_file.write("There are no features for any participant.")
+else:
+
+ for time_segment in time_segments:
+ features_per_segment = features[features["local_segment_label"] == time_segment]
+ if features_per_segment.empty:
+ html_file.write("There are no features for " + time_segment + " segments.
")
+ else:
+ # drop useless columns
+ features_per_segment = features_per_segment.drop(["pid", "local_segment", "local_segment_label", "local_segment_start_datetime", "local_segment_end_datetime"], axis=1).astype(float)
+ # get correlation matrix
+ corr_matrix = features_per_segment.corr(method=corr_method, min_periods=min_rows_ratio * features_per_segment.shape[0])
+ # replace correlation coefficients less than corr_threshold with NA
+ corr_matrix[(corr_matrix > -corr_threshold) & (corr_matrix < corr_threshold)] = np.nan
+
+ # plot heatmap
+ getCorrMatrixHeatmap(corr_matrix, time_segment, html_file)
+
+html_file.close()
diff --git a/src/visualization/heatmap_features_correlations.py b/src/visualization/heatmap_features_correlations.py
deleted file mode 100644
index 8093a9db..00000000
--- a/src/visualization/heatmap_features_correlations.py
+++ /dev/null
@@ -1,59 +0,0 @@
-import numpy as np
-import pandas as pd
-import plotly.io as pio
-import plotly.graph_objects as go
-
-
-def getCorrMatrixHeatmap(corr_matrix, output_path):
- colnames = corr_matrix.columns
- plot = go.Figure(data=go.Heatmap(z=corr_matrix.values.tolist(),
- x=colnames,
- y=colnames,
- colorscale="Viridis"))
- plot.update_layout(title="Correlation Matrix Heatmap")
- pio.write_html(plot, file=output_path, auto_open=False, include_plotlyjs="cdn")
-
-
-min_rows_ratio = snakemake.params["min_rows_ratio"]
-corr_threshold = snakemake.params["corr_threshold"]
-
-# merge features
-features, features_all_sensors = pd.DataFrame(columns=["local_date"]), pd.DataFrame(columns=["local_date"])
-pids = set()
-last_pid = None
-for path in snakemake.input["features"]:
- pid = path.split("/")[2]
- if pid not in pids:
- pids.add(pid)
- features_all_sensors["pid"] = last_pid
- features = pd.concat([features, features_all_sensors], axis=0, ignore_index=True, sort=False)
- features_all_sensors = pd.DataFrame(columns=["local_date"])
- features_per_sensor = pd.read_csv(path)
- features_all_sensors = features_all_sensors.merge(features_per_sensor, on="local_date", how="outer")
- last_pid = pid
-
-features_all_sensors["pid"] = last_pid
-features = pd.concat([features, features_all_sensors], axis=0, ignore_index=True, sort=False)
-features.set_index(["pid", "local_date"], inplace=True)
-
-# select days based on the input of "phone_valid_sensed_days"
-selected_participants_and_days = pd.DataFrame()
-for path in snakemake.input["phone_valid_sensed_days"]:
- pid = path.split("/")[2]
- phone_valid_sensed_days = pd.read_csv(path)
- phone_valid_sensed_days = phone_valid_sensed_days[phone_valid_sensed_days["is_valid_sensed_day"] == True]
- phone_valid_sensed_days["pid"] = pid
- selected_participants_and_days = pd.concat([selected_participants_and_days, phone_valid_sensed_days], axis=0)
-
-selected_participants_and_days.set_index(["pid", "local_date"], inplace=True)
-features = features.loc[features.index.intersection(selected_participants_and_days.index), :]
-
-# get correlation matrix
-features = features.astype(float)
-corr_matrix = features.corr(method=snakemake.params["corr_method"], min_periods=min_rows_ratio * features.shape[0])
-
-# replace correlation coefficients less than corr_threshold with NA
-corr_matrix[(corr_matrix > -corr_threshold) & (corr_matrix < corr_threshold)] = np.nan
-
-# plot heatmap
-getCorrMatrixHeatmap(corr_matrix, snakemake.output[0])
diff --git a/src/visualization/heatmap_phone_data_yield_per_participant_per_time_segment.py b/src/visualization/heatmap_phone_data_yield_per_participant_per_time_segment.py
new file mode 100644
index 00000000..fd9595c5
--- /dev/null
+++ b/src/visualization/heatmap_phone_data_yield_per_participant_per_time_segment.py
@@ -0,0 +1,85 @@
+import pandas as pd
+import numpy as np
+import plotly.graph_objects as go
+import yaml
+
+
+
+
+
+def getPhoneDataYieldHeatmap(data_for_plot, y_axis_labels, time_segment, type, html_file):
+
+ fig = go.Figure(data=go.Heatmap(z=data_for_plot.values.tolist(),
+ x=data_for_plot.columns.tolist(),
+ y=y_axis_labels,
+ hovertext=data_for_plot.values.tolist(),
+ hovertemplate="Time since first segment: %{x}
Participant: %{y}
Ratiovalidyielded" + type + ": %{z}",
+ zmin=0, zmax=1,
+ colorscale="Viridis"))
+
+ fig.update_layout(title="Heatmap of valid yielded " + type + " ratio for " + time_segment + " segments.
y-axis shows participant information (format: pid.label).
x-axis shows the time since their first segment.
z-axis (color) shows valid yielded " + type + " ratio during a segment instance.")
+
+ fig["layout"]["xaxis"].update(side="bottom")
+ fig["layout"].update(xaxis_title="Time Since First Segment")
+ fig["layout"].update(margin=dict(t=160))
+
+ html_file.write(fig.to_html(full_html=False, include_plotlyjs="cdn"))
+
+
+
+
+
+
+
+y_axis_labels, phone_data_yield_minutes, phone_data_yield_hours = [], {}, {}
+for phone_data_yield_path, participant_file_path, time_segments_path in zip(snakemake.input["phone_data_yield"], snakemake.input["participant_file"], snakemake.input["time_segments_labels"]):
+
+ # set pid.label as y_axis_label
+ pid = phone_data_yield_path.split("/")[3]
+ time_segments = pd.read_csv(time_segments_path, header=0)["label"]
+
+ with open(participant_file_path, "r", encoding="utf-8") as f:
+ participant_file = yaml.safe_load(f)
+ label = participant_file["PHONE"]["LABEL"]
+
+ y_axis_label = pid + "." + label
+ y_axis_labels.append(y_axis_label)
+
+
+ phone_data_yield = pd.read_csv(phone_data_yield_path, index_col=["local_segment_start_datetime"], parse_dates=["local_segment_start_datetime"])
+ # make sure the phone_data_yield file contains "phone_data_yield_rapids_ratiovalidyieldedminutes" and "phone_data_yield_rapids_ratiovalidyieldedhours" columns
+ if ("phone_data_yield_rapids_ratiovalidyieldedminutes" not in phone_data_yield.columns) or ("phone_data_yield_rapids_ratiovalidyieldedhours" not in phone_data_yield.columns):
+ raise ValueError("Please make sure [PHONE_DATA_YIELD][RAPIDS][COMPUTE] is True AND [PHONE_DATA_YIELD][RAPIDS][FEATURES] contains [ratiovalidyieldedminutes, ratiovalidyieldedhours].")
+
+ if not phone_data_yield.empty:
+
+ for time_segment in time_segments:
+ phone_data_yield_per_segment = phone_data_yield[phone_data_yield["local_segment_label"] == time_segment]
+
+ if not phone_data_yield_per_segment.empty:
+
+ # set number of minutes after the first start date time of local segments as x_axis_label
+ phone_data_yield_per_segment.index = phone_data_yield_per_segment.index - phone_data_yield_per_segment.index.min()
+
+ phone_data_yield_minutes_per_segment = phone_data_yield_per_segment[["phone_data_yield_rapids_ratiovalidyieldedminutes"]].rename(columns={"phone_data_yield_rapids_ratiovalidyieldedminutes": y_axis_label})
+ phone_data_yield_hours_per_segment = phone_data_yield_per_segment[["phone_data_yield_rapids_ratiovalidyieldedhours"]].rename(columns={"phone_data_yield_rapids_ratiovalidyieldedhours": y_axis_label})
+
+ if time_segment not in phone_data_yield_minutes.keys():
+ phone_data_yield_minutes[time_segment] = phone_data_yield_minutes_per_segment
+ phone_data_yield_hours[time_segment] = phone_data_yield_hours_per_segment
+ else:
+ phone_data_yield_minutes[time_segment] = pd.concat([phone_data_yield_minutes[time_segment], phone_data_yield_minutes_per_segment], axis=1, sort=True)
+ phone_data_yield_hours[time_segment] = pd.concat([phone_data_yield_hours[time_segment], phone_data_yield_hours_per_segment], axis=1, sort=True)
+
+
+html_file = open(snakemake.output[0], "a", encoding="utf-8")
+if len(phone_data_yield_minutes.keys()) == 0:
+ html_file.write("There is no sensor data for the sensors in [PHONE_DATA_YIELD][SENSORS].")
+for time_segment in phone_data_yield_minutes.keys():
+ minutes_data_for_plot = phone_data_yield_minutes[time_segment].transpose().reindex(pd.Index(y_axis_labels)).round(3)
+ hours_data_for_plot = phone_data_yield_hours[time_segment].transpose().reindex(pd.Index(y_axis_labels)).round(3)
+
+ getPhoneDataYieldHeatmap(minutes_data_for_plot, y_axis_labels, time_segment, "minutes", html_file)
+ getPhoneDataYieldHeatmap(hours_data_for_plot, y_axis_labels, time_segment, "hours", html_file)
+
+html_file.close()
diff --git a/src/visualization/heatmap_rows.py b/src/visualization/heatmap_rows.py
deleted file mode 100644
index 764478ec..00000000
--- a/src/visualization/heatmap_rows.py
+++ /dev/null
@@ -1,68 +0,0 @@
-import pandas as pd
-import numpy as np
-import plotly.io as pio
-import plotly.graph_objects as go
-import datetime
-
-def getComplianceMatrix(dates, compliance_bins):
- compliance_matrix = []
- for date in dates:
- date_bins = compliance_bins[compliance_bins["local_date"] == date]["count"].tolist()
- compliance_matrix.append(date_bins)
- return compliance_matrix
-
-
-def getRowCountHeatmap(dates, row_count_per_bin, sensor_name, pid, output_path, bin_size):
- bins_per_hour = int(60 / bin_size)
- x_axis_labels = ["{0:0=2d}".format(x // bins_per_hour) + ":" + \
- "{0:0=2d}".format(x % bins_per_hour * bin_size) for x in range(24 * bins_per_hour)]
- plot = go.Figure(data=go.Heatmap(z=row_count_per_bin,
- x=x_axis_labels,
- y=[datetime.datetime.strftime(date, '%Y/%m/%d') for date in dates],
- colorscale="Viridis"))
- plot.update_layout(title="Row count heatmap for " + sensor_name + " of " + pid + "
Label: " + label + ", device_id: " + device_id)
- pio.write_html(plot, file=output_path, auto_open=False, include_plotlyjs="cdn")
-
-
-
-sensor_data = pd.read_csv(snakemake.input["sensor"], encoding="ISO-8859-1")
-sensor_name = snakemake.params["table"]
-pid = snakemake.params["pid"]
-bin_size = snakemake.params["bin_size"]
-
-with open(snakemake.input["pid_file"], encoding="ISO-8859-1") as external_file:
- external_file_content = external_file.readlines()
-device_id = external_file_content[0].split(",")[-1]
-label = external_file_content[2]
-
-
-# check if we have sensor data
-if sensor_data.empty:
- empty_html = open(snakemake.output[0], "w")
- empty_html.write("There is no " + sensor_name + " data for " + pid + "
Label: " + label + ", device_id: " + device_id)
- empty_html.close()
-else:
- start_date = sensor_data["local_date"][0]
- end_date = sensor_data.at[sensor_data.index[-1],"local_date"]
-
- sensor_data["local_date_time"] = pd.to_datetime(sensor_data["local_date_time"])
- sensor_data = sensor_data[["local_date_time"]]
- sensor_data["count"] = 1
-
- # Add first and last day boundaries for resampling
- sensor_data = sensor_data.append([pd.Series([datetime.datetime.strptime(start_date + " 00:00:00", "%Y-%m-%d %H:%M:%S"), 0], sensor_data.columns),
- pd.Series([datetime.datetime.strptime(end_date + " 23:59:59", "%Y-%m-%d %H:%M:%S"), 0], sensor_data.columns)])
-
- # Resample into bins with the size of bin_size
- resampled_bins = pd.DataFrame(sensor_data.resample(str(bin_size) + "T", on="local_date_time")["count"].sum())
-
- # Extract list of dates for creating the heatmap
- resampled_bins.reset_index(inplace=True)
- resampled_bins["local_date"] = resampled_bins["local_date_time"].dt.date
- dates = resampled_bins["local_date"].drop_duplicates().tolist()
-
- # Create heatmap
- row_count_per_bin = getComplianceMatrix(dates, resampled_bins)
- row_count_per_bin = np.asarray(row_count_per_bin)
- row_count_per_bin = np.where(row_count_per_bin == 0, np.nan, row_count_per_bin)
- getRowCountHeatmap(dates, row_count_per_bin, sensor_name, pid, snakemake.output[0], bin_size)
diff --git a/src/visualization/heatmap_sensed_bins.py b/src/visualization/heatmap_sensed_bins.py
deleted file mode 100644
index 26639400..00000000
--- a/src/visualization/heatmap_sensed_bins.py
+++ /dev/null
@@ -1,50 +0,0 @@
-import pandas as pd
-import numpy as np
-import plotly.io as pio
-import plotly.graph_objects as go
-import datetime
-
-def getDatesComplianceMatrix(phone_sensed_bins):
- dates = phone_sensed_bins.index
- compliance_matrix = []
- for date in dates:
- compliance_matrix.append(phone_sensed_bins.loc[date, :].tolist())
- return dates, compliance_matrix
-
-def getComplianceHeatmap(dates, compliance_matrix, pid, output_path, bin_size):
- bins_per_hour = int(60 / bin_size)
- x_axis_labels = ["{0:0=2d}".format(x // bins_per_hour) + ":" + \
- "{0:0=2d}".format(x % bins_per_hour * bin_size) for x in range(24 * bins_per_hour)]
- plot = go.Figure(data=go.Heatmap(z=compliance_matrix,
- x=x_axis_labels,
- y=[datetime.datetime.strftime(date, '%Y/%m/%d') for date in dates],
- colorscale='Viridis',
- colorbar={'tick0': 0,'dtick': 1}))
- plot.update_layout(title="Heatmap sensed bins.
Five-minute bins showing how many sensors logged at least one row of data in that period for " + pid + "
Label: " + label + ", device_id: " + device_id)
- pio.write_html(plot, file=output_path, auto_open=False, include_plotlyjs="cdn")
-
-# get current patient id
-pid = snakemake.params["pid"]
-bin_size = snakemake.params["bin_size"]
-
-with open(snakemake.input["pid_file"], encoding="ISO-8859-1") as external_file:
- external_file_content = external_file.readlines()
-device_id = external_file_content[0].split(",")[-1]
-label = external_file_content[2]
-
-phone_sensed_bins = pd.read_csv(snakemake.input["sensor"], parse_dates=["local_date"], index_col="local_date")
-
-if phone_sensed_bins.empty:
- empty_html = open(snakemake.output[0], "w", encoding="ISO-8859-1")
- empty_html.write("There is no sensor data for " + pid + "
Label: " + label + ", device_id: " + device_id)
- empty_html.close()
-else:
- # resample to impute missing dates
- phone_sensed_bins = phone_sensed_bins.resample("1D").asfreq().fillna(0)
- # get dates and compliance_matrix
- dates, compliance_matrix = getDatesComplianceMatrix(phone_sensed_bins)
- # convert compliance_matrix from list to np.array and replace 0 with np.nan
- compliance_matrix = np.asarray(compliance_matrix)
- compliance_matrix = np.where(compliance_matrix == 0, np.nan, compliance_matrix)
- # get heatmap
- getComplianceHeatmap(dates, compliance_matrix, pid, snakemake.output[0], bin_size)
\ No newline at end of file
diff --git a/src/visualization/heatmap_sensor_row_count_per_time_segment.py b/src/visualization/heatmap_sensor_row_count_per_time_segment.py
new file mode 100644
index 00000000..6b62e6e1
--- /dev/null
+++ b/src/visualization/heatmap_sensor_row_count_per_time_segment.py
@@ -0,0 +1,89 @@
+import pandas as pd
+import numpy as np
+import plotly.graph_objects as go
+from importlib import util
+from pathlib import Path
+import yaml
+
+
+def getRowCountHeatmap(data_for_plot, scaled_data_for_plot, pid, time_segment, html_file):
+
+ fig = go.Figure(data=go.Heatmap(z=scaled_data_for_plot.values.tolist(),
+ x=data_for_plot.columns,
+ y=data_for_plot.index,
+ hovertext=data_for_plot.values.tolist(),
+ hovertemplate="Segment start: %{x}
Sensor: %{y}
Row count: %{hovertext}",
+ zmin=0, zmax=1,
+ colorscale='Viridis'))
+
+ fig.update_layout(title="Heatmap of sensor row count for " + time_segment + " segments. Pid: " + pid +". Label: " + label + "
y-axis shows the included sensors.
x-axis shows the start (date and time) of a time segment.
z-axis (color) shows row count per sensor per segment instance.")
+ fig["layout"].update(margin=dict(t=160))
+
+ html_file.write(fig.to_html(full_html=False, include_plotlyjs="cdn"))
+
+
+
+
+# import filter_data_by_segment from src/features/utils/utils.py
+spec = util.spec_from_file_location("util", str(Path(snakemake.scriptdir).parent / "features" / "utils" / "utils.py"))
+mod = util.module_from_spec(spec)
+spec.loader.exec_module(mod)
+filter_data_by_segment = getattr(mod, "filter_data_by_segment")
+
+
+
+
+
+phone_data_yield = pd.read_csv(snakemake.input["phone_data_yield"], index_col=["local_segment_start_datetime"], parse_dates=["local_segment_start_datetime"])
+# make sure the phone_data_yield file contains "phone_data_yield_rapids_ratiovalidyieldedminutes" and "phone_data_yield_rapids_ratiovalidyieldedhours" columns
+if ("phone_data_yield_rapids_ratiovalidyieldedminutes" not in phone_data_yield.columns) or ("phone_data_yield_rapids_ratiovalidyieldedhours" not in phone_data_yield.columns):
+ raise ValueError("Please make sure [PHONE_DATA_YIELD][RAPIDS][COMPUTE] is True AND [PHONE_DATA_YIELD][RAPIDS][FEATURES] contains [ratiovalidyieldedminutes, ratiovalidyieldedhours].")
+phone_data_yield = phone_data_yield[["local_segment_label", "phone_data_yield_rapids_ratiovalidyieldedminutes", "phone_data_yield_rapids_ratiovalidyieldedhours"]]
+
+time_segments = pd.read_csv(snakemake.input["time_segments_labels"], header=0)["label"]
+pid = snakemake.params["pid"]
+
+with open(snakemake.input["participant_file"], "r", encoding="utf-8") as f:
+ participant_file = yaml.safe_load(f)
+label = participant_file["PHONE"]["LABEL"]
+
+sensor_names = []
+sensors_row_count = dict(zip(time_segments, [pd.DataFrame()] * len(time_segments)))
+
+for sensor_path in snakemake.input["all_sensors"]:
+ sensor_data = pd.read_csv(sensor_path, usecols=["assigned_segments"])
+ sensor_name = sensor_path.split("/")[-1].replace("_with_datetime.csv", "")
+ sensor_names.append(sensor_name)
+
+ if not sensor_data.empty:
+ for time_segment in time_segments:
+ sensor_data_per_segment = filter_data_by_segment(sensor_data, time_segment)
+
+ if not sensor_data_per_segment.empty:
+ # extract local start datetime of the segment from "local_segment" column
+ sensor_data_per_segment["local_segment_start_datetime"] = pd.to_datetime(sensor_data_per_segment["local_segment"].apply(lambda x: x.split("#")[1].split(",")[0]))
+ sensor_row_count = sensor_data_per_segment.groupby("local_segment_start_datetime")[["local_segment"]].count().rename(columns={"local_segment": sensor_name})
+ sensors_row_count[time_segment] = pd.concat([sensors_row_count[time_segment], sensor_row_count], axis=1, sort=False)
+
+# add phone data yield features and plot heatmap
+html_file = open(snakemake.output[0], "a", encoding="utf-8")
+sensor_names.extend(["ratiovalidyieldedminutes", "ratiovalidyieldedhours"])
+for time_segment in time_segments:
+ if not phone_data_yield.empty:
+ phone_data_yield_per_segment = phone_data_yield[phone_data_yield["local_segment_label"] == time_segment].rename(columns={"phone_data_yield_rapids_ratiovalidyieldedminutes": "ratiovalidyieldedminutes","phone_data_yield_rapids_ratiovalidyieldedhours": "ratiovalidyieldedhours"}).round(3)
+ if not phone_data_yield_per_segment.empty:
+ sensors_row_count[time_segment] = pd.concat([sensors_row_count[time_segment], phone_data_yield_per_segment], axis=1, sort=True)
+
+ # consider all the sensors
+ data_for_plot = sensors_row_count[time_segment].transpose().reindex(pd.Index(sensor_names))
+
+ if data_for_plot.empty:
+ html_file.write("There are no records of selected sensors in database for " + time_segment + " segments. Pid: " + pid + ". Label: " + label + ".
")
+ else:
+ # except for phone data yield sensor, scale each sensor (row) to the range of [0, 1]
+ scaled_data_for_plot = data_for_plot.copy()
+ scaled_data_for_plot.loc[sensor_names[:-2]] = scaled_data_for_plot.fillna(np.nan).loc[sensor_names[:-2]].apply(lambda x: (x - np.nanmin(x)) / (np.nanmax(x) - np.nanmin(x)) if np.nanmax(x) != np.nanmin(x) else (x / np.nanmin(x)), axis=1)
+
+ getRowCountHeatmap(data_for_plot, scaled_data_for_plot, pid, time_segment, html_file)
+
+html_file.close()
diff --git a/src/visualization/heatmap_sensors_per_minute_per_time_segment.py b/src/visualization/heatmap_sensors_per_minute_per_time_segment.py
new file mode 100644
index 00000000..dd524322
--- /dev/null
+++ b/src/visualization/heatmap_sensors_per_minute_per_time_segment.py
@@ -0,0 +1,100 @@
+import pandas as pd
+import numpy as np
+import plotly.graph_objects as go
+from importlib import util
+from pathlib import Path
+import yaml
+
+
+def colors2colorscale(colors):
+ colorscale = []
+ length = len(colors)
+ for i in range(length):
+ if i != length - 1:
+ colorscale = colorscale + [[i/(length-1), colors[i]], [(i+1)/(length-1), colors[i]]]
+ else:
+ colorscale.append([1, colors[i]])
+ return colorscale
+
+def getSensorsPerMinPerSegmentHeatmap(phone_data_yield, pid, time_segment, html_file):
+
+ x_axis_labels = [pd.Timedelta(minutes=x) for x in phone_data_yield.columns]
+
+ fig = go.Figure(data=go.Heatmap(z=phone_data_yield.values.tolist(),
+ x=x_axis_labels,
+ y=phone_data_yield.index,
+ zmin=0, zmax=16,
+ colorscale=colors2colorscale(colors),
+ colorbar=dict(thickness=25, tickvals=[1/2 + x for x in range(16)],ticktext=[x for x in range(16)])))
+
+ fig.update_layout(title="Number of sensors with any data per minute for " + time_segment + " segments. Pid: "+pid+". Label: " + label + "
y-axis shows the start (date and time) of a time segment.
x-axis shows the time since the start of the time segment.
z-axis (color) shows how many sensors logged at least one row of data per minute.")
+ fig["layout"].update(margin=dict(t=160))
+
+ html_file.write(fig.to_html(full_html=False, include_plotlyjs="cdn"))
+
+
+
+
+
+# import filter_data_by_segment from src/features/utils/utils.py
+spec = util.spec_from_file_location("util", str(Path(snakemake.scriptdir).parent / "features" / "utils" / "utils.py"))
+mod = util.module_from_spec(spec)
+spec.loader.exec_module(mod)
+filter_data_by_segment = getattr(mod, "filter_data_by_segment")
+
+
+
+
+
+
+
+
+
+
+colors = ["red", "#3D0751", "#423176", "#414381", "#3F5688", "#42678B", "#42768C", "#45868B", "#4A968A", "#53A485", "#5FB57E", "#76C170", "#91CF63", "#B4DA55", "#D9E152", "#F8E755", "#DEE00F"]
+pid = snakemake.params["pid"]
+time_segments_labels = pd.read_csv(snakemake.input["time_segments_labels"], header=0)
+
+with open(snakemake.input["participant_file"], "r", encoding="utf-8") as f:
+ participant_file = yaml.safe_load(f)
+label = participant_file["PHONE"]["LABEL"]
+
+phone_data_yield = pd.read_csv(snakemake.input["phone_data_yield"], parse_dates=["local_date_time"])
+
+html_file = open(snakemake.output[0], "a", encoding="utf-8")
+if phone_data_yield.empty:
+ html_file.write("There is no sensor data for " + pid + " (pid) and " + label + " (label).")
+else:
+ for time_segment in time_segments_labels["label"]:
+ phone_data_yield_per_segment = filter_data_by_segment(phone_data_yield, time_segment)
+
+ if phone_data_yield_per_segment.empty:
+ html_file.write("There is no sensor data of " + time_segment + " segments for " + pid + " (pid) and " + label + " (label).
")
+ else:
+ # calculate the length (in minute) of per segment instance
+ phone_data_yield_per_segment["length"] = phone_data_yield_per_segment["timestamps_segment"].str.split(",").apply(lambda x: int((int(x[1])-int(x[0])) / (1000 * 60)))
+ # calculate the number of sensors logged at least one row of data per minute.
+ phone_data_yield_per_segment = phone_data_yield_per_segment.groupby(["local_segment", "length", "local_date", "local_hour", "local_minute"])[["sensor", "local_date_time"]].max().reset_index()
+ # extract local start datetime of the segment from "local_segment" column
+ phone_data_yield_per_segment["local_segment_start_datetimes"] = pd.to_datetime(phone_data_yield_per_segment["local_segment"].apply(lambda x: x.split("#")[1].split(",")[0]))
+ # calculate the number of minutes after local start datetime of the segment
+ phone_data_yield_per_segment["minutes_after_segment_start"] = ((phone_data_yield_per_segment["local_date_time"] - phone_data_yield_per_segment["local_segment_start_datetimes"]) / pd.Timedelta(minutes=1)).astype("int")
+
+ # impute missing rows with 0
+ columns_for_full_index = phone_data_yield_per_segment[["local_segment_start_datetimes", "length"]].drop_duplicates(keep="first")
+ columns_for_full_index = columns_for_full_index.apply(lambda row: [[row["local_segment_start_datetimes"], x] for x in range(row["length"] + 1)], axis=1)
+ full_index = []
+ for columns in columns_for_full_index:
+ full_index = full_index + columns
+ full_index = pd.MultiIndex.from_tuples(full_index, names=("local_segment_start_datetimes", "minutes_after_segment_start"))
+ phone_data_yield_per_segment = phone_data_yield_per_segment.set_index(["local_segment_start_datetimes", "minutes_after_segment_start"]).reindex(full_index).reset_index().fillna(0)
+
+ # transpose the dataframe per local start datetime of the segment and discard the useless index layer
+ phone_data_yield_per_segment = phone_data_yield_per_segment.groupby("local_segment_start_datetimes")[["minutes_after_segment_start", "sensor"]].apply(lambda x: x.set_index("minutes_after_segment_start").transpose())
+ phone_data_yield_per_segment.index = phone_data_yield_per_segment.index.get_level_values("local_segment_start_datetimes")
+
+ # get heatmap
+ getSensorsPerMinPerSegmentHeatmap(phone_data_yield_per_segment, pid, time_segment, html_file)
+
+
+html_file.close()
diff --git a/src/visualization/histogram_phone_data_yield.py b/src/visualization/histogram_phone_data_yield.py
index e4a55aaf..cd15ec8d 100644
--- a/src/visualization/histogram_phone_data_yield.py
+++ b/src/visualization/histogram_phone_data_yield.py
@@ -8,15 +8,18 @@ phone_data_yield = pd.read_csv(snakemake.input[0])
if ("phone_data_yield_rapids_ratiovalidyieldedminutes" not in phone_data_yield.columns) or ("phone_data_yield_rapids_ratiovalidyieldedhours" not in phone_data_yield.columns):
raise ValueError("Please make sure [PHONE_DATA_YIELD][RAPIDS][COMPUTE] is True AND [PHONE_DATA_YIELD][RAPIDS][FEATURES] contains [ratiovalidyieldedminutes, ratiovalidyieldedhours].")
-# plot ratio valid yielded minutes histogram
-fig_ratiovalidyieldedminutes = px.histogram(phone_data_yield, x="phone_data_yield_rapids_ratiovalidyieldedminutes", color="local_segment_label")
-fig_ratiovalidyieldedminutes.update_layout(title="Ratio Valid Yielded Minutes Histogram")
-
-# plot ratio valid yielded hours histogram
-fig_ratiovalidyieldedhours = px.histogram(phone_data_yield, x="phone_data_yield_rapids_ratiovalidyieldedhours", color="local_segment_label")
-fig_ratiovalidyieldedhours.update_layout(title="Ratio Valid Yielded Hours Histogram")
-
-
-with open(snakemake.output[0], "a") as html_file:
+html_file = open(snakemake.output[0], "a", encoding="utf-8")
+if phone_data_yield.empty:
+ html_file.write("There is no sensor data for the sensors in [PHONE_DATA_YIELD][SENSORS].")
+else:
+ # plot ratio valid yielded minutes histogram
+ fig_ratiovalidyieldedminutes = px.histogram(phone_data_yield, x="phone_data_yield_rapids_ratiovalidyieldedminutes", color="local_segment_label")
+ fig_ratiovalidyieldedminutes.update_layout(title="Histogram of valid yielded minutes ratio per time segment.")
html_file.write(fig_ratiovalidyieldedminutes.to_html(full_html=False, include_plotlyjs="cdn"))
+
+ # plot ratio valid yielded hours histogram
+ fig_ratiovalidyieldedhours = px.histogram(phone_data_yield, x="phone_data_yield_rapids_ratiovalidyieldedhours", color="local_segment_label")
+ fig_ratiovalidyieldedhours.update_layout(title="Histogram of valid yielded hours ratio per time segment.")
html_file.write(fig_ratiovalidyieldedhours.to_html(full_html=False, include_plotlyjs="cdn"))
+
+html_file.close()
diff --git a/src/visualization/heatmap_sensed_bins_all_participants.Rmd b/src/visualization/merge_heatmap_sensor_row_count_per_time_segment.Rmd
similarity index 63%
rename from src/visualization/heatmap_sensed_bins_all_participants.Rmd
rename to src/visualization/merge_heatmap_sensor_row_count_per_time_segment.Rmd
index e6dbdbbf..b6c8463c 100644
--- a/src/visualization/heatmap_sensed_bins_all_participants.Rmd
+++ b/src/visualization/merge_heatmap_sensor_row_count_per_time_segment.Rmd
@@ -1,10 +1,10 @@
---
-title: "Heatmap Sensed Bins Report"
+title: "Sensor Row Count per Time Segment For All Participants"
author:
- - "MoSHI Pipeline"
+ - "RAPIDS"
date: "`r format(Sys.time(), '%d %B, %Y')`"
params:
- rmd: "heatmap_sensed_bins_all_participants.Rmd"
+ rmd: "merge_heatmap_sensor_row_count_per_time_segment.Rmd"
output:
html_document:
highlight: tango
@@ -17,14 +17,17 @@ output:
smooth_scroll: yes
---
+
+
```{r include=FALSE}
source("renv/activate.R")
```
-## All phone sensors
```{r, echo=FALSE}
-heatmaps <- snakemake@input[["heatmap_sensed_bins"]]
+heatmaps <- snakemake@input[["heatmap_sensor_row_count_per_time_segment"]]
heatmaps.html <- vector(mode="list", length(heatmaps))
for(pid in 1:length(heatmaps)){
diff --git a/src/visualization/heatmap_days_by_sensors_all_participants.Rmd b/src/visualization/merge_heatmap_sensors_per_minute_per_time_segment.Rmd
similarity index 63%
rename from src/visualization/heatmap_days_by_sensors_all_participants.Rmd
rename to src/visualization/merge_heatmap_sensors_per_minute_per_time_segment.Rmd
index cb4303c2..2e1143e0 100644
--- a/src/visualization/heatmap_days_by_sensors_all_participants.Rmd
+++ b/src/visualization/merge_heatmap_sensors_per_minute_per_time_segment.Rmd
@@ -1,10 +1,10 @@
---
-title: "Heatmap Rows Report"
+title: "Sensors per Minute per Time Segment for All Participants"
author:
- - "MoSHI Pipeline"
+ - "RAPIDS"
date: "`r format(Sys.time(), '%d %B, %Y')`"
params:
- rmd: "heatmap_days_by_sensors_all_participants.Rmd"
+ rmd: "merge_heatmap_sensors_per_minute_per_time_segment.Rmd"
output:
html_document:
highlight: tango
@@ -17,14 +17,17 @@ output:
smooth_scroll: yes
---
+
+
```{r include=FALSE}
source("renv/activate.R")
```
-## All phone sensors
```{r, echo=FALSE}
-heatmaps <- snakemake@input[["heatmap_rows"]]
+heatmaps <- snakemake@input[["heatmap_sensors_per_minute_per_time_segment"]]
heatmaps.html <- vector(mode="list", length(heatmaps))
for(pid in 1:length(heatmaps)){
diff --git a/src/visualization/overall_compliance_heatmap.py b/src/visualization/overall_compliance_heatmap.py
deleted file mode 100644
index 877ab0d2..00000000
--- a/src/visualization/overall_compliance_heatmap.py
+++ /dev/null
@@ -1,102 +0,0 @@
-import pandas as pd
-import numpy as np
-import plotly.io as pio
-import plotly.graph_objects as go
-from dateutil import tz
-import datetime
-
-def getOneRow(data_per_participant, last_certain_dates, col_name, row, expected_num_of_days, only_show_valid_days):
-
- data = pd.read_csv(data_per_participant, index_col=["local_date"])
-
- if col_name == "num_sensors":
- data["num_sensors"] = data.max(axis=1)
-
- if only_show_valid_days and col_name == "valid_sensed_hours":
- # replace invalid days' valid sensed hours with np.nan to let our heatmap only shows valid days
- data.loc[data[data["is_valid_sensed_day"] == False].index, "valid_sensed_hours"] = np.nan
-
- if expected_num_of_days == -1:
- # show all days
- data.index = pd.to_datetime(data.index)
- start_date = data.index.min()
- # upsample data into one day bins
- data = data.resample("1D").sum()
- data["date_idx"] = (data.index - start_date).days
- data.set_index("date_idx", inplace=True, drop=True)
- row = row + data[col_name].tolist()
- else:
- # only show last certain days
- for date in last_certain_dates:
- if date in data.index:
- row.append(data.loc[date][col_name])
- else:
- row.append(0)
-
- return row
-
-def getOverallComplianceHeatmap(sensors_with_data, valid_sensed_hours, last_certain_dates, bin_size, min_bins_per_hour, expected_num_of_days, output_path):
- plot = go.Figure(data=go.Heatmap(z=valid_sensed_hours[last_certain_dates].values,
- x=[date.replace("-", "/") for date in last_certain_dates] if expected_num_of_days != -1 else last_certain_dates,
- y=[pid + "." + label for pid, label in zip(sensors_with_data["pid"].to_list(), sensors_with_data["label"].to_list())],
- text=sensors_with_data[last_certain_dates].values,
- hovertemplate="Date: %{x}
Participant: %{y}
Valid sensed hours: %{z}
Number of sensors with data: %{text}" if expected_num_of_days != -1 else "Day index: %{x}
Participant: %{y}
Valid sensed hours: %{z}
Number of sensors with data: %{text}",
- colorscale="Viridis",
- colorbar={"tick0": 0,"dtick": 1},
- showscale=True))
- if expected_num_of_days != -1:
- plot.update_layout(title="Overall compliance heatmap for last " + str(expected_num_of_days) + " days.
Bin's color shows valid sensed hours for that day.
A valid hour has at least one row of any sensor in "+ str(min_bins_per_hour) +" out of " + str(int(60 / bin_size)) + " bins of " + str(bin_size) + " minutes.
You can hover over every day to see the number of sensors with data in that day.")
- else:
- plot.update_layout(title="Overall compliance heatmap for all days.
Bin's color shows valid sensed hours for that day.
A valid hour has at least one row of any sensor in "+ str(min_bins_per_hour) +" out of " + str(int(60 / bin_size)) + " bins of " + str(bin_size) + " minutes.
You can hover over every day to see the number of sensors with data in that day.")
-
- plot["layout"]["xaxis"].update(side="bottom")
- plot["layout"].update(xaxis_title="Day indexes")
- plot["layout"].update(margin=dict(t=160))
- pio.write_html(plot, file=output_path, auto_open=False, include_plotlyjs="cdn")
-
-
-phone_sensed_bins = snakemake.input["phone_sensed_bins"]
-phone_valid_sensed_days = snakemake.input["phone_valid_sensed_days"]
-pid_files = snakemake.input["pid_files"]
-only_show_valid_days = snakemake.params["only_show_valid_days"]
-local_timezone = snakemake.params["local_timezone"]
-bin_size = snakemake.params["bin_size"]
-min_bins_per_hour = snakemake.params["min_bins_per_hour"]
-expected_num_of_days = int(snakemake.params["expected_num_of_days"])
-
-if expected_num_of_days < -1:
- raise ValueError("EXPECTED_NUM_OF_DAYS of OVERALL_COMPLIANCE_HEATMAP section in config.yaml must be larger or equal to -1.")
-
-last_certain_dates = []
-if expected_num_of_days != -1:
- # get the list of dates to show
- cur_date = datetime.datetime.now().astimezone(tz.gettz(local_timezone)).date()
- for date_offset in range(expected_num_of_days-1, -1, -1):
- last_certain_dates.append((cur_date - datetime.timedelta(days=date_offset)).strftime("%Y-%m-%d"))
-
-sensors_with_data_records, valid_sensed_hours_records = [], []
-for sensors_with_data_individual, valid_sensed_hours_individual, pid_file in zip(phone_sensed_bins, phone_valid_sensed_days, pid_files):
-
- with open(pid_file, encoding="ISO-8859-1") as external_file:
- external_file_content = external_file.readlines()
- device_id = external_file_content[0].split(",")[-1].strip()
- label = external_file_content[2].strip()
- pid = pid_file.split("/")[-1]
-
- sensors_with_data_records.append(getOneRow(sensors_with_data_individual, last_certain_dates, "num_sensors", [pid, label, device_id], expected_num_of_days, only_show_valid_days))
- valid_sensed_hours_records.append(getOneRow(valid_sensed_hours_individual, last_certain_dates, "valid_sensed_hours", [pid, label, device_id], expected_num_of_days, only_show_valid_days))
-
-if expected_num_of_days == -1:
- # get the date_idx of all days
- total_num_of_days = max([len(x) for x in sensors_with_data_records]) - 3
- last_certain_dates = [date_idx for date_idx in range(total_num_of_days)]
-
-sensors_with_data = pd.DataFrame(data=sensors_with_data_records, columns=["pid", "label", "device_id"] + last_certain_dates).replace(0, np.nan)
-valid_sensed_hours = pd.DataFrame(data=valid_sensed_hours_records, columns=["pid", "label", "device_id"] + last_certain_dates).replace(0, np.nan)
-
-if sensors_with_data.empty:
- empty_html = open(snakemake.output[0], "w")
- empty_html.write("There is no sensor data for all participants")
- empty_html.close()
-else:
- getOverallComplianceHeatmap(sensors_with_data, valid_sensed_hours, last_certain_dates, bin_size, min_bins_per_hour, expected_num_of_days, snakemake.output[0])
diff --git a/src/visualization/visualize.py b/src/visualization/visualize.py
deleted file mode 100644
index e69de29b..00000000