Update 4 plots for time_segments

pull/103/head
Meng Li 2020-12-03 21:00:32 -05:00
parent 3560217e3b
commit 9a0e57301b
21 changed files with 447 additions and 565 deletions

View File

@ -231,20 +231,19 @@ for provider in config["FITBIT_STEPS_INTRADAY"]["PROVIDERS"].keys():
if config["HISTOGRAM_PHONE_DATA_YIELD"]["PLOT"]:
files_to_compute.append("reports/data_exploration/histogram_phone_data_yield.html")
# visualization for data exploration
# if config["HEATMAP_FEATURES_CORRELATIONS"]["PLOT"]:
# files_to_compute.extend(expand("reports/data_exploration/{min_valid_hours_per_day}hours_{min_valid_bins_per_hour}bins/heatmap_features_correlations.html", min_valid_hours_per_day=config["HEATMAP_FEATURES_CORRELATIONS"]["MIN_VALID_HOURS_PER_DAY"], min_valid_bins_per_hour=config["PHONE_VALID_SENSED_DAYS"]["MIN_VALID_BINS_PER_HOUR"]))
if config["HEATMAP_SENSORS_PER_MINUTE_PER_TIME_SEGMENT"]["PLOT"]:
files_to_compute.extend(expand("reports/interim/{pid}/heatmap_sensors_per_minute_per_time_segment.html", pid=config["PIDS"]))
files_to_compute.append("reports/data_exploration/heatmap_sensors_per_minute_per_time_segment.html")
# if config["HEATMAP_DAYS_BY_SENSORS"]["PLOT"]:
# files_to_compute.extend(expand("reports/interim/{min_valid_hours_per_day}hours_{min_valid_bins_per_hour}bins/{pid}/heatmap_days_by_sensors.html", pid=config["PIDS"], min_valid_hours_per_day=config["HEATMAP_DAYS_BY_SENSORS"]["MIN_VALID_HOURS_PER_DAY"], min_valid_bins_per_hour=config["PHONE_VALID_SENSED_DAYS"]["MIN_VALID_BINS_PER_HOUR"]))
# files_to_compute.extend(expand("reports/data_exploration/{min_valid_hours_per_day}hours_{min_valid_bins_per_hour}bins/heatmap_days_by_sensors_all_participants.html", min_valid_hours_per_day=config["HEATMAP_DAYS_BY_SENSORS"]["MIN_VALID_HOURS_PER_DAY"], min_valid_bins_per_hour=config["PHONE_VALID_SENSED_DAYS"]["MIN_VALID_BINS_PER_HOUR"]))
if config["HEATMAP_SENSOR_ROW_COUNT_PER_TIME_SEGMENT"]["PLOT"]:
files_to_compute.extend(expand("reports/interim/{pid}/heatmap_sensor_row_count_per_time_segment.html", pid=config["PIDS"]))
files_to_compute.append("reports/data_exploration/heatmap_sensor_row_count_per_time_segment.html")
# if config["HEATMAP_SENSED_BINS"]["PLOT"]:
# files_to_compute.extend(expand("reports/interim/heatmap_sensed_bins/{pid}/heatmap_sensed_bins.html", pid=config["PIDS"]))
# files_to_compute.extend(["reports/data_exploration/heatmap_sensed_bins_all_participants.html"])
if config["HEATMAP_PHONE_DATA_YIELD_PER_PARTICIPANT_PER_TIME_SEGMENT"]["PLOT"]:
files_to_compute.append("reports/data_exploration/heatmap_phone_data_yield_per_participant_per_time_segment.html")
# if config["OVERALL_COMPLIANCE_HEATMAP"]["PLOT"]:
# files_to_compute.extend(expand("reports/data_exploration/{min_valid_hours_per_day}hours_{min_valid_bins_per_hour}bins/overall_compliance_heatmap.html", min_valid_hours_per_day=config["OVERALL_COMPLIANCE_HEATMAP"]["MIN_VALID_HOURS_PER_DAY"], min_valid_bins_per_hour=config["PHONE_VALID_SENSED_DAYS"]["MIN_VALID_BINS_PER_HOUR"]))
if config["HEATMAP_FEATURE_CORRELATION_MATRIX"]["PLOT"]:
files_to_compute.append("reports/data_exploration/heatmap_feature_correlation_matrix.html")
rule all:

View File

@ -259,9 +259,6 @@ PHONE_WIFI_VISIBLE:
########################################################################################################################
# FITBIT #
########################################################################################################################
@ -350,7 +347,6 @@ FITBIT_STEPS_INTRADAY:
########################################################################################################################
# PLOTS #
########################################################################################################################
@ -358,32 +354,19 @@ FITBIT_STEPS_INTRADAY:
HISTOGRAM_PHONE_DATA_YIELD:
PLOT: False
HEATMAP_FEATURES_CORRELATIONS:
HEATMAP_SENSORS_PER_MINUTE_PER_TIME_SEGMENT:
PLOT: False
HEATMAP_SENSOR_ROW_COUNT_PER_TIME_SEGMENT:
PLOT: False
SENSORS: [PHONE_ACCELEROMETER, PHONE_ACTIVITY_RECOGNITION, PHONE_APPLICATIONS_FOREGROUND, PHONE_BATTERY, PHONE_BLUETOOTH, PHONE_CALLS, PHONE_CONVERSATION, PHONE_LIGHT, PHONE_LOCATIONS, PHONE_MESSAGES, PHONE_SCREEN, PHONE_WIFI_CONNECTED, PHONE_WIFI_VISIBLE]
HEATMAP_PHONE_DATA_YIELD_PER_PARTICIPANT_PER_TIME_SEGMENT:
PLOT: False
HEATMAP_FEATURE_CORRELATION_MATRIX:
PLOT: False
MIN_ROWS_RATIO: 0.5
MIN_VALID_HOURS_PER_DAY: #*min_valid_hours_per_day
MIN_VALID_BINS_PER_HOUR: #*min_valid_bins_per_hour
PHONE_FEATURES: [accelerometer, activity_recognition, applications_foreground, battery, calls_incoming, calls_missed, calls_outgoing, conversation, light, location_doryab, messages_received, messages_sent, screen]
FITBIT_FEATURES: [fitbit_heartrate, fitbit_step, fitbit_sleep]
CORR_THRESHOLD: 0.1
CORR_METHOD: "pearson" # choose from {"pearson", "kendall", "spearman"}
HEATMAP_DAYS_BY_SENSORS:
PLOT: False
MIN_VALID_HOURS_PER_DAY: #*min_valid_hours_per_day
MIN_VALID_BINS_PER_HOUR: #*min_valid_bins_per_hour
EXPECTED_NUM_OF_DAYS: -1
DB_TABLES: [accelerometer, applications_foreground, battery, bluetooth, calls, light, locations, messages, screen, wifi, sensor_wifi, plugin_google_activity_recognition, plugin_ios_activity_recognition, plugin_studentlife_audio_android, plugin_studentlife_audio]
HEATMAP_SENSED_BINS:
PLOT: False
BIN_SIZE: #*bin_size
OVERALL_COMPLIANCE_HEATMAP:
PLOT: False
ONLY_SHOW_VALID_DAYS: False
EXPECTED_NUM_OF_DAYS: -1
BIN_SIZE: #*bin_size
MIN_VALID_HOURS_PER_DAY: #*min_valid_hours_per_day
MIN_VALID_BINS_PER_HOUR: #*min_valid_bins_per_hour

View File

@ -214,6 +214,20 @@ for provider in config["FITBIT_STEPS_INTRADAY"]["PROVIDERS"].keys():
if config["HISTOGRAM_PHONE_DATA_YIELD"]["PLOT"]:
files_to_compute.append("reports/data_exploration/histogram_phone_data_yield.html")
if config["HEATMAP_SENSORS_PER_MINUTE_PER_TIME_SEGMENT"]["PLOT"]:
files_to_compute.extend(expand("reports/interim/{pid}/heatmap_sensors_per_minute_per_time_segment.html", pid=config["PIDS"]))
files_to_compute.append("reports/data_exploration/heatmap_sensors_per_minute_per_time_segment.html")
if config["HEATMAP_SENSOR_ROW_COUNT_PER_TIME_SEGMENT"]["PLOT"]:
files_to_compute.extend(expand("reports/interim/{pid}/heatmap_sensor_row_count_per_time_segment.html", pid=config["PIDS"]))
files_to_compute.append("reports/data_exploration/heatmap_sensor_row_count_per_time_segment.html")
if config["HEATMAP_PHONE_DATA_YIELD_PER_PARTICIPANT_PER_TIME_SEGMENT"]["PLOT"]:
files_to_compute.append("reports/data_exploration/heatmap_phone_data_yield_per_participant_per_time_segment.html")
if config["HEATMAP_FEATURE_CORRELATION_MATRIX"]["PLOT"]:
files_to_compute.append("reports/data_exploration/heatmap_feature_correlation_matrix.html")
# Analysis Workflow Example
models, scalers = [], []
for model_name in config["PARAMS_FOR_ANALYSIS"]["MODEL_NAMES"]:

View File

@ -323,6 +323,22 @@ FITBIT_STEPS_INTRADAY:
HISTOGRAM_PHONE_DATA_YIELD:
PLOT: True
HEATMAP_SENSORS_PER_MINUTE_PER_TIME_SEGMENT:
PLOT: True
HEATMAP_SENSOR_ROW_COUNT_PER_TIME_SEGMENT:
PLOT: True
SENSORS: [PHONE_ACCELEROMETER, PHONE_ACTIVITY_RECOGNITION, PHONE_APPLICATIONS_FOREGROUND, PHONE_BATTERY, PHONE_BLUETOOTH, PHONE_CALLS, PHONE_CONVERSATION, PHONE_LIGHT, PHONE_LOCATIONS, PHONE_MESSAGES, PHONE_SCREEN, PHONE_WIFI_CONNECTED, PHONE_WIFI_VISIBLE]
HEATMAP_PHONE_DATA_YIELD_PER_PARTICIPANT_PER_TIME_SEGMENT:
PLOT: True
HEATMAP_FEATURE_CORRELATION_MATRIX:
PLOT: TRUE
MIN_ROWS_RATIO: 0.5
CORR_THRESHOLD: 0.1
CORR_METHOD: "pearson" # choose from {"pearson", "kendall", "spearman"}
########################################################################################################################

View File

@ -1,19 +1,3 @@
# Common.smk ##########################################################################################################
def infer_participant_platform(participant_file):
with open(participant_file, encoding="ISO-8859-1") as external_file:
external_file_content = external_file.readlines()
platforms = external_file_content[1].strip().split(",")
if platforms[0] == "multiple" or (len(platforms) > 1 and "android" in platforms and "ios" in platforms):
platform = "android"
else:
platform = platforms[0]
if platform not in ["android", "ios"]:
raise ValueError("Platform (line 2) in a participant file should be 'android', 'ios', or 'multiple'. You typed '" + platforms + "'")
return platform
# Features.smk #########################################################################################################
def find_features_files(wildcards):
feature_files = []
@ -38,14 +22,3 @@ def input_merge_sensor_features_for_individual_participants(wildcards):
break
return feature_files
# Reports.smk ###########################################################################################################
def optional_heatmap_days_by_sensors_input(wildcards):
platform = infer_participant_platform("data/external/"+wildcards.pid)
if platform == "android":
tables_platform = [table for table in config["HEATMAP_DAYS_BY_SENSORS"]["DB_TABLES"] if table not in [config["CONVERSATION"]["DB_TABLE"]["IOS"], config["ACTIVITY_RECOGNITION"]["DB_TABLE"]["IOS"]]] # for android, discard any ios tables that may exist
elif platform == "ios":
tables_platform = [table for table in config["HEATMAP_DAYS_BY_SENSORS"]["DB_TABLES"] if table not in [config["CONVERSATION"]["DB_TABLE"]["ANDROID"], config["ACTIVITY_RECOGNITION"]["DB_TABLE"]["ANDROID"]]] # for ios, discard any android tables that may exist
return expand("data/raw/{{pid}}/{table}_with_datetime.csv", table = tables_platform)

View File

@ -6,74 +6,66 @@ rule histogram_phone_data_yield:
script:
"../src/visualization/histogram_phone_data_yield.py"
rule heatmap_features_correlations:
rule heatmap_sensors_per_minute_per_time_segment:
input:
features = expand("data/processed/{pid}/{sensor}_{time_segment}.csv", pid=config["PIDS"], sensor=config["HEATMAP_FEATURES_CORRELATIONS"]["PHONE_FEATURES"]+config["HEATMAP_FEATURES_CORRELATIONS"]["FITBIT_FEATURES"], time_segment=config["TIME_SEGMENTS"]),
phone_valid_sensed_days = expand("data/interim/{pid}/phone_valid_sensed_days_{{min_valid_hours_per_day}}hours_{{min_valid_bins_per_hour}}bins.csv", pid=config["PIDS"])
phone_data_yield = "data/interim/{pid}/phone_yielded_timestamps_with_datetime.csv",
participant_file = "data/external/participant_files/{pid}.yaml",
time_segments_labels = "data/interim/time_segments/{pid}_time_segments_labels.csv"
params:
min_rows_ratio = config["HEATMAP_FEATURES_CORRELATIONS"]["MIN_ROWS_RATIO"],
corr_threshold = config["HEATMAP_FEATURES_CORRELATIONS"]["CORR_THRESHOLD"],
corr_method = config["HEATMAP_FEATURES_CORRELATIONS"]["CORR_METHOD"]
pid = "{pid}"
output:
"reports/data_exploration/{min_valid_hours_per_day}hours_{min_valid_bins_per_hour}bins/heatmap_features_correlations.html"
"reports/interim/{pid}/heatmap_sensors_per_minute_per_time_segment.html"
script:
"../src/visualization/heatmap_features_correlations.py"
"../src/visualization/heatmap_sensors_per_minute_per_time_segment.py"
rule heatmap_days_by_sensors:
rule merge_heatmap_sensors_per_minute_per_time_segment:
input:
sensors = optional_heatmap_days_by_sensors_input,
phone_valid_sensed_days = "data/interim/{pid}/phone_valid_sensed_days_{min_valid_hours_per_day}hours_{min_valid_bins_per_hour}bins.csv"
heatmap_sensors_per_minute_per_time_segment = expand("reports/interim/{pid}/heatmap_sensors_per_minute_per_time_segment.html", pid=config["PIDS"])
output:
"reports/data_exploration/heatmap_sensors_per_minute_per_time_segment.html"
script:
"../src/visualization/merge_heatmap_sensors_per_minute_per_time_segment.Rmd"
rule heatmap_sensor_row_count_per_time_segment:
input:
all_sensors = expand("data/raw/{{pid}}/{sensor}_with_datetime.csv", sensor = map(str.lower, config["HEATMAP_SENSOR_ROW_COUNT_PER_TIME_SEGMENT"]["SENSORS"])),
phone_data_yield = "data/processed/features/{pid}/phone_data_yield.csv",
participant_file = "data/external/participant_files/{pid}.yaml",
time_segments_labels = "data/interim/time_segments/{pid}_time_segments_labels.csv"
params:
pid = "{pid}",
expected_num_of_days = config["HEATMAP_DAYS_BY_SENSORS"]["EXPECTED_NUM_OF_DAYS"]
pid = "{pid}"
output:
"reports/interim/{min_valid_hours_per_day}hours_{min_valid_bins_per_hour}bins/{pid}/heatmap_days_by_sensors.html"
"reports/interim/{pid}/heatmap_sensor_row_count_per_time_segment.html"
script:
"../src/visualization/heatmap_days_by_sensors.py"
"../src/visualization/heatmap_sensor_row_count_per_time_segment.py"
rule heatmap_days_by_sensors_all_participants:
rule merge_heatmap_sensor_row_count_per_time_segment:
input:
heatmap_rows = expand("reports/interim/{{min_valid_hours_per_day}}hours_{{min_valid_bins_per_hour}}bins/{pid}/heatmap_days_by_sensors.html", pid=config["PIDS"])
heatmap_sensor_row_count_per_time_segment = expand("reports/interim/{pid}/heatmap_sensor_row_count_per_time_segment.html", pid=config["PIDS"])
output:
"reports/data_exploration/{min_valid_hours_per_day}hours_{min_valid_bins_per_hour}bins/heatmap_days_by_sensors_all_participants.html"
"reports/data_exploration/heatmap_sensor_row_count_per_time_segment.html"
script:
"../src/visualization/heatmap_days_by_sensors_all_participants.Rmd"
"../src/visualization/merge_heatmap_sensor_row_count_per_time_segment.Rmd"
rule heatmap_sensed_bins:
rule heatmap_phone_data_yield_per_participant_per_time_segment:
input:
sensor = "data/interim/{pid}/phone_sensed_bins.csv",
pid_file = "data/external/{pid}"
phone_data_yield = expand("data/processed/features/{pid}/phone_data_yield.csv", pid=config["PIDS"]),
participant_file = expand("data/external/participant_files/{pid}.yaml", pid=config["PIDS"]),
time_segments_labels = expand("data/interim/time_segments/{pid}_time_segments_labels.csv", pid=config["PIDS"])
output:
"reports/data_exploration/heatmap_phone_data_yield_per_participant_per_time_segment.html"
script:
"../src/visualization/heatmap_phone_data_yield_per_participant_per_time_segment.py"
rule heatmap_feature_correlation_matrix:
input:
all_sensor_features = "data/processed/features/all_participants/all_sensor_features.csv" # before data cleaning
params:
pid = "{pid}",
bin_size = config["HEATMAP_SENSED_BINS"]["BIN_SIZE"]
min_rows_ratio = config["HEATMAP_FEATURE_CORRELATION_MATRIX"]["MIN_ROWS_RATIO"],
corr_threshold = config["HEATMAP_FEATURE_CORRELATION_MATRIX"]["CORR_THRESHOLD"],
corr_method = config["HEATMAP_FEATURE_CORRELATION_MATRIX"]["CORR_METHOD"]
output:
"reports/interim/heatmap_sensed_bins/{pid}/heatmap_sensed_bins.html"
"reports/data_exploration/heatmap_feature_correlation_matrix.html"
script:
"../src/visualization/heatmap_sensed_bins.py"
"../src/visualization/heatmap_feature_correlation_matrix.py"
rule heatmap_sensed_bins_all_participants:
input:
heatmap_sensed_bins = expand("reports/interim/heatmap_sensed_bins/{pid}/heatmap_sensed_bins.html", pid=config["PIDS"])
output:
"reports/data_exploration/heatmap_sensed_bins_all_participants.html"
script:
"../src/visualization/heatmap_sensed_bins_all_participants.Rmd"
rule overall_compliance_heatmap:
input:
phone_sensed_bins = expand("data/interim/{pid}/phone_sensed_bins.csv", pid=config["PIDS"]),
phone_valid_sensed_days = expand("data/interim/{pid}/phone_valid_sensed_days_{{min_valid_hours_per_day}}hours_{{min_valid_bins_per_hour}}bins.csv", pid=config["PIDS"]),
pid_files = expand("data/external/{pid}", pid=config["PIDS"])
params:
only_show_valid_days = config["OVERALL_COMPLIANCE_HEATMAP"]["ONLY_SHOW_VALID_DAYS"],
local_timezone = config["PHONE_DATA_CONFIGURATION"]["TIMEZONE"]["VALUE"],
expected_num_of_days = config["OVERALL_COMPLIANCE_HEATMAP"]["EXPECTED_NUM_OF_DAYS"],
bin_size = config["OVERALL_COMPLIANCE_HEATMAP"]["BIN_SIZE"],
min_bins_per_hour = "{min_valid_bins_per_hour}"
output:
"reports/data_exploration/{min_valid_hours_per_day}hours_{min_valid_bins_per_hour}bins/overall_compliance_heatmap.html"
script:
"../src/visualization/overall_compliance_heatmap.py"

View File

@ -1,34 +0,0 @@
import pandas as pd
import datetime
import plotly.io as pio
import plotly.graph_objects as go
def getBatteryConsumptionRatesBarChart(battery_data, pid):
plot = go.Figure(go.Bar(
x=battery_data["battery_daily_avgconsumptionrate"],
y=battery_data["local_date"].apply(lambda x: x.strftime("%Y/%m/%d")).tolist(),
orientation='h'))
plot.update_layout(title="Daily battery consumption rates bar chart for " + pid + "<br>Label: " + label + ", device_id: " + device_id,
xaxis_title="battery drains % per hour",
)
return plot
battery_data = pd.read_csv(snakemake.input["sensor"], parse_dates=["local_date"])
pid = snakemake.params["pid"]
with open(snakemake.input["pid_file"], encoding="ISO-8859-1") as external_file:
external_file_content = external_file.readlines()
device_id = external_file_content[0].split(",")[-1]
label = external_file_content[2]
if battery_data.empty:
empty_html = open(snakemake.output[0], "w")
empty_html.write("There is no battery data for " + pid + "<br>Label: " + label + ", device_id: " + device_id)
empty_html.close()
else:
battery_data.set_index(["local_date"], inplace=True)
battery_data = battery_data.resample("1D").asfreq().fillna(0).reset_index()
plot = getBatteryConsumptionRatesBarChart(battery_data, pid)
pio.write_html(plot, file=snakemake.output[0], auto_open=False, include_plotlyjs="cdn")

View File

@ -1,39 +0,0 @@
---
title: "Compliance Report"
author:
- "MoSHI Pipeline"
date: "`r format(Sys.time(), '%d %B, %Y')`"
params:
rmd: "compliance_report.Rmd"
output:
html_document:
highlight: tango
number_sections: no
theme: default
toc: yes
toc_depth: 3
toc_float:
collapsed: no
smooth_scroll: yes
---
```{r include=FALSE}
source("renv/activate.R")
```
## Overall phone compliance
```{r, echo=FALSE}
htmltools::includeHTML(snakemake@input[["compliance_heatmap"]])
```
## Per sensor compliance
```{r, echo=FALSE}
heatmaps <- snakemake@input[["sensor_heatmaps"]]
heatmaps.html <- vector(mode="list", length(heatmaps))
for(sensor_id in 1:length(heatmaps)){
heatmaps.html[[sensor_id]] <- htmltools::includeHTML(heatmaps[sensor_id])
}
htmltools::tagList(heatmaps.html)
```

View File

@ -1,74 +0,0 @@
import numpy as np
import pandas as pd
import plotly.io as pio
import plotly.graph_objects as go
from datetime import datetime, timedelta
def getRowCountHeatmap(row_count_sensors_normalized, row_count_sensors, pid, output_path):
plot = go.Figure(data=go.Heatmap(z=row_count_sensors_normalized.T.values.tolist(),
x=[datetime.strftime(idx[0], "%Y/%m/%d")+"("+str(idx[1])+")" for idx in row_count_sensors.index],
y=row_count_sensors.columns.tolist(),
hovertext=row_count_sensors.T.values.tolist(),
hovertemplate="Date: %{x}<br>Sensor: %{y}<br>Row count: %{hovertext}<extra></extra>",
colorscale="Viridis"))
plot.update_layout(title="Row count heatmap for " + pid)
pio.write_html(plot, file=output_path, auto_open=False, include_plotlyjs="cdn")
phone_valid_sensed_days = pd.read_csv(snakemake.input["phone_valid_sensed_days"], parse_dates=["local_date"], index_col=["local_date"])
phone_valid_sensed_days = phone_valid_sensed_days[phone_valid_sensed_days["is_valid_sensed_day"] == True]
row_count_sensors = pd.DataFrame()
for sensor_path in snakemake.input["sensors"]:
sensor_name = sensor_path.split("/")[-1].replace("_with_datetime.csv", "")
# plugin_studentlife_audio_android or plugin_studentlife_audio => conversion; plugin_google_activity_recognition or plugin_ios_activity_recognition => AR; applications_foreground => apps
sensor_name = sensor_name.replace("plugin_studentlife_audio_android", "conversion").replace("plugin_studentlife_audio", "conversion") \
.replace("plugin_google_activity_recognition", "AR").replace("plugin_ios_activity_recognition", "AR") \
.replace("applications_foreground", "apps")
sensor_data = pd.read_csv(sensor_path, encoding="ISO-8859-1", parse_dates=["local_date"], dtype={"label": str})
if sensor_data.empty:
row_count_sensor = pd.DataFrame(columns=[sensor_name])
else:
row_count_sensor = sensor_data[["timestamp", "local_date"]].groupby(["local_date"]).count().rename(columns={"timestamp": sensor_name})
row_count_sensors = row_count_sensors.join(row_count_sensor, how="outer")
row_count_sensors.index = pd.to_datetime(row_count_sensors.index)
row_count_sensors = row_count_sensors.join(phone_valid_sensed_days[["valid_sensed_hours"]], how="outer")
if row_count_sensors.empty:
empty_html = open(snakemake.output[0], "w")
empty_html.write("There are no records of sensors in database.")
empty_html.close()
else:
# set date_idx based on the first date
reference_date = row_count_sensors.index.min()
last_date = row_count_sensors.index.max()
row_count_sensors["date_idx"] = (row_count_sensors.index - reference_date).days
row_count_sensors["local_date"] = row_count_sensors.index
row_count_sensors.set_index(["local_date", "date_idx"], inplace=True)
expected_num_of_days = int(snakemake.params["expected_num_of_days"])
if expected_num_of_days < -1:
raise ValueError("EXPECTED_NUM_OF_DAYS of HEATMAP_DAYS_BY_SENSORS section in config.yaml must be larger or equal to -1.")
# if expected_num_of_days = -1, return all dates
expected_num_of_days = (last_date - reference_date).days if expected_num_of_days == -1 else expected_num_of_days
# add empty rows to make sure different participants have the same date_idx range
date_idx_range = [idx for idx in range(expected_num_of_days)]
date_range = [reference_date + timedelta(days=idx) for idx in date_idx_range]
all_dates = pd.DataFrame({"local_date": date_range, "date_idx": date_idx_range})
all_dates.set_index(["local_date", "date_idx"], inplace=True)
row_count_sensors = row_count_sensors.merge(all_dates, left_index=True, right_index=True, how="right")
# normalize each sensor (column)
if row_count_sensors.count().max() > 1:
row_count_sensors_normalized = row_count_sensors.fillna(np.nan).apply(lambda x: (x - np.nanmin(x)) / (np.nanmax(x) - np.nanmin(x)) if np.nanmax(x) != np.nanmin(x) else (x / np.nanmin(x)), axis=0)
else:
row_count_sensors_normalized = row_count_sensors
pid = sensor_path.split("/")[2]
getRowCountHeatmap(row_count_sensors_normalized, row_count_sensors, pid, snakemake.output[0])

View File

@ -0,0 +1,48 @@
import numpy as np
import pandas as pd
import plotly.graph_objects as go
def getCorrMatrixHeatmap(corr_matrix, time_segment, html_file):
feature_names = corr_matrix.columns
fig = go.Figure(data=go.Heatmap(z=corr_matrix.values.tolist(),
x=feature_names,
y=feature_names,
colorscale="Viridis"))
fig.update_layout(title="Correlation matrix of features of " + time_segment + " segments.")
html_file.write(fig.to_html(full_html=False, include_plotlyjs="cdn"))
min_rows_ratio = snakemake.params["min_rows_ratio"]
corr_threshold = snakemake.params["corr_threshold"]
corr_method = snakemake.params["corr_method"]
features = pd.read_csv(snakemake.input["all_sensor_features"])
time_segments = set(features["local_segment_label"])
html_file = open(snakemake.output[0], "a", encoding="utf-8")
if features.empty:
html_file.write("There are no features for any participant.")
else:
for time_segment in time_segments:
features_per_segment = features[features["local_segment_label"] == time_segment]
if features_per_segment.empty:
html_file.write("There are no features for " + time_segment + " segments.<br>")
else:
# drop useless columns
features_per_segment = features_per_segment.drop(["pid", "local_segment", "local_segment_label", "local_segment_start_datetime", "local_segment_end_datetime"], axis=1).astype(float)
# get correlation matrix
corr_matrix = features_per_segment.corr(method=corr_method, min_periods=min_rows_ratio * features_per_segment.shape[0])
# replace correlation coefficients less than corr_threshold with NA
corr_matrix[(corr_matrix > -corr_threshold) & (corr_matrix < corr_threshold)] = np.nan
# plot heatmap
getCorrMatrixHeatmap(corr_matrix, time_segment, html_file)
html_file.close()

View File

@ -1,59 +0,0 @@
import numpy as np
import pandas as pd
import plotly.io as pio
import plotly.graph_objects as go
def getCorrMatrixHeatmap(corr_matrix, output_path):
colnames = corr_matrix.columns
plot = go.Figure(data=go.Heatmap(z=corr_matrix.values.tolist(),
x=colnames,
y=colnames,
colorscale="Viridis"))
plot.update_layout(title="Correlation Matrix Heatmap")
pio.write_html(plot, file=output_path, auto_open=False, include_plotlyjs="cdn")
min_rows_ratio = snakemake.params["min_rows_ratio"]
corr_threshold = snakemake.params["corr_threshold"]
# merge features
features, features_all_sensors = pd.DataFrame(columns=["local_date"]), pd.DataFrame(columns=["local_date"])
pids = set()
last_pid = None
for path in snakemake.input["features"]:
pid = path.split("/")[2]
if pid not in pids:
pids.add(pid)
features_all_sensors["pid"] = last_pid
features = pd.concat([features, features_all_sensors], axis=0, ignore_index=True, sort=False)
features_all_sensors = pd.DataFrame(columns=["local_date"])
features_per_sensor = pd.read_csv(path)
features_all_sensors = features_all_sensors.merge(features_per_sensor, on="local_date", how="outer")
last_pid = pid
features_all_sensors["pid"] = last_pid
features = pd.concat([features, features_all_sensors], axis=0, ignore_index=True, sort=False)
features.set_index(["pid", "local_date"], inplace=True)
# select days based on the input of "phone_valid_sensed_days"
selected_participants_and_days = pd.DataFrame()
for path in snakemake.input["phone_valid_sensed_days"]:
pid = path.split("/")[2]
phone_valid_sensed_days = pd.read_csv(path)
phone_valid_sensed_days = phone_valid_sensed_days[phone_valid_sensed_days["is_valid_sensed_day"] == True]
phone_valid_sensed_days["pid"] = pid
selected_participants_and_days = pd.concat([selected_participants_and_days, phone_valid_sensed_days], axis=0)
selected_participants_and_days.set_index(["pid", "local_date"], inplace=True)
features = features.loc[features.index.intersection(selected_participants_and_days.index), :]
# get correlation matrix
features = features.astype(float)
corr_matrix = features.corr(method=snakemake.params["corr_method"], min_periods=min_rows_ratio * features.shape[0])
# replace correlation coefficients less than corr_threshold with NA
corr_matrix[(corr_matrix > -corr_threshold) & (corr_matrix < corr_threshold)] = np.nan
# plot heatmap
getCorrMatrixHeatmap(corr_matrix, snakemake.output[0])

View File

@ -0,0 +1,85 @@
import pandas as pd
import numpy as np
import plotly.graph_objects as go
import yaml
def getPhoneDataYieldHeatmap(data_for_plot, y_axis_labels, time_segment, type, html_file):
fig = go.Figure(data=go.Heatmap(z=data_for_plot.values.tolist(),
x=data_for_plot.columns.tolist(),
y=y_axis_labels,
hovertext=data_for_plot.values.tolist(),
hovertemplate="Time since first segment: %{x}<br>Participant: %{y}<br>Ratiovalidyielded" + type + ": %{z}<extra></extra>",
zmin=0, zmax=1,
colorscale="Viridis"))
fig.update_layout(title="Heatmap of valid yielded " + type + " ratio for " + time_segment + " segments.<br>y-axis shows participant information (format: pid.label).<br>x-axis shows the time since their first segment.<br>z-axis (color) shows valid yielded " + type + " ratio during a segment instance.")
fig["layout"]["xaxis"].update(side="bottom")
fig["layout"].update(xaxis_title="Time Since First Segment")
fig["layout"].update(margin=dict(t=160))
html_file.write(fig.to_html(full_html=False, include_plotlyjs="cdn"))
y_axis_labels, phone_data_yield_minutes, phone_data_yield_hours = [], {}, {}
for phone_data_yield_path, participant_file_path, time_segments_path in zip(snakemake.input["phone_data_yield"], snakemake.input["participant_file"], snakemake.input["time_segments_labels"]):
# set pid.label as y_axis_label
pid = phone_data_yield_path.split("/")[3]
time_segments = pd.read_csv(time_segments_path, header=0)["label"]
with open(participant_file_path, "r", encoding="utf-8") as f:
participant_file = yaml.safe_load(f)
label = participant_file["PHONE"]["LABEL"]
y_axis_label = pid + "." + label
y_axis_labels.append(y_axis_label)
phone_data_yield = pd.read_csv(phone_data_yield_path, index_col=["local_segment_start_datetime"], parse_dates=["local_segment_start_datetime"])
# make sure the phone_data_yield file contains "phone_data_yield_rapids_ratiovalidyieldedminutes" and "phone_data_yield_rapids_ratiovalidyieldedhours" columns
if ("phone_data_yield_rapids_ratiovalidyieldedminutes" not in phone_data_yield.columns) or ("phone_data_yield_rapids_ratiovalidyieldedhours" not in phone_data_yield.columns):
raise ValueError("Please make sure [PHONE_DATA_YIELD][RAPIDS][COMPUTE] is True AND [PHONE_DATA_YIELD][RAPIDS][FEATURES] contains [ratiovalidyieldedminutes, ratiovalidyieldedhours].")
if not phone_data_yield.empty:
for time_segment in time_segments:
phone_data_yield_per_segment = phone_data_yield[phone_data_yield["local_segment_label"] == time_segment]
if not phone_data_yield_per_segment.empty:
# set number of minutes after the first start date time of local segments as x_axis_label
phone_data_yield_per_segment.index = phone_data_yield_per_segment.index - phone_data_yield_per_segment.index.min()
phone_data_yield_minutes_per_segment = phone_data_yield_per_segment[["phone_data_yield_rapids_ratiovalidyieldedminutes"]].rename(columns={"phone_data_yield_rapids_ratiovalidyieldedminutes": y_axis_label})
phone_data_yield_hours_per_segment = phone_data_yield_per_segment[["phone_data_yield_rapids_ratiovalidyieldedhours"]].rename(columns={"phone_data_yield_rapids_ratiovalidyieldedhours": y_axis_label})
if time_segment not in phone_data_yield_minutes.keys():
phone_data_yield_minutes[time_segment] = phone_data_yield_minutes_per_segment
phone_data_yield_hours[time_segment] = phone_data_yield_hours_per_segment
else:
phone_data_yield_minutes[time_segment] = pd.concat([phone_data_yield_minutes[time_segment], phone_data_yield_minutes_per_segment], axis=1, sort=True)
phone_data_yield_hours[time_segment] = pd.concat([phone_data_yield_hours[time_segment], phone_data_yield_hours_per_segment], axis=1, sort=True)
html_file = open(snakemake.output[0], "a", encoding="utf-8")
if len(phone_data_yield_minutes.keys()) == 0:
html_file.write("There is no sensor data for the sensors in [PHONE_DATA_YIELD][SENSORS].")
for time_segment in phone_data_yield_minutes.keys():
minutes_data_for_plot = phone_data_yield_minutes[time_segment].transpose().reindex(pd.Index(y_axis_labels)).round(3)
hours_data_for_plot = phone_data_yield_hours[time_segment].transpose().reindex(pd.Index(y_axis_labels)).round(3)
getPhoneDataYieldHeatmap(minutes_data_for_plot, y_axis_labels, time_segment, "minutes", html_file)
getPhoneDataYieldHeatmap(hours_data_for_plot, y_axis_labels, time_segment, "hours", html_file)
html_file.close()

View File

@ -1,68 +0,0 @@
import pandas as pd
import numpy as np
import plotly.io as pio
import plotly.graph_objects as go
import datetime
def getComplianceMatrix(dates, compliance_bins):
compliance_matrix = []
for date in dates:
date_bins = compliance_bins[compliance_bins["local_date"] == date]["count"].tolist()
compliance_matrix.append(date_bins)
return compliance_matrix
def getRowCountHeatmap(dates, row_count_per_bin, sensor_name, pid, output_path, bin_size):
bins_per_hour = int(60 / bin_size)
x_axis_labels = ["{0:0=2d}".format(x // bins_per_hour) + ":" + \
"{0:0=2d}".format(x % bins_per_hour * bin_size) for x in range(24 * bins_per_hour)]
plot = go.Figure(data=go.Heatmap(z=row_count_per_bin,
x=x_axis_labels,
y=[datetime.datetime.strftime(date, '%Y/%m/%d') for date in dates],
colorscale="Viridis"))
plot.update_layout(title="Row count heatmap for " + sensor_name + " of " + pid + "<br>Label: " + label + ", device_id: " + device_id)
pio.write_html(plot, file=output_path, auto_open=False, include_plotlyjs="cdn")
sensor_data = pd.read_csv(snakemake.input["sensor"], encoding="ISO-8859-1")
sensor_name = snakemake.params["table"]
pid = snakemake.params["pid"]
bin_size = snakemake.params["bin_size"]
with open(snakemake.input["pid_file"], encoding="ISO-8859-1") as external_file:
external_file_content = external_file.readlines()
device_id = external_file_content[0].split(",")[-1]
label = external_file_content[2]
# check if we have sensor data
if sensor_data.empty:
empty_html = open(snakemake.output[0], "w")
empty_html.write("There is no " + sensor_name + " data for " + pid + "<br>Label: " + label + ", device_id: " + device_id)
empty_html.close()
else:
start_date = sensor_data["local_date"][0]
end_date = sensor_data.at[sensor_data.index[-1],"local_date"]
sensor_data["local_date_time"] = pd.to_datetime(sensor_data["local_date_time"])
sensor_data = sensor_data[["local_date_time"]]
sensor_data["count"] = 1
# Add first and last day boundaries for resampling
sensor_data = sensor_data.append([pd.Series([datetime.datetime.strptime(start_date + " 00:00:00", "%Y-%m-%d %H:%M:%S"), 0], sensor_data.columns),
pd.Series([datetime.datetime.strptime(end_date + " 23:59:59", "%Y-%m-%d %H:%M:%S"), 0], sensor_data.columns)])
# Resample into bins with the size of bin_size
resampled_bins = pd.DataFrame(sensor_data.resample(str(bin_size) + "T", on="local_date_time")["count"].sum())
# Extract list of dates for creating the heatmap
resampled_bins.reset_index(inplace=True)
resampled_bins["local_date"] = resampled_bins["local_date_time"].dt.date
dates = resampled_bins["local_date"].drop_duplicates().tolist()
# Create heatmap
row_count_per_bin = getComplianceMatrix(dates, resampled_bins)
row_count_per_bin = np.asarray(row_count_per_bin)
row_count_per_bin = np.where(row_count_per_bin == 0, np.nan, row_count_per_bin)
getRowCountHeatmap(dates, row_count_per_bin, sensor_name, pid, snakemake.output[0], bin_size)

View File

@ -1,50 +0,0 @@
import pandas as pd
import numpy as np
import plotly.io as pio
import plotly.graph_objects as go
import datetime
def getDatesComplianceMatrix(phone_sensed_bins):
dates = phone_sensed_bins.index
compliance_matrix = []
for date in dates:
compliance_matrix.append(phone_sensed_bins.loc[date, :].tolist())
return dates, compliance_matrix
def getComplianceHeatmap(dates, compliance_matrix, pid, output_path, bin_size):
bins_per_hour = int(60 / bin_size)
x_axis_labels = ["{0:0=2d}".format(x // bins_per_hour) + ":" + \
"{0:0=2d}".format(x % bins_per_hour * bin_size) for x in range(24 * bins_per_hour)]
plot = go.Figure(data=go.Heatmap(z=compliance_matrix,
x=x_axis_labels,
y=[datetime.datetime.strftime(date, '%Y/%m/%d') for date in dates],
colorscale='Viridis',
colorbar={'tick0': 0,'dtick': 1}))
plot.update_layout(title="Heatmap sensed bins.<br>Five-minute bins showing how many sensors logged at least one row of data in that period for " + pid + "<br>Label: " + label + ", device_id: " + device_id)
pio.write_html(plot, file=output_path, auto_open=False, include_plotlyjs="cdn")
# get current patient id
pid = snakemake.params["pid"]
bin_size = snakemake.params["bin_size"]
with open(snakemake.input["pid_file"], encoding="ISO-8859-1") as external_file:
external_file_content = external_file.readlines()
device_id = external_file_content[0].split(",")[-1]
label = external_file_content[2]
phone_sensed_bins = pd.read_csv(snakemake.input["sensor"], parse_dates=["local_date"], index_col="local_date")
if phone_sensed_bins.empty:
empty_html = open(snakemake.output[0], "w", encoding="ISO-8859-1")
empty_html.write("There is no sensor data for " + pid + "<br>Label: " + label + ", device_id: " + device_id)
empty_html.close()
else:
# resample to impute missing dates
phone_sensed_bins = phone_sensed_bins.resample("1D").asfreq().fillna(0)
# get dates and compliance_matrix
dates, compliance_matrix = getDatesComplianceMatrix(phone_sensed_bins)
# convert compliance_matrix from list to np.array and replace 0 with np.nan
compliance_matrix = np.asarray(compliance_matrix)
compliance_matrix = np.where(compliance_matrix == 0, np.nan, compliance_matrix)
# get heatmap
getComplianceHeatmap(dates, compliance_matrix, pid, snakemake.output[0], bin_size)

View File

@ -0,0 +1,89 @@
import pandas as pd
import numpy as np
import plotly.graph_objects as go
from importlib import util
from pathlib import Path
import yaml
def getRowCountHeatmap(data_for_plot, scaled_data_for_plot, pid, time_segment, html_file):
fig = go.Figure(data=go.Heatmap(z=scaled_data_for_plot.values.tolist(),
x=data_for_plot.columns,
y=data_for_plot.index,
hovertext=data_for_plot.values.tolist(),
hovertemplate="Segment start: %{x}<br>Sensor: %{y}<br>Row count: %{hovertext}<extra></extra>",
zmin=0, zmax=1,
colorscale='Viridis'))
fig.update_layout(title="Heatmap of sensor row count for " + time_segment + " segments. Pid: " + pid +". Label: " + label + "<br>y-axis shows the included sensors.<br>x-axis shows the start (date and time) of a time segment.<br>z-axis (color) shows row count per sensor per segment instance.")
fig["layout"].update(margin=dict(t=160))
html_file.write(fig.to_html(full_html=False, include_plotlyjs="cdn"))
# import filter_data_by_segment from src/features/utils/utils.py
spec = util.spec_from_file_location("util", str(Path(snakemake.scriptdir).parent / "features" / "utils" / "utils.py"))
mod = util.module_from_spec(spec)
spec.loader.exec_module(mod)
filter_data_by_segment = getattr(mod, "filter_data_by_segment")
phone_data_yield = pd.read_csv(snakemake.input["phone_data_yield"], index_col=["local_segment_start_datetime"], parse_dates=["local_segment_start_datetime"])
# make sure the phone_data_yield file contains "phone_data_yield_rapids_ratiovalidyieldedminutes" and "phone_data_yield_rapids_ratiovalidyieldedhours" columns
if ("phone_data_yield_rapids_ratiovalidyieldedminutes" not in phone_data_yield.columns) or ("phone_data_yield_rapids_ratiovalidyieldedhours" not in phone_data_yield.columns):
raise ValueError("Please make sure [PHONE_DATA_YIELD][RAPIDS][COMPUTE] is True AND [PHONE_DATA_YIELD][RAPIDS][FEATURES] contains [ratiovalidyieldedminutes, ratiovalidyieldedhours].")
phone_data_yield = phone_data_yield[["local_segment_label", "phone_data_yield_rapids_ratiovalidyieldedminutes", "phone_data_yield_rapids_ratiovalidyieldedhours"]]
time_segments = pd.read_csv(snakemake.input["time_segments_labels"], header=0)["label"]
pid = snakemake.params["pid"]
with open(snakemake.input["participant_file"], "r", encoding="utf-8") as f:
participant_file = yaml.safe_load(f)
label = participant_file["PHONE"]["LABEL"]
sensor_names = []
sensors_row_count = dict(zip(time_segments, [pd.DataFrame()] * len(time_segments)))
for sensor_path in snakemake.input["all_sensors"]:
sensor_data = pd.read_csv(sensor_path, usecols=["assigned_segments"])
sensor_name = sensor_path.split("/")[-1].replace("_with_datetime.csv", "")
sensor_names.append(sensor_name)
if not sensor_data.empty:
for time_segment in time_segments:
sensor_data_per_segment = filter_data_by_segment(sensor_data, time_segment)
if not sensor_data_per_segment.empty:
# extract local start datetime of the segment from "local_segment" column
sensor_data_per_segment["local_segment_start_datetime"] = pd.to_datetime(sensor_data_per_segment["local_segment"].apply(lambda x: x.split("#")[1].split(",")[0]))
sensor_row_count = sensor_data_per_segment.groupby("local_segment_start_datetime")[["local_segment"]].count().rename(columns={"local_segment": sensor_name})
sensors_row_count[time_segment] = pd.concat([sensors_row_count[time_segment], sensor_row_count], axis=1, sort=False)
# add phone data yield features and plot heatmap
html_file = open(snakemake.output[0], "a", encoding="utf-8")
sensor_names.extend(["ratiovalidyieldedminutes", "ratiovalidyieldedhours"])
for time_segment in time_segments:
if not phone_data_yield.empty:
phone_data_yield_per_segment = phone_data_yield[phone_data_yield["local_segment_label"] == time_segment].rename(columns={"phone_data_yield_rapids_ratiovalidyieldedminutes": "ratiovalidyieldedminutes","phone_data_yield_rapids_ratiovalidyieldedhours": "ratiovalidyieldedhours"}).round(3)
if not phone_data_yield_per_segment.empty:
sensors_row_count[time_segment] = pd.concat([sensors_row_count[time_segment], phone_data_yield_per_segment], axis=1, sort=True)
# consider all the sensors
data_for_plot = sensors_row_count[time_segment].transpose().reindex(pd.Index(sensor_names))
if data_for_plot.empty:
html_file.write("There are no records of selected sensors in database for " + time_segment + " segments. Pid: " + pid + ". Label: " + label + ".<br>")
else:
# except for phone data yield sensor, scale each sensor (row) to the range of [0, 1]
scaled_data_for_plot = data_for_plot.copy()
scaled_data_for_plot.loc[sensor_names[:-2]] = scaled_data_for_plot.fillna(np.nan).loc[sensor_names[:-2]].apply(lambda x: (x - np.nanmin(x)) / (np.nanmax(x) - np.nanmin(x)) if np.nanmax(x) != np.nanmin(x) else (x / np.nanmin(x)), axis=1)
getRowCountHeatmap(data_for_plot, scaled_data_for_plot, pid, time_segment, html_file)
html_file.close()

View File

@ -0,0 +1,100 @@
import pandas as pd
import numpy as np
import plotly.graph_objects as go
from importlib import util
from pathlib import Path
import yaml
def colors2colorscale(colors):
colorscale = []
length = len(colors)
for i in range(length):
if i != length - 1:
colorscale = colorscale + [[i/(length-1), colors[i]], [(i+1)/(length-1), colors[i]]]
else:
colorscale.append([1, colors[i]])
return colorscale
def getSensorsPerMinPerSegmentHeatmap(phone_data_yield, pid, time_segment, html_file):
x_axis_labels = [pd.Timedelta(minutes=x) for x in phone_data_yield.columns]
fig = go.Figure(data=go.Heatmap(z=phone_data_yield.values.tolist(),
x=x_axis_labels,
y=phone_data_yield.index,
zmin=0, zmax=16,
colorscale=colors2colorscale(colors),
colorbar=dict(thickness=25, tickvals=[1/2 + x for x in range(16)],ticktext=[x for x in range(16)])))
fig.update_layout(title="Number of sensors with any data per minute for " + time_segment + " segments. Pid: "+pid+". Label: " + label + "<br>y-axis shows the start (date and time) of a time segment.<br>x-axis shows the time since the start of the time segment.<br>z-axis (color) shows how many sensors logged at least one row of data per minute.")
fig["layout"].update(margin=dict(t=160))
html_file.write(fig.to_html(full_html=False, include_plotlyjs="cdn"))
# import filter_data_by_segment from src/features/utils/utils.py
spec = util.spec_from_file_location("util", str(Path(snakemake.scriptdir).parent / "features" / "utils" / "utils.py"))
mod = util.module_from_spec(spec)
spec.loader.exec_module(mod)
filter_data_by_segment = getattr(mod, "filter_data_by_segment")
colors = ["red", "#3D0751", "#423176", "#414381", "#3F5688", "#42678B", "#42768C", "#45868B", "#4A968A", "#53A485", "#5FB57E", "#76C170", "#91CF63", "#B4DA55", "#D9E152", "#F8E755", "#DEE00F"]
pid = snakemake.params["pid"]
time_segments_labels = pd.read_csv(snakemake.input["time_segments_labels"], header=0)
with open(snakemake.input["participant_file"], "r", encoding="utf-8") as f:
participant_file = yaml.safe_load(f)
label = participant_file["PHONE"]["LABEL"]
phone_data_yield = pd.read_csv(snakemake.input["phone_data_yield"], parse_dates=["local_date_time"])
html_file = open(snakemake.output[0], "a", encoding="utf-8")
if phone_data_yield.empty:
html_file.write("There is no sensor data for " + pid + " (pid) and " + label + " (label).")
else:
for time_segment in time_segments_labels["label"]:
phone_data_yield_per_segment = filter_data_by_segment(phone_data_yield, time_segment)
if phone_data_yield_per_segment.empty:
html_file.write("There is no sensor data of " + time_segment + " segments for " + pid + " (pid) and " + label + " (label).<br>")
else:
# calculate the length (in minute) of per segment instance
phone_data_yield_per_segment["length"] = phone_data_yield_per_segment["timestamps_segment"].str.split(",").apply(lambda x: int((int(x[1])-int(x[0])) / (1000 * 60)))
# calculate the number of sensors logged at least one row of data per minute.
phone_data_yield_per_segment = phone_data_yield_per_segment.groupby(["local_segment", "length", "local_date", "local_hour", "local_minute"])[["sensor", "local_date_time"]].max().reset_index()
# extract local start datetime of the segment from "local_segment" column
phone_data_yield_per_segment["local_segment_start_datetimes"] = pd.to_datetime(phone_data_yield_per_segment["local_segment"].apply(lambda x: x.split("#")[1].split(",")[0]))
# calculate the number of minutes after local start datetime of the segment
phone_data_yield_per_segment["minutes_after_segment_start"] = ((phone_data_yield_per_segment["local_date_time"] - phone_data_yield_per_segment["local_segment_start_datetimes"]) / pd.Timedelta(minutes=1)).astype("int")
# impute missing rows with 0
columns_for_full_index = phone_data_yield_per_segment[["local_segment_start_datetimes", "length"]].drop_duplicates(keep="first")
columns_for_full_index = columns_for_full_index.apply(lambda row: [[row["local_segment_start_datetimes"], x] for x in range(row["length"] + 1)], axis=1)
full_index = []
for columns in columns_for_full_index:
full_index = full_index + columns
full_index = pd.MultiIndex.from_tuples(full_index, names=("local_segment_start_datetimes", "minutes_after_segment_start"))
phone_data_yield_per_segment = phone_data_yield_per_segment.set_index(["local_segment_start_datetimes", "minutes_after_segment_start"]).reindex(full_index).reset_index().fillna(0)
# transpose the dataframe per local start datetime of the segment and discard the useless index layer
phone_data_yield_per_segment = phone_data_yield_per_segment.groupby("local_segment_start_datetimes")[["minutes_after_segment_start", "sensor"]].apply(lambda x: x.set_index("minutes_after_segment_start").transpose())
phone_data_yield_per_segment.index = phone_data_yield_per_segment.index.get_level_values("local_segment_start_datetimes")
# get heatmap
getSensorsPerMinPerSegmentHeatmap(phone_data_yield_per_segment, pid, time_segment, html_file)
html_file.close()

View File

@ -8,15 +8,18 @@ phone_data_yield = pd.read_csv(snakemake.input[0])
if ("phone_data_yield_rapids_ratiovalidyieldedminutes" not in phone_data_yield.columns) or ("phone_data_yield_rapids_ratiovalidyieldedhours" not in phone_data_yield.columns):
raise ValueError("Please make sure [PHONE_DATA_YIELD][RAPIDS][COMPUTE] is True AND [PHONE_DATA_YIELD][RAPIDS][FEATURES] contains [ratiovalidyieldedminutes, ratiovalidyieldedhours].")
# plot ratio valid yielded minutes histogram
fig_ratiovalidyieldedminutes = px.histogram(phone_data_yield, x="phone_data_yield_rapids_ratiovalidyieldedminutes", color="local_segment_label")
fig_ratiovalidyieldedminutes.update_layout(title="Ratio Valid Yielded Minutes Histogram")
# plot ratio valid yielded hours histogram
fig_ratiovalidyieldedhours = px.histogram(phone_data_yield, x="phone_data_yield_rapids_ratiovalidyieldedhours", color="local_segment_label")
fig_ratiovalidyieldedhours.update_layout(title="Ratio Valid Yielded Hours Histogram")
with open(snakemake.output[0], "a") as html_file:
html_file = open(snakemake.output[0], "a", encoding="utf-8")
if phone_data_yield.empty:
html_file.write("There is no sensor data for the sensors in [PHONE_DATA_YIELD][SENSORS].")
else:
# plot ratio valid yielded minutes histogram
fig_ratiovalidyieldedminutes = px.histogram(phone_data_yield, x="phone_data_yield_rapids_ratiovalidyieldedminutes", color="local_segment_label")
fig_ratiovalidyieldedminutes.update_layout(title="Histogram of valid yielded minutes ratio per time segment.")
html_file.write(fig_ratiovalidyieldedminutes.to_html(full_html=False, include_plotlyjs="cdn"))
# plot ratio valid yielded hours histogram
fig_ratiovalidyieldedhours = px.histogram(phone_data_yield, x="phone_data_yield_rapids_ratiovalidyieldedhours", color="local_segment_label")
fig_ratiovalidyieldedhours.update_layout(title="Histogram of valid yielded hours ratio per time segment.")
html_file.write(fig_ratiovalidyieldedhours.to_html(full_html=False, include_plotlyjs="cdn"))
html_file.close()

View File

@ -1,10 +1,10 @@
---
title: "Heatmap Sensed Bins Report"
title: "Sensor Row Count per Time Segment For All Participants"
author:
- "MoSHI Pipeline"
- "RAPIDS"
date: "`r format(Sys.time(), '%d %B, %Y')`"
params:
rmd: "heatmap_sensed_bins_all_participants.Rmd"
rmd: "merge_heatmap_sensor_row_count_per_time_segment.Rmd"
output:
html_document:
highlight: tango
@ -17,14 +17,17 @@ output:
smooth_scroll: yes
---
<style>
.main-container {min-width:800px; max-width:100%;}
</style>
```{r include=FALSE}
source("renv/activate.R")
```
## All phone sensors
```{r, echo=FALSE}
heatmaps <- snakemake@input[["heatmap_sensed_bins"]]
heatmaps <- snakemake@input[["heatmap_sensor_row_count_per_time_segment"]]
heatmaps.html <- vector(mode="list", length(heatmaps))
for(pid in 1:length(heatmaps)){

View File

@ -1,10 +1,10 @@
---
title: "Heatmap Rows Report"
title: "Sensors per Minute per Time Segment for All Participants"
author:
- "MoSHI Pipeline"
- "RAPIDS"
date: "`r format(Sys.time(), '%d %B, %Y')`"
params:
rmd: "heatmap_days_by_sensors_all_participants.Rmd"
rmd: "merge_heatmap_sensors_per_minute_per_time_segment.Rmd"
output:
html_document:
highlight: tango
@ -17,14 +17,17 @@ output:
smooth_scroll: yes
---
<style>
.main-container {min-width:800px; max-width:100%;}
</style>
```{r include=FALSE}
source("renv/activate.R")
```
## All phone sensors
```{r, echo=FALSE}
heatmaps <- snakemake@input[["heatmap_rows"]]
heatmaps <- snakemake@input[["heatmap_sensors_per_minute_per_time_segment"]]
heatmaps.html <- vector(mode="list", length(heatmaps))
for(pid in 1:length(heatmaps)){

View File

@ -1,102 +0,0 @@
import pandas as pd
import numpy as np
import plotly.io as pio
import plotly.graph_objects as go
from dateutil import tz
import datetime
def getOneRow(data_per_participant, last_certain_dates, col_name, row, expected_num_of_days, only_show_valid_days):
data = pd.read_csv(data_per_participant, index_col=["local_date"])
if col_name == "num_sensors":
data["num_sensors"] = data.max(axis=1)
if only_show_valid_days and col_name == "valid_sensed_hours":
# replace invalid days' valid sensed hours with np.nan to let our heatmap only shows valid days
data.loc[data[data["is_valid_sensed_day"] == False].index, "valid_sensed_hours"] = np.nan
if expected_num_of_days == -1:
# show all days
data.index = pd.to_datetime(data.index)
start_date = data.index.min()
# upsample data into one day bins
data = data.resample("1D").sum()
data["date_idx"] = (data.index - start_date).days
data.set_index("date_idx", inplace=True, drop=True)
row = row + data[col_name].tolist()
else:
# only show last certain days
for date in last_certain_dates:
if date in data.index:
row.append(data.loc[date][col_name])
else:
row.append(0)
return row
def getOverallComplianceHeatmap(sensors_with_data, valid_sensed_hours, last_certain_dates, bin_size, min_bins_per_hour, expected_num_of_days, output_path):
plot = go.Figure(data=go.Heatmap(z=valid_sensed_hours[last_certain_dates].values,
x=[date.replace("-", "/") for date in last_certain_dates] if expected_num_of_days != -1 else last_certain_dates,
y=[pid + "." + label for pid, label in zip(sensors_with_data["pid"].to_list(), sensors_with_data["label"].to_list())],
text=sensors_with_data[last_certain_dates].values,
hovertemplate="Date: %{x}<br>Participant: %{y}<br>Valid sensed hours: %{z}<br>Number of sensors with data: %{text}<extra></extra>" if expected_num_of_days != -1 else "Day index: %{x}<br>Participant: %{y}<br>Valid sensed hours: %{z}<br>Number of sensors with data: %{text}<extra></extra>",
colorscale="Viridis",
colorbar={"tick0": 0,"dtick": 1},
showscale=True))
if expected_num_of_days != -1:
plot.update_layout(title="Overall compliance heatmap for last " + str(expected_num_of_days) + " days.<br>Bin's color shows valid sensed hours for that day.<br>A valid hour has at least one row of any sensor in "+ str(min_bins_per_hour) +" out of " + str(int(60 / bin_size)) + " bins of " + str(bin_size) + " minutes.<br>You can hover over every day to see the number of sensors with data in that day.")
else:
plot.update_layout(title="Overall compliance heatmap for all days.<br>Bin's color shows valid sensed hours for that day.<br>A valid hour has at least one row of any sensor in "+ str(min_bins_per_hour) +" out of " + str(int(60 / bin_size)) + " bins of " + str(bin_size) + " minutes.<br>You can hover over every day to see the number of sensors with data in that day.")
plot["layout"]["xaxis"].update(side="bottom")
plot["layout"].update(xaxis_title="Day indexes")
plot["layout"].update(margin=dict(t=160))
pio.write_html(plot, file=output_path, auto_open=False, include_plotlyjs="cdn")
phone_sensed_bins = snakemake.input["phone_sensed_bins"]
phone_valid_sensed_days = snakemake.input["phone_valid_sensed_days"]
pid_files = snakemake.input["pid_files"]
only_show_valid_days = snakemake.params["only_show_valid_days"]
local_timezone = snakemake.params["local_timezone"]
bin_size = snakemake.params["bin_size"]
min_bins_per_hour = snakemake.params["min_bins_per_hour"]
expected_num_of_days = int(snakemake.params["expected_num_of_days"])
if expected_num_of_days < -1:
raise ValueError("EXPECTED_NUM_OF_DAYS of OVERALL_COMPLIANCE_HEATMAP section in config.yaml must be larger or equal to -1.")
last_certain_dates = []
if expected_num_of_days != -1:
# get the list of dates to show
cur_date = datetime.datetime.now().astimezone(tz.gettz(local_timezone)).date()
for date_offset in range(expected_num_of_days-1, -1, -1):
last_certain_dates.append((cur_date - datetime.timedelta(days=date_offset)).strftime("%Y-%m-%d"))
sensors_with_data_records, valid_sensed_hours_records = [], []
for sensors_with_data_individual, valid_sensed_hours_individual, pid_file in zip(phone_sensed_bins, phone_valid_sensed_days, pid_files):
with open(pid_file, encoding="ISO-8859-1") as external_file:
external_file_content = external_file.readlines()
device_id = external_file_content[0].split(",")[-1].strip()
label = external_file_content[2].strip()
pid = pid_file.split("/")[-1]
sensors_with_data_records.append(getOneRow(sensors_with_data_individual, last_certain_dates, "num_sensors", [pid, label, device_id], expected_num_of_days, only_show_valid_days))
valid_sensed_hours_records.append(getOneRow(valid_sensed_hours_individual, last_certain_dates, "valid_sensed_hours", [pid, label, device_id], expected_num_of_days, only_show_valid_days))
if expected_num_of_days == -1:
# get the date_idx of all days
total_num_of_days = max([len(x) for x in sensors_with_data_records]) - 3
last_certain_dates = [date_idx for date_idx in range(total_num_of_days)]
sensors_with_data = pd.DataFrame(data=sensors_with_data_records, columns=["pid", "label", "device_id"] + last_certain_dates).replace(0, np.nan)
valid_sensed_hours = pd.DataFrame(data=valid_sensed_hours_records, columns=["pid", "label", "device_id"] + last_certain_dates).replace(0, np.nan)
if sensors_with_data.empty:
empty_html = open(snakemake.output[0], "w")
empty_html.write("There is no sensor data for all participants")
empty_html.close()
else:
getOverallComplianceHeatmap(sensors_with_data, valid_sensed_hours, last_certain_dates, bin_size, min_bins_per_hour, expected_num_of_days, snakemake.output[0])