Update 4 plots for time_segments
parent
3560217e3b
commit
9a0e57301b
21
Snakefile
21
Snakefile
|
@ -231,20 +231,19 @@ for provider in config["FITBIT_STEPS_INTRADAY"]["PROVIDERS"].keys():
|
||||||
if config["HISTOGRAM_PHONE_DATA_YIELD"]["PLOT"]:
|
if config["HISTOGRAM_PHONE_DATA_YIELD"]["PLOT"]:
|
||||||
files_to_compute.append("reports/data_exploration/histogram_phone_data_yield.html")
|
files_to_compute.append("reports/data_exploration/histogram_phone_data_yield.html")
|
||||||
|
|
||||||
# visualization for data exploration
|
if config["HEATMAP_SENSORS_PER_MINUTE_PER_TIME_SEGMENT"]["PLOT"]:
|
||||||
# if config["HEATMAP_FEATURES_CORRELATIONS"]["PLOT"]:
|
files_to_compute.extend(expand("reports/interim/{pid}/heatmap_sensors_per_minute_per_time_segment.html", pid=config["PIDS"]))
|
||||||
# files_to_compute.extend(expand("reports/data_exploration/{min_valid_hours_per_day}hours_{min_valid_bins_per_hour}bins/heatmap_features_correlations.html", min_valid_hours_per_day=config["HEATMAP_FEATURES_CORRELATIONS"]["MIN_VALID_HOURS_PER_DAY"], min_valid_bins_per_hour=config["PHONE_VALID_SENSED_DAYS"]["MIN_VALID_BINS_PER_HOUR"]))
|
files_to_compute.append("reports/data_exploration/heatmap_sensors_per_minute_per_time_segment.html")
|
||||||
|
|
||||||
# if config["HEATMAP_DAYS_BY_SENSORS"]["PLOT"]:
|
if config["HEATMAP_SENSOR_ROW_COUNT_PER_TIME_SEGMENT"]["PLOT"]:
|
||||||
# files_to_compute.extend(expand("reports/interim/{min_valid_hours_per_day}hours_{min_valid_bins_per_hour}bins/{pid}/heatmap_days_by_sensors.html", pid=config["PIDS"], min_valid_hours_per_day=config["HEATMAP_DAYS_BY_SENSORS"]["MIN_VALID_HOURS_PER_DAY"], min_valid_bins_per_hour=config["PHONE_VALID_SENSED_DAYS"]["MIN_VALID_BINS_PER_HOUR"]))
|
files_to_compute.extend(expand("reports/interim/{pid}/heatmap_sensor_row_count_per_time_segment.html", pid=config["PIDS"]))
|
||||||
# files_to_compute.extend(expand("reports/data_exploration/{min_valid_hours_per_day}hours_{min_valid_bins_per_hour}bins/heatmap_days_by_sensors_all_participants.html", min_valid_hours_per_day=config["HEATMAP_DAYS_BY_SENSORS"]["MIN_VALID_HOURS_PER_DAY"], min_valid_bins_per_hour=config["PHONE_VALID_SENSED_DAYS"]["MIN_VALID_BINS_PER_HOUR"]))
|
files_to_compute.append("reports/data_exploration/heatmap_sensor_row_count_per_time_segment.html")
|
||||||
|
|
||||||
# if config["HEATMAP_SENSED_BINS"]["PLOT"]:
|
if config["HEATMAP_PHONE_DATA_YIELD_PER_PARTICIPANT_PER_TIME_SEGMENT"]["PLOT"]:
|
||||||
# files_to_compute.extend(expand("reports/interim/heatmap_sensed_bins/{pid}/heatmap_sensed_bins.html", pid=config["PIDS"]))
|
files_to_compute.append("reports/data_exploration/heatmap_phone_data_yield_per_participant_per_time_segment.html")
|
||||||
# files_to_compute.extend(["reports/data_exploration/heatmap_sensed_bins_all_participants.html"])
|
|
||||||
|
|
||||||
# if config["OVERALL_COMPLIANCE_HEATMAP"]["PLOT"]:
|
if config["HEATMAP_FEATURE_CORRELATION_MATRIX"]["PLOT"]:
|
||||||
# files_to_compute.extend(expand("reports/data_exploration/{min_valid_hours_per_day}hours_{min_valid_bins_per_hour}bins/overall_compliance_heatmap.html", min_valid_hours_per_day=config["OVERALL_COMPLIANCE_HEATMAP"]["MIN_VALID_HOURS_PER_DAY"], min_valid_bins_per_hour=config["PHONE_VALID_SENSED_DAYS"]["MIN_VALID_BINS_PER_HOUR"]))
|
files_to_compute.append("reports/data_exploration/heatmap_feature_correlation_matrix.html")
|
||||||
|
|
||||||
|
|
||||||
rule all:
|
rule all:
|
||||||
|
|
39
config.yaml
39
config.yaml
|
@ -259,9 +259,6 @@ PHONE_WIFI_VISIBLE:
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
########################################################################################################################
|
########################################################################################################################
|
||||||
# FITBIT #
|
# FITBIT #
|
||||||
########################################################################################################################
|
########################################################################################################################
|
||||||
|
@ -350,7 +347,6 @@ FITBIT_STEPS_INTRADAY:
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
########################################################################################################################
|
########################################################################################################################
|
||||||
# PLOTS #
|
# PLOTS #
|
||||||
########################################################################################################################
|
########################################################################################################################
|
||||||
|
@ -358,32 +354,19 @@ FITBIT_STEPS_INTRADAY:
|
||||||
HISTOGRAM_PHONE_DATA_YIELD:
|
HISTOGRAM_PHONE_DATA_YIELD:
|
||||||
PLOT: False
|
PLOT: False
|
||||||
|
|
||||||
HEATMAP_FEATURES_CORRELATIONS:
|
HEATMAP_SENSORS_PER_MINUTE_PER_TIME_SEGMENT:
|
||||||
|
PLOT: False
|
||||||
|
|
||||||
|
HEATMAP_SENSOR_ROW_COUNT_PER_TIME_SEGMENT:
|
||||||
|
PLOT: False
|
||||||
|
SENSORS: [PHONE_ACCELEROMETER, PHONE_ACTIVITY_RECOGNITION, PHONE_APPLICATIONS_FOREGROUND, PHONE_BATTERY, PHONE_BLUETOOTH, PHONE_CALLS, PHONE_CONVERSATION, PHONE_LIGHT, PHONE_LOCATIONS, PHONE_MESSAGES, PHONE_SCREEN, PHONE_WIFI_CONNECTED, PHONE_WIFI_VISIBLE]
|
||||||
|
|
||||||
|
HEATMAP_PHONE_DATA_YIELD_PER_PARTICIPANT_PER_TIME_SEGMENT:
|
||||||
|
PLOT: False
|
||||||
|
|
||||||
|
HEATMAP_FEATURE_CORRELATION_MATRIX:
|
||||||
PLOT: False
|
PLOT: False
|
||||||
MIN_ROWS_RATIO: 0.5
|
MIN_ROWS_RATIO: 0.5
|
||||||
MIN_VALID_HOURS_PER_DAY: #*min_valid_hours_per_day
|
|
||||||
MIN_VALID_BINS_PER_HOUR: #*min_valid_bins_per_hour
|
|
||||||
PHONE_FEATURES: [accelerometer, activity_recognition, applications_foreground, battery, calls_incoming, calls_missed, calls_outgoing, conversation, light, location_doryab, messages_received, messages_sent, screen]
|
|
||||||
FITBIT_FEATURES: [fitbit_heartrate, fitbit_step, fitbit_sleep]
|
|
||||||
CORR_THRESHOLD: 0.1
|
CORR_THRESHOLD: 0.1
|
||||||
CORR_METHOD: "pearson" # choose from {"pearson", "kendall", "spearman"}
|
CORR_METHOD: "pearson" # choose from {"pearson", "kendall", "spearman"}
|
||||||
|
|
||||||
HEATMAP_DAYS_BY_SENSORS:
|
|
||||||
PLOT: False
|
|
||||||
MIN_VALID_HOURS_PER_DAY: #*min_valid_hours_per_day
|
|
||||||
MIN_VALID_BINS_PER_HOUR: #*min_valid_bins_per_hour
|
|
||||||
EXPECTED_NUM_OF_DAYS: -1
|
|
||||||
DB_TABLES: [accelerometer, applications_foreground, battery, bluetooth, calls, light, locations, messages, screen, wifi, sensor_wifi, plugin_google_activity_recognition, plugin_ios_activity_recognition, plugin_studentlife_audio_android, plugin_studentlife_audio]
|
|
||||||
|
|
||||||
HEATMAP_SENSED_BINS:
|
|
||||||
PLOT: False
|
|
||||||
BIN_SIZE: #*bin_size
|
|
||||||
|
|
||||||
OVERALL_COMPLIANCE_HEATMAP:
|
|
||||||
PLOT: False
|
|
||||||
ONLY_SHOW_VALID_DAYS: False
|
|
||||||
EXPECTED_NUM_OF_DAYS: -1
|
|
||||||
BIN_SIZE: #*bin_size
|
|
||||||
MIN_VALID_HOURS_PER_DAY: #*min_valid_hours_per_day
|
|
||||||
MIN_VALID_BINS_PER_HOUR: #*min_valid_bins_per_hour
|
|
||||||
|
|
||||||
|
|
|
@ -214,6 +214,20 @@ for provider in config["FITBIT_STEPS_INTRADAY"]["PROVIDERS"].keys():
|
||||||
if config["HISTOGRAM_PHONE_DATA_YIELD"]["PLOT"]:
|
if config["HISTOGRAM_PHONE_DATA_YIELD"]["PLOT"]:
|
||||||
files_to_compute.append("reports/data_exploration/histogram_phone_data_yield.html")
|
files_to_compute.append("reports/data_exploration/histogram_phone_data_yield.html")
|
||||||
|
|
||||||
|
if config["HEATMAP_SENSORS_PER_MINUTE_PER_TIME_SEGMENT"]["PLOT"]:
|
||||||
|
files_to_compute.extend(expand("reports/interim/{pid}/heatmap_sensors_per_minute_per_time_segment.html", pid=config["PIDS"]))
|
||||||
|
files_to_compute.append("reports/data_exploration/heatmap_sensors_per_minute_per_time_segment.html")
|
||||||
|
|
||||||
|
if config["HEATMAP_SENSOR_ROW_COUNT_PER_TIME_SEGMENT"]["PLOT"]:
|
||||||
|
files_to_compute.extend(expand("reports/interim/{pid}/heatmap_sensor_row_count_per_time_segment.html", pid=config["PIDS"]))
|
||||||
|
files_to_compute.append("reports/data_exploration/heatmap_sensor_row_count_per_time_segment.html")
|
||||||
|
|
||||||
|
if config["HEATMAP_PHONE_DATA_YIELD_PER_PARTICIPANT_PER_TIME_SEGMENT"]["PLOT"]:
|
||||||
|
files_to_compute.append("reports/data_exploration/heatmap_phone_data_yield_per_participant_per_time_segment.html")
|
||||||
|
|
||||||
|
if config["HEATMAP_FEATURE_CORRELATION_MATRIX"]["PLOT"]:
|
||||||
|
files_to_compute.append("reports/data_exploration/heatmap_feature_correlation_matrix.html")
|
||||||
|
|
||||||
# Analysis Workflow Example
|
# Analysis Workflow Example
|
||||||
models, scalers = [], []
|
models, scalers = [], []
|
||||||
for model_name in config["PARAMS_FOR_ANALYSIS"]["MODEL_NAMES"]:
|
for model_name in config["PARAMS_FOR_ANALYSIS"]["MODEL_NAMES"]:
|
||||||
|
|
|
@ -323,6 +323,22 @@ FITBIT_STEPS_INTRADAY:
|
||||||
HISTOGRAM_PHONE_DATA_YIELD:
|
HISTOGRAM_PHONE_DATA_YIELD:
|
||||||
PLOT: True
|
PLOT: True
|
||||||
|
|
||||||
|
HEATMAP_SENSORS_PER_MINUTE_PER_TIME_SEGMENT:
|
||||||
|
PLOT: True
|
||||||
|
|
||||||
|
HEATMAP_SENSOR_ROW_COUNT_PER_TIME_SEGMENT:
|
||||||
|
PLOT: True
|
||||||
|
SENSORS: [PHONE_ACCELEROMETER, PHONE_ACTIVITY_RECOGNITION, PHONE_APPLICATIONS_FOREGROUND, PHONE_BATTERY, PHONE_BLUETOOTH, PHONE_CALLS, PHONE_CONVERSATION, PHONE_LIGHT, PHONE_LOCATIONS, PHONE_MESSAGES, PHONE_SCREEN, PHONE_WIFI_CONNECTED, PHONE_WIFI_VISIBLE]
|
||||||
|
|
||||||
|
HEATMAP_PHONE_DATA_YIELD_PER_PARTICIPANT_PER_TIME_SEGMENT:
|
||||||
|
PLOT: True
|
||||||
|
|
||||||
|
HEATMAP_FEATURE_CORRELATION_MATRIX:
|
||||||
|
PLOT: TRUE
|
||||||
|
MIN_ROWS_RATIO: 0.5
|
||||||
|
CORR_THRESHOLD: 0.1
|
||||||
|
CORR_METHOD: "pearson" # choose from {"pearson", "kendall", "spearman"}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
########################################################################################################################
|
########################################################################################################################
|
||||||
|
|
|
@ -1,19 +1,3 @@
|
||||||
# Common.smk ##########################################################################################################
|
|
||||||
|
|
||||||
def infer_participant_platform(participant_file):
|
|
||||||
with open(participant_file, encoding="ISO-8859-1") as external_file:
|
|
||||||
external_file_content = external_file.readlines()
|
|
||||||
platforms = external_file_content[1].strip().split(",")
|
|
||||||
if platforms[0] == "multiple" or (len(platforms) > 1 and "android" in platforms and "ios" in platforms):
|
|
||||||
platform = "android"
|
|
||||||
else:
|
|
||||||
platform = platforms[0]
|
|
||||||
|
|
||||||
if platform not in ["android", "ios"]:
|
|
||||||
raise ValueError("Platform (line 2) in a participant file should be 'android', 'ios', or 'multiple'. You typed '" + platforms + "'")
|
|
||||||
|
|
||||||
return platform
|
|
||||||
|
|
||||||
# Features.smk #########################################################################################################
|
# Features.smk #########################################################################################################
|
||||||
def find_features_files(wildcards):
|
def find_features_files(wildcards):
|
||||||
feature_files = []
|
feature_files = []
|
||||||
|
@ -38,14 +22,3 @@ def input_merge_sensor_features_for_individual_participants(wildcards):
|
||||||
break
|
break
|
||||||
return feature_files
|
return feature_files
|
||||||
|
|
||||||
# Reports.smk ###########################################################################################################
|
|
||||||
|
|
||||||
def optional_heatmap_days_by_sensors_input(wildcards):
|
|
||||||
platform = infer_participant_platform("data/external/"+wildcards.pid)
|
|
||||||
|
|
||||||
if platform == "android":
|
|
||||||
tables_platform = [table for table in config["HEATMAP_DAYS_BY_SENSORS"]["DB_TABLES"] if table not in [config["CONVERSATION"]["DB_TABLE"]["IOS"], config["ACTIVITY_RECOGNITION"]["DB_TABLE"]["IOS"]]] # for android, discard any ios tables that may exist
|
|
||||||
elif platform == "ios":
|
|
||||||
tables_platform = [table for table in config["HEATMAP_DAYS_BY_SENSORS"]["DB_TABLES"] if table not in [config["CONVERSATION"]["DB_TABLE"]["ANDROID"], config["ACTIVITY_RECOGNITION"]["DB_TABLE"]["ANDROID"]]] # for ios, discard any android tables that may exist
|
|
||||||
|
|
||||||
return expand("data/raw/{{pid}}/{table}_with_datetime.csv", table = tables_platform)
|
|
||||||
|
|
|
@ -6,74 +6,66 @@ rule histogram_phone_data_yield:
|
||||||
script:
|
script:
|
||||||
"../src/visualization/histogram_phone_data_yield.py"
|
"../src/visualization/histogram_phone_data_yield.py"
|
||||||
|
|
||||||
|
rule heatmap_sensors_per_minute_per_time_segment:
|
||||||
|
|
||||||
|
|
||||||
rule heatmap_features_correlations:
|
|
||||||
input:
|
input:
|
||||||
features = expand("data/processed/{pid}/{sensor}_{time_segment}.csv", pid=config["PIDS"], sensor=config["HEATMAP_FEATURES_CORRELATIONS"]["PHONE_FEATURES"]+config["HEATMAP_FEATURES_CORRELATIONS"]["FITBIT_FEATURES"], time_segment=config["TIME_SEGMENTS"]),
|
phone_data_yield = "data/interim/{pid}/phone_yielded_timestamps_with_datetime.csv",
|
||||||
phone_valid_sensed_days = expand("data/interim/{pid}/phone_valid_sensed_days_{{min_valid_hours_per_day}}hours_{{min_valid_bins_per_hour}}bins.csv", pid=config["PIDS"])
|
participant_file = "data/external/participant_files/{pid}.yaml",
|
||||||
|
time_segments_labels = "data/interim/time_segments/{pid}_time_segments_labels.csv"
|
||||||
params:
|
params:
|
||||||
min_rows_ratio = config["HEATMAP_FEATURES_CORRELATIONS"]["MIN_ROWS_RATIO"],
|
pid = "{pid}"
|
||||||
corr_threshold = config["HEATMAP_FEATURES_CORRELATIONS"]["CORR_THRESHOLD"],
|
|
||||||
corr_method = config["HEATMAP_FEATURES_CORRELATIONS"]["CORR_METHOD"]
|
|
||||||
output:
|
output:
|
||||||
"reports/data_exploration/{min_valid_hours_per_day}hours_{min_valid_bins_per_hour}bins/heatmap_features_correlations.html"
|
"reports/interim/{pid}/heatmap_sensors_per_minute_per_time_segment.html"
|
||||||
script:
|
script:
|
||||||
"../src/visualization/heatmap_features_correlations.py"
|
"../src/visualization/heatmap_sensors_per_minute_per_time_segment.py"
|
||||||
|
|
||||||
rule heatmap_days_by_sensors:
|
rule merge_heatmap_sensors_per_minute_per_time_segment:
|
||||||
input:
|
input:
|
||||||
sensors = optional_heatmap_days_by_sensors_input,
|
heatmap_sensors_per_minute_per_time_segment = expand("reports/interim/{pid}/heatmap_sensors_per_minute_per_time_segment.html", pid=config["PIDS"])
|
||||||
phone_valid_sensed_days = "data/interim/{pid}/phone_valid_sensed_days_{min_valid_hours_per_day}hours_{min_valid_bins_per_hour}bins.csv"
|
output:
|
||||||
|
"reports/data_exploration/heatmap_sensors_per_minute_per_time_segment.html"
|
||||||
|
script:
|
||||||
|
"../src/visualization/merge_heatmap_sensors_per_minute_per_time_segment.Rmd"
|
||||||
|
|
||||||
|
rule heatmap_sensor_row_count_per_time_segment:
|
||||||
|
input:
|
||||||
|
all_sensors = expand("data/raw/{{pid}}/{sensor}_with_datetime.csv", sensor = map(str.lower, config["HEATMAP_SENSOR_ROW_COUNT_PER_TIME_SEGMENT"]["SENSORS"])),
|
||||||
|
phone_data_yield = "data/processed/features/{pid}/phone_data_yield.csv",
|
||||||
|
participant_file = "data/external/participant_files/{pid}.yaml",
|
||||||
|
time_segments_labels = "data/interim/time_segments/{pid}_time_segments_labels.csv"
|
||||||
params:
|
params:
|
||||||
pid = "{pid}",
|
pid = "{pid}"
|
||||||
expected_num_of_days = config["HEATMAP_DAYS_BY_SENSORS"]["EXPECTED_NUM_OF_DAYS"]
|
|
||||||
output:
|
output:
|
||||||
"reports/interim/{min_valid_hours_per_day}hours_{min_valid_bins_per_hour}bins/{pid}/heatmap_days_by_sensors.html"
|
"reports/interim/{pid}/heatmap_sensor_row_count_per_time_segment.html"
|
||||||
script:
|
script:
|
||||||
"../src/visualization/heatmap_days_by_sensors.py"
|
"../src/visualization/heatmap_sensor_row_count_per_time_segment.py"
|
||||||
|
|
||||||
rule heatmap_days_by_sensors_all_participants:
|
rule merge_heatmap_sensor_row_count_per_time_segment:
|
||||||
input:
|
input:
|
||||||
heatmap_rows = expand("reports/interim/{{min_valid_hours_per_day}}hours_{{min_valid_bins_per_hour}}bins/{pid}/heatmap_days_by_sensors.html", pid=config["PIDS"])
|
heatmap_sensor_row_count_per_time_segment = expand("reports/interim/{pid}/heatmap_sensor_row_count_per_time_segment.html", pid=config["PIDS"])
|
||||||
output:
|
output:
|
||||||
"reports/data_exploration/{min_valid_hours_per_day}hours_{min_valid_bins_per_hour}bins/heatmap_days_by_sensors_all_participants.html"
|
"reports/data_exploration/heatmap_sensor_row_count_per_time_segment.html"
|
||||||
script:
|
script:
|
||||||
"../src/visualization/heatmap_days_by_sensors_all_participants.Rmd"
|
"../src/visualization/merge_heatmap_sensor_row_count_per_time_segment.Rmd"
|
||||||
|
|
||||||
rule heatmap_sensed_bins:
|
rule heatmap_phone_data_yield_per_participant_per_time_segment:
|
||||||
input:
|
input:
|
||||||
sensor = "data/interim/{pid}/phone_sensed_bins.csv",
|
phone_data_yield = expand("data/processed/features/{pid}/phone_data_yield.csv", pid=config["PIDS"]),
|
||||||
pid_file = "data/external/{pid}"
|
participant_file = expand("data/external/participant_files/{pid}.yaml", pid=config["PIDS"]),
|
||||||
|
time_segments_labels = expand("data/interim/time_segments/{pid}_time_segments_labels.csv", pid=config["PIDS"])
|
||||||
|
output:
|
||||||
|
"reports/data_exploration/heatmap_phone_data_yield_per_participant_per_time_segment.html"
|
||||||
|
script:
|
||||||
|
"../src/visualization/heatmap_phone_data_yield_per_participant_per_time_segment.py"
|
||||||
|
|
||||||
|
rule heatmap_feature_correlation_matrix:
|
||||||
|
input:
|
||||||
|
all_sensor_features = "data/processed/features/all_participants/all_sensor_features.csv" # before data cleaning
|
||||||
params:
|
params:
|
||||||
pid = "{pid}",
|
min_rows_ratio = config["HEATMAP_FEATURE_CORRELATION_MATRIX"]["MIN_ROWS_RATIO"],
|
||||||
bin_size = config["HEATMAP_SENSED_BINS"]["BIN_SIZE"]
|
corr_threshold = config["HEATMAP_FEATURE_CORRELATION_MATRIX"]["CORR_THRESHOLD"],
|
||||||
|
corr_method = config["HEATMAP_FEATURE_CORRELATION_MATRIX"]["CORR_METHOD"]
|
||||||
output:
|
output:
|
||||||
"reports/interim/heatmap_sensed_bins/{pid}/heatmap_sensed_bins.html"
|
"reports/data_exploration/heatmap_feature_correlation_matrix.html"
|
||||||
script:
|
script:
|
||||||
"../src/visualization/heatmap_sensed_bins.py"
|
"../src/visualization/heatmap_feature_correlation_matrix.py"
|
||||||
|
|
||||||
rule heatmap_sensed_bins_all_participants:
|
|
||||||
input:
|
|
||||||
heatmap_sensed_bins = expand("reports/interim/heatmap_sensed_bins/{pid}/heatmap_sensed_bins.html", pid=config["PIDS"])
|
|
||||||
output:
|
|
||||||
"reports/data_exploration/heatmap_sensed_bins_all_participants.html"
|
|
||||||
script:
|
|
||||||
"../src/visualization/heatmap_sensed_bins_all_participants.Rmd"
|
|
||||||
|
|
||||||
rule overall_compliance_heatmap:
|
|
||||||
input:
|
|
||||||
phone_sensed_bins = expand("data/interim/{pid}/phone_sensed_bins.csv", pid=config["PIDS"]),
|
|
||||||
phone_valid_sensed_days = expand("data/interim/{pid}/phone_valid_sensed_days_{{min_valid_hours_per_day}}hours_{{min_valid_bins_per_hour}}bins.csv", pid=config["PIDS"]),
|
|
||||||
pid_files = expand("data/external/{pid}", pid=config["PIDS"])
|
|
||||||
params:
|
|
||||||
only_show_valid_days = config["OVERALL_COMPLIANCE_HEATMAP"]["ONLY_SHOW_VALID_DAYS"],
|
|
||||||
local_timezone = config["PHONE_DATA_CONFIGURATION"]["TIMEZONE"]["VALUE"],
|
|
||||||
expected_num_of_days = config["OVERALL_COMPLIANCE_HEATMAP"]["EXPECTED_NUM_OF_DAYS"],
|
|
||||||
bin_size = config["OVERALL_COMPLIANCE_HEATMAP"]["BIN_SIZE"],
|
|
||||||
min_bins_per_hour = "{min_valid_bins_per_hour}"
|
|
||||||
output:
|
|
||||||
"reports/data_exploration/{min_valid_hours_per_day}hours_{min_valid_bins_per_hour}bins/overall_compliance_heatmap.html"
|
|
||||||
script:
|
|
||||||
"../src/visualization/overall_compliance_heatmap.py"
|
|
||||||
|
|
|
@ -1,34 +0,0 @@
|
||||||
import pandas as pd
|
|
||||||
import datetime
|
|
||||||
import plotly.io as pio
|
|
||||||
import plotly.graph_objects as go
|
|
||||||
|
|
||||||
def getBatteryConsumptionRatesBarChart(battery_data, pid):
|
|
||||||
plot = go.Figure(go.Bar(
|
|
||||||
x=battery_data["battery_daily_avgconsumptionrate"],
|
|
||||||
y=battery_data["local_date"].apply(lambda x: x.strftime("%Y/%m/%d")).tolist(),
|
|
||||||
orientation='h'))
|
|
||||||
plot.update_layout(title="Daily battery consumption rates bar chart for " + pid + "<br>Label: " + label + ", device_id: " + device_id,
|
|
||||||
xaxis_title="battery drains % per hour",
|
|
||||||
)
|
|
||||||
return plot
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
battery_data = pd.read_csv(snakemake.input["sensor"], parse_dates=["local_date"])
|
|
||||||
pid = snakemake.params["pid"]
|
|
||||||
|
|
||||||
with open(snakemake.input["pid_file"], encoding="ISO-8859-1") as external_file:
|
|
||||||
external_file_content = external_file.readlines()
|
|
||||||
device_id = external_file_content[0].split(",")[-1]
|
|
||||||
label = external_file_content[2]
|
|
||||||
|
|
||||||
if battery_data.empty:
|
|
||||||
empty_html = open(snakemake.output[0], "w")
|
|
||||||
empty_html.write("There is no battery data for " + pid + "<br>Label: " + label + ", device_id: " + device_id)
|
|
||||||
empty_html.close()
|
|
||||||
else:
|
|
||||||
battery_data.set_index(["local_date"], inplace=True)
|
|
||||||
battery_data = battery_data.resample("1D").asfreq().fillna(0).reset_index()
|
|
||||||
plot = getBatteryConsumptionRatesBarChart(battery_data, pid)
|
|
||||||
pio.write_html(plot, file=snakemake.output[0], auto_open=False, include_plotlyjs="cdn")
|
|
|
@ -1,39 +0,0 @@
|
||||||
---
|
|
||||||
title: "Compliance Report"
|
|
||||||
author:
|
|
||||||
- "MoSHI Pipeline"
|
|
||||||
date: "`r format(Sys.time(), '%d %B, %Y')`"
|
|
||||||
params:
|
|
||||||
rmd: "compliance_report.Rmd"
|
|
||||||
output:
|
|
||||||
html_document:
|
|
||||||
highlight: tango
|
|
||||||
number_sections: no
|
|
||||||
theme: default
|
|
||||||
toc: yes
|
|
||||||
toc_depth: 3
|
|
||||||
toc_float:
|
|
||||||
collapsed: no
|
|
||||||
smooth_scroll: yes
|
|
||||||
---
|
|
||||||
|
|
||||||
```{r include=FALSE}
|
|
||||||
source("renv/activate.R")
|
|
||||||
```
|
|
||||||
|
|
||||||
## Overall phone compliance
|
|
||||||
|
|
||||||
```{r, echo=FALSE}
|
|
||||||
htmltools::includeHTML(snakemake@input[["compliance_heatmap"]])
|
|
||||||
```
|
|
||||||
|
|
||||||
## Per sensor compliance
|
|
||||||
```{r, echo=FALSE}
|
|
||||||
heatmaps <- snakemake@input[["sensor_heatmaps"]]
|
|
||||||
heatmaps.html <- vector(mode="list", length(heatmaps))
|
|
||||||
|
|
||||||
for(sensor_id in 1:length(heatmaps)){
|
|
||||||
heatmaps.html[[sensor_id]] <- htmltools::includeHTML(heatmaps[sensor_id])
|
|
||||||
}
|
|
||||||
htmltools::tagList(heatmaps.html)
|
|
||||||
```
|
|
|
@ -1,74 +0,0 @@
|
||||||
import numpy as np
|
|
||||||
import pandas as pd
|
|
||||||
import plotly.io as pio
|
|
||||||
import plotly.graph_objects as go
|
|
||||||
from datetime import datetime, timedelta
|
|
||||||
|
|
||||||
def getRowCountHeatmap(row_count_sensors_normalized, row_count_sensors, pid, output_path):
|
|
||||||
plot = go.Figure(data=go.Heatmap(z=row_count_sensors_normalized.T.values.tolist(),
|
|
||||||
x=[datetime.strftime(idx[0], "%Y/%m/%d")+"("+str(idx[1])+")" for idx in row_count_sensors.index],
|
|
||||||
y=row_count_sensors.columns.tolist(),
|
|
||||||
hovertext=row_count_sensors.T.values.tolist(),
|
|
||||||
hovertemplate="Date: %{x}<br>Sensor: %{y}<br>Row count: %{hovertext}<extra></extra>",
|
|
||||||
colorscale="Viridis"))
|
|
||||||
plot.update_layout(title="Row count heatmap for " + pid)
|
|
||||||
pio.write_html(plot, file=output_path, auto_open=False, include_plotlyjs="cdn")
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
phone_valid_sensed_days = pd.read_csv(snakemake.input["phone_valid_sensed_days"], parse_dates=["local_date"], index_col=["local_date"])
|
|
||||||
phone_valid_sensed_days = phone_valid_sensed_days[phone_valid_sensed_days["is_valid_sensed_day"] == True]
|
|
||||||
|
|
||||||
row_count_sensors = pd.DataFrame()
|
|
||||||
for sensor_path in snakemake.input["sensors"]:
|
|
||||||
sensor_name = sensor_path.split("/")[-1].replace("_with_datetime.csv", "")
|
|
||||||
# plugin_studentlife_audio_android or plugin_studentlife_audio => conversion; plugin_google_activity_recognition or plugin_ios_activity_recognition => AR; applications_foreground => apps
|
|
||||||
sensor_name = sensor_name.replace("plugin_studentlife_audio_android", "conversion").replace("plugin_studentlife_audio", "conversion") \
|
|
||||||
.replace("plugin_google_activity_recognition", "AR").replace("plugin_ios_activity_recognition", "AR") \
|
|
||||||
.replace("applications_foreground", "apps")
|
|
||||||
|
|
||||||
sensor_data = pd.read_csv(sensor_path, encoding="ISO-8859-1", parse_dates=["local_date"], dtype={"label": str})
|
|
||||||
if sensor_data.empty:
|
|
||||||
row_count_sensor = pd.DataFrame(columns=[sensor_name])
|
|
||||||
else:
|
|
||||||
row_count_sensor = sensor_data[["timestamp", "local_date"]].groupby(["local_date"]).count().rename(columns={"timestamp": sensor_name})
|
|
||||||
row_count_sensors = row_count_sensors.join(row_count_sensor, how="outer")
|
|
||||||
|
|
||||||
row_count_sensors.index = pd.to_datetime(row_count_sensors.index)
|
|
||||||
row_count_sensors = row_count_sensors.join(phone_valid_sensed_days[["valid_sensed_hours"]], how="outer")
|
|
||||||
|
|
||||||
if row_count_sensors.empty:
|
|
||||||
empty_html = open(snakemake.output[0], "w")
|
|
||||||
empty_html.write("There are no records of sensors in database.")
|
|
||||||
empty_html.close()
|
|
||||||
else:
|
|
||||||
# set date_idx based on the first date
|
|
||||||
reference_date = row_count_sensors.index.min()
|
|
||||||
last_date = row_count_sensors.index.max()
|
|
||||||
row_count_sensors["date_idx"] = (row_count_sensors.index - reference_date).days
|
|
||||||
row_count_sensors["local_date"] = row_count_sensors.index
|
|
||||||
row_count_sensors.set_index(["local_date", "date_idx"], inplace=True)
|
|
||||||
|
|
||||||
|
|
||||||
expected_num_of_days = int(snakemake.params["expected_num_of_days"])
|
|
||||||
if expected_num_of_days < -1:
|
|
||||||
raise ValueError("EXPECTED_NUM_OF_DAYS of HEATMAP_DAYS_BY_SENSORS section in config.yaml must be larger or equal to -1.")
|
|
||||||
# if expected_num_of_days = -1, return all dates
|
|
||||||
expected_num_of_days = (last_date - reference_date).days if expected_num_of_days == -1 else expected_num_of_days
|
|
||||||
|
|
||||||
# add empty rows to make sure different participants have the same date_idx range
|
|
||||||
date_idx_range = [idx for idx in range(expected_num_of_days)]
|
|
||||||
date_range = [reference_date + timedelta(days=idx) for idx in date_idx_range]
|
|
||||||
all_dates = pd.DataFrame({"local_date": date_range, "date_idx": date_idx_range})
|
|
||||||
all_dates.set_index(["local_date", "date_idx"], inplace=True)
|
|
||||||
|
|
||||||
row_count_sensors = row_count_sensors.merge(all_dates, left_index=True, right_index=True, how="right")
|
|
||||||
|
|
||||||
# normalize each sensor (column)
|
|
||||||
if row_count_sensors.count().max() > 1:
|
|
||||||
row_count_sensors_normalized = row_count_sensors.fillna(np.nan).apply(lambda x: (x - np.nanmin(x)) / (np.nanmax(x) - np.nanmin(x)) if np.nanmax(x) != np.nanmin(x) else (x / np.nanmin(x)), axis=0)
|
|
||||||
else:
|
|
||||||
row_count_sensors_normalized = row_count_sensors
|
|
||||||
|
|
||||||
pid = sensor_path.split("/")[2]
|
|
||||||
getRowCountHeatmap(row_count_sensors_normalized, row_count_sensors, pid, snakemake.output[0])
|
|
|
@ -0,0 +1,48 @@
|
||||||
|
import numpy as np
|
||||||
|
import pandas as pd
|
||||||
|
import plotly.graph_objects as go
|
||||||
|
|
||||||
|
|
||||||
|
def getCorrMatrixHeatmap(corr_matrix, time_segment, html_file):
|
||||||
|
|
||||||
|
feature_names = corr_matrix.columns
|
||||||
|
|
||||||
|
fig = go.Figure(data=go.Heatmap(z=corr_matrix.values.tolist(),
|
||||||
|
x=feature_names,
|
||||||
|
y=feature_names,
|
||||||
|
colorscale="Viridis"))
|
||||||
|
|
||||||
|
fig.update_layout(title="Correlation matrix of features of " + time_segment + " segments.")
|
||||||
|
|
||||||
|
html_file.write(fig.to_html(full_html=False, include_plotlyjs="cdn"))
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
min_rows_ratio = snakemake.params["min_rows_ratio"]
|
||||||
|
corr_threshold = snakemake.params["corr_threshold"]
|
||||||
|
corr_method = snakemake.params["corr_method"]
|
||||||
|
|
||||||
|
features = pd.read_csv(snakemake.input["all_sensor_features"])
|
||||||
|
time_segments = set(features["local_segment_label"])
|
||||||
|
|
||||||
|
html_file = open(snakemake.output[0], "a", encoding="utf-8")
|
||||||
|
if features.empty:
|
||||||
|
html_file.write("There are no features for any participant.")
|
||||||
|
else:
|
||||||
|
|
||||||
|
for time_segment in time_segments:
|
||||||
|
features_per_segment = features[features["local_segment_label"] == time_segment]
|
||||||
|
if features_per_segment.empty:
|
||||||
|
html_file.write("There are no features for " + time_segment + " segments.<br>")
|
||||||
|
else:
|
||||||
|
# drop useless columns
|
||||||
|
features_per_segment = features_per_segment.drop(["pid", "local_segment", "local_segment_label", "local_segment_start_datetime", "local_segment_end_datetime"], axis=1).astype(float)
|
||||||
|
# get correlation matrix
|
||||||
|
corr_matrix = features_per_segment.corr(method=corr_method, min_periods=min_rows_ratio * features_per_segment.shape[0])
|
||||||
|
# replace correlation coefficients less than corr_threshold with NA
|
||||||
|
corr_matrix[(corr_matrix > -corr_threshold) & (corr_matrix < corr_threshold)] = np.nan
|
||||||
|
|
||||||
|
# plot heatmap
|
||||||
|
getCorrMatrixHeatmap(corr_matrix, time_segment, html_file)
|
||||||
|
|
||||||
|
html_file.close()
|
|
@ -1,59 +0,0 @@
|
||||||
import numpy as np
|
|
||||||
import pandas as pd
|
|
||||||
import plotly.io as pio
|
|
||||||
import plotly.graph_objects as go
|
|
||||||
|
|
||||||
|
|
||||||
def getCorrMatrixHeatmap(corr_matrix, output_path):
|
|
||||||
colnames = corr_matrix.columns
|
|
||||||
plot = go.Figure(data=go.Heatmap(z=corr_matrix.values.tolist(),
|
|
||||||
x=colnames,
|
|
||||||
y=colnames,
|
|
||||||
colorscale="Viridis"))
|
|
||||||
plot.update_layout(title="Correlation Matrix Heatmap")
|
|
||||||
pio.write_html(plot, file=output_path, auto_open=False, include_plotlyjs="cdn")
|
|
||||||
|
|
||||||
|
|
||||||
min_rows_ratio = snakemake.params["min_rows_ratio"]
|
|
||||||
corr_threshold = snakemake.params["corr_threshold"]
|
|
||||||
|
|
||||||
# merge features
|
|
||||||
features, features_all_sensors = pd.DataFrame(columns=["local_date"]), pd.DataFrame(columns=["local_date"])
|
|
||||||
pids = set()
|
|
||||||
last_pid = None
|
|
||||||
for path in snakemake.input["features"]:
|
|
||||||
pid = path.split("/")[2]
|
|
||||||
if pid not in pids:
|
|
||||||
pids.add(pid)
|
|
||||||
features_all_sensors["pid"] = last_pid
|
|
||||||
features = pd.concat([features, features_all_sensors], axis=0, ignore_index=True, sort=False)
|
|
||||||
features_all_sensors = pd.DataFrame(columns=["local_date"])
|
|
||||||
features_per_sensor = pd.read_csv(path)
|
|
||||||
features_all_sensors = features_all_sensors.merge(features_per_sensor, on="local_date", how="outer")
|
|
||||||
last_pid = pid
|
|
||||||
|
|
||||||
features_all_sensors["pid"] = last_pid
|
|
||||||
features = pd.concat([features, features_all_sensors], axis=0, ignore_index=True, sort=False)
|
|
||||||
features.set_index(["pid", "local_date"], inplace=True)
|
|
||||||
|
|
||||||
# select days based on the input of "phone_valid_sensed_days"
|
|
||||||
selected_participants_and_days = pd.DataFrame()
|
|
||||||
for path in snakemake.input["phone_valid_sensed_days"]:
|
|
||||||
pid = path.split("/")[2]
|
|
||||||
phone_valid_sensed_days = pd.read_csv(path)
|
|
||||||
phone_valid_sensed_days = phone_valid_sensed_days[phone_valid_sensed_days["is_valid_sensed_day"] == True]
|
|
||||||
phone_valid_sensed_days["pid"] = pid
|
|
||||||
selected_participants_and_days = pd.concat([selected_participants_and_days, phone_valid_sensed_days], axis=0)
|
|
||||||
|
|
||||||
selected_participants_and_days.set_index(["pid", "local_date"], inplace=True)
|
|
||||||
features = features.loc[features.index.intersection(selected_participants_and_days.index), :]
|
|
||||||
|
|
||||||
# get correlation matrix
|
|
||||||
features = features.astype(float)
|
|
||||||
corr_matrix = features.corr(method=snakemake.params["corr_method"], min_periods=min_rows_ratio * features.shape[0])
|
|
||||||
|
|
||||||
# replace correlation coefficients less than corr_threshold with NA
|
|
||||||
corr_matrix[(corr_matrix > -corr_threshold) & (corr_matrix < corr_threshold)] = np.nan
|
|
||||||
|
|
||||||
# plot heatmap
|
|
||||||
getCorrMatrixHeatmap(corr_matrix, snakemake.output[0])
|
|
|
@ -0,0 +1,85 @@
|
||||||
|
import pandas as pd
|
||||||
|
import numpy as np
|
||||||
|
import plotly.graph_objects as go
|
||||||
|
import yaml
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
def getPhoneDataYieldHeatmap(data_for_plot, y_axis_labels, time_segment, type, html_file):
|
||||||
|
|
||||||
|
fig = go.Figure(data=go.Heatmap(z=data_for_plot.values.tolist(),
|
||||||
|
x=data_for_plot.columns.tolist(),
|
||||||
|
y=y_axis_labels,
|
||||||
|
hovertext=data_for_plot.values.tolist(),
|
||||||
|
hovertemplate="Time since first segment: %{x}<br>Participant: %{y}<br>Ratiovalidyielded" + type + ": %{z}<extra></extra>",
|
||||||
|
zmin=0, zmax=1,
|
||||||
|
colorscale="Viridis"))
|
||||||
|
|
||||||
|
fig.update_layout(title="Heatmap of valid yielded " + type + " ratio for " + time_segment + " segments.<br>y-axis shows participant information (format: pid.label).<br>x-axis shows the time since their first segment.<br>z-axis (color) shows valid yielded " + type + " ratio during a segment instance.")
|
||||||
|
|
||||||
|
fig["layout"]["xaxis"].update(side="bottom")
|
||||||
|
fig["layout"].update(xaxis_title="Time Since First Segment")
|
||||||
|
fig["layout"].update(margin=dict(t=160))
|
||||||
|
|
||||||
|
html_file.write(fig.to_html(full_html=False, include_plotlyjs="cdn"))
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
y_axis_labels, phone_data_yield_minutes, phone_data_yield_hours = [], {}, {}
|
||||||
|
for phone_data_yield_path, participant_file_path, time_segments_path in zip(snakemake.input["phone_data_yield"], snakemake.input["participant_file"], snakemake.input["time_segments_labels"]):
|
||||||
|
|
||||||
|
# set pid.label as y_axis_label
|
||||||
|
pid = phone_data_yield_path.split("/")[3]
|
||||||
|
time_segments = pd.read_csv(time_segments_path, header=0)["label"]
|
||||||
|
|
||||||
|
with open(participant_file_path, "r", encoding="utf-8") as f:
|
||||||
|
participant_file = yaml.safe_load(f)
|
||||||
|
label = participant_file["PHONE"]["LABEL"]
|
||||||
|
|
||||||
|
y_axis_label = pid + "." + label
|
||||||
|
y_axis_labels.append(y_axis_label)
|
||||||
|
|
||||||
|
|
||||||
|
phone_data_yield = pd.read_csv(phone_data_yield_path, index_col=["local_segment_start_datetime"], parse_dates=["local_segment_start_datetime"])
|
||||||
|
# make sure the phone_data_yield file contains "phone_data_yield_rapids_ratiovalidyieldedminutes" and "phone_data_yield_rapids_ratiovalidyieldedhours" columns
|
||||||
|
if ("phone_data_yield_rapids_ratiovalidyieldedminutes" not in phone_data_yield.columns) or ("phone_data_yield_rapids_ratiovalidyieldedhours" not in phone_data_yield.columns):
|
||||||
|
raise ValueError("Please make sure [PHONE_DATA_YIELD][RAPIDS][COMPUTE] is True AND [PHONE_DATA_YIELD][RAPIDS][FEATURES] contains [ratiovalidyieldedminutes, ratiovalidyieldedhours].")
|
||||||
|
|
||||||
|
if not phone_data_yield.empty:
|
||||||
|
|
||||||
|
for time_segment in time_segments:
|
||||||
|
phone_data_yield_per_segment = phone_data_yield[phone_data_yield["local_segment_label"] == time_segment]
|
||||||
|
|
||||||
|
if not phone_data_yield_per_segment.empty:
|
||||||
|
|
||||||
|
# set number of minutes after the first start date time of local segments as x_axis_label
|
||||||
|
phone_data_yield_per_segment.index = phone_data_yield_per_segment.index - phone_data_yield_per_segment.index.min()
|
||||||
|
|
||||||
|
phone_data_yield_minutes_per_segment = phone_data_yield_per_segment[["phone_data_yield_rapids_ratiovalidyieldedminutes"]].rename(columns={"phone_data_yield_rapids_ratiovalidyieldedminutes": y_axis_label})
|
||||||
|
phone_data_yield_hours_per_segment = phone_data_yield_per_segment[["phone_data_yield_rapids_ratiovalidyieldedhours"]].rename(columns={"phone_data_yield_rapids_ratiovalidyieldedhours": y_axis_label})
|
||||||
|
|
||||||
|
if time_segment not in phone_data_yield_minutes.keys():
|
||||||
|
phone_data_yield_minutes[time_segment] = phone_data_yield_minutes_per_segment
|
||||||
|
phone_data_yield_hours[time_segment] = phone_data_yield_hours_per_segment
|
||||||
|
else:
|
||||||
|
phone_data_yield_minutes[time_segment] = pd.concat([phone_data_yield_minutes[time_segment], phone_data_yield_minutes_per_segment], axis=1, sort=True)
|
||||||
|
phone_data_yield_hours[time_segment] = pd.concat([phone_data_yield_hours[time_segment], phone_data_yield_hours_per_segment], axis=1, sort=True)
|
||||||
|
|
||||||
|
|
||||||
|
html_file = open(snakemake.output[0], "a", encoding="utf-8")
|
||||||
|
if len(phone_data_yield_minutes.keys()) == 0:
|
||||||
|
html_file.write("There is no sensor data for the sensors in [PHONE_DATA_YIELD][SENSORS].")
|
||||||
|
for time_segment in phone_data_yield_minutes.keys():
|
||||||
|
minutes_data_for_plot = phone_data_yield_minutes[time_segment].transpose().reindex(pd.Index(y_axis_labels)).round(3)
|
||||||
|
hours_data_for_plot = phone_data_yield_hours[time_segment].transpose().reindex(pd.Index(y_axis_labels)).round(3)
|
||||||
|
|
||||||
|
getPhoneDataYieldHeatmap(minutes_data_for_plot, y_axis_labels, time_segment, "minutes", html_file)
|
||||||
|
getPhoneDataYieldHeatmap(hours_data_for_plot, y_axis_labels, time_segment, "hours", html_file)
|
||||||
|
|
||||||
|
html_file.close()
|
|
@ -1,68 +0,0 @@
|
||||||
import pandas as pd
|
|
||||||
import numpy as np
|
|
||||||
import plotly.io as pio
|
|
||||||
import plotly.graph_objects as go
|
|
||||||
import datetime
|
|
||||||
|
|
||||||
def getComplianceMatrix(dates, compliance_bins):
|
|
||||||
compliance_matrix = []
|
|
||||||
for date in dates:
|
|
||||||
date_bins = compliance_bins[compliance_bins["local_date"] == date]["count"].tolist()
|
|
||||||
compliance_matrix.append(date_bins)
|
|
||||||
return compliance_matrix
|
|
||||||
|
|
||||||
|
|
||||||
def getRowCountHeatmap(dates, row_count_per_bin, sensor_name, pid, output_path, bin_size):
|
|
||||||
bins_per_hour = int(60 / bin_size)
|
|
||||||
x_axis_labels = ["{0:0=2d}".format(x // bins_per_hour) + ":" + \
|
|
||||||
"{0:0=2d}".format(x % bins_per_hour * bin_size) for x in range(24 * bins_per_hour)]
|
|
||||||
plot = go.Figure(data=go.Heatmap(z=row_count_per_bin,
|
|
||||||
x=x_axis_labels,
|
|
||||||
y=[datetime.datetime.strftime(date, '%Y/%m/%d') for date in dates],
|
|
||||||
colorscale="Viridis"))
|
|
||||||
plot.update_layout(title="Row count heatmap for " + sensor_name + " of " + pid + "<br>Label: " + label + ", device_id: " + device_id)
|
|
||||||
pio.write_html(plot, file=output_path, auto_open=False, include_plotlyjs="cdn")
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
sensor_data = pd.read_csv(snakemake.input["sensor"], encoding="ISO-8859-1")
|
|
||||||
sensor_name = snakemake.params["table"]
|
|
||||||
pid = snakemake.params["pid"]
|
|
||||||
bin_size = snakemake.params["bin_size"]
|
|
||||||
|
|
||||||
with open(snakemake.input["pid_file"], encoding="ISO-8859-1") as external_file:
|
|
||||||
external_file_content = external_file.readlines()
|
|
||||||
device_id = external_file_content[0].split(",")[-1]
|
|
||||||
label = external_file_content[2]
|
|
||||||
|
|
||||||
|
|
||||||
# check if we have sensor data
|
|
||||||
if sensor_data.empty:
|
|
||||||
empty_html = open(snakemake.output[0], "w")
|
|
||||||
empty_html.write("There is no " + sensor_name + " data for " + pid + "<br>Label: " + label + ", device_id: " + device_id)
|
|
||||||
empty_html.close()
|
|
||||||
else:
|
|
||||||
start_date = sensor_data["local_date"][0]
|
|
||||||
end_date = sensor_data.at[sensor_data.index[-1],"local_date"]
|
|
||||||
|
|
||||||
sensor_data["local_date_time"] = pd.to_datetime(sensor_data["local_date_time"])
|
|
||||||
sensor_data = sensor_data[["local_date_time"]]
|
|
||||||
sensor_data["count"] = 1
|
|
||||||
|
|
||||||
# Add first and last day boundaries for resampling
|
|
||||||
sensor_data = sensor_data.append([pd.Series([datetime.datetime.strptime(start_date + " 00:00:00", "%Y-%m-%d %H:%M:%S"), 0], sensor_data.columns),
|
|
||||||
pd.Series([datetime.datetime.strptime(end_date + " 23:59:59", "%Y-%m-%d %H:%M:%S"), 0], sensor_data.columns)])
|
|
||||||
|
|
||||||
# Resample into bins with the size of bin_size
|
|
||||||
resampled_bins = pd.DataFrame(sensor_data.resample(str(bin_size) + "T", on="local_date_time")["count"].sum())
|
|
||||||
|
|
||||||
# Extract list of dates for creating the heatmap
|
|
||||||
resampled_bins.reset_index(inplace=True)
|
|
||||||
resampled_bins["local_date"] = resampled_bins["local_date_time"].dt.date
|
|
||||||
dates = resampled_bins["local_date"].drop_duplicates().tolist()
|
|
||||||
|
|
||||||
# Create heatmap
|
|
||||||
row_count_per_bin = getComplianceMatrix(dates, resampled_bins)
|
|
||||||
row_count_per_bin = np.asarray(row_count_per_bin)
|
|
||||||
row_count_per_bin = np.where(row_count_per_bin == 0, np.nan, row_count_per_bin)
|
|
||||||
getRowCountHeatmap(dates, row_count_per_bin, sensor_name, pid, snakemake.output[0], bin_size)
|
|
|
@ -1,50 +0,0 @@
|
||||||
import pandas as pd
|
|
||||||
import numpy as np
|
|
||||||
import plotly.io as pio
|
|
||||||
import plotly.graph_objects as go
|
|
||||||
import datetime
|
|
||||||
|
|
||||||
def getDatesComplianceMatrix(phone_sensed_bins):
|
|
||||||
dates = phone_sensed_bins.index
|
|
||||||
compliance_matrix = []
|
|
||||||
for date in dates:
|
|
||||||
compliance_matrix.append(phone_sensed_bins.loc[date, :].tolist())
|
|
||||||
return dates, compliance_matrix
|
|
||||||
|
|
||||||
def getComplianceHeatmap(dates, compliance_matrix, pid, output_path, bin_size):
|
|
||||||
bins_per_hour = int(60 / bin_size)
|
|
||||||
x_axis_labels = ["{0:0=2d}".format(x // bins_per_hour) + ":" + \
|
|
||||||
"{0:0=2d}".format(x % bins_per_hour * bin_size) for x in range(24 * bins_per_hour)]
|
|
||||||
plot = go.Figure(data=go.Heatmap(z=compliance_matrix,
|
|
||||||
x=x_axis_labels,
|
|
||||||
y=[datetime.datetime.strftime(date, '%Y/%m/%d') for date in dates],
|
|
||||||
colorscale='Viridis',
|
|
||||||
colorbar={'tick0': 0,'dtick': 1}))
|
|
||||||
plot.update_layout(title="Heatmap sensed bins.<br>Five-minute bins showing how many sensors logged at least one row of data in that period for " + pid + "<br>Label: " + label + ", device_id: " + device_id)
|
|
||||||
pio.write_html(plot, file=output_path, auto_open=False, include_plotlyjs="cdn")
|
|
||||||
|
|
||||||
# get current patient id
|
|
||||||
pid = snakemake.params["pid"]
|
|
||||||
bin_size = snakemake.params["bin_size"]
|
|
||||||
|
|
||||||
with open(snakemake.input["pid_file"], encoding="ISO-8859-1") as external_file:
|
|
||||||
external_file_content = external_file.readlines()
|
|
||||||
device_id = external_file_content[0].split(",")[-1]
|
|
||||||
label = external_file_content[2]
|
|
||||||
|
|
||||||
phone_sensed_bins = pd.read_csv(snakemake.input["sensor"], parse_dates=["local_date"], index_col="local_date")
|
|
||||||
|
|
||||||
if phone_sensed_bins.empty:
|
|
||||||
empty_html = open(snakemake.output[0], "w", encoding="ISO-8859-1")
|
|
||||||
empty_html.write("There is no sensor data for " + pid + "<br>Label: " + label + ", device_id: " + device_id)
|
|
||||||
empty_html.close()
|
|
||||||
else:
|
|
||||||
# resample to impute missing dates
|
|
||||||
phone_sensed_bins = phone_sensed_bins.resample("1D").asfreq().fillna(0)
|
|
||||||
# get dates and compliance_matrix
|
|
||||||
dates, compliance_matrix = getDatesComplianceMatrix(phone_sensed_bins)
|
|
||||||
# convert compliance_matrix from list to np.array and replace 0 with np.nan
|
|
||||||
compliance_matrix = np.asarray(compliance_matrix)
|
|
||||||
compliance_matrix = np.where(compliance_matrix == 0, np.nan, compliance_matrix)
|
|
||||||
# get heatmap
|
|
||||||
getComplianceHeatmap(dates, compliance_matrix, pid, snakemake.output[0], bin_size)
|
|
|
@ -0,0 +1,89 @@
|
||||||
|
import pandas as pd
|
||||||
|
import numpy as np
|
||||||
|
import plotly.graph_objects as go
|
||||||
|
from importlib import util
|
||||||
|
from pathlib import Path
|
||||||
|
import yaml
|
||||||
|
|
||||||
|
|
||||||
|
def getRowCountHeatmap(data_for_plot, scaled_data_for_plot, pid, time_segment, html_file):
|
||||||
|
|
||||||
|
fig = go.Figure(data=go.Heatmap(z=scaled_data_for_plot.values.tolist(),
|
||||||
|
x=data_for_plot.columns,
|
||||||
|
y=data_for_plot.index,
|
||||||
|
hovertext=data_for_plot.values.tolist(),
|
||||||
|
hovertemplate="Segment start: %{x}<br>Sensor: %{y}<br>Row count: %{hovertext}<extra></extra>",
|
||||||
|
zmin=0, zmax=1,
|
||||||
|
colorscale='Viridis'))
|
||||||
|
|
||||||
|
fig.update_layout(title="Heatmap of sensor row count for " + time_segment + " segments. Pid: " + pid +". Label: " + label + "<br>y-axis shows the included sensors.<br>x-axis shows the start (date and time) of a time segment.<br>z-axis (color) shows row count per sensor per segment instance.")
|
||||||
|
fig["layout"].update(margin=dict(t=160))
|
||||||
|
|
||||||
|
html_file.write(fig.to_html(full_html=False, include_plotlyjs="cdn"))
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
# import filter_data_by_segment from src/features/utils/utils.py
|
||||||
|
spec = util.spec_from_file_location("util", str(Path(snakemake.scriptdir).parent / "features" / "utils" / "utils.py"))
|
||||||
|
mod = util.module_from_spec(spec)
|
||||||
|
spec.loader.exec_module(mod)
|
||||||
|
filter_data_by_segment = getattr(mod, "filter_data_by_segment")
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
phone_data_yield = pd.read_csv(snakemake.input["phone_data_yield"], index_col=["local_segment_start_datetime"], parse_dates=["local_segment_start_datetime"])
|
||||||
|
# make sure the phone_data_yield file contains "phone_data_yield_rapids_ratiovalidyieldedminutes" and "phone_data_yield_rapids_ratiovalidyieldedhours" columns
|
||||||
|
if ("phone_data_yield_rapids_ratiovalidyieldedminutes" not in phone_data_yield.columns) or ("phone_data_yield_rapids_ratiovalidyieldedhours" not in phone_data_yield.columns):
|
||||||
|
raise ValueError("Please make sure [PHONE_DATA_YIELD][RAPIDS][COMPUTE] is True AND [PHONE_DATA_YIELD][RAPIDS][FEATURES] contains [ratiovalidyieldedminutes, ratiovalidyieldedhours].")
|
||||||
|
phone_data_yield = phone_data_yield[["local_segment_label", "phone_data_yield_rapids_ratiovalidyieldedminutes", "phone_data_yield_rapids_ratiovalidyieldedhours"]]
|
||||||
|
|
||||||
|
time_segments = pd.read_csv(snakemake.input["time_segments_labels"], header=0)["label"]
|
||||||
|
pid = snakemake.params["pid"]
|
||||||
|
|
||||||
|
with open(snakemake.input["participant_file"], "r", encoding="utf-8") as f:
|
||||||
|
participant_file = yaml.safe_load(f)
|
||||||
|
label = participant_file["PHONE"]["LABEL"]
|
||||||
|
|
||||||
|
sensor_names = []
|
||||||
|
sensors_row_count = dict(zip(time_segments, [pd.DataFrame()] * len(time_segments)))
|
||||||
|
|
||||||
|
for sensor_path in snakemake.input["all_sensors"]:
|
||||||
|
sensor_data = pd.read_csv(sensor_path, usecols=["assigned_segments"])
|
||||||
|
sensor_name = sensor_path.split("/")[-1].replace("_with_datetime.csv", "")
|
||||||
|
sensor_names.append(sensor_name)
|
||||||
|
|
||||||
|
if not sensor_data.empty:
|
||||||
|
for time_segment in time_segments:
|
||||||
|
sensor_data_per_segment = filter_data_by_segment(sensor_data, time_segment)
|
||||||
|
|
||||||
|
if not sensor_data_per_segment.empty:
|
||||||
|
# extract local start datetime of the segment from "local_segment" column
|
||||||
|
sensor_data_per_segment["local_segment_start_datetime"] = pd.to_datetime(sensor_data_per_segment["local_segment"].apply(lambda x: x.split("#")[1].split(",")[0]))
|
||||||
|
sensor_row_count = sensor_data_per_segment.groupby("local_segment_start_datetime")[["local_segment"]].count().rename(columns={"local_segment": sensor_name})
|
||||||
|
sensors_row_count[time_segment] = pd.concat([sensors_row_count[time_segment], sensor_row_count], axis=1, sort=False)
|
||||||
|
|
||||||
|
# add phone data yield features and plot heatmap
|
||||||
|
html_file = open(snakemake.output[0], "a", encoding="utf-8")
|
||||||
|
sensor_names.extend(["ratiovalidyieldedminutes", "ratiovalidyieldedhours"])
|
||||||
|
for time_segment in time_segments:
|
||||||
|
if not phone_data_yield.empty:
|
||||||
|
phone_data_yield_per_segment = phone_data_yield[phone_data_yield["local_segment_label"] == time_segment].rename(columns={"phone_data_yield_rapids_ratiovalidyieldedminutes": "ratiovalidyieldedminutes","phone_data_yield_rapids_ratiovalidyieldedhours": "ratiovalidyieldedhours"}).round(3)
|
||||||
|
if not phone_data_yield_per_segment.empty:
|
||||||
|
sensors_row_count[time_segment] = pd.concat([sensors_row_count[time_segment], phone_data_yield_per_segment], axis=1, sort=True)
|
||||||
|
|
||||||
|
# consider all the sensors
|
||||||
|
data_for_plot = sensors_row_count[time_segment].transpose().reindex(pd.Index(sensor_names))
|
||||||
|
|
||||||
|
if data_for_plot.empty:
|
||||||
|
html_file.write("There are no records of selected sensors in database for " + time_segment + " segments. Pid: " + pid + ". Label: " + label + ".<br>")
|
||||||
|
else:
|
||||||
|
# except for phone data yield sensor, scale each sensor (row) to the range of [0, 1]
|
||||||
|
scaled_data_for_plot = data_for_plot.copy()
|
||||||
|
scaled_data_for_plot.loc[sensor_names[:-2]] = scaled_data_for_plot.fillna(np.nan).loc[sensor_names[:-2]].apply(lambda x: (x - np.nanmin(x)) / (np.nanmax(x) - np.nanmin(x)) if np.nanmax(x) != np.nanmin(x) else (x / np.nanmin(x)), axis=1)
|
||||||
|
|
||||||
|
getRowCountHeatmap(data_for_plot, scaled_data_for_plot, pid, time_segment, html_file)
|
||||||
|
|
||||||
|
html_file.close()
|
|
@ -0,0 +1,100 @@
|
||||||
|
import pandas as pd
|
||||||
|
import numpy as np
|
||||||
|
import plotly.graph_objects as go
|
||||||
|
from importlib import util
|
||||||
|
from pathlib import Path
|
||||||
|
import yaml
|
||||||
|
|
||||||
|
|
||||||
|
def colors2colorscale(colors):
|
||||||
|
colorscale = []
|
||||||
|
length = len(colors)
|
||||||
|
for i in range(length):
|
||||||
|
if i != length - 1:
|
||||||
|
colorscale = colorscale + [[i/(length-1), colors[i]], [(i+1)/(length-1), colors[i]]]
|
||||||
|
else:
|
||||||
|
colorscale.append([1, colors[i]])
|
||||||
|
return colorscale
|
||||||
|
|
||||||
|
def getSensorsPerMinPerSegmentHeatmap(phone_data_yield, pid, time_segment, html_file):
|
||||||
|
|
||||||
|
x_axis_labels = [pd.Timedelta(minutes=x) for x in phone_data_yield.columns]
|
||||||
|
|
||||||
|
fig = go.Figure(data=go.Heatmap(z=phone_data_yield.values.tolist(),
|
||||||
|
x=x_axis_labels,
|
||||||
|
y=phone_data_yield.index,
|
||||||
|
zmin=0, zmax=16,
|
||||||
|
colorscale=colors2colorscale(colors),
|
||||||
|
colorbar=dict(thickness=25, tickvals=[1/2 + x for x in range(16)],ticktext=[x for x in range(16)])))
|
||||||
|
|
||||||
|
fig.update_layout(title="Number of sensors with any data per minute for " + time_segment + " segments. Pid: "+pid+". Label: " + label + "<br>y-axis shows the start (date and time) of a time segment.<br>x-axis shows the time since the start of the time segment.<br>z-axis (color) shows how many sensors logged at least one row of data per minute.")
|
||||||
|
fig["layout"].update(margin=dict(t=160))
|
||||||
|
|
||||||
|
html_file.write(fig.to_html(full_html=False, include_plotlyjs="cdn"))
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
# import filter_data_by_segment from src/features/utils/utils.py
|
||||||
|
spec = util.spec_from_file_location("util", str(Path(snakemake.scriptdir).parent / "features" / "utils" / "utils.py"))
|
||||||
|
mod = util.module_from_spec(spec)
|
||||||
|
spec.loader.exec_module(mod)
|
||||||
|
filter_data_by_segment = getattr(mod, "filter_data_by_segment")
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
colors = ["red", "#3D0751", "#423176", "#414381", "#3F5688", "#42678B", "#42768C", "#45868B", "#4A968A", "#53A485", "#5FB57E", "#76C170", "#91CF63", "#B4DA55", "#D9E152", "#F8E755", "#DEE00F"]
|
||||||
|
pid = snakemake.params["pid"]
|
||||||
|
time_segments_labels = pd.read_csv(snakemake.input["time_segments_labels"], header=0)
|
||||||
|
|
||||||
|
with open(snakemake.input["participant_file"], "r", encoding="utf-8") as f:
|
||||||
|
participant_file = yaml.safe_load(f)
|
||||||
|
label = participant_file["PHONE"]["LABEL"]
|
||||||
|
|
||||||
|
phone_data_yield = pd.read_csv(snakemake.input["phone_data_yield"], parse_dates=["local_date_time"])
|
||||||
|
|
||||||
|
html_file = open(snakemake.output[0], "a", encoding="utf-8")
|
||||||
|
if phone_data_yield.empty:
|
||||||
|
html_file.write("There is no sensor data for " + pid + " (pid) and " + label + " (label).")
|
||||||
|
else:
|
||||||
|
for time_segment in time_segments_labels["label"]:
|
||||||
|
phone_data_yield_per_segment = filter_data_by_segment(phone_data_yield, time_segment)
|
||||||
|
|
||||||
|
if phone_data_yield_per_segment.empty:
|
||||||
|
html_file.write("There is no sensor data of " + time_segment + " segments for " + pid + " (pid) and " + label + " (label).<br>")
|
||||||
|
else:
|
||||||
|
# calculate the length (in minute) of per segment instance
|
||||||
|
phone_data_yield_per_segment["length"] = phone_data_yield_per_segment["timestamps_segment"].str.split(",").apply(lambda x: int((int(x[1])-int(x[0])) / (1000 * 60)))
|
||||||
|
# calculate the number of sensors logged at least one row of data per minute.
|
||||||
|
phone_data_yield_per_segment = phone_data_yield_per_segment.groupby(["local_segment", "length", "local_date", "local_hour", "local_minute"])[["sensor", "local_date_time"]].max().reset_index()
|
||||||
|
# extract local start datetime of the segment from "local_segment" column
|
||||||
|
phone_data_yield_per_segment["local_segment_start_datetimes"] = pd.to_datetime(phone_data_yield_per_segment["local_segment"].apply(lambda x: x.split("#")[1].split(",")[0]))
|
||||||
|
# calculate the number of minutes after local start datetime of the segment
|
||||||
|
phone_data_yield_per_segment["minutes_after_segment_start"] = ((phone_data_yield_per_segment["local_date_time"] - phone_data_yield_per_segment["local_segment_start_datetimes"]) / pd.Timedelta(minutes=1)).astype("int")
|
||||||
|
|
||||||
|
# impute missing rows with 0
|
||||||
|
columns_for_full_index = phone_data_yield_per_segment[["local_segment_start_datetimes", "length"]].drop_duplicates(keep="first")
|
||||||
|
columns_for_full_index = columns_for_full_index.apply(lambda row: [[row["local_segment_start_datetimes"], x] for x in range(row["length"] + 1)], axis=1)
|
||||||
|
full_index = []
|
||||||
|
for columns in columns_for_full_index:
|
||||||
|
full_index = full_index + columns
|
||||||
|
full_index = pd.MultiIndex.from_tuples(full_index, names=("local_segment_start_datetimes", "minutes_after_segment_start"))
|
||||||
|
phone_data_yield_per_segment = phone_data_yield_per_segment.set_index(["local_segment_start_datetimes", "minutes_after_segment_start"]).reindex(full_index).reset_index().fillna(0)
|
||||||
|
|
||||||
|
# transpose the dataframe per local start datetime of the segment and discard the useless index layer
|
||||||
|
phone_data_yield_per_segment = phone_data_yield_per_segment.groupby("local_segment_start_datetimes")[["minutes_after_segment_start", "sensor"]].apply(lambda x: x.set_index("minutes_after_segment_start").transpose())
|
||||||
|
phone_data_yield_per_segment.index = phone_data_yield_per_segment.index.get_level_values("local_segment_start_datetimes")
|
||||||
|
|
||||||
|
# get heatmap
|
||||||
|
getSensorsPerMinPerSegmentHeatmap(phone_data_yield_per_segment, pid, time_segment, html_file)
|
||||||
|
|
||||||
|
|
||||||
|
html_file.close()
|
|
@ -8,15 +8,18 @@ phone_data_yield = pd.read_csv(snakemake.input[0])
|
||||||
if ("phone_data_yield_rapids_ratiovalidyieldedminutes" not in phone_data_yield.columns) or ("phone_data_yield_rapids_ratiovalidyieldedhours" not in phone_data_yield.columns):
|
if ("phone_data_yield_rapids_ratiovalidyieldedminutes" not in phone_data_yield.columns) or ("phone_data_yield_rapids_ratiovalidyieldedhours" not in phone_data_yield.columns):
|
||||||
raise ValueError("Please make sure [PHONE_DATA_YIELD][RAPIDS][COMPUTE] is True AND [PHONE_DATA_YIELD][RAPIDS][FEATURES] contains [ratiovalidyieldedminutes, ratiovalidyieldedhours].")
|
raise ValueError("Please make sure [PHONE_DATA_YIELD][RAPIDS][COMPUTE] is True AND [PHONE_DATA_YIELD][RAPIDS][FEATURES] contains [ratiovalidyieldedminutes, ratiovalidyieldedhours].")
|
||||||
|
|
||||||
|
html_file = open(snakemake.output[0], "a", encoding="utf-8")
|
||||||
|
if phone_data_yield.empty:
|
||||||
|
html_file.write("There is no sensor data for the sensors in [PHONE_DATA_YIELD][SENSORS].")
|
||||||
|
else:
|
||||||
# plot ratio valid yielded minutes histogram
|
# plot ratio valid yielded minutes histogram
|
||||||
fig_ratiovalidyieldedminutes = px.histogram(phone_data_yield, x="phone_data_yield_rapids_ratiovalidyieldedminutes", color="local_segment_label")
|
fig_ratiovalidyieldedminutes = px.histogram(phone_data_yield, x="phone_data_yield_rapids_ratiovalidyieldedminutes", color="local_segment_label")
|
||||||
fig_ratiovalidyieldedminutes.update_layout(title="Ratio Valid Yielded Minutes Histogram")
|
fig_ratiovalidyieldedminutes.update_layout(title="Histogram of valid yielded minutes ratio per time segment.")
|
||||||
|
html_file.write(fig_ratiovalidyieldedminutes.to_html(full_html=False, include_plotlyjs="cdn"))
|
||||||
|
|
||||||
# plot ratio valid yielded hours histogram
|
# plot ratio valid yielded hours histogram
|
||||||
fig_ratiovalidyieldedhours = px.histogram(phone_data_yield, x="phone_data_yield_rapids_ratiovalidyieldedhours", color="local_segment_label")
|
fig_ratiovalidyieldedhours = px.histogram(phone_data_yield, x="phone_data_yield_rapids_ratiovalidyieldedhours", color="local_segment_label")
|
||||||
fig_ratiovalidyieldedhours.update_layout(title="Ratio Valid Yielded Hours Histogram")
|
fig_ratiovalidyieldedhours.update_layout(title="Histogram of valid yielded hours ratio per time segment.")
|
||||||
|
|
||||||
|
|
||||||
with open(snakemake.output[0], "a") as html_file:
|
|
||||||
html_file.write(fig_ratiovalidyieldedminutes.to_html(full_html=False, include_plotlyjs="cdn"))
|
|
||||||
html_file.write(fig_ratiovalidyieldedhours.to_html(full_html=False, include_plotlyjs="cdn"))
|
html_file.write(fig_ratiovalidyieldedhours.to_html(full_html=False, include_plotlyjs="cdn"))
|
||||||
|
|
||||||
|
html_file.close()
|
||||||
|
|
|
@ -1,10 +1,10 @@
|
||||||
---
|
---
|
||||||
title: "Heatmap Sensed Bins Report"
|
title: "Sensor Row Count per Time Segment For All Participants"
|
||||||
author:
|
author:
|
||||||
- "MoSHI Pipeline"
|
- "RAPIDS"
|
||||||
date: "`r format(Sys.time(), '%d %B, %Y')`"
|
date: "`r format(Sys.time(), '%d %B, %Y')`"
|
||||||
params:
|
params:
|
||||||
rmd: "heatmap_sensed_bins_all_participants.Rmd"
|
rmd: "merge_heatmap_sensor_row_count_per_time_segment.Rmd"
|
||||||
output:
|
output:
|
||||||
html_document:
|
html_document:
|
||||||
highlight: tango
|
highlight: tango
|
||||||
|
@ -17,14 +17,17 @@ output:
|
||||||
smooth_scroll: yes
|
smooth_scroll: yes
|
||||||
---
|
---
|
||||||
|
|
||||||
|
<style>
|
||||||
|
.main-container {min-width:800px; max-width:100%;}
|
||||||
|
</style>
|
||||||
|
|
||||||
```{r include=FALSE}
|
```{r include=FALSE}
|
||||||
source("renv/activate.R")
|
source("renv/activate.R")
|
||||||
```
|
```
|
||||||
|
|
||||||
## All phone sensors
|
|
||||||
|
|
||||||
```{r, echo=FALSE}
|
```{r, echo=FALSE}
|
||||||
heatmaps <- snakemake@input[["heatmap_sensed_bins"]]
|
heatmaps <- snakemake@input[["heatmap_sensor_row_count_per_time_segment"]]
|
||||||
heatmaps.html <- vector(mode="list", length(heatmaps))
|
heatmaps.html <- vector(mode="list", length(heatmaps))
|
||||||
|
|
||||||
for(pid in 1:length(heatmaps)){
|
for(pid in 1:length(heatmaps)){
|
|
@ -1,10 +1,10 @@
|
||||||
---
|
---
|
||||||
title: "Heatmap Rows Report"
|
title: "Sensors per Minute per Time Segment for All Participants"
|
||||||
author:
|
author:
|
||||||
- "MoSHI Pipeline"
|
- "RAPIDS"
|
||||||
date: "`r format(Sys.time(), '%d %B, %Y')`"
|
date: "`r format(Sys.time(), '%d %B, %Y')`"
|
||||||
params:
|
params:
|
||||||
rmd: "heatmap_days_by_sensors_all_participants.Rmd"
|
rmd: "merge_heatmap_sensors_per_minute_per_time_segment.Rmd"
|
||||||
output:
|
output:
|
||||||
html_document:
|
html_document:
|
||||||
highlight: tango
|
highlight: tango
|
||||||
|
@ -17,14 +17,17 @@ output:
|
||||||
smooth_scroll: yes
|
smooth_scroll: yes
|
||||||
---
|
---
|
||||||
|
|
||||||
|
<style>
|
||||||
|
.main-container {min-width:800px; max-width:100%;}
|
||||||
|
</style>
|
||||||
|
|
||||||
```{r include=FALSE}
|
```{r include=FALSE}
|
||||||
source("renv/activate.R")
|
source("renv/activate.R")
|
||||||
```
|
```
|
||||||
|
|
||||||
## All phone sensors
|
|
||||||
|
|
||||||
```{r, echo=FALSE}
|
```{r, echo=FALSE}
|
||||||
heatmaps <- snakemake@input[["heatmap_rows"]]
|
heatmaps <- snakemake@input[["heatmap_sensors_per_minute_per_time_segment"]]
|
||||||
heatmaps.html <- vector(mode="list", length(heatmaps))
|
heatmaps.html <- vector(mode="list", length(heatmaps))
|
||||||
|
|
||||||
for(pid in 1:length(heatmaps)){
|
for(pid in 1:length(heatmaps)){
|
|
@ -1,102 +0,0 @@
|
||||||
import pandas as pd
|
|
||||||
import numpy as np
|
|
||||||
import plotly.io as pio
|
|
||||||
import plotly.graph_objects as go
|
|
||||||
from dateutil import tz
|
|
||||||
import datetime
|
|
||||||
|
|
||||||
def getOneRow(data_per_participant, last_certain_dates, col_name, row, expected_num_of_days, only_show_valid_days):
|
|
||||||
|
|
||||||
data = pd.read_csv(data_per_participant, index_col=["local_date"])
|
|
||||||
|
|
||||||
if col_name == "num_sensors":
|
|
||||||
data["num_sensors"] = data.max(axis=1)
|
|
||||||
|
|
||||||
if only_show_valid_days and col_name == "valid_sensed_hours":
|
|
||||||
# replace invalid days' valid sensed hours with np.nan to let our heatmap only shows valid days
|
|
||||||
data.loc[data[data["is_valid_sensed_day"] == False].index, "valid_sensed_hours"] = np.nan
|
|
||||||
|
|
||||||
if expected_num_of_days == -1:
|
|
||||||
# show all days
|
|
||||||
data.index = pd.to_datetime(data.index)
|
|
||||||
start_date = data.index.min()
|
|
||||||
# upsample data into one day bins
|
|
||||||
data = data.resample("1D").sum()
|
|
||||||
data["date_idx"] = (data.index - start_date).days
|
|
||||||
data.set_index("date_idx", inplace=True, drop=True)
|
|
||||||
row = row + data[col_name].tolist()
|
|
||||||
else:
|
|
||||||
# only show last certain days
|
|
||||||
for date in last_certain_dates:
|
|
||||||
if date in data.index:
|
|
||||||
row.append(data.loc[date][col_name])
|
|
||||||
else:
|
|
||||||
row.append(0)
|
|
||||||
|
|
||||||
return row
|
|
||||||
|
|
||||||
def getOverallComplianceHeatmap(sensors_with_data, valid_sensed_hours, last_certain_dates, bin_size, min_bins_per_hour, expected_num_of_days, output_path):
|
|
||||||
plot = go.Figure(data=go.Heatmap(z=valid_sensed_hours[last_certain_dates].values,
|
|
||||||
x=[date.replace("-", "/") for date in last_certain_dates] if expected_num_of_days != -1 else last_certain_dates,
|
|
||||||
y=[pid + "." + label for pid, label in zip(sensors_with_data["pid"].to_list(), sensors_with_data["label"].to_list())],
|
|
||||||
text=sensors_with_data[last_certain_dates].values,
|
|
||||||
hovertemplate="Date: %{x}<br>Participant: %{y}<br>Valid sensed hours: %{z}<br>Number of sensors with data: %{text}<extra></extra>" if expected_num_of_days != -1 else "Day index: %{x}<br>Participant: %{y}<br>Valid sensed hours: %{z}<br>Number of sensors with data: %{text}<extra></extra>",
|
|
||||||
colorscale="Viridis",
|
|
||||||
colorbar={"tick0": 0,"dtick": 1},
|
|
||||||
showscale=True))
|
|
||||||
if expected_num_of_days != -1:
|
|
||||||
plot.update_layout(title="Overall compliance heatmap for last " + str(expected_num_of_days) + " days.<br>Bin's color shows valid sensed hours for that day.<br>A valid hour has at least one row of any sensor in "+ str(min_bins_per_hour) +" out of " + str(int(60 / bin_size)) + " bins of " + str(bin_size) + " minutes.<br>You can hover over every day to see the number of sensors with data in that day.")
|
|
||||||
else:
|
|
||||||
plot.update_layout(title="Overall compliance heatmap for all days.<br>Bin's color shows valid sensed hours for that day.<br>A valid hour has at least one row of any sensor in "+ str(min_bins_per_hour) +" out of " + str(int(60 / bin_size)) + " bins of " + str(bin_size) + " minutes.<br>You can hover over every day to see the number of sensors with data in that day.")
|
|
||||||
|
|
||||||
plot["layout"]["xaxis"].update(side="bottom")
|
|
||||||
plot["layout"].update(xaxis_title="Day indexes")
|
|
||||||
plot["layout"].update(margin=dict(t=160))
|
|
||||||
pio.write_html(plot, file=output_path, auto_open=False, include_plotlyjs="cdn")
|
|
||||||
|
|
||||||
|
|
||||||
phone_sensed_bins = snakemake.input["phone_sensed_bins"]
|
|
||||||
phone_valid_sensed_days = snakemake.input["phone_valid_sensed_days"]
|
|
||||||
pid_files = snakemake.input["pid_files"]
|
|
||||||
only_show_valid_days = snakemake.params["only_show_valid_days"]
|
|
||||||
local_timezone = snakemake.params["local_timezone"]
|
|
||||||
bin_size = snakemake.params["bin_size"]
|
|
||||||
min_bins_per_hour = snakemake.params["min_bins_per_hour"]
|
|
||||||
expected_num_of_days = int(snakemake.params["expected_num_of_days"])
|
|
||||||
|
|
||||||
if expected_num_of_days < -1:
|
|
||||||
raise ValueError("EXPECTED_NUM_OF_DAYS of OVERALL_COMPLIANCE_HEATMAP section in config.yaml must be larger or equal to -1.")
|
|
||||||
|
|
||||||
last_certain_dates = []
|
|
||||||
if expected_num_of_days != -1:
|
|
||||||
# get the list of dates to show
|
|
||||||
cur_date = datetime.datetime.now().astimezone(tz.gettz(local_timezone)).date()
|
|
||||||
for date_offset in range(expected_num_of_days-1, -1, -1):
|
|
||||||
last_certain_dates.append((cur_date - datetime.timedelta(days=date_offset)).strftime("%Y-%m-%d"))
|
|
||||||
|
|
||||||
sensors_with_data_records, valid_sensed_hours_records = [], []
|
|
||||||
for sensors_with_data_individual, valid_sensed_hours_individual, pid_file in zip(phone_sensed_bins, phone_valid_sensed_days, pid_files):
|
|
||||||
|
|
||||||
with open(pid_file, encoding="ISO-8859-1") as external_file:
|
|
||||||
external_file_content = external_file.readlines()
|
|
||||||
device_id = external_file_content[0].split(",")[-1].strip()
|
|
||||||
label = external_file_content[2].strip()
|
|
||||||
pid = pid_file.split("/")[-1]
|
|
||||||
|
|
||||||
sensors_with_data_records.append(getOneRow(sensors_with_data_individual, last_certain_dates, "num_sensors", [pid, label, device_id], expected_num_of_days, only_show_valid_days))
|
|
||||||
valid_sensed_hours_records.append(getOneRow(valid_sensed_hours_individual, last_certain_dates, "valid_sensed_hours", [pid, label, device_id], expected_num_of_days, only_show_valid_days))
|
|
||||||
|
|
||||||
if expected_num_of_days == -1:
|
|
||||||
# get the date_idx of all days
|
|
||||||
total_num_of_days = max([len(x) for x in sensors_with_data_records]) - 3
|
|
||||||
last_certain_dates = [date_idx for date_idx in range(total_num_of_days)]
|
|
||||||
|
|
||||||
sensors_with_data = pd.DataFrame(data=sensors_with_data_records, columns=["pid", "label", "device_id"] + last_certain_dates).replace(0, np.nan)
|
|
||||||
valid_sensed_hours = pd.DataFrame(data=valid_sensed_hours_records, columns=["pid", "label", "device_id"] + last_certain_dates).replace(0, np.nan)
|
|
||||||
|
|
||||||
if sensors_with_data.empty:
|
|
||||||
empty_html = open(snakemake.output[0], "w")
|
|
||||||
empty_html.write("There is no sensor data for all participants")
|
|
||||||
empty_html.close()
|
|
||||||
else:
|
|
||||||
getOverallComplianceHeatmap(sensors_with_data, valid_sensed_hours, last_certain_dates, bin_size, min_bins_per_hour, expected_num_of_days, snakemake.output[0])
|
|
Loading…
Reference in New Issue