Update 4 plots for time_segments

2020-12-03 21:00:32 -05:00 · 2020-12-03 21:00:32 -05:00 · 9a0e57301b
parent 3560217e3b
commit 9a0e57301b
21 changed files with 447 additions and 565 deletions
--- a/23
+++ b/23
@ -231,20 +231,19 @@ for provider in config["FITBIT_STEPS_INTRADAY"]["PROVIDERS"].keys():
 if config["HISTOGRAM_PHONE_DATA_YIELD"]["PLOT"]:
    files_to_compute.append("reports/data_exploration/histogram_phone_data_yield.html")

-# visualization for data exploration
-# if config["HEATMAP_FEATURES_CORRELATIONS"]["PLOT"]:
-#     files_to_compute.extend(expand("reports/data_exploration/{min_valid_hours_per_day}hours_{min_valid_bins_per_hour}bins/heatmap_features_correlations.html", min_valid_hours_per_day=config["HEATMAP_FEATURES_CORRELATIONS"]["MIN_VALID_HOURS_PER_DAY"], min_valid_bins_per_hour=config["PHONE_VALID_SENSED_DAYS"]["MIN_VALID_BINS_PER_HOUR"]))
-    
-# if config["HEATMAP_DAYS_BY_SENSORS"]["PLOT"]:
-#     files_to_compute.extend(expand("reports/interim/{min_valid_hours_per_day}hours_{min_valid_bins_per_hour}bins/{pid}/heatmap_days_by_sensors.html", pid=config["PIDS"], min_valid_hours_per_day=config["HEATMAP_DAYS_BY_SENSORS"]["MIN_VALID_HOURS_PER_DAY"], min_valid_bins_per_hour=config["PHONE_VALID_SENSED_DAYS"]["MIN_VALID_BINS_PER_HOUR"]))
-#     files_to_compute.extend(expand("reports/data_exploration/{min_valid_hours_per_day}hours_{min_valid_bins_per_hour}bins/heatmap_days_by_sensors_all_participants.html", min_valid_hours_per_day=config["HEATMAP_DAYS_BY_SENSORS"]["MIN_VALID_HOURS_PER_DAY"], min_valid_bins_per_hour=config["PHONE_VALID_SENSED_DAYS"]["MIN_VALID_BINS_PER_HOUR"]))
+if config["HEATMAP_SENSORS_PER_MINUTE_PER_TIME_SEGMENT"]["PLOT"]:
+    files_to_compute.extend(expand("reports/interim/{pid}/heatmap_sensors_per_minute_per_time_segment.html", pid=config["PIDS"]))
+    files_to_compute.append("reports/data_exploration/heatmap_sensors_per_minute_per_time_segment.html")

-# if config["HEATMAP_SENSED_BINS"]["PLOT"]:
-#     files_to_compute.extend(expand("reports/interim/heatmap_sensed_bins/{pid}/heatmap_sensed_bins.html", pid=config["PIDS"]))
-#     files_to_compute.extend(["reports/data_exploration/heatmap_sensed_bins_all_participants.html"])
+if config["HEATMAP_SENSOR_ROW_COUNT_PER_TIME_SEGMENT"]["PLOT"]:
+    files_to_compute.extend(expand("reports/interim/{pid}/heatmap_sensor_row_count_per_time_segment.html", pid=config["PIDS"]))
+    files_to_compute.append("reports/data_exploration/heatmap_sensor_row_count_per_time_segment.html")

-# if config["OVERALL_COMPLIANCE_HEATMAP"]["PLOT"]:
-#     files_to_compute.extend(expand("reports/data_exploration/{min_valid_hours_per_day}hours_{min_valid_bins_per_hour}bins/overall_compliance_heatmap.html", min_valid_hours_per_day=config["OVERALL_COMPLIANCE_HEATMAP"]["MIN_VALID_HOURS_PER_DAY"], min_valid_bins_per_hour=config["PHONE_VALID_SENSED_DAYS"]["MIN_VALID_BINS_PER_HOUR"]))
+if config["HEATMAP_PHONE_DATA_YIELD_PER_PARTICIPANT_PER_TIME_SEGMENT"]["PLOT"]:
+    files_to_compute.append("reports/data_exploration/heatmap_phone_data_yield_per_participant_per_time_segment.html")
+
+if config["HEATMAP_FEATURE_CORRELATION_MATRIX"]["PLOT"]:
+    files_to_compute.append("reports/data_exploration/heatmap_feature_correlation_matrix.html")


 rule all:
--- a/config.yaml
+++ b/config.yaml
@ -259,9 +259,6 @@ PHONE_WIFI_VISIBLE:



-
-
-
 ########################################################################################################################
 #                                                 FITBIT                                                               #
 ########################################################################################################################
@ -350,7 +347,6 @@ FITBIT_STEPS_INTRADAY:



-
 ########################################################################################################################
 #                                                 PLOTS                                                                #
 ########################################################################################################################
@ -358,32 +354,19 @@ FITBIT_STEPS_INTRADAY:
 HISTOGRAM_PHONE_DATA_YIELD:
  PLOT: False

-HEATMAP_FEATURES_CORRELATIONS:
+HEATMAP_SENSORS_PER_MINUTE_PER_TIME_SEGMENT:
+  PLOT: False
+
+HEATMAP_SENSOR_ROW_COUNT_PER_TIME_SEGMENT:
+  PLOT: False
+  SENSORS: [PHONE_ACCELEROMETER, PHONE_ACTIVITY_RECOGNITION, PHONE_APPLICATIONS_FOREGROUND, PHONE_BATTERY, PHONE_BLUETOOTH, PHONE_CALLS, PHONE_CONVERSATION, PHONE_LIGHT, PHONE_LOCATIONS, PHONE_MESSAGES, PHONE_SCREEN, PHONE_WIFI_CONNECTED, PHONE_WIFI_VISIBLE]
+
+HEATMAP_PHONE_DATA_YIELD_PER_PARTICIPANT_PER_TIME_SEGMENT:
+  PLOT: False
+
+HEATMAP_FEATURE_CORRELATION_MATRIX:
  PLOT: False
  MIN_ROWS_RATIO: 0.5
-  MIN_VALID_HOURS_PER_DAY: #*min_valid_hours_per_day
-  MIN_VALID_BINS_PER_HOUR: #*min_valid_bins_per_hour
-  PHONE_FEATURES: [accelerometer, activity_recognition, applications_foreground, battery, calls_incoming, calls_missed, calls_outgoing, conversation, light, location_doryab, messages_received, messages_sent, screen]
-  FITBIT_FEATURES: [fitbit_heartrate, fitbit_step, fitbit_sleep]
  CORR_THRESHOLD: 0.1
  CORR_METHOD: "pearson" # choose from {"pearson", "kendall", "spearman"}

-HEATMAP_DAYS_BY_SENSORS:
-  PLOT: False
-  MIN_VALID_HOURS_PER_DAY: #*min_valid_hours_per_day
-  MIN_VALID_BINS_PER_HOUR: #*min_valid_bins_per_hour
-  EXPECTED_NUM_OF_DAYS: -1
-  DB_TABLES: [accelerometer, applications_foreground, battery, bluetooth, calls, light, locations, messages, screen, wifi, sensor_wifi, plugin_google_activity_recognition, plugin_ios_activity_recognition, plugin_studentlife_audio_android, plugin_studentlife_audio]
-
-HEATMAP_SENSED_BINS:
-  PLOT: False
-  BIN_SIZE: #*bin_size
-
-OVERALL_COMPLIANCE_HEATMAP:
-  PLOT: False
-  ONLY_SHOW_VALID_DAYS: False
-  EXPECTED_NUM_OF_DAYS: -1
-  BIN_SIZE: #*bin_size
-  MIN_VALID_HOURS_PER_DAY: #*min_valid_hours_per_day
-  MIN_VALID_BINS_PER_HOUR: #*min_valid_bins_per_hour
-
--- a/example_profile/Snakefile
+++ b/example_profile/Snakefile
@ -214,6 +214,20 @@ for provider in config["FITBIT_STEPS_INTRADAY"]["PROVIDERS"].keys():
 if config["HISTOGRAM_PHONE_DATA_YIELD"]["PLOT"]:
    files_to_compute.append("reports/data_exploration/histogram_phone_data_yield.html")

+if config["HEATMAP_SENSORS_PER_MINUTE_PER_TIME_SEGMENT"]["PLOT"]:
+    files_to_compute.extend(expand("reports/interim/{pid}/heatmap_sensors_per_minute_per_time_segment.html", pid=config["PIDS"]))
+    files_to_compute.append("reports/data_exploration/heatmap_sensors_per_minute_per_time_segment.html")
+
+if config["HEATMAP_SENSOR_ROW_COUNT_PER_TIME_SEGMENT"]["PLOT"]:
+    files_to_compute.extend(expand("reports/interim/{pid}/heatmap_sensor_row_count_per_time_segment.html", pid=config["PIDS"]))
+    files_to_compute.append("reports/data_exploration/heatmap_sensor_row_count_per_time_segment.html")
+
+if config["HEATMAP_PHONE_DATA_YIELD_PER_PARTICIPANT_PER_TIME_SEGMENT"]["PLOT"]:
+    files_to_compute.append("reports/data_exploration/heatmap_phone_data_yield_per_participant_per_time_segment.html")
+
+if config["HEATMAP_FEATURE_CORRELATION_MATRIX"]["PLOT"]:
+    files_to_compute.append("reports/data_exploration/heatmap_feature_correlation_matrix.html")
+
 # Analysis Workflow Example
 models, scalers = [], []
 for model_name in config["PARAMS_FOR_ANALYSIS"]["MODEL_NAMES"]:
--- a/example_profile/example_config.yaml
+++ b/example_profile/example_config.yaml
@ -323,6 +323,22 @@ FITBIT_STEPS_INTRADAY:
 HISTOGRAM_PHONE_DATA_YIELD:
  PLOT: True

+HEATMAP_SENSORS_PER_MINUTE_PER_TIME_SEGMENT:
+  PLOT: True
+
+HEATMAP_SENSOR_ROW_COUNT_PER_TIME_SEGMENT:
+  PLOT: True
+  SENSORS: [PHONE_ACCELEROMETER, PHONE_ACTIVITY_RECOGNITION, PHONE_APPLICATIONS_FOREGROUND, PHONE_BATTERY, PHONE_BLUETOOTH, PHONE_CALLS, PHONE_CONVERSATION, PHONE_LIGHT, PHONE_LOCATIONS, PHONE_MESSAGES, PHONE_SCREEN, PHONE_WIFI_CONNECTED, PHONE_WIFI_VISIBLE]
+
+HEATMAP_PHONE_DATA_YIELD_PER_PARTICIPANT_PER_TIME_SEGMENT:
+  PLOT: True
+
+HEATMAP_FEATURE_CORRELATION_MATRIX:
+  PLOT: TRUE
+  MIN_ROWS_RATIO: 0.5
+  CORR_THRESHOLD: 0.1
+  CORR_METHOD: "pearson" # choose from {"pearson", "kendall", "spearman"}
+


 ########################################################################################################################
--- a/rules/common.smk
+++ b/rules/common.smk
@ -1,19 +1,3 @@
-# Common.smk ##########################################################################################################
-
-def infer_participant_platform(participant_file):
-    with open(participant_file, encoding="ISO-8859-1") as external_file:
-        external_file_content = external_file.readlines()
-    platforms = external_file_content[1].strip().split(",")
-    if platforms[0] == "multiple" or (len(platforms) > 1 and "android" in platforms and "ios" in platforms):
-        platform = "android"
-    else:
-        platform = platforms[0]
-
-    if platform not in ["android", "ios"]:
-        raise ValueError("Platform (line 2) in a participant file should be 'android', 'ios', or 'multiple'. You typed '" + platforms + "'")
-
-    return platform
-
 # Features.smk #########################################################################################################
 def find_features_files(wildcards):
    feature_files = []
@ -38,14 +22,3 @@ def input_merge_sensor_features_for_individual_participants(wildcards):
                    break
    return feature_files

-# Reports.smk ###########################################################################################################
-
-def optional_heatmap_days_by_sensors_input(wildcards):
-    platform = infer_participant_platform("data/external/"+wildcards.pid)
-    
-    if platform == "android":
-        tables_platform = [table for table in config["HEATMAP_DAYS_BY_SENSORS"]["DB_TABLES"] if table not in [config["CONVERSATION"]["DB_TABLE"]["IOS"], config["ACTIVITY_RECOGNITION"]["DB_TABLE"]["IOS"]]] # for android, discard any ios tables that may exist
-    elif platform == "ios":
-        tables_platform = [table for table in config["HEATMAP_DAYS_BY_SENSORS"]["DB_TABLES"] if table not in [config["CONVERSATION"]["DB_TABLE"]["ANDROID"], config["ACTIVITY_RECOGNITION"]["DB_TABLE"]["ANDROID"]]] # for ios, discard any android tables that may exist
-
-    return expand("data/raw/{{pid}}/{table}_with_datetime.csv", table = tables_platform)
--- a/rules/reports.smk
+++ b/rules/reports.smk
@ -6,74 +6,66 @@ rule histogram_phone_data_yield:
    script:
        "../src/visualization/histogram_phone_data_yield.py"

-
-
-
-rule heatmap_features_correlations:
+rule heatmap_sensors_per_minute_per_time_segment:
    input:
-        features = expand("data/processed/{pid}/{sensor}_{time_segment}.csv", pid=config["PIDS"], sensor=config["HEATMAP_FEATURES_CORRELATIONS"]["PHONE_FEATURES"]+config["HEATMAP_FEATURES_CORRELATIONS"]["FITBIT_FEATURES"], time_segment=config["TIME_SEGMENTS"]),
-        phone_valid_sensed_days = expand("data/interim/{pid}/phone_valid_sensed_days_{{min_valid_hours_per_day}}hours_{{min_valid_bins_per_hour}}bins.csv", pid=config["PIDS"])
+        phone_data_yield = "data/interim/{pid}/phone_yielded_timestamps_with_datetime.csv",
+        participant_file = "data/external/participant_files/{pid}.yaml",
+        time_segments_labels = "data/interim/time_segments/{pid}_time_segments_labels.csv"
    params:
-        min_rows_ratio = config["HEATMAP_FEATURES_CORRELATIONS"]["MIN_ROWS_RATIO"],
-        corr_threshold = config["HEATMAP_FEATURES_CORRELATIONS"]["CORR_THRESHOLD"],
-        corr_method = config["HEATMAP_FEATURES_CORRELATIONS"]["CORR_METHOD"]
+        pid = "{pid}"
    output:
-        "reports/data_exploration/{min_valid_hours_per_day}hours_{min_valid_bins_per_hour}bins/heatmap_features_correlations.html"
+        "reports/interim/{pid}/heatmap_sensors_per_minute_per_time_segment.html"
    script:
-        "../src/visualization/heatmap_features_correlations.py"
+        "../src/visualization/heatmap_sensors_per_minute_per_time_segment.py"

-rule heatmap_days_by_sensors:
+rule merge_heatmap_sensors_per_minute_per_time_segment:
    input:
-        sensors = optional_heatmap_days_by_sensors_input,
-        phone_valid_sensed_days = "data/interim/{pid}/phone_valid_sensed_days_{min_valid_hours_per_day}hours_{min_valid_bins_per_hour}bins.csv"
+        heatmap_sensors_per_minute_per_time_segment = expand("reports/interim/{pid}/heatmap_sensors_per_minute_per_time_segment.html", pid=config["PIDS"])
+    output:
+        "reports/data_exploration/heatmap_sensors_per_minute_per_time_segment.html"
+    script:
+        "../src/visualization/merge_heatmap_sensors_per_minute_per_time_segment.Rmd"
+
+rule heatmap_sensor_row_count_per_time_segment:
+    input:
+        all_sensors = expand("data/raw/{{pid}}/{sensor}_with_datetime.csv", sensor = map(str.lower, config["HEATMAP_SENSOR_ROW_COUNT_PER_TIME_SEGMENT"]["SENSORS"])),
+        phone_data_yield = "data/processed/features/{pid}/phone_data_yield.csv",
+        participant_file = "data/external/participant_files/{pid}.yaml",
+        time_segments_labels = "data/interim/time_segments/{pid}_time_segments_labels.csv"
    params:
-        pid = "{pid}",
-        expected_num_of_days = config["HEATMAP_DAYS_BY_SENSORS"]["EXPECTED_NUM_OF_DAYS"]
+        pid = "{pid}"
    output:
-        "reports/interim/{min_valid_hours_per_day}hours_{min_valid_bins_per_hour}bins/{pid}/heatmap_days_by_sensors.html"
+        "reports/interim/{pid}/heatmap_sensor_row_count_per_time_segment.html"
    script:
-        "../src/visualization/heatmap_days_by_sensors.py"
+        "../src/visualization/heatmap_sensor_row_count_per_time_segment.py"

-rule heatmap_days_by_sensors_all_participants:
+rule merge_heatmap_sensor_row_count_per_time_segment:
    input:
-        heatmap_rows =  expand("reports/interim/{{min_valid_hours_per_day}}hours_{{min_valid_bins_per_hour}}bins/{pid}/heatmap_days_by_sensors.html", pid=config["PIDS"])
+        heatmap_sensor_row_count_per_time_segment = expand("reports/interim/{pid}/heatmap_sensor_row_count_per_time_segment.html", pid=config["PIDS"])
    output:
-        "reports/data_exploration/{min_valid_hours_per_day}hours_{min_valid_bins_per_hour}bins/heatmap_days_by_sensors_all_participants.html"
+        "reports/data_exploration/heatmap_sensor_row_count_per_time_segment.html"
    script:
-        "../src/visualization/heatmap_days_by_sensors_all_participants.Rmd"
+        "../src/visualization/merge_heatmap_sensor_row_count_per_time_segment.Rmd"

-rule heatmap_sensed_bins:
+rule heatmap_phone_data_yield_per_participant_per_time_segment:
    input:
-        sensor = "data/interim/{pid}/phone_sensed_bins.csv",
-        pid_file = "data/external/{pid}"
+        phone_data_yield = expand("data/processed/features/{pid}/phone_data_yield.csv", pid=config["PIDS"]),
+        participant_file = expand("data/external/participant_files/{pid}.yaml", pid=config["PIDS"]),
+        time_segments_labels = expand("data/interim/time_segments/{pid}_time_segments_labels.csv", pid=config["PIDS"])
+    output:
+        "reports/data_exploration/heatmap_phone_data_yield_per_participant_per_time_segment.html"
+    script:
+        "../src/visualization/heatmap_phone_data_yield_per_participant_per_time_segment.py"
+
+rule heatmap_feature_correlation_matrix:
+    input:
+        all_sensor_features = "data/processed/features/all_participants/all_sensor_features.csv" # before data cleaning
    params:
-        pid = "{pid}",
-        bin_size = config["HEATMAP_SENSED_BINS"]["BIN_SIZE"]
+        min_rows_ratio = config["HEATMAP_FEATURE_CORRELATION_MATRIX"]["MIN_ROWS_RATIO"],
+        corr_threshold = config["HEATMAP_FEATURE_CORRELATION_MATRIX"]["CORR_THRESHOLD"],
+        corr_method = config["HEATMAP_FEATURE_CORRELATION_MATRIX"]["CORR_METHOD"]
    output:
-        "reports/interim/heatmap_sensed_bins/{pid}/heatmap_sensed_bins.html"
+        "reports/data_exploration/heatmap_feature_correlation_matrix.html"
    script:
-        "../src/visualization/heatmap_sensed_bins.py"
+        "../src/visualization/heatmap_feature_correlation_matrix.py"

-rule heatmap_sensed_bins_all_participants:
-    input:
-        heatmap_sensed_bins = expand("reports/interim/heatmap_sensed_bins/{pid}/heatmap_sensed_bins.html", pid=config["PIDS"])
-    output:
-        "reports/data_exploration/heatmap_sensed_bins_all_participants.html"
-    script:
-        "../src/visualization/heatmap_sensed_bins_all_participants.Rmd"
-
-rule overall_compliance_heatmap:
-    input:
-        phone_sensed_bins =  expand("data/interim/{pid}/phone_sensed_bins.csv", pid=config["PIDS"]),
-        phone_valid_sensed_days = expand("data/interim/{pid}/phone_valid_sensed_days_{{min_valid_hours_per_day}}hours_{{min_valid_bins_per_hour}}bins.csv", pid=config["PIDS"]),
-        pid_files = expand("data/external/{pid}", pid=config["PIDS"])
-    params:
-        only_show_valid_days = config["OVERALL_COMPLIANCE_HEATMAP"]["ONLY_SHOW_VALID_DAYS"],
-        local_timezone = config["PHONE_DATA_CONFIGURATION"]["TIMEZONE"]["VALUE"],
-        expected_num_of_days = config["OVERALL_COMPLIANCE_HEATMAP"]["EXPECTED_NUM_OF_DAYS"],
-        bin_size = config["OVERALL_COMPLIANCE_HEATMAP"]["BIN_SIZE"],
-        min_bins_per_hour = "{min_valid_bins_per_hour}"
-    output:
-        "reports/data_exploration/{min_valid_hours_per_day}hours_{min_valid_bins_per_hour}bins/overall_compliance_heatmap.html"
-    script:
-        "../src/visualization/overall_compliance_heatmap.py"
--- a/src/visualization/battery_consumption_rates_barchart.py
+++ b/src/visualization/battery_consumption_rates_barchart.py
@ -1,34 +0,0 @@
-import pandas as pd
-import datetime
-import plotly.io as pio
-import plotly.graph_objects as go
-
-def getBatteryConsumptionRatesBarChart(battery_data, pid):
-    plot = go.Figure(go.Bar(
-                    x=battery_data["battery_daily_avgconsumptionrate"],
-                    y=battery_data["local_date"].apply(lambda x: x.strftime("%Y/%m/%d")).tolist(),
-                    orientation='h'))
-    plot.update_layout(title="Daily battery consumption rates bar chart for " + pid + "<br>Label: " + label + ", device_id: " + device_id,
-                    xaxis_title="battery drains % per hour",
-                    )
-    return plot
-
-
-
-battery_data = pd.read_csv(snakemake.input["sensor"], parse_dates=["local_date"])
-pid = snakemake.params["pid"]
-
-with open(snakemake.input["pid_file"], encoding="ISO-8859-1") as external_file:
-    external_file_content = external_file.readlines()
-device_id = external_file_content[0].split(",")[-1]
-label = external_file_content[2]
-
-if battery_data.empty:
-    empty_html = open(snakemake.output[0], "w")
-    empty_html.write("There is no battery data for " + pid + "<br>Label: " + label + ", device_id: " + device_id)
-    empty_html.close()
-else:
-    battery_data.set_index(["local_date"], inplace=True)
-    battery_data = battery_data.resample("1D").asfreq().fillna(0).reset_index()
-    plot = getBatteryConsumptionRatesBarChart(battery_data, pid)
-    pio.write_html(plot, file=snakemake.output[0], auto_open=False, include_plotlyjs="cdn")
--- a/src/visualization/compliance_report.Rmd
+++ b/src/visualization/compliance_report.Rmd
@ -1,39 +0,0 @@
---
-title: "Compliance Report"
-author:
-    - "MoSHI Pipeline"
-date: "`r format(Sys.time(), '%d %B, %Y')`"
-params:
-   rmd: "compliance_report.Rmd"
-output:
-  html_document:
-  highlight: tango
-  number_sections: no
-  theme: default
-  toc: yes
-  toc_depth: 3
-  toc_float:
-    collapsed: no
-    smooth_scroll: yes
---
-
-```{r include=FALSE}
-source("renv/activate.R")
-```
-
-## Overall phone compliance
-
-```{r, echo=FALSE}
-htmltools::includeHTML(snakemake@input[["compliance_heatmap"]])
-```
-
-## Per sensor compliance
-```{r, echo=FALSE}
-heatmaps  <- snakemake@input[["sensor_heatmaps"]]
-heatmaps.html <- vector(mode="list", length(heatmaps))
-
-for(sensor_id in 1:length(heatmaps)){
-    heatmaps.html[[sensor_id]] <- htmltools::includeHTML(heatmaps[sensor_id])
-}
-htmltools::tagList(heatmaps.html)
-```
--- a/src/visualization/heatmap_days_by_sensors.py
+++ b/src/visualization/heatmap_days_by_sensors.py
@ -1,74 +0,0 @@
-import numpy as np
-import pandas as pd
-import plotly.io as pio
-import plotly.graph_objects as go
-from datetime import datetime, timedelta
-
-def getRowCountHeatmap(row_count_sensors_normalized, row_count_sensors, pid, output_path):
-    plot = go.Figure(data=go.Heatmap(z=row_count_sensors_normalized.T.values.tolist(),
-                                     x=[datetime.strftime(idx[0], "%Y/%m/%d")+"("+str(idx[1])+")" for idx in row_count_sensors.index],
-                                     y=row_count_sensors.columns.tolist(),
-                                     hovertext=row_count_sensors.T.values.tolist(),
-                                     hovertemplate="Date: %{x}<br>Sensor: %{y}<br>Row count: %{hovertext}<extra></extra>",
-                                     colorscale="Viridis"))
-    plot.update_layout(title="Row count heatmap for " + pid)
-    pio.write_html(plot, file=output_path, auto_open=False, include_plotlyjs="cdn")
-
-
-
-phone_valid_sensed_days = pd.read_csv(snakemake.input["phone_valid_sensed_days"], parse_dates=["local_date"], index_col=["local_date"])
-phone_valid_sensed_days = phone_valid_sensed_days[phone_valid_sensed_days["is_valid_sensed_day"] == True]
-
-row_count_sensors = pd.DataFrame()
-for sensor_path in snakemake.input["sensors"]:
-    sensor_name = sensor_path.split("/")[-1].replace("_with_datetime.csv", "")
-    # plugin_studentlife_audio_android or plugin_studentlife_audio => conversion; plugin_google_activity_recognition or plugin_ios_activity_recognition => AR; applications_foreground => apps
-    sensor_name = sensor_name.replace("plugin_studentlife_audio_android", "conversion").replace("plugin_studentlife_audio", "conversion") \
-                                .replace("plugin_google_activity_recognition", "AR").replace("plugin_ios_activity_recognition", "AR") \
-                                .replace("applications_foreground", "apps")
-                                
-    sensor_data = pd.read_csv(sensor_path, encoding="ISO-8859-1", parse_dates=["local_date"], dtype={"label": str})
-    if sensor_data.empty:
-        row_count_sensor = pd.DataFrame(columns=[sensor_name])
-    else:
-        row_count_sensor = sensor_data[["timestamp", "local_date"]].groupby(["local_date"]).count().rename(columns={"timestamp": sensor_name})
-    row_count_sensors = row_count_sensors.join(row_count_sensor, how="outer")
-
-row_count_sensors.index = pd.to_datetime(row_count_sensors.index)
-row_count_sensors = row_count_sensors.join(phone_valid_sensed_days[["valid_sensed_hours"]], how="outer")
-
-if row_count_sensors.empty:
-    empty_html = open(snakemake.output[0], "w")
-    empty_html.write("There are no records of sensors in database.")
-    empty_html.close()
-else:
-    # set date_idx based on the first date
-    reference_date = row_count_sensors.index.min()
-    last_date = row_count_sensors.index.max()
-    row_count_sensors["date_idx"] = (row_count_sensors.index - reference_date).days
-    row_count_sensors["local_date"] = row_count_sensors.index
-    row_count_sensors.set_index(["local_date", "date_idx"], inplace=True)
-
-
-    expected_num_of_days = int(snakemake.params["expected_num_of_days"])
-    if expected_num_of_days < -1:
-        raise ValueError("EXPECTED_NUM_OF_DAYS of HEATMAP_DAYS_BY_SENSORS section in config.yaml must be larger or equal to -1.")
-    # if expected_num_of_days = -1, return all dates
-    expected_num_of_days = (last_date - reference_date).days if expected_num_of_days == -1 else expected_num_of_days
-
-    # add empty rows to make sure different participants have the same date_idx range
-    date_idx_range = [idx for idx in range(expected_num_of_days)]
-    date_range = [reference_date + timedelta(days=idx) for idx in date_idx_range]
-    all_dates = pd.DataFrame({"local_date": date_range, "date_idx": date_idx_range})
-    all_dates.set_index(["local_date", "date_idx"], inplace=True)
-
-    row_count_sensors = row_count_sensors.merge(all_dates, left_index=True, right_index=True, how="right")
-
-    # normalize each sensor (column)
-    if row_count_sensors.count().max() > 1:
-        row_count_sensors_normalized = row_count_sensors.fillna(np.nan).apply(lambda x: (x - np.nanmin(x)) / (np.nanmax(x) - np.nanmin(x)) if np.nanmax(x) != np.nanmin(x) else (x / np.nanmin(x)), axis=0)
-    else:
-        row_count_sensors_normalized = row_count_sensors
-
-    pid = sensor_path.split("/")[2]
-    getRowCountHeatmap(row_count_sensors_normalized, row_count_sensors, pid, snakemake.output[0])
--- a/src/visualization/heatmap_feature_correlation_matrix.py
+++ b/src/visualization/heatmap_feature_correlation_matrix.py
@ -0,0 +1,48 @@
+import numpy as np
+import pandas as pd
+import plotly.graph_objects as go
+
+
+def getCorrMatrixHeatmap(corr_matrix, time_segment, html_file):
+
+    feature_names = corr_matrix.columns
+
+    fig = go.Figure(data=go.Heatmap(z=corr_matrix.values.tolist(),
+                                     x=feature_names,
+                                     y=feature_names,
+                                     colorscale="Viridis"))
+    
+    fig.update_layout(title="Correlation matrix of features of " + time_segment + " segments.")
+    
+    html_file.write(fig.to_html(full_html=False, include_plotlyjs="cdn"))
+
+
+
+min_rows_ratio = snakemake.params["min_rows_ratio"]
+corr_threshold = snakemake.params["corr_threshold"]
+corr_method = snakemake.params["corr_method"]
+
+features = pd.read_csv(snakemake.input["all_sensor_features"])
+time_segments = set(features["local_segment_label"])
+
+html_file = open(snakemake.output[0], "a", encoding="utf-8")
+if features.empty:
+    html_file.write("There are no features for any participant.")
+else:
+
+    for time_segment in time_segments:
+        features_per_segment = features[features["local_segment_label"] == time_segment]
+        if features_per_segment.empty:
+            html_file.write("There are no features for " + time_segment + " segments.<br>")
+        else:
+            # drop useless columns
+            features_per_segment = features_per_segment.drop(["pid", "local_segment", "local_segment_label", "local_segment_start_datetime", "local_segment_end_datetime"], axis=1).astype(float)
+            # get correlation matrix
+            corr_matrix = features_per_segment.corr(method=corr_method, min_periods=min_rows_ratio * features_per_segment.shape[0])
+            # replace correlation coefficients less than corr_threshold with NA
+            corr_matrix[(corr_matrix > -corr_threshold) & (corr_matrix < corr_threshold)] = np.nan
+
+            # plot heatmap
+            getCorrMatrixHeatmap(corr_matrix, time_segment, html_file)
+
+html_file.close()
--- a/src/visualization/heatmap_features_correlations.py
+++ b/src/visualization/heatmap_features_correlations.py
@ -1,59 +0,0 @@
-import numpy as np
-import pandas as pd
-import plotly.io as pio
-import plotly.graph_objects as go
-
-
-def getCorrMatrixHeatmap(corr_matrix, output_path):
-    colnames = corr_matrix.columns
-    plot = go.Figure(data=go.Heatmap(z=corr_matrix.values.tolist(),
-                                     x=colnames,
-                                     y=colnames,
-                                     colorscale="Viridis"))
-    plot.update_layout(title="Correlation Matrix Heatmap")
-    pio.write_html(plot, file=output_path, auto_open=False, include_plotlyjs="cdn")
-
-
-min_rows_ratio = snakemake.params["min_rows_ratio"]
-corr_threshold = snakemake.params["corr_threshold"]
-
-# merge features
-features, features_all_sensors = pd.DataFrame(columns=["local_date"]), pd.DataFrame(columns=["local_date"])
-pids = set()
-last_pid = None
-for path in snakemake.input["features"]:
-    pid = path.split("/")[2]
-    if pid not in pids:
-        pids.add(pid)
-        features_all_sensors["pid"] = last_pid
-        features = pd.concat([features, features_all_sensors], axis=0, ignore_index=True, sort=False)
-        features_all_sensors = pd.DataFrame(columns=["local_date"])
-    features_per_sensor = pd.read_csv(path)
-    features_all_sensors = features_all_sensors.merge(features_per_sensor, on="local_date", how="outer")
-    last_pid = pid
-
-features_all_sensors["pid"] = last_pid
-features = pd.concat([features, features_all_sensors], axis=0, ignore_index=True, sort=False)
-features.set_index(["pid", "local_date"], inplace=True)
-
-# select days based on the input of "phone_valid_sensed_days"
-selected_participants_and_days = pd.DataFrame()
-for path in snakemake.input["phone_valid_sensed_days"]:
-    pid = path.split("/")[2]
-    phone_valid_sensed_days = pd.read_csv(path)
-    phone_valid_sensed_days = phone_valid_sensed_days[phone_valid_sensed_days["is_valid_sensed_day"] == True]
-    phone_valid_sensed_days["pid"] = pid
-    selected_participants_and_days = pd.concat([selected_participants_and_days, phone_valid_sensed_days], axis=0)
-
-selected_participants_and_days.set_index(["pid", "local_date"], inplace=True)
-features = features.loc[features.index.intersection(selected_participants_and_days.index), :]
-
-# get correlation matrix
-features = features.astype(float)
-corr_matrix = features.corr(method=snakemake.params["corr_method"], min_periods=min_rows_ratio * features.shape[0])
-
-# replace correlation coefficients less than corr_threshold with NA
-corr_matrix[(corr_matrix > -corr_threshold) & (corr_matrix < corr_threshold)] = np.nan
-
-# plot heatmap
-getCorrMatrixHeatmap(corr_matrix, snakemake.output[0])
--- a/src/visualization/heatmap_phone_data_yield_per_participant_per_time_segment.py
+++ b/src/visualization/heatmap_phone_data_yield_per_participant_per_time_segment.py
@ -0,0 +1,85 @@
+import pandas as pd
+import numpy as np
+import plotly.graph_objects as go
+import yaml
+
+
+
+
+
+def getPhoneDataYieldHeatmap(data_for_plot, y_axis_labels, time_segment, type, html_file):
+
+    fig = go.Figure(data=go.Heatmap(z=data_for_plot.values.tolist(),
+                                       x=data_for_plot.columns.tolist(),
+                                       y=y_axis_labels,
+                                       hovertext=data_for_plot.values.tolist(),
+                                       hovertemplate="Time since first segment: %{x}<br>Participant: %{y}<br>Ratiovalidyielded" + type + ": %{z}<extra></extra>",
+                                       zmin=0, zmax=1,
+                                       colorscale="Viridis"))
+
+    fig.update_layout(title="Heatmap of valid yielded " + type + " ratio for " + time_segment + " segments.<br>y-axis shows participant information (format: pid.label).<br>x-axis shows the time since their first segment.<br>z-axis (color) shows valid yielded " + type + " ratio during a segment instance.")
+        
+    fig["layout"]["xaxis"].update(side="bottom")
+    fig["layout"].update(xaxis_title="Time Since First Segment")
+    fig["layout"].update(margin=dict(t=160))
+    
+    html_file.write(fig.to_html(full_html=False, include_plotlyjs="cdn"))
+
+
+
+
+
+
+
+y_axis_labels, phone_data_yield_minutes, phone_data_yield_hours = [], {}, {}
+for phone_data_yield_path, participant_file_path, time_segments_path in zip(snakemake.input["phone_data_yield"], snakemake.input["participant_file"], snakemake.input["time_segments_labels"]):
+    
+    # set pid.label as y_axis_label
+    pid = phone_data_yield_path.split("/")[3]
+    time_segments = pd.read_csv(time_segments_path, header=0)["label"]
+
+    with open(participant_file_path, "r", encoding="utf-8") as f:
+        participant_file = yaml.safe_load(f)
+    label = participant_file["PHONE"]["LABEL"]
+
+    y_axis_label = pid + "." + label
+    y_axis_labels.append(y_axis_label)
+
+    
+    phone_data_yield = pd.read_csv(phone_data_yield_path, index_col=["local_segment_start_datetime"], parse_dates=["local_segment_start_datetime"])
+    # make sure the phone_data_yield file contains "phone_data_yield_rapids_ratiovalidyieldedminutes" and "phone_data_yield_rapids_ratiovalidyieldedhours" columns
+    if ("phone_data_yield_rapids_ratiovalidyieldedminutes" not in phone_data_yield.columns) or ("phone_data_yield_rapids_ratiovalidyieldedhours" not in phone_data_yield.columns):
+        raise ValueError("Please make sure [PHONE_DATA_YIELD][RAPIDS][COMPUTE] is True AND [PHONE_DATA_YIELD][RAPIDS][FEATURES] contains [ratiovalidyieldedminutes, ratiovalidyieldedhours].")
+
+    if not phone_data_yield.empty:
+
+        for time_segment in time_segments:
+            phone_data_yield_per_segment = phone_data_yield[phone_data_yield["local_segment_label"] == time_segment]
+
+            if not phone_data_yield_per_segment.empty:
+
+                # set number of minutes after the first start date time of local segments as x_axis_label
+                phone_data_yield_per_segment.index = phone_data_yield_per_segment.index - phone_data_yield_per_segment.index.min()
+
+                phone_data_yield_minutes_per_segment = phone_data_yield_per_segment[["phone_data_yield_rapids_ratiovalidyieldedminutes"]].rename(columns={"phone_data_yield_rapids_ratiovalidyieldedminutes": y_axis_label})
+                phone_data_yield_hours_per_segment = phone_data_yield_per_segment[["phone_data_yield_rapids_ratiovalidyieldedhours"]].rename(columns={"phone_data_yield_rapids_ratiovalidyieldedhours": y_axis_label})
+
+                if time_segment not in phone_data_yield_minutes.keys():
+                    phone_data_yield_minutes[time_segment] = phone_data_yield_minutes_per_segment
+                    phone_data_yield_hours[time_segment] = phone_data_yield_hours_per_segment
+                else:
+                    phone_data_yield_minutes[time_segment] = pd.concat([phone_data_yield_minutes[time_segment], phone_data_yield_minutes_per_segment], axis=1, sort=True)
+                    phone_data_yield_hours[time_segment] = pd.concat([phone_data_yield_hours[time_segment], phone_data_yield_hours_per_segment], axis=1, sort=True)
+
+
+html_file = open(snakemake.output[0], "a", encoding="utf-8")
+if len(phone_data_yield_minutes.keys()) == 0:
+    html_file.write("There is no sensor data for the sensors in [PHONE_DATA_YIELD][SENSORS].")
+for time_segment in phone_data_yield_minutes.keys():
+    minutes_data_for_plot = phone_data_yield_minutes[time_segment].transpose().reindex(pd.Index(y_axis_labels)).round(3)
+    hours_data_for_plot = phone_data_yield_hours[time_segment].transpose().reindex(pd.Index(y_axis_labels)).round(3)
+
+    getPhoneDataYieldHeatmap(minutes_data_for_plot, y_axis_labels, time_segment, "minutes", html_file)
+    getPhoneDataYieldHeatmap(hours_data_for_plot, y_axis_labels, time_segment, "hours", html_file)
+
+html_file.close()
--- a/src/visualization/heatmap_rows.py
+++ b/src/visualization/heatmap_rows.py
@ -1,68 +0,0 @@
-import pandas as pd
-import numpy as np
-import plotly.io as pio
-import plotly.graph_objects as go
-import datetime
-
-def getComplianceMatrix(dates, compliance_bins):
-    compliance_matrix = []
-    for date in dates:
-        date_bins = compliance_bins[compliance_bins["local_date"] == date]["count"].tolist()
-        compliance_matrix.append(date_bins)
-    return compliance_matrix
-
-
-def getRowCountHeatmap(dates, row_count_per_bin, sensor_name, pid, output_path, bin_size):
-    bins_per_hour = int(60 / bin_size)
-    x_axis_labels = ["{0:0=2d}".format(x // bins_per_hour) + ":" + \
-                    "{0:0=2d}".format(x % bins_per_hour * bin_size) for x in range(24 * bins_per_hour)]
-    plot = go.Figure(data=go.Heatmap(z=row_count_per_bin,
-                                     x=x_axis_labels,
-                                     y=[datetime.datetime.strftime(date, '%Y/%m/%d') for date in dates],
-                                     colorscale="Viridis"))
-    plot.update_layout(title="Row count heatmap for " + sensor_name + " of " + pid + "<br>Label: " + label + ", device_id: " + device_id)
-    pio.write_html(plot, file=output_path, auto_open=False, include_plotlyjs="cdn")
-
-
-
-sensor_data = pd.read_csv(snakemake.input["sensor"], encoding="ISO-8859-1")
-sensor_name = snakemake.params["table"]
-pid = snakemake.params["pid"]
-bin_size = snakemake.params["bin_size"]
-
-with open(snakemake.input["pid_file"], encoding="ISO-8859-1") as external_file:
-    external_file_content = external_file.readlines()
-device_id = external_file_content[0].split(",")[-1]
-label = external_file_content[2]
-
-
-# check if we have sensor data
-if sensor_data.empty:
-    empty_html = open(snakemake.output[0], "w")
-    empty_html.write("There is no " + sensor_name + " data for " + pid + "<br>Label: " + label + ", device_id: " + device_id)
-    empty_html.close()
-else:
-    start_date = sensor_data["local_date"][0]
-    end_date = sensor_data.at[sensor_data.index[-1],"local_date"]
-
-    sensor_data["local_date_time"] = pd.to_datetime(sensor_data["local_date_time"])    
-    sensor_data = sensor_data[["local_date_time"]]
-    sensor_data["count"] = 1
-
-    # Add first and last day boundaries for resampling
-    sensor_data = sensor_data.append([pd.Series([datetime.datetime.strptime(start_date + " 00:00:00", "%Y-%m-%d %H:%M:%S"), 0], sensor_data.columns),
-                                                            pd.Series([datetime.datetime.strptime(end_date + " 23:59:59", "%Y-%m-%d %H:%M:%S"),  0], sensor_data.columns)])
-
-    # Resample into bins with the size of bin_size
-    resampled_bins = pd.DataFrame(sensor_data.resample(str(bin_size) + "T", on="local_date_time")["count"].sum())
-    
-    # Extract list of dates for creating the heatmap
-    resampled_bins.reset_index(inplace=True)
-    resampled_bins["local_date"] = resampled_bins["local_date_time"].dt.date
-    dates = resampled_bins["local_date"].drop_duplicates().tolist()
-
-    # Create heatmap
-    row_count_per_bin = getComplianceMatrix(dates, resampled_bins)
-    row_count_per_bin = np.asarray(row_count_per_bin)
-    row_count_per_bin = np.where(row_count_per_bin == 0, np.nan, row_count_per_bin)
-    getRowCountHeatmap(dates, row_count_per_bin, sensor_name, pid, snakemake.output[0], bin_size)
--- a/src/visualization/heatmap_sensed_bins.py
+++ b/src/visualization/heatmap_sensed_bins.py
@ -1,50 +0,0 @@
-import pandas as pd
-import numpy as np
-import plotly.io as pio
-import plotly.graph_objects as go
-import datetime
-
-def getDatesComplianceMatrix(phone_sensed_bins):
-    dates = phone_sensed_bins.index
-    compliance_matrix = []
-    for date in dates:
-        compliance_matrix.append(phone_sensed_bins.loc[date, :].tolist())
-    return dates, compliance_matrix
-
-def getComplianceHeatmap(dates, compliance_matrix, pid, output_path, bin_size):
-    bins_per_hour = int(60 / bin_size)
-    x_axis_labels = ["{0:0=2d}".format(x // bins_per_hour) + ":" + \
-                    "{0:0=2d}".format(x % bins_per_hour * bin_size) for x in range(24 * bins_per_hour)]
-    plot = go.Figure(data=go.Heatmap(z=compliance_matrix,
-                                     x=x_axis_labels,
-                                     y=[datetime.datetime.strftime(date, '%Y/%m/%d') for date in dates],
-                                     colorscale='Viridis',
-                                     colorbar={'tick0': 0,'dtick': 1}))
-    plot.update_layout(title="Heatmap sensed bins.<br>Five-minute bins showing how many sensors logged at least one row of data in that period for " + pid + "<br>Label: " + label + ", device_id: " + device_id)
-    pio.write_html(plot, file=output_path, auto_open=False, include_plotlyjs="cdn")
-
-# get current patient id
-pid = snakemake.params["pid"]
-bin_size = snakemake.params["bin_size"]
-
-with open(snakemake.input["pid_file"], encoding="ISO-8859-1") as external_file:
-    external_file_content = external_file.readlines()
-device_id = external_file_content[0].split(",")[-1]
-label = external_file_content[2]
-
-phone_sensed_bins = pd.read_csv(snakemake.input["sensor"], parse_dates=["local_date"], index_col="local_date")
-
-if phone_sensed_bins.empty:
-    empty_html = open(snakemake.output[0], "w", encoding="ISO-8859-1")
-    empty_html.write("There is no sensor data for " + pid + "<br>Label: " + label + ", device_id: " + device_id)
-    empty_html.close()
-else:
-    # resample to impute missing dates
-    phone_sensed_bins = phone_sensed_bins.resample("1D").asfreq().fillna(0)
-    # get dates and compliance_matrix
-    dates, compliance_matrix = getDatesComplianceMatrix(phone_sensed_bins)
-    # convert compliance_matrix from list to np.array and replace 0 with np.nan
-    compliance_matrix = np.asarray(compliance_matrix)
-    compliance_matrix = np.where(compliance_matrix == 0, np.nan, compliance_matrix)
-    # get heatmap
-    getComplianceHeatmap(dates, compliance_matrix, pid, snakemake.output[0], bin_size)
--- a/src/visualization/heatmap_sensor_row_count_per_time_segment.py
+++ b/src/visualization/heatmap_sensor_row_count_per_time_segment.py
@ -0,0 +1,89 @@
+import pandas as pd
+import numpy as np
+import plotly.graph_objects as go
+from importlib import util
+from pathlib import Path
+import yaml
+
+
+def getRowCountHeatmap(data_for_plot, scaled_data_for_plot, pid, time_segment, html_file):
+
+    fig = go.Figure(data=go.Heatmap(z=scaled_data_for_plot.values.tolist(),
+                                     x=data_for_plot.columns,
+                                     y=data_for_plot.index,
+                                     hovertext=data_for_plot.values.tolist(),
+                                     hovertemplate="Segment start: %{x}<br>Sensor: %{y}<br>Row count: %{hovertext}<extra></extra>",
+                                     zmin=0, zmax=1,
+                                     colorscale='Viridis'))
+
+    fig.update_layout(title="Heatmap of sensor row count for " + time_segment + " segments. Pid: " + pid +". Label: " + label + "<br>y-axis shows the included sensors.<br>x-axis shows the start (date and time) of a time segment.<br>z-axis (color) shows row count per sensor per segment instance.")
+    fig["layout"].update(margin=dict(t=160))
+    
+    html_file.write(fig.to_html(full_html=False, include_plotlyjs="cdn"))
+
+
+
+
+# import filter_data_by_segment from src/features/utils/utils.py
+spec = util.spec_from_file_location("util", str(Path(snakemake.scriptdir).parent / "features" / "utils" / "utils.py"))
+mod = util.module_from_spec(spec)
+spec.loader.exec_module(mod)
+filter_data_by_segment = getattr(mod,  "filter_data_by_segment")
+
+
+
+
+
+phone_data_yield = pd.read_csv(snakemake.input["phone_data_yield"], index_col=["local_segment_start_datetime"], parse_dates=["local_segment_start_datetime"])
+# make sure the phone_data_yield file contains "phone_data_yield_rapids_ratiovalidyieldedminutes" and "phone_data_yield_rapids_ratiovalidyieldedhours" columns
+if ("phone_data_yield_rapids_ratiovalidyieldedminutes" not in phone_data_yield.columns) or ("phone_data_yield_rapids_ratiovalidyieldedhours" not in phone_data_yield.columns):
+    raise ValueError("Please make sure [PHONE_DATA_YIELD][RAPIDS][COMPUTE] is True AND [PHONE_DATA_YIELD][RAPIDS][FEATURES] contains [ratiovalidyieldedminutes, ratiovalidyieldedhours].")
+phone_data_yield = phone_data_yield[["local_segment_label", "phone_data_yield_rapids_ratiovalidyieldedminutes", "phone_data_yield_rapids_ratiovalidyieldedhours"]]
+
+time_segments = pd.read_csv(snakemake.input["time_segments_labels"], header=0)["label"]
+pid = snakemake.params["pid"]
+
+with open(snakemake.input["participant_file"], "r", encoding="utf-8") as f:
+    participant_file = yaml.safe_load(f)
+label = participant_file["PHONE"]["LABEL"]
+
+sensor_names = []
+sensors_row_count = dict(zip(time_segments, [pd.DataFrame()] * len(time_segments)))
+
+for sensor_path in snakemake.input["all_sensors"]:
+    sensor_data = pd.read_csv(sensor_path, usecols=["assigned_segments"])
+    sensor_name = sensor_path.split("/")[-1].replace("_with_datetime.csv", "")
+    sensor_names.append(sensor_name)
+    
+    if not sensor_data.empty:
+        for time_segment in time_segments:
+            sensor_data_per_segment = filter_data_by_segment(sensor_data, time_segment)
+
+            if not sensor_data_per_segment.empty:
+                # extract local start datetime of the segment from "local_segment" column
+                sensor_data_per_segment["local_segment_start_datetime"] = pd.to_datetime(sensor_data_per_segment["local_segment"].apply(lambda x: x.split("#")[1].split(",")[0]))
+                sensor_row_count = sensor_data_per_segment.groupby("local_segment_start_datetime")[["local_segment"]].count().rename(columns={"local_segment": sensor_name})
+                sensors_row_count[time_segment] = pd.concat([sensors_row_count[time_segment], sensor_row_count], axis=1, sort=False)
+
+# add phone data yield features and plot heatmap
+html_file = open(snakemake.output[0], "a", encoding="utf-8")
+sensor_names.extend(["ratiovalidyieldedminutes", "ratiovalidyieldedhours"])
+for time_segment in time_segments:
+    if not phone_data_yield.empty:
+        phone_data_yield_per_segment = phone_data_yield[phone_data_yield["local_segment_label"] == time_segment].rename(columns={"phone_data_yield_rapids_ratiovalidyieldedminutes": "ratiovalidyieldedminutes","phone_data_yield_rapids_ratiovalidyieldedhours": "ratiovalidyieldedhours"}).round(3)
+        if not phone_data_yield_per_segment.empty:
+            sensors_row_count[time_segment] = pd.concat([sensors_row_count[time_segment], phone_data_yield_per_segment], axis=1, sort=True)
+    
+    # consider all the sensors
+    data_for_plot = sensors_row_count[time_segment].transpose().reindex(pd.Index(sensor_names))
+
+    if data_for_plot.empty:
+        html_file.write("There are no records of selected sensors in database for " + time_segment + " segments. Pid: " + pid + ". Label: " + label + ".<br>")
+    else:
+        # except for phone data yield sensor, scale each sensor (row) to the range of [0, 1]
+        scaled_data_for_plot = data_for_plot.copy()
+        scaled_data_for_plot.loc[sensor_names[:-2]] = scaled_data_for_plot.fillna(np.nan).loc[sensor_names[:-2]].apply(lambda x: (x - np.nanmin(x)) / (np.nanmax(x) - np.nanmin(x)) if np.nanmax(x) != np.nanmin(x) else (x / np.nanmin(x)), axis=1)
+
+        getRowCountHeatmap(data_for_plot, scaled_data_for_plot, pid, time_segment, html_file)
+
+html_file.close()
--- a/src/visualization/heatmap_sensors_per_minute_per_time_segment.py
+++ b/src/visualization/heatmap_sensors_per_minute_per_time_segment.py
@ -0,0 +1,100 @@
+import pandas as pd
+import numpy as np
+import plotly.graph_objects as go
+from importlib import util
+from pathlib import Path
+import yaml
+
+
+def colors2colorscale(colors):
+    colorscale = []
+    length = len(colors)
+    for i in range(length):
+        if i != length - 1:
+            colorscale = colorscale + [[i/(length-1), colors[i]], [(i+1)/(length-1), colors[i]]]
+        else:
+            colorscale.append([1, colors[i]])
+    return colorscale
+
+def getSensorsPerMinPerSegmentHeatmap(phone_data_yield, pid, time_segment, html_file):
+
+    x_axis_labels = [pd.Timedelta(minutes=x) for x in phone_data_yield.columns]
+    
+    fig = go.Figure(data=go.Heatmap(z=phone_data_yield.values.tolist(),
+                                     x=x_axis_labels,
+                                     y=phone_data_yield.index,
+                                     zmin=0, zmax=16,
+                                     colorscale=colors2colorscale(colors),
+                                     colorbar=dict(thickness=25, tickvals=[1/2 + x for x in range(16)],ticktext=[x for x in range(16)])))
+    
+    fig.update_layout(title="Number of sensors with any data per minute for " + time_segment + " segments. Pid: "+pid+". Label: " + label + "<br>y-axis shows the start (date and time) of a time segment.<br>x-axis shows the time since the start of the time segment.<br>z-axis (color) shows how many sensors logged at least one row of data per minute.")
+    fig["layout"].update(margin=dict(t=160))
+    
+    html_file.write(fig.to_html(full_html=False, include_plotlyjs="cdn"))
+
+
+
+
+
+# import filter_data_by_segment from src/features/utils/utils.py
+spec = util.spec_from_file_location("util", str(Path(snakemake.scriptdir).parent / "features" / "utils" / "utils.py"))
+mod = util.module_from_spec(spec)
+spec.loader.exec_module(mod)
+filter_data_by_segment = getattr(mod,  "filter_data_by_segment")
+
+
+
+
+
+
+
+
+
+
+colors = ["red", "#3D0751", "#423176", "#414381", "#3F5688", "#42678B", "#42768C", "#45868B", "#4A968A", "#53A485", "#5FB57E", "#76C170", "#91CF63", "#B4DA55", "#D9E152", "#F8E755", "#DEE00F"]
+pid = snakemake.params["pid"]
+time_segments_labels = pd.read_csv(snakemake.input["time_segments_labels"], header=0)
+
+with open(snakemake.input["participant_file"], "r", encoding="utf-8") as f:
+    participant_file = yaml.safe_load(f)
+label = participant_file["PHONE"]["LABEL"]
+
+phone_data_yield = pd.read_csv(snakemake.input["phone_data_yield"], parse_dates=["local_date_time"])
+
+html_file = open(snakemake.output[0], "a", encoding="utf-8")
+if phone_data_yield.empty:
+    html_file.write("There is no sensor data for " + pid + " (pid) and " + label + " (label).")
+else:
+    for time_segment in time_segments_labels["label"]:
+        phone_data_yield_per_segment = filter_data_by_segment(phone_data_yield, time_segment)
+
+        if phone_data_yield_per_segment.empty:
+            html_file.write("There is no sensor data of " + time_segment + " segments for "  + pid + " (pid) and " + label + " (label).<br>")
+        else:
+            # calculate the length (in minute) of per segment instance
+            phone_data_yield_per_segment["length"] = phone_data_yield_per_segment["timestamps_segment"].str.split(",").apply(lambda x: int((int(x[1])-int(x[0])) / (1000 * 60)))
+            # calculate the number of sensors logged at least one row of data per minute.
+            phone_data_yield_per_segment = phone_data_yield_per_segment.groupby(["local_segment", "length", "local_date", "local_hour", "local_minute"])[["sensor", "local_date_time"]].max().reset_index()
+            # extract local start datetime of the segment from "local_segment" column
+            phone_data_yield_per_segment["local_segment_start_datetimes"] = pd.to_datetime(phone_data_yield_per_segment["local_segment"].apply(lambda x: x.split("#")[1].split(",")[0]))
+            # calculate the number of minutes after local start datetime of the segment
+            phone_data_yield_per_segment["minutes_after_segment_start"] = ((phone_data_yield_per_segment["local_date_time"] - phone_data_yield_per_segment["local_segment_start_datetimes"]) / pd.Timedelta(minutes=1)).astype("int")
+            
+            # impute missing rows with 0
+            columns_for_full_index = phone_data_yield_per_segment[["local_segment_start_datetimes", "length"]].drop_duplicates(keep="first")
+            columns_for_full_index = columns_for_full_index.apply(lambda row: [[row["local_segment_start_datetimes"], x] for x in range(row["length"] + 1)], axis=1)
+            full_index = []
+            for columns in columns_for_full_index:
+                full_index = full_index + columns
+            full_index = pd.MultiIndex.from_tuples(full_index, names=("local_segment_start_datetimes", "minutes_after_segment_start"))
+            phone_data_yield_per_segment = phone_data_yield_per_segment.set_index(["local_segment_start_datetimes", "minutes_after_segment_start"]).reindex(full_index).reset_index().fillna(0)
+            
+            # transpose the dataframe per local start datetime of the segment and discard the useless index layer
+            phone_data_yield_per_segment = phone_data_yield_per_segment.groupby("local_segment_start_datetimes")[["minutes_after_segment_start", "sensor"]].apply(lambda x: x.set_index("minutes_after_segment_start").transpose())
+            phone_data_yield_per_segment.index = phone_data_yield_per_segment.index.get_level_values("local_segment_start_datetimes")
+
+            # get heatmap
+            getSensorsPerMinPerSegmentHeatmap(phone_data_yield_per_segment, pid, time_segment, html_file)
+
+
+html_file.close()
--- a/src/visualization/histogram_phone_data_yield.py
+++ b/src/visualization/histogram_phone_data_yield.py
@ -8,15 +8,18 @@ phone_data_yield = pd.read_csv(snakemake.input[0])
 if ("phone_data_yield_rapids_ratiovalidyieldedminutes" not in phone_data_yield.columns) or ("phone_data_yield_rapids_ratiovalidyieldedhours" not in phone_data_yield.columns):
    raise ValueError("Please make sure [PHONE_DATA_YIELD][RAPIDS][COMPUTE] is True AND [PHONE_DATA_YIELD][RAPIDS][FEATURES] contains [ratiovalidyieldedminutes, ratiovalidyieldedhours].")

-# plot ratio valid yielded minutes histogram
-fig_ratiovalidyieldedminutes = px.histogram(phone_data_yield, x="phone_data_yield_rapids_ratiovalidyieldedminutes", color="local_segment_label")
-fig_ratiovalidyieldedminutes.update_layout(title="Ratio Valid Yielded Minutes Histogram")
-
-# plot ratio valid yielded hours histogram
-fig_ratiovalidyieldedhours = px.histogram(phone_data_yield, x="phone_data_yield_rapids_ratiovalidyieldedhours", color="local_segment_label")
-fig_ratiovalidyieldedhours.update_layout(title="Ratio Valid Yielded Hours Histogram")
-
-
-with open(snakemake.output[0], "a") as html_file:
+html_file = open(snakemake.output[0], "a", encoding="utf-8")
+if phone_data_yield.empty:
+    html_file.write("There is no sensor data for the sensors in [PHONE_DATA_YIELD][SENSORS].")
+else:
+    # plot ratio valid yielded minutes histogram
+    fig_ratiovalidyieldedminutes = px.histogram(phone_data_yield, x="phone_data_yield_rapids_ratiovalidyieldedminutes", color="local_segment_label")
+    fig_ratiovalidyieldedminutes.update_layout(title="Histogram of valid yielded minutes ratio per time segment.")
    html_file.write(fig_ratiovalidyieldedminutes.to_html(full_html=False, include_plotlyjs="cdn"))
+
+    # plot ratio valid yielded hours histogram
+    fig_ratiovalidyieldedhours = px.histogram(phone_data_yield, x="phone_data_yield_rapids_ratiovalidyieldedhours", color="local_segment_label")
+    fig_ratiovalidyieldedhours.update_layout(title="Histogram of valid yielded hours ratio per time segment.")
    html_file.write(fig_ratiovalidyieldedhours.to_html(full_html=False, include_plotlyjs="cdn"))
+
+html_file.close()
--- a/src/visualization/merge_heatmap_sensor_row_count_per_time_segment.Rmd
+++ b/src/visualization/merge_heatmap_sensor_row_count_per_time_segment.Rmd
@ -1,10 +1,10 @@
 ---
-title: "Heatmap Sensed Bins Report"
+title: "Sensor Row Count per Time Segment For All Participants"
 author:
-    - "MoSHI Pipeline"
+    - "RAPIDS"
 date: "`r format(Sys.time(), '%d %B, %Y')`"
 params:
-   rmd: "heatmap_sensed_bins_all_participants.Rmd"
+   rmd: "merge_heatmap_sensor_row_count_per_time_segment.Rmd"
 output:
  html_document:
  highlight: tango
@ -17,14 +17,17 @@ output:
    smooth_scroll: yes
 ---

+<style>
+.main-container {min-width:800px; max-width:100%;}
+</style>
+
 ```{r include=FALSE}
 source("renv/activate.R")
 ```

-## All phone sensors

 ```{r, echo=FALSE}
-heatmaps  <- snakemake@input[["heatmap_sensed_bins"]]
+heatmaps  <- snakemake@input[["heatmap_sensor_row_count_per_time_segment"]]
 heatmaps.html <- vector(mode="list", length(heatmaps))

 for(pid in 1:length(heatmaps)){
--- a/src/visualization/merge_heatmap_sensors_per_minute_per_time_segment.Rmd
+++ b/src/visualization/merge_heatmap_sensors_per_minute_per_time_segment.Rmd
@ -1,10 +1,10 @@
 ---
-title: "Heatmap Rows Report"
+title: "Sensors per Minute per Time Segment for All Participants"
 author:
-    - "MoSHI Pipeline"
+    - "RAPIDS"
 date: "`r format(Sys.time(), '%d %B, %Y')`"
 params:
-   rmd: "heatmap_days_by_sensors_all_participants.Rmd"
+   rmd: "merge_heatmap_sensors_per_minute_per_time_segment.Rmd"
 output:
  html_document:
  highlight: tango
@ -17,14 +17,17 @@ output:
    smooth_scroll: yes
 ---

+<style>
+.main-container {min-width:800px; max-width:100%;}
+</style>
+
 ```{r include=FALSE}
 source("renv/activate.R")
 ```

-## All phone sensors

 ```{r, echo=FALSE}
-heatmaps  <- snakemake@input[["heatmap_rows"]]
+heatmaps  <- snakemake@input[["heatmap_sensors_per_minute_per_time_segment"]]
 heatmaps.html <- vector(mode="list", length(heatmaps))

 for(pid in 1:length(heatmaps)){
--- a/src/visualization/overall_compliance_heatmap.py
+++ b/src/visualization/overall_compliance_heatmap.py
@ -1,102 +0,0 @@
-import pandas as pd
-import numpy as np
-import plotly.io as pio
-import plotly.graph_objects as go
-from dateutil import tz
-import datetime
-
-def getOneRow(data_per_participant, last_certain_dates, col_name, row, expected_num_of_days, only_show_valid_days):
-
-    data = pd.read_csv(data_per_participant, index_col=["local_date"])
-
-    if col_name == "num_sensors":
-        data["num_sensors"] = data.max(axis=1)
-    
-    if only_show_valid_days and col_name == "valid_sensed_hours":
-        # replace invalid days' valid sensed hours with np.nan to let our heatmap only shows valid days
-        data.loc[data[data["is_valid_sensed_day"] == False].index, "valid_sensed_hours"] = np.nan
-
-    if expected_num_of_days == -1:
-        # show all days
-        data.index = pd.to_datetime(data.index)
-        start_date = data.index.min()
-        # upsample data into one day bins
-        data = data.resample("1D").sum()
-        data["date_idx"] = (data.index - start_date).days
-        data.set_index("date_idx", inplace=True, drop=True)
-        row = row + data[col_name].tolist()
-    else:
-        # only show last certain days
-        for date in last_certain_dates:
-            if date in data.index:
-                row.append(data.loc[date][col_name])
-            else:
-                row.append(0)
-
-    return row
-
-def getOverallComplianceHeatmap(sensors_with_data, valid_sensed_hours, last_certain_dates, bin_size, min_bins_per_hour, expected_num_of_days, output_path):
-    plot = go.Figure(data=go.Heatmap(z=valid_sensed_hours[last_certain_dates].values,
-                                       x=[date.replace("-", "/") for date in last_certain_dates] if expected_num_of_days != -1 else last_certain_dates,
-                                       y=[pid + "." + label for pid, label in zip(sensors_with_data["pid"].to_list(), sensors_with_data["label"].to_list())],
-                                       text=sensors_with_data[last_certain_dates].values,
-                                       hovertemplate="Date: %{x}<br>Participant: %{y}<br>Valid sensed hours: %{z}<br>Number of sensors with data: %{text}<extra></extra>" if expected_num_of_days != -1 else "Day index: %{x}<br>Participant: %{y}<br>Valid sensed hours: %{z}<br>Number of sensors with data: %{text}<extra></extra>",
-                                       colorscale="Viridis",
-                                       colorbar={"tick0": 0,"dtick": 1},
-                                       showscale=True))
-    if expected_num_of_days != -1:
-        plot.update_layout(title="Overall compliance heatmap for last " + str(expected_num_of_days) + " days.<br>Bin's color shows valid sensed hours for that day.<br>A valid hour has at least one row of any sensor in "+ str(min_bins_per_hour) +" out of " + str(int(60 / bin_size)) + " bins of " + str(bin_size) + " minutes.<br>You can hover over every day to see the number of sensors with data in that day.")
-    else:
-        plot.update_layout(title="Overall compliance heatmap for all days.<br>Bin's color shows valid sensed hours for that day.<br>A valid hour has at least one row of any sensor in "+ str(min_bins_per_hour) +" out of " + str(int(60 / bin_size)) + " bins of " + str(bin_size) + " minutes.<br>You can hover over every day to see the number of sensors with data in that day.")
-    
-    plot["layout"]["xaxis"].update(side="bottom")
-    plot["layout"].update(xaxis_title="Day indexes")
-    plot["layout"].update(margin=dict(t=160))
-    pio.write_html(plot, file=output_path, auto_open=False, include_plotlyjs="cdn")
-
-
-phone_sensed_bins = snakemake.input["phone_sensed_bins"]
-phone_valid_sensed_days = snakemake.input["phone_valid_sensed_days"]
-pid_files = snakemake.input["pid_files"]
-only_show_valid_days = snakemake.params["only_show_valid_days"]
-local_timezone = snakemake.params["local_timezone"]
-bin_size = snakemake.params["bin_size"]
-min_bins_per_hour = snakemake.params["min_bins_per_hour"]
-expected_num_of_days = int(snakemake.params["expected_num_of_days"])
-
-if expected_num_of_days < -1:
-    raise ValueError("EXPECTED_NUM_OF_DAYS of OVERALL_COMPLIANCE_HEATMAP section in config.yaml must be larger or equal to -1.")
-
-last_certain_dates = []
-if expected_num_of_days != -1:
-    # get the list of dates to show
-    cur_date = datetime.datetime.now().astimezone(tz.gettz(local_timezone)).date()
-    for date_offset in range(expected_num_of_days-1, -1, -1):
-        last_certain_dates.append((cur_date - datetime.timedelta(days=date_offset)).strftime("%Y-%m-%d"))
-
-sensors_with_data_records, valid_sensed_hours_records = [], []
-for sensors_with_data_individual, valid_sensed_hours_individual, pid_file in zip(phone_sensed_bins, phone_valid_sensed_days, pid_files):
-    
-    with open(pid_file, encoding="ISO-8859-1") as external_file:
-        external_file_content = external_file.readlines()
-    device_id = external_file_content[0].split(",")[-1].strip()
-    label = external_file_content[2].strip()
-    pid = pid_file.split("/")[-1]
-
-    sensors_with_data_records.append(getOneRow(sensors_with_data_individual, last_certain_dates, "num_sensors", [pid, label, device_id], expected_num_of_days, only_show_valid_days))
-    valid_sensed_hours_records.append(getOneRow(valid_sensed_hours_individual, last_certain_dates, "valid_sensed_hours", [pid, label, device_id], expected_num_of_days, only_show_valid_days))
-
-if expected_num_of_days == -1:
-    # get the date_idx of all days
-    total_num_of_days = max([len(x) for x in sensors_with_data_records]) - 3
-    last_certain_dates = [date_idx for date_idx in range(total_num_of_days)]
-
-sensors_with_data = pd.DataFrame(data=sensors_with_data_records, columns=["pid", "label", "device_id"] + last_certain_dates).replace(0, np.nan)
-valid_sensed_hours = pd.DataFrame(data=valid_sensed_hours_records, columns=["pid", "label", "device_id"] + last_certain_dates).replace(0, np.nan)
-
-if sensors_with_data.empty:
-    empty_html = open(snakemake.output[0], "w")
-    empty_html.write("There is no sensor data for all participants")
-    empty_html.close()
-else:
-    getOverallComplianceHeatmap(sensors_with_data, valid_sensed_hours, last_certain_dates, bin_size, min_bins_per_hour, expected_num_of_days, snakemake.output[0])
--- a/src/visualization/visualize.py
+++ b/src/visualization/visualize.py