diff --git a/Snakefile b/Snakefile index 916b098b..2a0944d3 100644 --- a/Snakefile +++ b/Snakefile @@ -127,6 +127,25 @@ if config["DORYAB_LOCATION"]["COMPUTE"]: files_to_compute.extend(expand("data/raw/{pid}/{sensor}_with_datetime.csv", pid=config["PIDS"], sensor=config["DORYAB_LOCATION"]["DB_TABLE"])) files_to_compute.extend(expand("data/processed/{pid}/location_doryab_{segment}.csv", pid=config["PIDS"], segment = config["DORYAB_LOCATION"]["DAY_SEGMENTS"])) +# visualization for data exploration +if config["HEATMAP_FEATURES_CORRELATIONS"]["PLOT"]: + files_to_compute.extend(expand("reports/data_exploration/{min_valid_hours_per_day}h/heatmap_features_correlations.html", min_valid_hours_per_day=config["HEATMAP_FEATURES_CORRELATIONS"]["MIN_VALID_HOURS_PER_DAY"])) + +if config["HISTOGRAM_VALID_SENSED_HOURS"]["PLOT"]: + files_to_compute.extend(expand("reports/data_exploration/{min_valid_hours_per_day}h/histogram_valid_sensed_hours.html", min_valid_hours_per_day=config["HISTOGRAM_VALID_SENSED_HOURS"]["MIN_VALID_HOURS_PER_DAY"])) + +if config["HEATMAP_DAYS_BY_SENSORS"]["PLOT"]: + files_to_compute.extend(expand("reports/interim/{min_valid_hours_per_day}h/{pid}/heatmap_days_by_sensors.html", pid=config["PIDS"], min_valid_hours_per_day=config["HEATMAP_DAYS_BY_SENSORS"]["MIN_VALID_HOURS_PER_DAY"])) + files_to_compute.extend(expand("reports/data_exploration/{min_valid_hours_per_day}h/heatmap_days_by_sensors_all_participants.html", min_valid_hours_per_day=config["HEATMAP_DAYS_BY_SENSORS"]["MIN_VALID_HOURS_PER_DAY"])) + +if config["HEATMAP_SENSED_BINS"]["PLOT"]: + files_to_compute.extend(expand("reports/interim/heatmap_sensed_bins/{pid}/heatmap_sensed_bins.html", pid=config["PIDS"])) + files_to_compute.extend(["reports/data_exploration/heatmap_sensed_bins_all_participants.html"]) + +if config["OVERALL_COMPLIANCE_HEATMAP"]["PLOT"]: + files_to_compute.extend(expand("reports/data_exploration/{min_valid_hours_per_day}h/overall_compliance_heatmap.html", min_valid_hours_per_day=config["OVERALL_COMPLIANCE_HEATMAP"]["MIN_VALID_HOURS_PER_DAY"])) + +# analysis example if config["PARAMS_FOR_ANALYSIS"]["COMPUTE"]: rows_nan_threshold = config["PARAMS_FOR_ANALYSIS"]["ROWS_NAN_THRESHOLD"] cols_nan_threshold = config["PARAMS_FOR_ANALYSIS"]["COLS_NAN_THRESHOLD"] diff --git a/config.yaml b/config.yaml index 11266276..c294f9a4 100644 --- a/config.yaml +++ b/config.yaml @@ -29,15 +29,15 @@ READABLE_DATETIME: PHONE_VALID_SENSED_BINS: COMPUTE: False # This flag is automatically ignored (set to True) if you are extracting PHONE_VALID_SENSED_DAYS or screen or Barnett's location features - BIN_SIZE: 5 # (in minutes) + BIN_SIZE: &bin_size 5 # (in minutes) # Add as many sensor tables as you have, they all improve the computation of PHONE_VALID_SENSED_BINS and PHONE_VALID_SENSED_DAYS. # If you are extracting screen or Barnett's location features, screen and locations tables are mandatory. TABLES: [] PHONE_VALID_SENSED_DAYS: COMPUTE: False - MIN_VALID_HOURS_PER_DAY: [16, 18, 20] # (out of 24) MIN_HOURS_PER_DAY - MIN_VALID_BINS_PER_HOUR: 6 # (out of 60min/BIN_SIZE bins) + MIN_VALID_HOURS_PER_DAY: &min_valid_hours_per_day [16] # (out of 24) MIN_HOURS_PER_DAY + MIN_VALID_BINS_PER_HOUR: &min_valid_bins_per_hour 6 # (out of 60min/BIN_SIZE bins) # Communication SMS features config, TYPES and FEATURES keys need to match MESSAGES: @@ -200,14 +200,43 @@ CONVERSATION: RECORDINGMINUTES: 1 PAUSEDMINUTES : 3 +### Visualizations ################################################################ +HEATMAP_FEATURES_CORRELATIONS: + PLOT: False + MIN_ROWS_RATIO: 0.5 + MIN_VALID_HOURS_PER_DAY: *min_valid_hours_per_day + PHONE_FEATURES: [accelerometer, activity_recognition, applications_foreground, battery, calls_incoming, calls_missed, calls_outgoing, conversation, light, location_doryab, messages_received, messages_sent, screen] + FITBIT_FEATURES: [fitbit_heartrate, fitbit_step, fitbit_sleep] + CORR_THRESHOLD: 0.1 -### Analysis ################################################################ +HISTOGRAM_VALID_SENSED_HOURS: + PLOT: False + MIN_VALID_HOURS_PER_DAY: *min_valid_hours_per_day + +HEATMAP_DAYS_BY_SENSORS: + PLOT: False + MIN_VALID_HOURS_PER_DAY: *min_valid_hours_per_day + EXPECTED_NUM_OF_DAYS: -1 + PHONE_SENSORS_TABLES: ["accelerometer", "applications_foreground", "battery", "calls", "light", "locations", "messages", "screen", "plugin_google_activity_recognition", "plugin_studentlife_audio_android"] + +HEATMAP_SENSED_BINS: + PLOT: False + BIN_SIZE: *bin_size + +OVERALL_COMPLIANCE_HEATMAP: + PLOT: False + BIN_SIZE: *bin_size + EXPECTED_NUM_OF_DAYS: 7 + MIN_VALID_BINS_PER_HOUR: *min_valid_bins_per_hour + MIN_VALID_HOURS_PER_DAY: *min_valid_hours_per_day + +### Example Analysis ################################################################ PARAMS_FOR_ANALYSIS: COMPUTE: False GROUNDTRUTH_TABLE: participant_info SOURCES: &sources ["phone_features", "fitbit_features", "phone_fitbit_features"] DAY_SEGMENTS: *day_segments - PHONE_FEATURES: [accelerometer, applications_foreground, battery, call_incoming, call_missed, call_outgoing, activity_recognition, light, location_barnett, screen, sms_received, sms_sent] + PHONE_FEATURES: [accelerometer, activity_recognition, applications_foreground, battery, calls_incoming, calls_missed, calls_outgoing, conversation, light, location_doryab, messages_received, messages_sent, screen] FITBIT_FEATURES: [fitbit_heartrate, fitbit_step, fitbit_sleep] PHONE_FITBIT_FEATURES: "" # This array is merged in the input_merge_features_of_single_participant function in models.snakefile DEMOGRAPHIC_FEATURES: [age, gender, inpatientdays] diff --git a/rules/reports.snakefile b/rules/reports.snakefile index 871dd2c6..54d6fd36 100644 --- a/rules/reports.snakefile +++ b/rules/reports.snakefile @@ -1,61 +1,107 @@ -rule heatmap_rows: +rule heatmap_features_correlations: input: - sensor = "data/raw/{pid}/{sensor}_with_datetime.csv", - pid_file = "data/external/{pid}" + features = expand("data/processed/{pid}/{sensor}_{day_segment}.csv", pid=config["PIDS"], sensor=config["HEATMAP_FEATURES_CORRELATIONS"]["PHONE_FEATURES"]+config["HEATMAP_FEATURES_CORRELATIONS"]["FITBIT_FEATURES"], day_segment=config["DAY_SEGMENTS"]), + phone_valid_sensed_days = expand("data/interim/{pid}/phone_valid_sensed_days_{{min_valid_hours_per_day}}h.csv", pid=config["PIDS"]) params: - table = "{sensor}", - pid = "{pid}", - bin_size = config["PHONE_VALID_SENSED_BINS"]["BIN_SIZE"] + min_rows_ratio = config["HEATMAP_FEATURES_CORRELATIONS"]["MIN_ROWS_RATIO"], + corr_threshold = config["HEATMAP_FEATURES_CORRELATIONS"]["CORR_THRESHOLD"] #0.75 output: - "reports/figures/{pid}/{sensor}_heatmap_rows.html" + "reports/data_exploration/{min_valid_hours_per_day}h/heatmap_features_correlations.html" script: - "../src/visualization/heatmap_rows.py" + "../src/visualization/heatmap_features_correlations.py" -rule compliance_heatmap: +rule histogram_valid_sensed_hours: + input: + phone_valid_sensed_days = expand("data/interim/{pid}/phone_valid_sensed_days_{{min_valid_hours_per_day}}h.csv", pid=config["PIDS"]) + output: + "reports/data_exploration/{min_valid_hours_per_day}h/histogram_valid_sensed_hours.html" + script: + "../src/visualization/histogram_valid_sensed_hours.py" + +rule heatmap_days_by_sensors: + input: + sensors = expand("data/raw/{{pid}}/{sensor}_with_datetime.csv", sensor=config["HEATMAP_DAYS_BY_SENSORS"]["PHONE_SENSORS_TABLES"]), + phone_valid_sensed_days = "data/interim/{pid}/phone_valid_sensed_days_{min_valid_hours_per_day}h.csv" + params: + pid = "{pid}", + expected_num_of_days = config["HEATMAP_DAYS_BY_SENSORS"]["EXPECTED_NUM_OF_DAYS"] + output: + "reports/interim/{min_valid_hours_per_day}h/{pid}/heatmap_days_by_sensors.html" + script: + "../src/visualization/heatmap_days_by_sensors.py" + +rule heatmap_days_by_sensors_all_participants: + input: + heatmap_rows = expand("reports/interim/{{min_valid_hours_per_day}}h/{pid}/heatmap_days_by_sensors.html", pid=config["PIDS"]) + output: + "reports/data_exploration/{min_valid_hours_per_day}h/heatmap_days_by_sensors_all_participants.html" + script: + "../src/visualization/heatmap_days_by_sensors_all_participants.Rmd" + +rule heatmap_sensed_bins: input: sensor = "data/interim/{pid}/phone_sensed_bins.csv", pid_file = "data/external/{pid}" params: pid = "{pid}", - bin_size = config["PHONE_VALID_SENSED_BINS"]["BIN_SIZE"] + bin_size = config["HEATMAP_SENSED_BINS"]["BIN_SIZE"] output: - "reports/figures/{pid}/compliance_heatmap.html" + "reports/interim/heatmap_sensed_bins/{pid}/heatmap_sensed_bins.html" script: - "../src/visualization/compliance_heatmap.py" + "../src/visualization/heatmap_sensed_bins.py" + +rule heatmap_sensed_bins_all_participants: + input: + heatmap_sensed_bins = expand("reports/interim/heatmap_sensed_bins/{pid}/heatmap_sensed_bins.html", pid=config["PIDS"]) + output: + "reports/data_exploration/heatmap_sensed_bins_all_participants.html" + script: + "../src/visualization/heatmap_sensed_bins_all_participants.Rmd" rule overall_compliance_heatmap: input: phone_sensed_bins = expand("data/interim/{pid}/phone_sensed_bins.csv", pid=config["PIDS"]), - phone_valid_sensed_days = expand("data/interim/{pid}/phone_valid_sensed_days.csv", pid=config["PIDS"]), + phone_valid_sensed_days = expand("data/interim/{pid}/phone_valid_sensed_days_{{min_valid_hours_per_day}}h.csv", pid=config["PIDS"]), pid_files = expand("data/external/{pid}", pid=config["PIDS"]) params: local_timezone = config["READABLE_DATETIME"]["FIXED_TIMEZONE"], - bin_size = config["PHONE_VALID_SENSED_BINS"]["BIN_SIZE"], - min_bins_per_hour = config["PHONE_VALID_SENSED_DAYS"]["MIN_VALID_BINS_PER_HOUR"] + expected_num_of_days = config["OVERALL_COMPLIANCE_HEATMAP"]["EXPECTED_NUM_OF_DAYS"], + bin_size = config["OVERALL_COMPLIANCE_HEATMAP"]["BIN_SIZE"], + min_bins_per_hour = config["OVERALL_COMPLIANCE_HEATMAP"]["MIN_VALID_BINS_PER_HOUR"] output: - "reports/figures/overall_compliance_heatmap.html" + "reports/data_exploration/{min_valid_hours_per_day}h/overall_compliance_heatmap.html" script: "../src/visualization/overall_compliance_heatmap.py" -rule battery_consumption_rates_barchart: - input: - sensor = "data/processed/{pid}/battery_daily.csv", - pid_file = "data/external/{pid}" - params: - pid = "{pid}" - output: - "reports/figures/{pid}/battery_consumption_rates_barchart.html" - script: - "../src/visualization/battery_consumption_rates_barchart.py" +# rule heatmap_rows: +# input: +# sensor = "data/raw/{pid}/{sensor}_with_datetime.csv", +# pid_file = "data/external/{pid}" +# params: +# table = "{sensor}", +# pid = "{pid}", +# bin_size = config["PHONE_VALID_SENSED_BINS"]["BIN_SIZE"] +# output: +# "reports/figures/{pid}/{sensor}_heatmap_rows.html" +# script: +# "../src/visualization/heatmap_rows.py" -PHONE_SENSORS = [] -PHONE_SENSORS.extend([config["MESSAGES"]["DB_TABLE"], config["CALLS"]["DB_TABLE"], config["BARNETT_LOCATION"]["DB_TABLE"], config["BLUETOOTH"]["DB_TABLE"], config["BATTERY"]["DB_TABLE"], config["SCREEN"]["DB_TABLE"], config["LIGHT"]["DB_TABLE"], config["ACCELEROMETER"]["DB_TABLE"], config["APPLICATIONS_FOREGROUND"]["DB_TABLE"],config["WIFI"]["DB_TABLE"]]) +# rule battery_consumption_rates_barchart: +# input: +# sensor = "data/processed/{pid}/battery_daily.csv", +# pid_file = "data/external/{pid}" +# params: +# pid = "{pid}" +# output: +# "reports/figures/{pid}/battery_consumption_rates_barchart.html" +# script: +# "../src/visualization/battery_consumption_rates_barchart.py" -rule compliance_report: - input: - sensor_heatmaps = expand("reports/figures/{{pid}}/{sensor}_heatmap_rows.html", sensor=PHONE_SENSORS), - compliance_heatmap = rules.compliance_heatmap.output - output: - "reports/compliance/{pid}/compliance_report.html", - script: - "../src/visualization/compliance_report.Rmd" \ No newline at end of file +# rule compliance_report: +# input: +# sensor_heatmaps = expand("reports/figures/{{pid}}/{sensor}_heatmap_rows.html", sensor=PHONE_SENSORS), +# compliance_heatmap = rules.compliance_heatmap.output +# output: +# "reports/compliance/{pid}/compliance_report.html", +# script: +# "../src/visualization/compliance_report.Rmd" \ No newline at end of file diff --git a/src/visualization/heatmap_days_by_sensors.py b/src/visualization/heatmap_days_by_sensors.py new file mode 100644 index 00000000..9e9c233a --- /dev/null +++ b/src/visualization/heatmap_days_by_sensors.py @@ -0,0 +1,56 @@ +import pandas as pd +import plotly.io as pio +import plotly.graph_objects as go +from datetime import datetime, timedelta + +def getRowCountHeatmap(row_count_sensors_normalized, row_count_sensors, pid, output_path): + plot = go.Figure(data=go.Heatmap(z=row_count_sensors_normalized.T.values.tolist(), + x=[datetime.strftime(idx[0], "%Y/%m/%d")+"("+str(idx[1])+")" for idx in row_count_sensors.index], + y=row_count_sensors.columns.tolist(), + hovertext=row_count_sensors.T.values.tolist(), + hovertemplate="Date: %{x}
Sensor: %{y}
Row count: %{hovertext}", + colorscale="Viridis")) + plot.update_layout(title="Row count heatmap for " + pid) + pio.write_html(plot, file=output_path, auto_open=False, include_plotlyjs="cdn") + + + +phone_valid_sensed_days = pd.read_csv(snakemake.input["phone_valid_sensed_days"], parse_dates=["local_date"], index_col=["local_date"]) +phone_valid_sensed_days = phone_valid_sensed_days[phone_valid_sensed_days["is_valid_sensed_day"] == True] + +row_count_sensors = pd.DataFrame() +for sensor_path in snakemake.input["sensors"]: + # plugin_studentlife_audio_android => conversion; plugin_google_activity_recognition => AR; applications_foreground => apps + sensor_name = sensor_path.split("/")[-1].replace("_with_datetime.csv", "").replace("plugin_studentlife_audio_android", "conversion").replace("plugin_google_activity_recognition", "AR").replace("applications_foreground", "apps") + sensor_data = pd.read_csv(sensor_path, encoding="ISO-8859-1", parse_dates=["local_date"], dtype={"label": str}) + if sensor_data.empty: + row_count_sensor = pd.DataFrame(columns=[sensor_name]) + else: + row_count_sensor = sensor_data[["timestamp", "local_date"]].groupby(["local_date"]).count().rename(columns={"timestamp": sensor_name}) + row_count_sensors = row_count_sensors.join(row_count_sensor, how="outer") + +row_count_sensors.index = pd.to_datetime(row_count_sensors.index) +row_count_sensors = row_count_sensors.join(phone_valid_sensed_days[["valid_sensed_hours"]], how="outer") + +# set date_idx based on the first date +reference_date = row_count_sensors.index.min() +row_count_sensors["date_idx"] = (row_count_sensors.index - reference_date).days +row_count_sensors["local_date"] = row_count_sensors.index +row_count_sensors.set_index(["local_date", "date_idx"], inplace=True) + +# add empty rows to make sure different participants have the same date_idx range +date_idx_range = [idx for idx in range(int(snakemake.params["expected_num_of_days"]))] +date_range = [reference_date + timedelta(days=idx) for idx in date_idx_range] +all_dates = pd.DataFrame({"local_date": date_range, "date_idx": date_idx_range}) +all_dates.set_index(["local_date", "date_idx"], inplace=True) + +row_count_sensors = row_count_sensors.merge(all_dates, left_index=True, right_index=True, how="right") + +# normalize each sensor (column) +if row_count_sensors.count().max() > 1: + row_count_sensors_normalized = (row_count_sensors-row_count_sensors.min())/(row_count_sensors.max()-row_count_sensors.min()) +else: + row_count_sensors_normalized = row_count_sensors + +pid = sensor_path.split("/")[2] +getRowCountHeatmap(row_count_sensors_normalized, row_count_sensors, pid, snakemake.output[0]) diff --git a/src/visualization/heatmap_days_by_sensors_all_participants.Rmd b/src/visualization/heatmap_days_by_sensors_all_participants.Rmd new file mode 100644 index 00000000..cb4303c2 --- /dev/null +++ b/src/visualization/heatmap_days_by_sensors_all_participants.Rmd @@ -0,0 +1,34 @@ +--- +title: "Heatmap Rows Report" +author: + - "MoSHI Pipeline" +date: "`r format(Sys.time(), '%d %B, %Y')`" +params: + rmd: "heatmap_days_by_sensors_all_participants.Rmd" +output: + html_document: + highlight: tango + number_sections: no + theme: default + toc: yes + toc_depth: 3 + toc_float: + collapsed: no + smooth_scroll: yes +--- + +```{r include=FALSE} +source("renv/activate.R") +``` + +## All phone sensors + +```{r, echo=FALSE} +heatmaps <- snakemake@input[["heatmap_rows"]] +heatmaps.html <- vector(mode="list", length(heatmaps)) + +for(pid in 1:length(heatmaps)){ + heatmaps.html[[pid]] <- htmltools::includeHTML(heatmaps[pid]) +} +htmltools::tagList(heatmaps.html) +``` diff --git a/src/visualization/heatmap_features_correlations.py b/src/visualization/heatmap_features_correlations.py new file mode 100644 index 00000000..c8793765 --- /dev/null +++ b/src/visualization/heatmap_features_correlations.py @@ -0,0 +1,59 @@ +import numpy as np +import pandas as pd +import plotly.io as pio +import plotly.graph_objects as go + + +def getCorrMatrixHeatmap(corr_matrix, output_path): + colnames = corr_matrix.columns + plot = go.Figure(data=go.Heatmap(z=corr_matrix.values.tolist(), + x=colnames, + y=colnames, + colorscale="Viridis")) + plot.update_layout(title="Correlation Matrix Heatmap") + pio.write_html(plot, file=output_path, auto_open=False, include_plotlyjs="cdn") + + +min_rows_ratio = snakemake.params["min_rows_ratio"] +corr_threshold = snakemake.params["corr_threshold"] + +# merge features +features, features_all_sensors = pd.DataFrame(columns=["local_date"]), pd.DataFrame(columns=["local_date"]) +pids = set() +last_pid = None +for path in snakemake.input["features"]: + pid = path.split("/")[2] + if pid not in pids: + pids.add(pid) + features_all_sensors["pid"] = last_pid + features = pd.concat([features, features_all_sensors], axis=0, ignore_index=True, sort=False) + features_all_sensors = pd.DataFrame(columns=["local_date"]) + features_per_sensor = pd.read_csv(path) + features_all_sensors = features_all_sensors.merge(features_per_sensor, on="local_date", how="outer") + last_pid = pid + +features_all_sensors["pid"] = last_pid +features = pd.concat([features, features_all_sensors], axis=0, ignore_index=True, sort=False) +features.set_index(["pid", "local_date"], inplace=True) + +# select days based on the input of "phone_valid_sensed_days" +selected_participants_and_days = pd.DataFrame() +for path in snakemake.input["phone_valid_sensed_days"]: + pid = path.split("/")[2] + phone_valid_sensed_days = pd.read_csv(path) + phone_valid_sensed_days = phone_valid_sensed_days[phone_valid_sensed_days["is_valid_sensed_day"] == True] + phone_valid_sensed_days["pid"] = pid + selected_participants_and_days = pd.concat([selected_participants_and_days, phone_valid_sensed_days], axis=0) + +selected_participants_and_days.set_index(["pid", "local_date"], inplace=True) +features = features.loc[features.index.intersection(selected_participants_and_days.index), :] + +# get correlation matrix +features = features.astype(float) +corr_matrix = features.corr(min_periods=min_rows_ratio * features.shape[0]) + +# replace correlation coefficients less than corr_threshold with NA +corr_matrix[(corr_matrix > -corr_threshold) & (corr_matrix < corr_threshold)] = np.nan + +# plot heatmap +getCorrMatrixHeatmap(corr_matrix, snakemake.output[0]) diff --git a/src/visualization/compliance_heatmap.py b/src/visualization/heatmap_sensed_bins.py similarity index 90% rename from src/visualization/compliance_heatmap.py rename to src/visualization/heatmap_sensed_bins.py index 1c7fc903..26639400 100644 --- a/src/visualization/compliance_heatmap.py +++ b/src/visualization/heatmap_sensed_bins.py @@ -20,7 +20,7 @@ def getComplianceHeatmap(dates, compliance_matrix, pid, output_path, bin_size): y=[datetime.datetime.strftime(date, '%Y/%m/%d') for date in dates], colorscale='Viridis', colorbar={'tick0': 0,'dtick': 1})) - plot.update_layout(title="Compliance heatmap.
Five-minute bins showing how many sensors logged at least one row of data in that period for " + pid + "
Label: " + label + ", device_id: " + device_id) + plot.update_layout(title="Heatmap sensed bins.
Five-minute bins showing how many sensors logged at least one row of data in that period for " + pid + "
Label: " + label + ", device_id: " + device_id) pio.write_html(plot, file=output_path, auto_open=False, include_plotlyjs="cdn") # get current patient id diff --git a/src/visualization/heatmap_sensed_bins_all_participants.Rmd b/src/visualization/heatmap_sensed_bins_all_participants.Rmd new file mode 100644 index 00000000..e6dbdbbf --- /dev/null +++ b/src/visualization/heatmap_sensed_bins_all_participants.Rmd @@ -0,0 +1,34 @@ +--- +title: "Heatmap Sensed Bins Report" +author: + - "MoSHI Pipeline" +date: "`r format(Sys.time(), '%d %B, %Y')`" +params: + rmd: "heatmap_sensed_bins_all_participants.Rmd" +output: + html_document: + highlight: tango + number_sections: no + theme: default + toc: yes + toc_depth: 3 + toc_float: + collapsed: no + smooth_scroll: yes +--- + +```{r include=FALSE} +source("renv/activate.R") +``` + +## All phone sensors + +```{r, echo=FALSE} +heatmaps <- snakemake@input[["heatmap_sensed_bins"]] +heatmaps.html <- vector(mode="list", length(heatmaps)) + +for(pid in 1:length(heatmaps)){ + heatmaps.html[[pid]] <- htmltools::includeHTML(heatmaps[pid]) +} +htmltools::tagList(heatmaps.html) +``` diff --git a/src/visualization/histogram_valid_sensed_hours.py b/src/visualization/histogram_valid_sensed_hours.py new file mode 100644 index 00000000..cb5c904b --- /dev/null +++ b/src/visualization/histogram_valid_sensed_hours.py @@ -0,0 +1,16 @@ +import pandas as pd +import plotly.express as px +import plotly.io as pio + + +# merge "phone_valid_sensed_days" for all participants +selected_participants_and_days = pd.DataFrame() +for path in snakemake.input["phone_valid_sensed_days"]: + phone_valid_sensed_days = pd.read_csv(path) + phone_valid_sensed_days = phone_valid_sensed_days[phone_valid_sensed_days["is_valid_sensed_day"] == True] + selected_participants_and_days = pd.concat([selected_participants_and_days, phone_valid_sensed_days], axis=0) + +# plot histogram +fig = px.histogram(selected_participants_and_days, x="valid_sensed_hours") +fig.update_layout(title="Phone Valid Hours Histogram") +pio.write_html(fig, file=snakemake.output[0], auto_open=False, include_plotlyjs="cdn") \ No newline at end of file diff --git a/src/visualization/overall_compliance_heatmap.py b/src/visualization/overall_compliance_heatmap.py index bd6324ab..40ca1783 100644 --- a/src/visualization/overall_compliance_heatmap.py +++ b/src/visualization/overall_compliance_heatmap.py @@ -16,7 +16,7 @@ def getOneRow(data_per_participant, last_seven_dates, col_name, row): row.append(0) return row -def getOverallComplianceHeatmap(sensors_with_data, valid_sensed_hours, last_seven_dates, bin_size, min_bins_per_hour, output_path): +def getOverallComplianceHeatmap(sensors_with_data, valid_sensed_hours, last_seven_dates, bin_size, min_bins_per_hour, expected_num_of_days, output_path): plot = ff.create_annotated_heatmap(z=sensors_with_data[last_seven_dates].values, x=[date.replace("-", "/") for date in last_seven_dates], y=[pid + "." + label for pid, label in zip(sensors_with_data["pid"].to_list(), sensors_with_data["label"].to_list())], @@ -25,7 +25,7 @@ def getOverallComplianceHeatmap(sensors_with_data, valid_sensed_hours, last_seve colorscale="Viridis", colorbar={"tick0": 0,"dtick": 1}, showscale=True) - plot.update_layout(title="Overall compliance heatmap for last seven days.
Bin's color shows how many sensors logged at least one row of data for that day.
Bin's text shows the valid hours of that day.(A valid hour has at least one row of any sensor in "+ str(min_bins_per_hour) +" out of " + str(int(60 / bin_size)) + " bins of " + str(bin_size) + " minutes)") + plot.update_layout(title="Overall compliance heatmap for last " + str(expected_num_of_days) + " days.
Bin's color shows how many sensors logged at least one row of data for that day.
Bin's text shows the valid hours of that day.(A valid hour has at least one row of any sensor in "+ str(min_bins_per_hour) +" out of " + str(int(60 / bin_size)) + " bins of " + str(bin_size) + " minutes)") plot["layout"]["xaxis"].update(side="bottom") pio.write_html(plot, file=output_path, auto_open=False, include_plotlyjs="cdn") @@ -36,11 +36,12 @@ pid_files = snakemake.input["pid_files"] local_timezone = snakemake.params["local_timezone"] bin_size = snakemake.params["bin_size"] min_bins_per_hour = snakemake.params["min_bins_per_hour"] +expected_num_of_days = int(snakemake.params["expected_num_of_days"]) cur_date = datetime.datetime.now().astimezone(tz.gettz(local_timezone)).date() last_seven_dates = [] -for date_offset in range(6,-1,-1): +for date_offset in range(expected_num_of_days-1, -1, -1): last_seven_dates.append((cur_date - datetime.timedelta(days=date_offset)).strftime("%Y-%m-%d")) @@ -64,4 +65,4 @@ if sensors_with_data.empty: empty_html.write("There is no sensor data for all participants") empty_html.close() else: - getOverallComplianceHeatmap(sensors_with_data, valid_sensed_hours, last_seven_dates, bin_size, min_bins_per_hour, snakemake.output[0]) \ No newline at end of file + getOverallComplianceHeatmap(sensors_with_data, valid_sensed_hours, last_seven_dates, bin_size, min_bins_per_hour, expected_num_of_days, snakemake.output[0]) \ No newline at end of file