diff --git a/config.yaml b/config.yaml index 80a8ea8c..86e0461b 100644 --- a/config.yaml +++ b/config.yaml @@ -522,6 +522,7 @@ HISTOGRAM_PHONE_DATA_YIELD: # See https://www.rapids.science/latest/visualizations/data-quality-visualizations/#2-heatmaps-of-overall-data-yield HEATMAP_PHONE_DATA_YIELD_PER_PARTICIPANT_PER_TIME_SEGMENT: PLOT: False + TIME: RELATIVE_TIME # ABSOLUTE_TIME or RELATIVE_TIME # See https://www.rapids.science/latest/visualizations/data-quality-visualizations/#3-heatmap-of-recorded-phone-sensors HEATMAP_SENSORS_PER_MINUTE_PER_TIME_SEGMENT: @@ -530,7 +531,7 @@ HEATMAP_SENSORS_PER_MINUTE_PER_TIME_SEGMENT: # See https://www.rapids.science/latest/visualizations/data-quality-visualizations/#4-heatmap-of-sensor-row-count HEATMAP_SENSOR_ROW_COUNT_PER_TIME_SEGMENT: PLOT: False - SENSORS: [PHONE_ACCELEROMETER, PHONE_ACTIVITY_RECOGNITION, PHONE_APPLICATIONS_FOREGROUND, PHONE_BATTERY, PHONE_BLUETOOTH, PHONE_CALLS, PHONE_CONVERSATION, PHONE_LIGHT, PHONE_LOCATIONS, PHONE_MESSAGES, PHONE_SCREEN, PHONE_WIFI_CONNECTED, PHONE_WIFI_VISIBLE] + SENSORS: [] # Features ------ diff --git a/docs/img/h-data-yield.html b/docs/img/h-data-yield.html index 082b5513..7952a329 100644 --- a/docs/img/h-data-yield.html +++ b/docs/img/h-data-yield.html @@ -1,39 +1,3 @@ -
- - - -
- -
- - - -
- -
\ No newline at end of file +
+
+
\ No newline at end of file diff --git a/docs/img/h-data-yield.png b/docs/img/h-data-yield.png index e2fcced0..859e7cf9 100644 Binary files a/docs/img/h-data-yield.png and b/docs/img/h-data-yield.png differ diff --git a/docs/img/hm-data-yield-participants-absolute-time.html b/docs/img/hm-data-yield-participants-absolute-time.html new file mode 100644 index 00000000..e902c07c --- /dev/null +++ b/docs/img/hm-data-yield-participants-absolute-time.html @@ -0,0 +1,11 @@ +
+
+
+
+
+
+
+
+
+
+
\ No newline at end of file diff --git a/docs/img/hm-data-yield-participants-absolute-time.png b/docs/img/hm-data-yield-participants-absolute-time.png new file mode 100644 index 00000000..4129a8ee Binary files /dev/null and b/docs/img/hm-data-yield-participants-absolute-time.png differ diff --git a/docs/img/hm-data-yield-participants-relative-time.html b/docs/img/hm-data-yield-participants-relative-time.html new file mode 100644 index 00000000..7c7366f3 --- /dev/null +++ b/docs/img/hm-data-yield-participants-relative-time.html @@ -0,0 +1,11 @@ +
+
+
+
+
+
+
+
+
+
+
\ No newline at end of file diff --git a/docs/img/hm-data-yield-participants-relative-time.png b/docs/img/hm-data-yield-participants-relative-time.png new file mode 100644 index 00000000..20f8caa3 Binary files /dev/null and b/docs/img/hm-data-yield-participants-relative-time.png differ diff --git a/docs/img/hm-data-yield-participants.png b/docs/img/hm-data-yield-participants.png deleted file mode 100644 index 6778c870..00000000 Binary files a/docs/img/hm-data-yield-participants.png and /dev/null differ diff --git a/docs/img/hm-phone-sensors.html b/docs/img/hm-phone-sensors.html index b8b3c2e0..d47455d4 100644 --- a/docs/img/hm-phone-sensors.html +++ b/docs/img/hm-phone-sensors.html @@ -377,7 +377,7 @@ summary {

Sensors per Minute per Time Segment for All Participants

RAPIDS

-

04 December, 2020

+

23 March, 2021

@@ -385,198 +385,18 @@ summary { -
- - - -
- -
- - - -
- -
- - - -
- -
- - - -
- -
- - - -
- -
-
- - - -
- -
- - - -
- -
- - - -
- -
- - - -
- -
- - - -
- -
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/docs/img/hm-phone-sensors.png b/docs/img/hm-phone-sensors.png index d53905c3..3e19e4fe 100644 Binary files a/docs/img/hm-phone-sensors.png and b/docs/img/hm-phone-sensors.png differ diff --git a/docs/img/hm-sensor_rows.html b/docs/img/hm-sensor-rows.html similarity index 84% rename from docs/img/hm-sensor_rows.html rename to docs/img/hm-sensor-rows.html index f739577e..5e90eaf4 100644 --- a/docs/img/hm-sensor_rows.html +++ b/docs/img/hm-sensor-rows.html @@ -377,7 +377,7 @@ summary {

Sensor Row Count per Time Segment For All Participants

RAPIDS

-

04 December, 2020

+

23 March, 2021

@@ -385,198 +385,18 @@ summary { -
- - - -
- -
- - - -
- -
- - - -
- -
- - - -
- -
- - - -
- -
-
- - - -
- -
- - - -
- -
- - - -
- -
- - - -
- -
- - - -
- -
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/docs/img/hm-sensor-rows.png b/docs/img/hm-sensor-rows.png new file mode 100644 index 00000000..c8c6cbcd Binary files /dev/null and b/docs/img/hm-sensor-rows.png differ diff --git a/docs/img/hm-sensor_rows.png b/docs/img/hm-sensor_rows.png deleted file mode 100644 index 9857f0a4..00000000 Binary files a/docs/img/hm-sensor_rows.png and /dev/null differ diff --git a/docs/visualizations/data-quality-visualizations.md b/docs/visualizations/data-quality-visualizations.md index 0871684d..e1beae3d 100644 --- a/docs/visualizations/data-quality-visualizations.md +++ b/docs/visualizations/data-quality-visualizations.md @@ -1,5 +1,5 @@ # Data Quality Visualizations -We showcase these visualizations with a test study that collected 14 days of smartphone and Fitbit data from two participants (t01 and t02) and extracted behavioral features within five time segments (daily, morning, afternoon, evening, and night). +We showcase these visualizations with a test study that collected 14 days of smartphone and Fitbit data from two participants (example01 and example02) and extracted behavioral features within five time segments (daily, morning, afternoon, evening, and night). !!! note [Time segments](../../setup/configuration#time-segments) (e.g. `daily`, `morning`, etc.) can have multiple instances (day 1, day 2, or morning 1, morning 2, etc.) @@ -14,23 +14,35 @@ These plots can be used as a rough indication of the smartphone monitoring cover
-
Histogram of the data yielded minute ratio for a single participant during five time segments (daily, afternoon, evening, and night)
+
Histogram of the data yielded minute ratio for a single participant during five time segments (daily, morning, afternoon, evening, and night)
## 2. Heatmaps of overall data yield These heatmaps are a break down per time segment and per participant of [Visualization 1](#1-histograms-of-phone-data-yield). Heatmap's rows represent participants, columns represent time segment instances and the cells’ color represent the valid yielded minute or hour ratio for a participant during a time segment instance. -As different participants might join a study on different dates and time segments can be of any length and start on any day, the x-axis is labelled with the time delta between the start of each time segment instance minus the start of the first instance. These plots provide a quick study overview of the monitoring coverage per person and per time segment. +As different participants might join a study on different dates and time segments can be of any length and start on any day, the x-axis can be labelled with the absolute time of the start of each time segment instance or the time delta between the start of each time segment instance minus the start of the first instance. These plots provide a quick study overview of the monitoring coverage per person and per time segment. -The figure below shows the heatmap of the valid yielded minute ratio for participants t01 and t02 on daily segments and, as we inferred from the previous histogram, the lighter (yellow) color on most time segment instances (cells) indicate both phones sensed data without interruptions for most days (except for the first and last ones). - -!!! example - Click [here](../../img/hm-data-yield-participants.html) to see an example of these interactive visualizations in HTML format +The figure below shows the heatmap of the valid yielded minute ratio for participants example01 and example02 on daily segments and, as we inferred from the previous histogram, the lighter (yellow) color on most time segment instances (cells) indicate both phones sensed data without interruptions for most days (except for the first and last ones). -
- -
Overall compliance heatmap for all participants
-
+=== "[ABSOLUTE_TIME]" + + !!! example + Click [here](../../img/hm-data-yield-participants-absolute-time.html) to see an example of these interactive visualizations in HTML format + +
+ +
Overall compliance heatmap for all participants
+
+ +=== "[RELATIVE_TIME]" + + !!! example + Click [here](../../img/hm-data-yield-participants-relative-time.html) to see an example of these interactive visualizations in HTML format + +
+ +
Overall compliance heatmap for all participants
+
## 3. Heatmap of recorded phone sensors @@ -38,7 +50,7 @@ In these heatmaps rows represent time segment instances, columns represent minut RAPIDS creates a plot per participant and per time segment and can be used as a rough indication of whether time-based sensors were following their sensing schedule (e.g. if location was being sensed every 2 minutes). -The figure below shows this heatmap for phone sensors collected by participant t01 in daily time segments from Apr 23rd 2020 to May 4th 2020. We can infer that for most of the monitoring time, the participant’s phone logged data from at least 8 sensors each minute. +The figure below shows this heatmap for phone sensors collected by participant example01 in daily time segments from Apr 23rd 2020 to May 4th 2020. We can infer that for most of the monitoring time, the participant’s phone logged data from at least 7 sensors each minute. !!! example Click [here](../../img/hm-phone-sensors.html) to see an example of these interactive visualizations in HTML format @@ -53,12 +65,12 @@ These heatmaps are a per-sensor breakdown of [Visualization 1](#1-histograms-of- In these heatmaps rows represent phone or Fitbit sensors, columns represent time segment instances and cell’s color shows the normalized (0 to 1) row count of each sensor within a time segment instance. RAPIDS creates one heatmap per participant and they can be used to judge missing data on a per participant and per sensor basis. -The figure below shows data for 16 phone sensors (including data yield) of t01’s daily segments (only half of the sensor names and dates are visible in the screenshot but all can be accessed in the interactive plot). From the top two rows, we can see that the phone was sensing data for most of the monitoring period (as suggested by Figure 3 and Figure 4). We can also infer how phone usage influenced the different sensor streams; there are peaks of screen events during the first day (Apr 23rd), peaks of location coordinates on Apr 26th and Apr 30th, and no sent or received SMS except for Apr 23rd, Apr 29th and Apr 30th (unlabeled row between screen and locations). +The figure below shows data for 14 phone sensors (including data yield) of example01’s daily segments. From the top two rows, we can see that the phone was sensing data for most of the monitoring period (as suggested by Figure 3 and Figure 4). We can also infer how phone usage influenced the different sensor streams; there are peaks of screen events during the first day (Apr 23rd), peaks of location coordinates on Apr 26th and Apr 30th, and no sent or received SMS except for Apr 23rd, Apr 29th and Apr 30th (unlabeled row between screen and locations). !!! example - Click [here](../../img/hm-sensor_rows.html) to see an example of these interactive visualizations in HTML format + Click [here](../../img/hm-sensor-rows.html) to see an example of these interactive visualizations in HTML format
- +
Heatmap of the sensor row count per time segment of a single participant
diff --git a/example_profile/example_config.yaml b/example_profile/example_config.yaml index 8240187b..e1ab771d 100644 --- a/example_profile/example_config.yaml +++ b/example_profile/example_config.yaml @@ -523,6 +523,7 @@ HISTOGRAM_PHONE_DATA_YIELD: # See https://www.rapids.science/latest/visualizations/data-quality-visualizations/#2-heatmaps-of-overall-data-yield HEATMAP_PHONE_DATA_YIELD_PER_PARTICIPANT_PER_TIME_SEGMENT: PLOT: True + TIME: RELATIVE_TIME # ABSOLUTE_TIME or RELATIVE_TIME # See https://www.rapids.science/latest/visualizations/data-quality-visualizations/#3-heatmap-of-recorded-phone-sensors HEATMAP_SENSORS_PER_MINUTE_PER_TIME_SEGMENT: @@ -530,14 +531,14 @@ HEATMAP_SENSORS_PER_MINUTE_PER_TIME_SEGMENT: # See https://www.rapids.science/latest/visualizations/data-quality-visualizations/#4-heatmap-of-sensor-row-count HEATMAP_SENSOR_ROW_COUNT_PER_TIME_SEGMENT: - PLOT: False + PLOT: True SENSORS: [PHONE_ACTIVITY_RECOGNITION, PHONE_APPLICATIONS_FOREGROUND, PHONE_BATTERY, PHONE_BLUETOOTH, PHONE_CALLS, PHONE_CONVERSATION, PHONE_LIGHT, PHONE_LOCATIONS, PHONE_MESSAGES, PHONE_SCREEN, PHONE_WIFI_CONNECTED, PHONE_WIFI_VISIBLE] # Features ------ # See https://www.rapids.science/latest/visualizations/feature-visualizations/#1-heatmap-correlation-matrix HEATMAP_FEATURE_CORRELATION_MATRIX: - PLOT: False + PLOT: True MIN_ROWS_RATIO: 0.5 CORR_THRESHOLD: 0.1 CORR_METHOD: "pearson" # choose from {"pearson", "kendall", "spearman"} diff --git a/rules/reports.smk b/rules/reports.smk index 9e30ed35..3a4fa6bd 100644 --- a/rules/reports.smk +++ b/rules/reports.smk @@ -52,6 +52,8 @@ rule heatmap_phone_data_yield_per_participant_per_time_segment: phone_data_yield = expand("data/processed/features/{pid}/phone_data_yield.csv", pid=config["PIDS"]), participant_file = expand("data/external/participant_files/{pid}.yaml", pid=config["PIDS"]), time_segments_labels = expand("data/interim/time_segments/{pid}_time_segments_labels.csv", pid=config["PIDS"]) + params: + time = config["HEATMAP_PHONE_DATA_YIELD_PER_PARTICIPANT_PER_TIME_SEGMENT"]["TIME"] output: "reports/data_exploration/heatmap_phone_data_yield_per_participant_per_time_segment.html" script: diff --git a/src/visualization/heatmap_phone_data_yield_per_participant_per_time_segment.py b/src/visualization/heatmap_phone_data_yield_per_participant_per_time_segment.py index fd9595c5..8f639a62 100644 --- a/src/visualization/heatmap_phone_data_yield_per_participant_per_time_segment.py +++ b/src/visualization/heatmap_phone_data_yield_per_participant_per_time_segment.py @@ -7,20 +7,23 @@ import yaml -def getPhoneDataYieldHeatmap(data_for_plot, y_axis_labels, time_segment, type, html_file): +def getPhoneDataYieldHeatmap(data_for_plot, y_axis_labels, time_segment, type, time, html_file): fig = go.Figure(data=go.Heatmap(z=data_for_plot.values.tolist(), x=data_for_plot.columns.tolist(), y=y_axis_labels, hovertext=data_for_plot.values.tolist(), - hovertemplate="Time since first segment: %{x}
Participant: %{y}
Ratiovalidyielded" + type + ": %{z}", + hovertemplate="Time since first segment: %{x}
Participant: %{y}
Ratiovalidyielded" + type + ": %{z}" if time == "RELATIVE_TIME" else "Time: %{x}
Participant: %{y}
Ratiovalidyielded" + type + ": %{z}", zmin=0, zmax=1, colorscale="Viridis")) - fig.update_layout(title="Heatmap of valid yielded " + type + " ratio for " + time_segment + " segments.
y-axis shows participant information (format: pid.label).
x-axis shows the time since their first segment.
z-axis (color) shows valid yielded " + type + " ratio during a segment instance.") - + if time == "RELATIVE_TIME": + fig.update_layout(title="Heatmap of valid yielded " + type + " ratio for " + time_segment + " segments.
y-axis shows participant information (format: pid.label).
x-axis shows the time since their first segment.
z-axis (color) shows valid yielded " + type + " ratio during a segment instance.") + else: + fig.update_layout(title="Heatmap of valid yielded " + type + " ratio for " + time_segment + " segments.
y-axis shows participant information (format: pid.label).
x-axis shows the time.
z-axis (color) shows valid yielded " + type + " ratio during a segment instance.") + fig["layout"]["xaxis"].update(side="bottom") - fig["layout"].update(xaxis_title="Time Since First Segment") + fig["layout"].update(xaxis_title="Time Since First Segment" if time == "RELATIVE_TIME" else "Time") fig["layout"].update(margin=dict(t=160)) html_file.write(fig.to_html(full_html=False, include_plotlyjs="cdn")) @@ -30,7 +33,7 @@ def getPhoneDataYieldHeatmap(data_for_plot, y_axis_labels, time_segment, type, h - +time = snakemake.params["time"] y_axis_labels, phone_data_yield_minutes, phone_data_yield_hours = [], {}, {} for phone_data_yield_path, participant_file_path, time_segments_path in zip(snakemake.input["phone_data_yield"], snakemake.input["participant_file"], snakemake.input["time_segments_labels"]): @@ -58,8 +61,13 @@ for phone_data_yield_path, participant_file_path, time_segments_path in zip(snak if not phone_data_yield_per_segment.empty: - # set number of minutes after the first start date time of local segments as x_axis_label - phone_data_yield_per_segment.index = phone_data_yield_per_segment.index - phone_data_yield_per_segment.index.min() + if time == "RELATIVE_TIME": + # set number of minutes after the first start date time of local segments as x_axis_label + phone_data_yield_per_segment.index = phone_data_yield_per_segment.index - phone_data_yield_per_segment.index.min() + elif time == "ABSOLUTE_TIME": + pass + else: + raise ValueError("[HEATMAP_PHONE_DATA_YIELD_PER_PARTICIPANT_PER_TIME_SEGMENT][TIME] can only be RELATIVE_TIME or ABSOLUTE_TIME") phone_data_yield_minutes_per_segment = phone_data_yield_per_segment[["phone_data_yield_rapids_ratiovalidyieldedminutes"]].rename(columns={"phone_data_yield_rapids_ratiovalidyieldedminutes": y_axis_label}) phone_data_yield_hours_per_segment = phone_data_yield_per_segment[["phone_data_yield_rapids_ratiovalidyieldedhours"]].rename(columns={"phone_data_yield_rapids_ratiovalidyieldedhours": y_axis_label}) @@ -79,7 +87,7 @@ for time_segment in phone_data_yield_minutes.keys(): minutes_data_for_plot = phone_data_yield_minutes[time_segment].transpose().reindex(pd.Index(y_axis_labels)).round(3) hours_data_for_plot = phone_data_yield_hours[time_segment].transpose().reindex(pd.Index(y_axis_labels)).round(3) - getPhoneDataYieldHeatmap(minutes_data_for_plot, y_axis_labels, time_segment, "minutes", html_file) - getPhoneDataYieldHeatmap(hours_data_for_plot, y_axis_labels, time_segment, "hours", html_file) + getPhoneDataYieldHeatmap(minutes_data_for_plot, y_axis_labels, time_segment, "minutes", time, html_file) + getPhoneDataYieldHeatmap(hours_data_for_plot, y_axis_labels, time_segment, "hours", time, html_file) html_file.close()