Update plot#1: histogram_phone_data_yield

pull/103/head
Meng Li 2020-12-01 14:21:33 -05:00
parent e4b83b489a
commit 0bd43c139c
7 changed files with 51 additions and 32 deletions

View File

@ -227,14 +227,14 @@ for provider in config["FITBIT_STEPS_INTRADAY"]["PROVIDERS"].keys():
# files_to_compute.extend(expand("data/processed/features/{pid}/all_sensor_features.csv", pid=config["PIDS"])) # files_to_compute.extend(expand("data/processed/features/{pid}/all_sensor_features.csv", pid=config["PIDS"]))
# files_to_compute.append("data/processed/features/all_participants/all_sensor_features.csv") # files_to_compute.append("data/processed/features/all_participants/all_sensor_features.csv")
# Visualization for Data Exploration
if config["HISTOGRAM_PHONE_DATA_YIELD"]["PLOT"]:
files_to_compute.append("reports/data_exploration/histogram_phone_data_yield.html")
# visualization for data exploration # visualization for data exploration
# if config["HEATMAP_FEATURES_CORRELATIONS"]["PLOT"]: # if config["HEATMAP_FEATURES_CORRELATIONS"]["PLOT"]:
# files_to_compute.extend(expand("reports/data_exploration/{min_valid_hours_per_day}hours_{min_valid_bins_per_hour}bins/heatmap_features_correlations.html", min_valid_hours_per_day=config["HEATMAP_FEATURES_CORRELATIONS"]["MIN_VALID_HOURS_PER_DAY"], min_valid_bins_per_hour=config["PHONE_VALID_SENSED_DAYS"]["MIN_VALID_BINS_PER_HOUR"])) # files_to_compute.extend(expand("reports/data_exploration/{min_valid_hours_per_day}hours_{min_valid_bins_per_hour}bins/heatmap_features_correlations.html", min_valid_hours_per_day=config["HEATMAP_FEATURES_CORRELATIONS"]["MIN_VALID_HOURS_PER_DAY"], min_valid_bins_per_hour=config["PHONE_VALID_SENSED_DAYS"]["MIN_VALID_BINS_PER_HOUR"]))
# if config["HISTOGRAM_VALID_SENSED_HOURS"]["PLOT"]:
# files_to_compute.extend(expand("reports/data_exploration/{min_valid_hours_per_day}hours_{min_valid_bins_per_hour}bins/histogram_valid_sensed_hours.html", min_valid_hours_per_day=config["HISTOGRAM_VALID_SENSED_HOURS"]["MIN_VALID_HOURS_PER_DAY"], min_valid_bins_per_hour=config["PHONE_VALID_SENSED_DAYS"]["MIN_VALID_BINS_PER_HOUR"]))
# if config["HEATMAP_DAYS_BY_SENSORS"]["PLOT"]: # if config["HEATMAP_DAYS_BY_SENSORS"]["PLOT"]:
# files_to_compute.extend(expand("reports/interim/{min_valid_hours_per_day}hours_{min_valid_bins_per_hour}bins/{pid}/heatmap_days_by_sensors.html", pid=config["PIDS"], min_valid_hours_per_day=config["HEATMAP_DAYS_BY_SENSORS"]["MIN_VALID_HOURS_PER_DAY"], min_valid_bins_per_hour=config["PHONE_VALID_SENSED_DAYS"]["MIN_VALID_BINS_PER_HOUR"])) # files_to_compute.extend(expand("reports/interim/{min_valid_hours_per_day}hours_{min_valid_bins_per_hour}bins/{pid}/heatmap_days_by_sensors.html", pid=config["PIDS"], min_valid_hours_per_day=config["HEATMAP_DAYS_BY_SENSORS"]["MIN_VALID_HOURS_PER_DAY"], min_valid_bins_per_hour=config["PHONE_VALID_SENSED_DAYS"]["MIN_VALID_BINS_PER_HOUR"]))
# files_to_compute.extend(expand("reports/data_exploration/{min_valid_hours_per_day}hours_{min_valid_bins_per_hour}bins/heatmap_days_by_sensors_all_participants.html", min_valid_hours_per_day=config["HEATMAP_DAYS_BY_SENSORS"]["MIN_VALID_HOURS_PER_DAY"], min_valid_bins_per_hour=config["PHONE_VALID_SENSED_DAYS"]["MIN_VALID_BINS_PER_HOUR"])) # files_to_compute.extend(expand("reports/data_exploration/{min_valid_hours_per_day}hours_{min_valid_bins_per_hour}bins/heatmap_days_by_sensors_all_participants.html", min_valid_hours_per_day=config["HEATMAP_DAYS_BY_SENSORS"]["MIN_VALID_HOURS_PER_DAY"], min_valid_bins_per_hour=config["PHONE_VALID_SENSED_DAYS"]["MIN_VALID_BINS_PER_HOUR"]))

View File

@ -355,6 +355,9 @@ FITBIT_STEPS_INTRADAY:
# PLOTS # # PLOTS #
######################################################################################################################## ########################################################################################################################
HISTOGRAM_PHONE_DATA_YIELD:
PLOT: False
HEATMAP_FEATURES_CORRELATIONS: HEATMAP_FEATURES_CORRELATIONS:
PLOT: False PLOT: False
MIN_ROWS_RATIO: 0.5 MIN_ROWS_RATIO: 0.5
@ -365,11 +368,6 @@ HEATMAP_FEATURES_CORRELATIONS:
CORR_THRESHOLD: 0.1 CORR_THRESHOLD: 0.1
CORR_METHOD: "pearson" # choose from {"pearson", "kendall", "spearman"} CORR_METHOD: "pearson" # choose from {"pearson", "kendall", "spearman"}
HISTOGRAM_VALID_SENSED_HOURS:
PLOT: False
MIN_VALID_HOURS_PER_DAY: #*min_valid_hours_per_day
MIN_VALID_BINS_PER_HOUR: #*min_valid_bins_per_hour
HEATMAP_DAYS_BY_SENSORS: HEATMAP_DAYS_BY_SENSORS:
PLOT: False PLOT: False
MIN_VALID_HOURS_PER_DAY: #*min_valid_hours_per_day MIN_VALID_HOURS_PER_DAY: #*min_valid_hours_per_day

View File

@ -210,6 +210,9 @@ for provider in config["FITBIT_STEPS_INTRADAY"]["PROVIDERS"].keys():
files_to_compute.extend(expand("data/processed/features/{pid}/all_sensor_features.csv", pid=config["PIDS"])) files_to_compute.extend(expand("data/processed/features/{pid}/all_sensor_features.csv", pid=config["PIDS"]))
files_to_compute.append("data/processed/features/all_participants/all_sensor_features.csv") files_to_compute.append("data/processed/features/all_participants/all_sensor_features.csv")
# Visualization for Data Exploration
if config["HISTOGRAM_PHONE_DATA_YIELD"]["PLOT"]:
files_to_compute.append("reports/data_exploration/histogram_phone_data_yield.html")
# Analysis Workflow Example # Analysis Workflow Example
models, scalers = [], [] models, scalers = [], []

View File

@ -316,6 +316,15 @@ FITBIT_STEPS_INTRADAY:
########################################################################################################################
# PLOTS #
########################################################################################################################
HISTOGRAM_PHONE_DATA_YIELD:
PLOT: True
######################################################################################################################## ########################################################################################################################
# Analysis Workflow Example # # Analysis Workflow Example #
######################################################################################################################## ########################################################################################################################

View File

@ -1,3 +1,14 @@
rule histogram_phone_data_yield:
input:
"data/processed/features/all_participants/all_sensor_features.csv"
output:
"reports/data_exploration/histogram_phone_data_yield.html"
script:
"../src/visualization/histogram_phone_data_yield.py"
rule heatmap_features_correlations: rule heatmap_features_correlations:
input: input:
features = expand("data/processed/{pid}/{sensor}_{day_segment}.csv", pid=config["PIDS"], sensor=config["HEATMAP_FEATURES_CORRELATIONS"]["PHONE_FEATURES"]+config["HEATMAP_FEATURES_CORRELATIONS"]["FITBIT_FEATURES"], day_segment=config["DAY_SEGMENTS"]), features = expand("data/processed/{pid}/{sensor}_{day_segment}.csv", pid=config["PIDS"], sensor=config["HEATMAP_FEATURES_CORRELATIONS"]["PHONE_FEATURES"]+config["HEATMAP_FEATURES_CORRELATIONS"]["FITBIT_FEATURES"], day_segment=config["DAY_SEGMENTS"]),
@ -11,14 +22,6 @@ rule heatmap_features_correlations:
script: script:
"../src/visualization/heatmap_features_correlations.py" "../src/visualization/heatmap_features_correlations.py"
rule histogram_valid_sensed_hours:
input:
phone_valid_sensed_days = expand("data/interim/{pid}/phone_valid_sensed_days_{{min_valid_hours_per_day}}hours_{{min_valid_bins_per_hour}}bins.csv", pid=config["PIDS"])
output:
"reports/data_exploration/{min_valid_hours_per_day}hours_{min_valid_bins_per_hour}bins/histogram_valid_sensed_hours.html"
script:
"../src/visualization/histogram_valid_sensed_hours.py"
rule heatmap_days_by_sensors: rule heatmap_days_by_sensors:
input: input:
sensors = optional_heatmap_days_by_sensors_input, sensors = optional_heatmap_days_by_sensors_input,

View File

@ -0,0 +1,22 @@
import pandas as pd
import plotly.express as px
phone_data_yield = pd.read_csv(snakemake.input[0])
# make sure the input file contains "phone_data_yield_rapids_ratiovalidyieldedminutes" and "phone_data_yield_rapids_ratiovalidyieldedhours" columns
if ("phone_data_yield_rapids_ratiovalidyieldedminutes" not in phone_data_yield.columns) or ("phone_data_yield_rapids_ratiovalidyieldedhours" not in phone_data_yield.columns):
raise ValueError("Please make sure [PHONE_DATA_YIELD][RAPIDS][COMPUTE] is True AND [PHONE_DATA_YIELD][RAPIDS][FEATURES] contains [ratiovalidyieldedminutes, ratiovalidyieldedhours].")
# plot ratio valid yielded minutes histogram
fig_ratiovalidyieldedminutes = px.histogram(phone_data_yield, x="phone_data_yield_rapids_ratiovalidyieldedminutes", color="local_segment_label")
fig_ratiovalidyieldedminutes.update_layout(title="Ratio Valid Yielded Minutes Histogram")
# plot ratio valid yielded hours histogram
fig_ratiovalidyieldedhours = px.histogram(phone_data_yield, x="phone_data_yield_rapids_ratiovalidyieldedhours", color="local_segment_label")
fig_ratiovalidyieldedhours.update_layout(title="Ratio Valid Yielded Hours Histogram")
with open(snakemake.output[0], "a") as html_file:
html_file.write(fig_ratiovalidyieldedminutes.to_html(full_html=False, include_plotlyjs="cdn"))
html_file.write(fig_ratiovalidyieldedhours.to_html(full_html=False, include_plotlyjs="cdn"))

View File

@ -1,16 +0,0 @@
import pandas as pd
import plotly.express as px
import plotly.io as pio
# merge "phone_valid_sensed_days" for all participants
selected_participants_and_days = pd.DataFrame()
for path in snakemake.input["phone_valid_sensed_days"]:
phone_valid_sensed_days = pd.read_csv(path)
phone_valid_sensed_days = phone_valid_sensed_days[phone_valid_sensed_days["is_valid_sensed_day"] == True]
selected_participants_and_days = pd.concat([selected_participants_and_days, phone_valid_sensed_days], axis=0)
# plot histogram
fig = px.histogram(selected_participants_and_days, x="valid_sensed_hours")
fig.update_layout(title="Phone Valid Hours Histogram")
pio.write_html(fig, file=snakemake.output[0], auto_open=False, include_plotlyjs="cdn")