diff --git a/Snakefile b/Snakefile index dfd96877..688e7675 100644 --- a/Snakefile +++ b/Snakefile @@ -227,14 +227,14 @@ for provider in config["FITBIT_STEPS_INTRADAY"]["PROVIDERS"].keys(): # files_to_compute.extend(expand("data/processed/features/{pid}/all_sensor_features.csv", pid=config["PIDS"])) # files_to_compute.append("data/processed/features/all_participants/all_sensor_features.csv") +# Visualization for Data Exploration +if config["HISTOGRAM_PHONE_DATA_YIELD"]["PLOT"]: + files_to_compute.append("reports/data_exploration/histogram_phone_data_yield.html") # visualization for data exploration # if config["HEATMAP_FEATURES_CORRELATIONS"]["PLOT"]: # files_to_compute.extend(expand("reports/data_exploration/{min_valid_hours_per_day}hours_{min_valid_bins_per_hour}bins/heatmap_features_correlations.html", min_valid_hours_per_day=config["HEATMAP_FEATURES_CORRELATIONS"]["MIN_VALID_HOURS_PER_DAY"], min_valid_bins_per_hour=config["PHONE_VALID_SENSED_DAYS"]["MIN_VALID_BINS_PER_HOUR"])) -# if config["HISTOGRAM_VALID_SENSED_HOURS"]["PLOT"]: -# files_to_compute.extend(expand("reports/data_exploration/{min_valid_hours_per_day}hours_{min_valid_bins_per_hour}bins/histogram_valid_sensed_hours.html", min_valid_hours_per_day=config["HISTOGRAM_VALID_SENSED_HOURS"]["MIN_VALID_HOURS_PER_DAY"], min_valid_bins_per_hour=config["PHONE_VALID_SENSED_DAYS"]["MIN_VALID_BINS_PER_HOUR"])) - # if config["HEATMAP_DAYS_BY_SENSORS"]["PLOT"]: # files_to_compute.extend(expand("reports/interim/{min_valid_hours_per_day}hours_{min_valid_bins_per_hour}bins/{pid}/heatmap_days_by_sensors.html", pid=config["PIDS"], min_valid_hours_per_day=config["HEATMAP_DAYS_BY_SENSORS"]["MIN_VALID_HOURS_PER_DAY"], min_valid_bins_per_hour=config["PHONE_VALID_SENSED_DAYS"]["MIN_VALID_BINS_PER_HOUR"])) # files_to_compute.extend(expand("reports/data_exploration/{min_valid_hours_per_day}hours_{min_valid_bins_per_hour}bins/heatmap_days_by_sensors_all_participants.html", min_valid_hours_per_day=config["HEATMAP_DAYS_BY_SENSORS"]["MIN_VALID_HOURS_PER_DAY"], min_valid_bins_per_hour=config["PHONE_VALID_SENSED_DAYS"]["MIN_VALID_BINS_PER_HOUR"])) diff --git a/config.yaml b/config.yaml index abbe02f3..8af1d996 100644 --- a/config.yaml +++ b/config.yaml @@ -355,6 +355,9 @@ FITBIT_STEPS_INTRADAY: # PLOTS # ######################################################################################################################## +HISTOGRAM_PHONE_DATA_YIELD: + PLOT: False + HEATMAP_FEATURES_CORRELATIONS: PLOT: False MIN_ROWS_RATIO: 0.5 @@ -365,11 +368,6 @@ HEATMAP_FEATURES_CORRELATIONS: CORR_THRESHOLD: 0.1 CORR_METHOD: "pearson" # choose from {"pearson", "kendall", "spearman"} -HISTOGRAM_VALID_SENSED_HOURS: - PLOT: False - MIN_VALID_HOURS_PER_DAY: #*min_valid_hours_per_day - MIN_VALID_BINS_PER_HOUR: #*min_valid_bins_per_hour - HEATMAP_DAYS_BY_SENSORS: PLOT: False MIN_VALID_HOURS_PER_DAY: #*min_valid_hours_per_day diff --git a/example_profile/Snakefile b/example_profile/Snakefile index 970cd1b2..49530a30 100644 --- a/example_profile/Snakefile +++ b/example_profile/Snakefile @@ -210,6 +210,9 @@ for provider in config["FITBIT_STEPS_INTRADAY"]["PROVIDERS"].keys(): files_to_compute.extend(expand("data/processed/features/{pid}/all_sensor_features.csv", pid=config["PIDS"])) files_to_compute.append("data/processed/features/all_participants/all_sensor_features.csv") +# Visualization for Data Exploration +if config["HISTOGRAM_PHONE_DATA_YIELD"]["PLOT"]: + files_to_compute.append("reports/data_exploration/histogram_phone_data_yield.html") # Analysis Workflow Example models, scalers = [], [] diff --git a/example_profile/example_config.yaml b/example_profile/example_config.yaml index 5e27104d..d6e49640 100644 --- a/example_profile/example_config.yaml +++ b/example_profile/example_config.yaml @@ -316,6 +316,15 @@ FITBIT_STEPS_INTRADAY: +######################################################################################################################## +# PLOTS # +######################################################################################################################## + +HISTOGRAM_PHONE_DATA_YIELD: + PLOT: True + + + ######################################################################################################################## # Analysis Workflow Example # ######################################################################################################################## diff --git a/rules/reports.smk b/rules/reports.smk index ef44c249..e18e9ddc 100644 --- a/rules/reports.smk +++ b/rules/reports.smk @@ -1,3 +1,14 @@ +rule histogram_phone_data_yield: + input: + "data/processed/features/all_participants/all_sensor_features.csv" + output: + "reports/data_exploration/histogram_phone_data_yield.html" + script: + "../src/visualization/histogram_phone_data_yield.py" + + + + rule heatmap_features_correlations: input: features = expand("data/processed/{pid}/{sensor}_{day_segment}.csv", pid=config["PIDS"], sensor=config["HEATMAP_FEATURES_CORRELATIONS"]["PHONE_FEATURES"]+config["HEATMAP_FEATURES_CORRELATIONS"]["FITBIT_FEATURES"], day_segment=config["DAY_SEGMENTS"]), @@ -11,14 +22,6 @@ rule heatmap_features_correlations: script: "../src/visualization/heatmap_features_correlations.py" -rule histogram_valid_sensed_hours: - input: - phone_valid_sensed_days = expand("data/interim/{pid}/phone_valid_sensed_days_{{min_valid_hours_per_day}}hours_{{min_valid_bins_per_hour}}bins.csv", pid=config["PIDS"]) - output: - "reports/data_exploration/{min_valid_hours_per_day}hours_{min_valid_bins_per_hour}bins/histogram_valid_sensed_hours.html" - script: - "../src/visualization/histogram_valid_sensed_hours.py" - rule heatmap_days_by_sensors: input: sensors = optional_heatmap_days_by_sensors_input, diff --git a/src/visualization/histogram_phone_data_yield.py b/src/visualization/histogram_phone_data_yield.py new file mode 100644 index 00000000..e4a55aaf --- /dev/null +++ b/src/visualization/histogram_phone_data_yield.py @@ -0,0 +1,22 @@ +import pandas as pd +import plotly.express as px + + +phone_data_yield = pd.read_csv(snakemake.input[0]) + +# make sure the input file contains "phone_data_yield_rapids_ratiovalidyieldedminutes" and "phone_data_yield_rapids_ratiovalidyieldedhours" columns +if ("phone_data_yield_rapids_ratiovalidyieldedminutes" not in phone_data_yield.columns) or ("phone_data_yield_rapids_ratiovalidyieldedhours" not in phone_data_yield.columns): + raise ValueError("Please make sure [PHONE_DATA_YIELD][RAPIDS][COMPUTE] is True AND [PHONE_DATA_YIELD][RAPIDS][FEATURES] contains [ratiovalidyieldedminutes, ratiovalidyieldedhours].") + +# plot ratio valid yielded minutes histogram +fig_ratiovalidyieldedminutes = px.histogram(phone_data_yield, x="phone_data_yield_rapids_ratiovalidyieldedminutes", color="local_segment_label") +fig_ratiovalidyieldedminutes.update_layout(title="Ratio Valid Yielded Minutes Histogram") + +# plot ratio valid yielded hours histogram +fig_ratiovalidyieldedhours = px.histogram(phone_data_yield, x="phone_data_yield_rapids_ratiovalidyieldedhours", color="local_segment_label") +fig_ratiovalidyieldedhours.update_layout(title="Ratio Valid Yielded Hours Histogram") + + +with open(snakemake.output[0], "a") as html_file: + html_file.write(fig_ratiovalidyieldedminutes.to_html(full_html=False, include_plotlyjs="cdn")) + html_file.write(fig_ratiovalidyieldedhours.to_html(full_html=False, include_plotlyjs="cdn")) diff --git a/src/visualization/histogram_valid_sensed_hours.py b/src/visualization/histogram_valid_sensed_hours.py deleted file mode 100644 index cb5c904b..00000000 --- a/src/visualization/histogram_valid_sensed_hours.py +++ /dev/null @@ -1,16 +0,0 @@ -import pandas as pd -import plotly.express as px -import plotly.io as pio - - -# merge "phone_valid_sensed_days" for all participants -selected_participants_and_days = pd.DataFrame() -for path in snakemake.input["phone_valid_sensed_days"]: - phone_valid_sensed_days = pd.read_csv(path) - phone_valid_sensed_days = phone_valid_sensed_days[phone_valid_sensed_days["is_valid_sensed_day"] == True] - selected_participants_and_days = pd.concat([selected_participants_and_days, phone_valid_sensed_days], axis=0) - -# plot histogram -fig = px.histogram(selected_participants_and_days, x="valid_sensed_hours") -fig.update_layout(title="Phone Valid Hours Histogram") -pio.write_html(fig, file=snakemake.output[0], auto_open=False, include_plotlyjs="cdn") \ No newline at end of file