diff --git a/rules/reports.smk b/rules/reports.smk index 7935888d..caed8993 100644 --- a/rules/reports.smk +++ b/rules/reports.smk @@ -1,6 +1,8 @@ rule histogram_phone_data_yield: input: "data/processed/features/all_participants/all_sensor_features.csv" + params: + time_segments_type = config["TIME_SEGMENTS"]["TYPE"] output: "reports/data_exploration/histogram_phone_data_yield.html" script: diff --git a/src/visualization/histogram_phone_data_yield.py b/src/visualization/histogram_phone_data_yield.py index cd15ec8d..a8b90d77 100644 --- a/src/visualization/histogram_phone_data_yield.py +++ b/src/visualization/histogram_phone_data_yield.py @@ -2,8 +2,14 @@ import pandas as pd import plotly.express as px +time_segments_type = snakemake.params["time_segments_type"] phone_data_yield = pd.read_csv(snakemake.input[0]) +if time_segments_type == "FREQUENCY": + phone_data_yield["local_segment_label"] = phone_data_yield["local_segment_label"].str.split("\d+", expand=True, n=1)[0] +if time_segments_type == "EVENT": + phone_data_yield["local_segment_label"] = "event" + # make sure the input file contains "phone_data_yield_rapids_ratiovalidyieldedminutes" and "phone_data_yield_rapids_ratiovalidyieldedhours" columns if ("phone_data_yield_rapids_ratiovalidyieldedminutes" not in phone_data_yield.columns) or ("phone_data_yield_rapids_ratiovalidyieldedhours" not in phone_data_yield.columns): raise ValueError("Please make sure [PHONE_DATA_YIELD][RAPIDS][COMPUTE] is True AND [PHONE_DATA_YIELD][RAPIDS][FEATURES] contains [ratiovalidyieldedminutes, ratiovalidyieldedhours].")