From e98a8ff7ca77f060963dfa528f18db0f861ab004 Mon Sep 17 00:00:00 2001 From: Meng Li <34143965+Meng6@users.noreply.github.com> Date: Thu, 17 Jun 2021 12:27:32 -0400 Subject: [PATCH] Update histogram of phone data yield --- rules/reports.smk | 2 ++ src/visualization/histogram_phone_data_yield.py | 6 ++++++ 2 files changed, 8 insertions(+) diff --git a/rules/reports.smk b/rules/reports.smk index 7935888d..caed8993 100644 --- a/rules/reports.smk +++ b/rules/reports.smk @@ -1,6 +1,8 @@ rule histogram_phone_data_yield: input: "data/processed/features/all_participants/all_sensor_features.csv" + params: + time_segments_type = config["TIME_SEGMENTS"]["TYPE"] output: "reports/data_exploration/histogram_phone_data_yield.html" script: diff --git a/src/visualization/histogram_phone_data_yield.py b/src/visualization/histogram_phone_data_yield.py index cd15ec8d..a8b90d77 100644 --- a/src/visualization/histogram_phone_data_yield.py +++ b/src/visualization/histogram_phone_data_yield.py @@ -2,8 +2,14 @@ import pandas as pd import plotly.express as px +time_segments_type = snakemake.params["time_segments_type"] phone_data_yield = pd.read_csv(snakemake.input[0]) +if time_segments_type == "FREQUENCY": + phone_data_yield["local_segment_label"] = phone_data_yield["local_segment_label"].str.split("\d+", expand=True, n=1)[0] +if time_segments_type == "EVENT": + phone_data_yield["local_segment_label"] = "event" + # make sure the input file contains "phone_data_yield_rapids_ratiovalidyieldedminutes" and "phone_data_yield_rapids_ratiovalidyieldedhours" columns if ("phone_data_yield_rapids_ratiovalidyieldedminutes" not in phone_data_yield.columns) or ("phone_data_yield_rapids_ratiovalidyieldedhours" not in phone_data_yield.columns): raise ValueError("Please make sure [PHONE_DATA_YIELD][RAPIDS][COMPUTE] is True AND [PHONE_DATA_YIELD][RAPIDS][FEATURES] contains [ratiovalidyieldedminutes, ratiovalidyieldedhours].")