diff --git a/exploration/expl_esm_labels.py b/exploration/expl_esm_labels.py index 01107c3..e3298d7 100644 --- a/exploration/expl_esm_labels.py +++ b/exploration/expl_esm_labels.py @@ -46,7 +46,43 @@ df_esm_PANAS_clean = clean_up_esm(df_esm_PANAS) # %% df_esm_PANAS_grouped = df_esm_PANAS_clean.groupby( - ["participant_id", "questionnaire_id"] + ["participant_id", "date_lj", "questionnaire_id"] ) # %% +df_esm_PANAS_daily_sums = ( + df_esm_PANAS_grouped.esm_user_answer_numeric.agg("sum") + .reset_index() + .rename(columns={"esm_user_answer_numeric": "esm_numeric_sum"}) +) + +# %% [markdown] +# Group by participants, date, and subscale and calculate daily sums. + +# %% +df_esm_PANAS_summary_participant = ( + df_esm_PANAS_daily_sums.groupby(["participant_id", "questionnaire_id"]) + .agg(["mean", "median", "std"]) + .reset_index(col_level=1) +) +df_esm_PANAS_summary_participant.columns = df_esm_PANAS_summary_participant.columns.get_level_values( + 1 +) +df_esm_PANAS_summary_participant[ + "PANAS_subscale" +] = df_esm_PANAS_daily_sums.questionnaire_id.astype("category").cat.rename_categories( + {8.0: "PA", 9.0: "NA"} +) + +# %% [markdown] +# Next, calculate mean and standard deviation across all days for each participant. + +# %% +sns.displot( + data=df_esm_PANAS_summary_participant, x="mean", hue="PANAS_subscale", binwidth=2 +) + +# %% +sns.displot( + data=df_esm_PANAS_summary_participant, x="std", hue="PANAS_subscale", binwidth=1 +)