From 3bb66e38388b415fd1216a51054e3f28095f2401 Mon Sep 17 00:00:00 2001 From: junos Date: Sun, 4 Jul 2021 13:41:34 +0200 Subject: [PATCH] Calculate daily means instead of sums. --- exploration/expl_esm_labels.py | 35 ++++++++++++++++++------------- statistical_analysis/adherence.py | 4 ++-- 2 files changed, 22 insertions(+), 17 deletions(-) diff --git a/exploration/expl_esm_labels.py b/exploration/expl_esm_labels.py index e3298d7..61eb769 100644 --- a/exploration/expl_esm_labels.py +++ b/exploration/expl_esm_labels.py @@ -44,24 +44,24 @@ df_esm_PANAS = df_esm_preprocessed[ ] df_esm_PANAS_clean = clean_up_esm(df_esm_PANAS) -# %% -df_esm_PANAS_grouped = df_esm_PANAS_clean.groupby( - ["participant_id", "date_lj", "questionnaire_id"] -) +# %% [markdown] +# Group by participants, date, and subscale and calculate daily means. # %% -df_esm_PANAS_daily_sums = ( - df_esm_PANAS_grouped.esm_user_answer_numeric.agg("sum") +df_esm_PANAS_daily_means = ( + df_esm_PANAS_clean.groupby( + ["participant_id", "date_lj", "questionnaire_id"]) + .esm_user_answer_numeric.agg("mean") .reset_index() - .rename(columns={"esm_user_answer_numeric": "esm_numeric_sum"}) + .rename(columns={"esm_user_answer_numeric": "esm_numeric_mean"}) ) # %% [markdown] -# Group by participants, date, and subscale and calculate daily sums. +# Next, calculate mean, median, and standard deviation across all days for each participant. # %% df_esm_PANAS_summary_participant = ( - df_esm_PANAS_daily_sums.groupby(["participant_id", "questionnaire_id"]) + df_esm_PANAS_daily_means.groupby(["participant_id", "questionnaire_id"]) .agg(["mean", "median", "std"]) .reset_index(col_level=1) ) @@ -70,19 +70,24 @@ df_esm_PANAS_summary_participant.columns = df_esm_PANAS_summary_participant.colu ) df_esm_PANAS_summary_participant[ "PANAS_subscale" -] = df_esm_PANAS_daily_sums.questionnaire_id.astype("category").cat.rename_categories( +] = df_esm_PANAS_daily_means.questionnaire_id.astype("category").cat.rename_categories( {8.0: "PA", 9.0: "NA"} ) -# %% [markdown] -# Next, calculate mean and standard deviation across all days for each participant. - # %% sns.displot( - data=df_esm_PANAS_summary_participant, x="mean", hue="PANAS_subscale", binwidth=2 + data=df_esm_PANAS_summary_participant, x="mean", hue="PANAS_subscale", binwidth=0.2 ) # %% sns.displot( - data=df_esm_PANAS_summary_participant, x="std", hue="PANAS_subscale", binwidth=1 + data=df_esm_PANAS_summary_participant, x="median", hue="PANAS_subscale", binwidth=0.2 ) + +# %% +sns.displot( + data=df_esm_PANAS_summary_participant, x="std", hue="PANAS_subscale", binwidth=0.05 +) + +# %% +df_esm_PANAS_summary_participant[df_esm_PANAS_summary_participant["std"] < 0.1] diff --git a/statistical_analysis/adherence.py b/statistical_analysis/adherence.py index 9980f7f..6507599 100644 --- a/statistical_analysis/adherence.py +++ b/statistical_analysis/adherence.py @@ -242,8 +242,8 @@ df_session_workday[df_session_workday.time_diff_minutes < 30] # %% df_esm_preprocessed.loc[ - (df_esm_preprocessed.participant_id == 35) & (df_esm_preprocessed.esm_session == 6), - ["esm_trigger", "esm_session", "datetime_lj", "esm_instructions"], + (df_esm_preprocessed.participant_id == 35) & (df_esm_preprocessed.esm_session == 7) & (df_esm_preprocessed.device_id == "62a44038-3ccb-401e-a69c-6f22152c54a6"), + ["esm_trigger", "esm_session", "datetime_lj", "esm_instructions", "device_id", "_id"], ] # %%