From 3bb66e38388b415fd1216a51054e3f28095f2401 Mon Sep 17 00:00:00 2001
From: junos <junos.lukan@ijs.si>
Date: Sun, 4 Jul 2021 13:41:34 +0200
Subject: [PATCH] Calculate daily means instead of sums.

---
 exploration/expl_esm_labels.py    | 35 ++++++++++++++++++-------------
 statistical_analysis/adherence.py |  4 ++--
 2 files changed, 22 insertions(+), 17 deletions(-)

diff --git a/exploration/expl_esm_labels.py b/exploration/expl_esm_labels.py
index e3298d7..61eb769 100644
--- a/exploration/expl_esm_labels.py
+++ b/exploration/expl_esm_labels.py
@@ -44,24 +44,24 @@ df_esm_PANAS = df_esm_preprocessed[
 ]
 df_esm_PANAS_clean = clean_up_esm(df_esm_PANAS)
 
-# %%
-df_esm_PANAS_grouped = df_esm_PANAS_clean.groupby(
-    ["participant_id", "date_lj", "questionnaire_id"]
-)
+# %% [markdown]
+# Group by participants, date, and subscale and calculate daily means.
 
 # %%
-df_esm_PANAS_daily_sums = (
-    df_esm_PANAS_grouped.esm_user_answer_numeric.agg("sum")
+df_esm_PANAS_daily_means = (
+    df_esm_PANAS_clean.groupby(
+    ["participant_id", "date_lj", "questionnaire_id"])
+    .esm_user_answer_numeric.agg("mean")
     .reset_index()
-    .rename(columns={"esm_user_answer_numeric": "esm_numeric_sum"})
+    .rename(columns={"esm_user_answer_numeric": "esm_numeric_mean"})
 )
 
 # %% [markdown]
-# Group by participants, date, and subscale and calculate daily sums.
+# Next, calculate mean, median, and standard deviation across all days for each participant.
 
 # %%
 df_esm_PANAS_summary_participant = (
-    df_esm_PANAS_daily_sums.groupby(["participant_id", "questionnaire_id"])
+    df_esm_PANAS_daily_means.groupby(["participant_id", "questionnaire_id"])
     .agg(["mean", "median", "std"])
     .reset_index(col_level=1)
 )
@@ -70,19 +70,24 @@ df_esm_PANAS_summary_participant.columns = df_esm_PANAS_summary_participant.colu
 )
 df_esm_PANAS_summary_participant[
     "PANAS_subscale"
-] = df_esm_PANAS_daily_sums.questionnaire_id.astype("category").cat.rename_categories(
+] = df_esm_PANAS_daily_means.questionnaire_id.astype("category").cat.rename_categories(
     {8.0: "PA", 9.0: "NA"}
 )
 
-# %% [markdown]
-# Next, calculate mean and standard deviation across all days for each participant.
-
 # %%
 sns.displot(
-    data=df_esm_PANAS_summary_participant, x="mean", hue="PANAS_subscale", binwidth=2
+    data=df_esm_PANAS_summary_participant, x="mean", hue="PANAS_subscale", binwidth=0.2
 )
 
 # %%
 sns.displot(
-    data=df_esm_PANAS_summary_participant, x="std", hue="PANAS_subscale", binwidth=1
+    data=df_esm_PANAS_summary_participant, x="median", hue="PANAS_subscale", binwidth=0.2
 )
+
+# %%
+sns.displot(
+    data=df_esm_PANAS_summary_participant, x="std", hue="PANAS_subscale", binwidth=0.05
+)
+
+# %%
+df_esm_PANAS_summary_participant[df_esm_PANAS_summary_participant["std"] < 0.1]
diff --git a/statistical_analysis/adherence.py b/statistical_analysis/adherence.py
index 9980f7f..6507599 100644
--- a/statistical_analysis/adherence.py
+++ b/statistical_analysis/adherence.py
@@ -242,8 +242,8 @@ df_session_workday[df_session_workday.time_diff_minutes < 30]
 
 # %%
 df_esm_preprocessed.loc[
-    (df_esm_preprocessed.participant_id == 35) & (df_esm_preprocessed.esm_session == 6),
-    ["esm_trigger", "esm_session", "datetime_lj", "esm_instructions"],
+    (df_esm_preprocessed.participant_id == 35) & (df_esm_preprocessed.esm_session == 7) & (df_esm_preprocessed.device_id == "62a44038-3ccb-401e-a69c-6f22152c54a6"),
+    ["esm_trigger", "esm_session", "datetime_lj", "esm_instructions", "device_id", "_id"],
 ]
 
 # %%