Start exploring PANAS data.

Add a function to clean up ESM data.
2021-07-02 16:33:48 +02:00 · 2021-07-02 16:33:48 +02:00 · 74392f229a
parent c29c7b19f2
commit 74392f229a
3 changed files with 107 additions and 8 deletions
--- a/exploration/expl_esm_labels.py
+++ b/exploration/expl_esm_labels.py
@ -0,0 +1,53 @@
+# ---
+# jupyter:
+#   jupytext:
+#     formats: ipynb,py:percent
+#     text_representation:
+#       extension: .py
+#       format_name: percent
+#       format_version: '1.3'
+#       jupytext_version: 1.11.2
+#   kernelspec:
+#     display_name: straw2analysis
+#     language: python
+#     name: straw2analysis
+# ---
+
+# %%
+import os
+import sys
+
+import seaborn as sns
+
+nb_dir = os.path.split(os.getcwd())[0]
+if nb_dir not in sys.path:
+    sys.path.append(nb_dir)
+import participants.query_db
+from features.esm import *
+
+# %% [markdown]
+# # ESM data
+
+# %% [markdown]
+# Only take data from the main part of the study. The pilot data have different structure, there were especially many additions to ESM_JSON.
+
+# %%
+participants_inactive_usernames = participants.query_db.get_usernames(
+    collection_start=datetime.date.fromisoformat("2020-08-01")
+)
+df_esm_inactive = get_esm_data(participants_inactive_usernames)
+
+# %%
+df_esm_preprocessed = preprocess_esm(df_esm_inactive)
+df_esm_clean = clean_up_esm(df_esm_preprocessed)
+
+# %%
+df_esm_PANAS = df_esm_clean[
+    (df_esm_clean["questionnaire_id"] == 8) | (df_esm_clean["questionnaire_id"] == 9)
+]
+df_esm_PANAS_grouped = df_esm_PANAS.groupby(["participant_id", "questionnaire_id"])
+
+# %%
+df_esm_PANAS.head()
+
+# %%
--- a/features/esm.py
+++ b/features/esm.py
@ -213,3 +213,25 @@ def classify_sessions_by_completion_time(
    df_session_counts_time.loc[morning_transition_to_daytime, "time"] = "daytime"

    return df_session_counts_time
+
+
+def clean_up_esm(df_esm_preprocessed: pd.DataFrame) -> pd.DataFrame:
+    """
+    This function eliminates invalid ESM responses.
+    It removes unanswered ESMs.
+
+    Parameters
+    ----------
+    df_esm_preprocessed: pd.DataFrame
+        A preprocessed dataframe of esm data.
+
+    Returns
+    -------
+    df_esm_clean: pd.DataFrame
+        A subset of the original dataframe.
+
+    """
+    df_esm_clean = df_esm_preprocessed[
+        df_esm_preprocessed["esm_status"] == ESM_STATUS_ANSWERED
+    ]
+    return df_esm_clean
--- a/statistical_analysis/adherence.py
+++ b/statistical_analysis/adherence.py
@ -221,9 +221,15 @@ df_session_workday = df_session_workday.assign(
 )

 # %%
-g1 = sns.displot(df_session_workday["time_diff_minutes"], binwidth=5, height=5, aspect=1.5, color="#28827C")
+g1 = sns.displot(
+    df_session_workday["time_diff_minutes"],
+    binwidth=5,
+    height=5,
+    aspect=1.5,
+    color="#28827C",
+)
 g1.set_axis_labels("Time difference [min]", "Session count")
-#g1.savefig("WorkdayEMAtimeDiff.pdf")
+# g1.savefig("WorkdayEMAtimeDiff.pdf")

 # %% [markdown]
 # There are some sessions that are really close together. By design, none should be closer than 30 min. Let's take a look at those.
@ -278,9 +284,15 @@ df_mean_daytime_interval = df_session_workday.groupby("participant_id").median()
 df_mean_daytime_interval.describe()

 # %%
-g2 = sns.displot(df_mean_daytime_interval.time_diff_minutes, binwidth=5, height=5, aspect=1.5, color="#28827C")
+g2 = sns.displot(
+    df_mean_daytime_interval.time_diff_minutes,
+    binwidth=5,
+    height=5,
+    aspect=1.5,
+    color="#28827C",
+)
 g2.set_axis_labels("Median time difference [min]", "Participant count")
-#g2.savefig("WorkdayEMAtimeDiffMedianParticip.pdf")
+# g2.savefig("WorkdayEMAtimeDiffMedianParticip.pdf")

 # %%
 df_adherence = df_adherence.merge(
@ -303,7 +315,13 @@ df_count_daytime_per_participant = df_session_workday.groupby(
 df_count_daytime_per_participant["time"].describe()

 # %%
-sns.displot(df_count_daytime_per_participant.time, binwidth=1, height=5, aspect=1.5, color="#28827C")
+sns.displot(
+    df_count_daytime_per_participant.time,
+    binwidth=1,
+    height=5,
+    aspect=1.5,
+    color="#28827C",
+)

 # %% [markdown]
 # ## Evening EMA
@ -334,10 +352,16 @@ s_evening_completed_ratio = (
 s_evening_completed_ratio.describe()

 # %%
-g3 = sns.displot(s_evening_completed_ratio - 0.001, binwidth=0.05, height=5, aspect=1.5, color="#28827C")
+g3 = sns.displot(
+    s_evening_completed_ratio - 0.001,
+    binwidth=0.05,
+    height=5,
+    aspect=1.5,
+    color="#28827C",
+)
 g3.set_axis_labels("Ratio of days with the evening EMA filled out", "Participant count")
-g3.set(xlim=(1.01,0.59))
-#g3.savefig("EveningEMAratioParticip.pdf")
+g3.set(xlim=(1.01, 0.59))
+# g3.savefig("EveningEMAratioParticip.pdf")

 # %%
 df_adherence = df_adherence.merge(