Start exploring PANAS data.

Add a function to clean up ESM data.
2021-07-02 16:33:48 +02:00 · 2021-07-02 16:33:48 +02:00 · 74392f229a
parent c29c7b19f2
commit 74392f229a
3 changed files with 107 additions and 8 deletions
--- a/exploration/expl_esm_labels.py
+++ b/exploration/expl_esm_labels.py
@ -0,0 +1,53 @@
 # ---
 # jupyter:
 #   jupytext:
 #     formats: ipynb,py:percent
 #     text_representation:
 #       extension: .py
 #       format_name: percent
 #       format_version: '1.3'
 #       jupytext_version: 1.11.2
 #   kernelspec:
 #     display_name: straw2analysis
 #     language: python
 #     name: straw2analysis
 # ---
 # %%
 import os
 import sys
 import seaborn as sns
 nb_dir = os.path.split(os.getcwd())[0]
 if nb_dir not in sys.path:
    sys.path.append(nb_dir)
 import participants.query_db
 from features.esm import *
 # %% [markdown]
 # # ESM data
 # %% [markdown]
 # Only take data from the main part of the study. The pilot data have different structure, there were especially many additions to ESM_JSON.
 # %%
 participants_inactive_usernames = participants.query_db.get_usernames(
    collection_start=datetime.date.fromisoformat("2020-08-01")
 )
 df_esm_inactive = get_esm_data(participants_inactive_usernames)
 # %%
 df_esm_preprocessed = preprocess_esm(df_esm_inactive)
 df_esm_clean = clean_up_esm(df_esm_preprocessed)
 # %%
 df_esm_PANAS = df_esm_clean[
    (df_esm_clean["questionnaire_id"] == 8) | (df_esm_clean["questionnaire_id"] == 9)
 ]
 df_esm_PANAS_grouped = df_esm_PANAS.groupby(["participant_id", "questionnaire_id"])
 # %%
 df_esm_PANAS.head()
 # %%
--- a/features/esm.py
+++ b/features/esm.py
@ -213,3 +213,25 @@ def classify_sessions_by_completion_time(
    df_session_counts_time.loc[morning_transition_to_daytime, "time"] = "daytime"
    return df_session_counts_time
 def clean_up_esm(df_esm_preprocessed: pd.DataFrame) -> pd.DataFrame:
    """
    This function eliminates invalid ESM responses.
    It removes unanswered ESMs.
    Parameters
    ----------
    df_esm_preprocessed: pd.DataFrame
        A preprocessed dataframe of esm data.
    Returns
    -------
    df_esm_clean: pd.DataFrame
        A subset of the original dataframe.
    """
    df_esm_clean = df_esm_preprocessed[
        df_esm_preprocessed["esm_status"] == ESM_STATUS_ANSWERED
    ]
    return df_esm_clean
--- a/statistical_analysis/adherence.py
+++ b/statistical_analysis/adherence.py
@ -221,9 +221,15 @@ df_session_workday = df_session_workday.assign(
 )
 # %%
-g1 = sns.displot(df_session_workday["time_diff_minutes"], binwidth=5, height=5, aspect=1.5, color="#28827C")
+g1 = sns.displot(
    df_session_workday["time_diff_minutes"],
    binwidth=5,
    height=5,
    aspect=1.5,
    color="#28827C",
 )
 g1.set_axis_labels("Time difference [min]", "Session count")
-#g1.savefig("WorkdayEMAtimeDiff.pdf")
+# g1.savefig("WorkdayEMAtimeDiff.pdf")
 # %% [markdown]
 # There are some sessions that are really close together. By design, none should be closer than 30 min. Let's take a look at those.
@ -278,9 +284,15 @@ df_mean_daytime_interval = df_session_workday.groupby("participant_id").median()
 df_mean_daytime_interval.describe()
 # %%
-g2 = sns.displot(df_mean_daytime_interval.time_diff_minutes, binwidth=5, height=5, aspect=1.5, color="#28827C")
+g2 = sns.displot(
    df_mean_daytime_interval.time_diff_minutes,
    binwidth=5,
    height=5,
    aspect=1.5,
    color="#28827C",
 )
 g2.set_axis_labels("Median time difference [min]", "Participant count")
-#g2.savefig("WorkdayEMAtimeDiffMedianParticip.pdf")
+# g2.savefig("WorkdayEMAtimeDiffMedianParticip.pdf")
 # %%
 df_adherence = df_adherence.merge(
@ -303,7 +315,13 @@ df_count_daytime_per_participant = df_session_workday.groupby(
 df_count_daytime_per_participant["time"].describe()
 # %%
-sns.displot(df_count_daytime_per_participant.time, binwidth=1, height=5, aspect=1.5, color="#28827C")
+sns.displot(
    df_count_daytime_per_participant.time,
    binwidth=1,
    height=5,
    aspect=1.5,
    color="#28827C",
 )
 # %% [markdown]
 # ## Evening EMA
@ -334,10 +352,16 @@ s_evening_completed_ratio = (
 s_evening_completed_ratio.describe()
 # %%
-g3 = sns.displot(s_evening_completed_ratio - 0.001, binwidth=0.05, height=5, aspect=1.5, color="#28827C")
+g3 = sns.displot(
    s_evening_completed_ratio - 0.001,
    binwidth=0.05,
    height=5,
    aspect=1.5,
    color="#28827C",
 )
 g3.set_axis_labels("Ratio of days with the evening EMA filled out", "Participant count")
-g3.set(xlim=(1.01,0.59))
+g3.set(xlim=(1.01, 0.59))
-#g3.savefig("EveningEMAratioParticip.pdf")
+# g3.savefig("EveningEMAratioParticip.pdf")
 # %%
 df_adherence = df_adherence.merge(