diff --git a/exploration/expl_esm_labels.py b/exploration/expl_esm_labels.py new file mode 100644 index 0000000..beefd0e --- /dev/null +++ b/exploration/expl_esm_labels.py @@ -0,0 +1,53 @@ +# --- +# jupyter: +# jupytext: +# formats: ipynb,py:percent +# text_representation: +# extension: .py +# format_name: percent +# format_version: '1.3' +# jupytext_version: 1.11.2 +# kernelspec: +# display_name: straw2analysis +# language: python +# name: straw2analysis +# --- + +# %% +import os +import sys + +import seaborn as sns + +nb_dir = os.path.split(os.getcwd())[0] +if nb_dir not in sys.path: + sys.path.append(nb_dir) +import participants.query_db +from features.esm import * + +# %% [markdown] +# # ESM data + +# %% [markdown] +# Only take data from the main part of the study. The pilot data have different structure, there were especially many additions to ESM_JSON. + +# %% +participants_inactive_usernames = participants.query_db.get_usernames( + collection_start=datetime.date.fromisoformat("2020-08-01") +) +df_esm_inactive = get_esm_data(participants_inactive_usernames) + +# %% +df_esm_preprocessed = preprocess_esm(df_esm_inactive) +df_esm_clean = clean_up_esm(df_esm_preprocessed) + +# %% +df_esm_PANAS = df_esm_clean[ + (df_esm_clean["questionnaire_id"] == 8) | (df_esm_clean["questionnaire_id"] == 9) +] +df_esm_PANAS_grouped = df_esm_PANAS.groupby(["participant_id", "questionnaire_id"]) + +# %% +df_esm_PANAS.head() + +# %% diff --git a/features/esm.py b/features/esm.py index 6495542..3d9e878 100644 --- a/features/esm.py +++ b/features/esm.py @@ -213,3 +213,25 @@ def classify_sessions_by_completion_time( df_session_counts_time.loc[morning_transition_to_daytime, "time"] = "daytime" return df_session_counts_time + + +def clean_up_esm(df_esm_preprocessed: pd.DataFrame) -> pd.DataFrame: + """ + This function eliminates invalid ESM responses. + It removes unanswered ESMs. + + Parameters + ---------- + df_esm_preprocessed: pd.DataFrame + A preprocessed dataframe of esm data. + + Returns + ------- + df_esm_clean: pd.DataFrame + A subset of the original dataframe. + + """ + df_esm_clean = df_esm_preprocessed[ + df_esm_preprocessed["esm_status"] == ESM_STATUS_ANSWERED + ] + return df_esm_clean diff --git a/statistical_analysis/adherence.py b/statistical_analysis/adherence.py index eab2b16..9980f7f 100644 --- a/statistical_analysis/adherence.py +++ b/statistical_analysis/adherence.py @@ -221,9 +221,15 @@ df_session_workday = df_session_workday.assign( ) # %% -g1 = sns.displot(df_session_workday["time_diff_minutes"], binwidth=5, height=5, aspect=1.5, color="#28827C") +g1 = sns.displot( + df_session_workday["time_diff_minutes"], + binwidth=5, + height=5, + aspect=1.5, + color="#28827C", +) g1.set_axis_labels("Time difference [min]", "Session count") -#g1.savefig("WorkdayEMAtimeDiff.pdf") +# g1.savefig("WorkdayEMAtimeDiff.pdf") # %% [markdown] # There are some sessions that are really close together. By design, none should be closer than 30 min. Let's take a look at those. @@ -278,9 +284,15 @@ df_mean_daytime_interval = df_session_workday.groupby("participant_id").median() df_mean_daytime_interval.describe() # %% -g2 = sns.displot(df_mean_daytime_interval.time_diff_minutes, binwidth=5, height=5, aspect=1.5, color="#28827C") +g2 = sns.displot( + df_mean_daytime_interval.time_diff_minutes, + binwidth=5, + height=5, + aspect=1.5, + color="#28827C", +) g2.set_axis_labels("Median time difference [min]", "Participant count") -#g2.savefig("WorkdayEMAtimeDiffMedianParticip.pdf") +# g2.savefig("WorkdayEMAtimeDiffMedianParticip.pdf") # %% df_adherence = df_adherence.merge( @@ -303,7 +315,13 @@ df_count_daytime_per_participant = df_session_workday.groupby( df_count_daytime_per_participant["time"].describe() # %% -sns.displot(df_count_daytime_per_participant.time, binwidth=1, height=5, aspect=1.5, color="#28827C") +sns.displot( + df_count_daytime_per_participant.time, + binwidth=1, + height=5, + aspect=1.5, + color="#28827C", +) # %% [markdown] # ## Evening EMA @@ -334,10 +352,16 @@ s_evening_completed_ratio = ( s_evening_completed_ratio.describe() # %% -g3 = sns.displot(s_evening_completed_ratio - 0.001, binwidth=0.05, height=5, aspect=1.5, color="#28827C") +g3 = sns.displot( + s_evening_completed_ratio - 0.001, + binwidth=0.05, + height=5, + aspect=1.5, + color="#28827C", +) g3.set_axis_labels("Ratio of days with the evening EMA filled out", "Participant count") -g3.set(xlim=(1.01,0.59)) -#g3.savefig("EveningEMAratioParticip.pdf") +g3.set(xlim=(1.01, 0.59)) +# g3.savefig("EveningEMAratioParticip.pdf") # %% df_adherence = df_adherence.merge(