diff --git a/exploration/expl_esm.py b/exploration/expl_esm.py index 9cfd90f..9102823 100644 --- a/exploration/expl_esm.py +++ b/exploration/expl_esm.py @@ -141,14 +141,14 @@ df_esm_preprocessed.query("participant_id == 31 & esm_session == 77")[ ["esm_trigger", "esm_instructions", "esm_user_answer"] ] -# %% +# %% tags=[] df_esm_2 = df_session_counts[ df_session_counts["esm_session_count"] == 2 ].reset_index().merge(df_esm_preprocessed, how="left", on=["participant_id", "device_id", "esm_session"]) -with pd.option_context('display.max_rows', None, 'display.max_columns', None): # more options can be specified also - display(df_esm_2) +#with pd.option_context('display.max_rows', None, 'display.max_columns', None): # more options can be specified also + #display(df_esm_2) -# %% [markdown] +# %% [markdown] tags=[] # ### Long sessions # %% @@ -269,4 +269,39 @@ sns.displot( height=8, ) +# %% [markdown] +# ### Singleton sessions + +# %% +df_session_counts.count() + +# %% +df_session_counts[(df_session_counts.esm_session_count == 1) & df_session_counts.session_response.isna()] + +# %% +df_session_1 = df_session_counts[(df_session_counts["esm_session_count"] == 1) & df_session_counts.session_response.isna()] +df_esm_unique_session = df_session_1.join( + df_esm_preprocessed.set_index(["participant_id", "device_id", "esm_session"]) +) +df_esm_unique_session = df_esm_unique_session["esm_trigger"].rename("session_response") + +# %% +df_session_counts.loc[df_esm_unique_session.index, "session_response"] = df_esm_unique_session + +# %% +df_session_counts.count() + +# %% +df_session_counts.merge() + +# %% +df_esm_78243 = df_esm_preprocessed[df_esm_preprocessed["username"] == "uploader_78243"] +df_esm_78243 = df_esm_78243.sort_values("_id")[["id","_id","datetime_lj", "esm_status","esm_trigger","esm_instructions","esm_user_answer","esm_session"]] + +# %% +df_esm_78243.columns + +# %% +df_esm_78243.to_csv("example.csv") + # %% diff --git a/features/esm.py b/features/esm.py index 3263b31..c293b4b 100644 --- a/features/esm.py +++ b/features/esm.py @@ -9,6 +9,13 @@ from config.models import ESM, Participant from setup import db_engine, session TZ_LJ = timezone("Europe/Ljubljana") +ESM_STATUS_ANSWERED = 2 + +GROUP_SESSIONS_BY = ["participant_id", "device_id", "esm_session"] + +SESSION_STATUS_UNANSWERED = "ema_unanswered" +SESSION_STATUS_DAY_FINISHED = "day_finished" +SESSION_STATUS_COMPLETE = "ema_completed" def get_esm_data(usernames: Collection) -> pd.DataFrame: @@ -75,17 +82,15 @@ def classify_sessions_adherence(df_esm_preprocessed: pd.DataFrame) -> pd.DataFra ------- some dataframe """ - sessions_grouped = df_esm_preprocessed.groupby( - ["participant_id", "device_id", "esm_session"] - ) + sessions_grouped = df_esm_preprocessed.groupby(GROUP_SESSIONS_BY) df_session_counts = pd.DataFrame(sessions_grouped.count()["id"]).rename( columns={"id": "esm_session_count"} ) df_session_counts["session_response"] = np.NaN - esm_not_answered = sessions_grouped.apply(lambda x: (x.esm_status != 2).any()) - df_session_counts.loc[esm_not_answered, "session_response"] = "esm_unanswered" + esm_not_answered = sessions_grouped.apply(lambda x: (x.esm_status != ESM_STATUS_ANSWERED).any()) + df_session_counts.loc[esm_not_answered, "session_response"] = SESSION_STATUS_UNANSWERED non_session = sessions_grouped.apply( lambda x: ( @@ -94,12 +99,20 @@ def classify_sessions_adherence(df_esm_preprocessed: pd.DataFrame) -> pd.DataFra | (x.esm_user_answer == "DayFinishedSetEvening") # When would you like to answer the evening EMA? ).any() ) - df_session_counts.loc[non_session, "session_response"] = "day_finished" + df_session_counts.loc[non_session, "session_response"] = SESSION_STATUS_DAY_FINISHED + + singleton_sessions = (df_session_counts.esm_session_count == 1) & (df_session_counts.session_response.isna()) + df_session_1 = df_session_counts[singleton_sessions] + df_esm_unique_session = df_session_1.join( + df_esm_preprocessed.set_index(GROUP_SESSIONS_BY), how="left" + ) + df_esm_unique_session = df_esm_unique_session.assign(session_response=lambda x: x.esm_trigger)["session_response"] + df_session_counts.loc[df_esm_unique_session.index, "session_response"] = df_esm_unique_session finished_sessions = sessions_grouped.apply( lambda x: (x.esm_trigger.str.endswith("_last")).any() ) - df_session_counts.loc[finished_sessions, "session_response"] = "esm_finished" + df_session_counts.loc[finished_sessions, "session_response"] = SESSION_STATUS_COMPLETE # TODO Look at evening-evening_last sequence, if everything is caught with finished sessions