diff --git a/exploration/expl_esm.py b/exploration/expl_esm.py index 6934b58..9cfd90f 100644 --- a/exploration/expl_esm.py +++ b/exploration/expl_esm.py @@ -61,7 +61,7 @@ df_esm_preprocessed.columns # One approach would be to count distinct session IDs which are incremented for each group of EMAs. However, since not every question answered counts as a fulfilled EMA, some unique session IDs should be eliminated first. # %% -session_counts = df_esm_preprocessed.groupby(["participant_id", "esm_session"]).count()[ +session_counts = df_esm_preprocessed.groupby(["participant_id", "device_id", "esm_session"]).count()[ "id" ] @@ -82,7 +82,7 @@ df_session_counts = pd.DataFrame(session_counts).rename( ) df_session_1 = df_session_counts[(df_session_counts["esm_session_count"] == 1)] df_esm_unique_session = df_session_1.join( - df_esm_preprocessed.set_index(["participant_id", "esm_session"]) + df_esm_preprocessed.set_index(["participant_id", "device_id", "esm_session"]) ) # %% @@ -141,6 +141,13 @@ df_esm_preprocessed.query("participant_id == 31 & esm_session == 77")[ ["esm_trigger", "esm_instructions", "esm_user_answer"] ] +# %% +df_esm_2 = df_session_counts[ + df_session_counts["esm_session_count"] == 2 +].reset_index().merge(df_esm_preprocessed, how="left", on=["participant_id", "device_id", "esm_session"]) +with pd.option_context('display.max_rows', None, 'display.max_columns', None): # more options can be specified also + display(df_esm_2) + # %% [markdown] # ### Long sessions @@ -208,7 +215,7 @@ df_session_counts.count() # %% non_session = session_group_by.apply( lambda x: ( - (x.esm_user_answer == "DayFinished3421") | (x.esm_user_answer == "DayOff3421") + (x.esm_user_answer == "DayFinished3421") | (x.esm_user_answer == "DayOff3421") | (x.esm_user_answer == "DayFinishedSetEvening") ).any() ) df_session_counts.loc[non_session, "session_response"] = "day_finished" diff --git a/features/esm.py b/features/esm.py index fc43581..3263b31 100644 --- a/features/esm.py +++ b/features/esm.py @@ -89,8 +89,9 @@ def classify_sessions_adherence(df_esm_preprocessed: pd.DataFrame) -> pd.DataFra non_session = sessions_grouped.apply( lambda x: ( - (x.esm_user_answer == "DayFinished3421") - | (x.esm_user_answer == "DayOff3421") + (x.esm_user_answer == "DayFinished3421") # I finished working for today. + | (x.esm_user_answer == "DayOff3421") # I am not going to work today. + | (x.esm_user_answer == "DayFinishedSetEvening") # When would you like to answer the evening EMA? ).any() ) df_session_counts.loc[non_session, "session_response"] = "day_finished"