Identify unique sessions and assign status.

Use CONSTANT variables for these statuses.
communication
junos 2021-06-11 13:50:24 +02:00
parent 04c069af47
commit 371e755159
2 changed files with 59 additions and 11 deletions

View File

@ -141,14 +141,14 @@ df_esm_preprocessed.query("participant_id == 31 & esm_session == 77")[
["esm_trigger", "esm_instructions", "esm_user_answer"]
]
# %%
# %% tags=[]
df_esm_2 = df_session_counts[
df_session_counts["esm_session_count"] == 2
].reset_index().merge(df_esm_preprocessed, how="left", on=["participant_id", "device_id", "esm_session"])
with pd.option_context('display.max_rows', None, 'display.max_columns', None): # more options can be specified also
display(df_esm_2)
#with pd.option_context('display.max_rows', None, 'display.max_columns', None): # more options can be specified also
#display(df_esm_2)
# %% [markdown]
# %% [markdown] tags=[]
# ### Long sessions
# %%
@ -269,4 +269,39 @@ sns.displot(
height=8,
)
# %% [markdown]
# ### Singleton sessions
# %%
df_session_counts.count()
# %%
df_session_counts[(df_session_counts.esm_session_count == 1) & df_session_counts.session_response.isna()]
# %%
df_session_1 = df_session_counts[(df_session_counts["esm_session_count"] == 1) & df_session_counts.session_response.isna()]
df_esm_unique_session = df_session_1.join(
df_esm_preprocessed.set_index(["participant_id", "device_id", "esm_session"])
)
df_esm_unique_session = df_esm_unique_session["esm_trigger"].rename("session_response")
# %%
df_session_counts.loc[df_esm_unique_session.index, "session_response"] = df_esm_unique_session
# %%
df_session_counts.count()
# %%
df_session_counts.merge()
# %%
df_esm_78243 = df_esm_preprocessed[df_esm_preprocessed["username"] == "uploader_78243"]
df_esm_78243 = df_esm_78243.sort_values("_id")[["id","_id","datetime_lj", "esm_status","esm_trigger","esm_instructions","esm_user_answer","esm_session"]]
# %%
df_esm_78243.columns
# %%
df_esm_78243.to_csv("example.csv")
# %%

View File

@ -9,6 +9,13 @@ from config.models import ESM, Participant
from setup import db_engine, session
TZ_LJ = timezone("Europe/Ljubljana")
ESM_STATUS_ANSWERED = 2
GROUP_SESSIONS_BY = ["participant_id", "device_id", "esm_session"]
SESSION_STATUS_UNANSWERED = "ema_unanswered"
SESSION_STATUS_DAY_FINISHED = "day_finished"
SESSION_STATUS_COMPLETE = "ema_completed"
def get_esm_data(usernames: Collection) -> pd.DataFrame:
@ -75,17 +82,15 @@ def classify_sessions_adherence(df_esm_preprocessed: pd.DataFrame) -> pd.DataFra
-------
some dataframe
"""
sessions_grouped = df_esm_preprocessed.groupby(
["participant_id", "device_id", "esm_session"]
)
sessions_grouped = df_esm_preprocessed.groupby(GROUP_SESSIONS_BY)
df_session_counts = pd.DataFrame(sessions_grouped.count()["id"]).rename(
columns={"id": "esm_session_count"}
)
df_session_counts["session_response"] = np.NaN
esm_not_answered = sessions_grouped.apply(lambda x: (x.esm_status != 2).any())
df_session_counts.loc[esm_not_answered, "session_response"] = "esm_unanswered"
esm_not_answered = sessions_grouped.apply(lambda x: (x.esm_status != ESM_STATUS_ANSWERED).any())
df_session_counts.loc[esm_not_answered, "session_response"] = SESSION_STATUS_UNANSWERED
non_session = sessions_grouped.apply(
lambda x: (
@ -94,12 +99,20 @@ def classify_sessions_adherence(df_esm_preprocessed: pd.DataFrame) -> pd.DataFra
| (x.esm_user_answer == "DayFinishedSetEvening") # When would you like to answer the evening EMA?
).any()
)
df_session_counts.loc[non_session, "session_response"] = "day_finished"
df_session_counts.loc[non_session, "session_response"] = SESSION_STATUS_DAY_FINISHED
singleton_sessions = (df_session_counts.esm_session_count == 1) & (df_session_counts.session_response.isna())
df_session_1 = df_session_counts[singleton_sessions]
df_esm_unique_session = df_session_1.join(
df_esm_preprocessed.set_index(GROUP_SESSIONS_BY), how="left"
)
df_esm_unique_session = df_esm_unique_session.assign(session_response=lambda x: x.esm_trigger)["session_response"]
df_session_counts.loc[df_esm_unique_session.index, "session_response"] = df_esm_unique_session
finished_sessions = sessions_grouped.apply(
lambda x: (x.esm_trigger.str.endswith("_last")).any()
)
df_session_counts.loc[finished_sessions, "session_response"] = "esm_finished"
df_session_counts.loc[finished_sessions, "session_response"] = SESSION_STATUS_COMPLETE
# TODO Look at evening-evening_last sequence, if everything is caught with finished sessions