Identify unique sessions and assign status.

Use CONSTANT variables for these statuses.
communication
junos 2021-06-11 13:50:24 +02:00
parent 04c069af47
commit 371e755159
2 changed files with 59 additions and 11 deletions

View File

@ -141,14 +141,14 @@ df_esm_preprocessed.query("participant_id == 31 & esm_session == 77")[
["esm_trigger", "esm_instructions", "esm_user_answer"] ["esm_trigger", "esm_instructions", "esm_user_answer"]
] ]
# %% # %% tags=[]
df_esm_2 = df_session_counts[ df_esm_2 = df_session_counts[
df_session_counts["esm_session_count"] == 2 df_session_counts["esm_session_count"] == 2
].reset_index().merge(df_esm_preprocessed, how="left", on=["participant_id", "device_id", "esm_session"]) ].reset_index().merge(df_esm_preprocessed, how="left", on=["participant_id", "device_id", "esm_session"])
with pd.option_context('display.max_rows', None, 'display.max_columns', None): # more options can be specified also #with pd.option_context('display.max_rows', None, 'display.max_columns', None): # more options can be specified also
display(df_esm_2) #display(df_esm_2)
# %% [markdown] # %% [markdown] tags=[]
# ### Long sessions # ### Long sessions
# %% # %%
@ -269,4 +269,39 @@ sns.displot(
height=8, height=8,
) )
# %% [markdown]
# ### Singleton sessions
# %%
df_session_counts.count()
# %%
df_session_counts[(df_session_counts.esm_session_count == 1) & df_session_counts.session_response.isna()]
# %%
df_session_1 = df_session_counts[(df_session_counts["esm_session_count"] == 1) & df_session_counts.session_response.isna()]
df_esm_unique_session = df_session_1.join(
df_esm_preprocessed.set_index(["participant_id", "device_id", "esm_session"])
)
df_esm_unique_session = df_esm_unique_session["esm_trigger"].rename("session_response")
# %%
df_session_counts.loc[df_esm_unique_session.index, "session_response"] = df_esm_unique_session
# %%
df_session_counts.count()
# %%
df_session_counts.merge()
# %%
df_esm_78243 = df_esm_preprocessed[df_esm_preprocessed["username"] == "uploader_78243"]
df_esm_78243 = df_esm_78243.sort_values("_id")[["id","_id","datetime_lj", "esm_status","esm_trigger","esm_instructions","esm_user_answer","esm_session"]]
# %%
df_esm_78243.columns
# %%
df_esm_78243.to_csv("example.csv")
# %% # %%

View File

@ -9,6 +9,13 @@ from config.models import ESM, Participant
from setup import db_engine, session from setup import db_engine, session
TZ_LJ = timezone("Europe/Ljubljana") TZ_LJ = timezone("Europe/Ljubljana")
ESM_STATUS_ANSWERED = 2
GROUP_SESSIONS_BY = ["participant_id", "device_id", "esm_session"]
SESSION_STATUS_UNANSWERED = "ema_unanswered"
SESSION_STATUS_DAY_FINISHED = "day_finished"
SESSION_STATUS_COMPLETE = "ema_completed"
def get_esm_data(usernames: Collection) -> pd.DataFrame: def get_esm_data(usernames: Collection) -> pd.DataFrame:
@ -75,17 +82,15 @@ def classify_sessions_adherence(df_esm_preprocessed: pd.DataFrame) -> pd.DataFra
------- -------
some dataframe some dataframe
""" """
sessions_grouped = df_esm_preprocessed.groupby( sessions_grouped = df_esm_preprocessed.groupby(GROUP_SESSIONS_BY)
["participant_id", "device_id", "esm_session"]
)
df_session_counts = pd.DataFrame(sessions_grouped.count()["id"]).rename( df_session_counts = pd.DataFrame(sessions_grouped.count()["id"]).rename(
columns={"id": "esm_session_count"} columns={"id": "esm_session_count"}
) )
df_session_counts["session_response"] = np.NaN df_session_counts["session_response"] = np.NaN
esm_not_answered = sessions_grouped.apply(lambda x: (x.esm_status != 2).any()) esm_not_answered = sessions_grouped.apply(lambda x: (x.esm_status != ESM_STATUS_ANSWERED).any())
df_session_counts.loc[esm_not_answered, "session_response"] = "esm_unanswered" df_session_counts.loc[esm_not_answered, "session_response"] = SESSION_STATUS_UNANSWERED
non_session = sessions_grouped.apply( non_session = sessions_grouped.apply(
lambda x: ( lambda x: (
@ -94,12 +99,20 @@ def classify_sessions_adherence(df_esm_preprocessed: pd.DataFrame) -> pd.DataFra
| (x.esm_user_answer == "DayFinishedSetEvening") # When would you like to answer the evening EMA? | (x.esm_user_answer == "DayFinishedSetEvening") # When would you like to answer the evening EMA?
).any() ).any()
) )
df_session_counts.loc[non_session, "session_response"] = "day_finished" df_session_counts.loc[non_session, "session_response"] = SESSION_STATUS_DAY_FINISHED
singleton_sessions = (df_session_counts.esm_session_count == 1) & (df_session_counts.session_response.isna())
df_session_1 = df_session_counts[singleton_sessions]
df_esm_unique_session = df_session_1.join(
df_esm_preprocessed.set_index(GROUP_SESSIONS_BY), how="left"
)
df_esm_unique_session = df_esm_unique_session.assign(session_response=lambda x: x.esm_trigger)["session_response"]
df_session_counts.loc[df_esm_unique_session.index, "session_response"] = df_esm_unique_session
finished_sessions = sessions_grouped.apply( finished_sessions = sessions_grouped.apply(
lambda x: (x.esm_trigger.str.endswith("_last")).any() lambda x: (x.esm_trigger.str.endswith("_last")).any()
) )
df_session_counts.loc[finished_sessions, "session_response"] = "esm_finished" df_session_counts.loc[finished_sessions, "session_response"] = SESSION_STATUS_COMPLETE
# TODO Look at evening-evening_last sequence, if everything is caught with finished sessions # TODO Look at evening-evening_last sequence, if everything is caught with finished sessions