Analyze adherence:

Look at time differences between subsequent daytime EMA.
Look at the daily evening EMA proportion.
communication
junos 2021-06-11 20:28:24 +02:00
parent 7a12f68dfe
commit 23c3613c60
2 changed files with 202 additions and 9 deletions

View File

@ -328,7 +328,7 @@ df_esm_session_7 = df_session_7.join(
how="left",
)
# %% jupyter={"outputs_hidden": true} tags=[]
# %% tags=[]
with pd.option_context(
"display.max_rows", None, "display.max_columns", None
): # more options can be specified also
@ -347,7 +347,7 @@ df_esm_session_27 = df_session_27.join(
how="left",
)
# %% jupyter={"outputs_hidden": true} tags=[]
# %% tags=[]
with pd.option_context(
"display.max_rows", None, "display.max_columns", None
): # more options can be specified also
@ -357,3 +357,28 @@ with pd.option_context(
# These are all morning questionnaires with morning *and* workday items, with the feedback added and also branched in the longest possible way.
# %%
df_session_6 = df_session_counts[
(df_session_counts["esm_session_count"] == 6)
& df_session_counts.session_response.isna()
]
df_esm_session_6 = df_session_6.join(
df_esm_preprocessed.set_index(["participant_id", "device_id", "esm_session"]),
how="left",
)
# %%
display(df_esm_session_6[["esm_trigger", "esm_instructions", "esm_user_answer"]])
# %%
df_session_counts = classify_sessions_by_completion(df_esm_preprocessed)
df_session_time = classify_sessions_by_time(df_esm_preprocessed)
# %% [markdown]
# The sessions were classified by time by taking the **first** record in a session.
# However, a morning questionnaire could seamlessly transition into a daytime questionnaire, if the participant was already at work.
# In this case, the "time" label changed mid-session.
#
# Because of the way classify_sessions_by_time works, this questionnaire was classified as "morning".
# But for all intents and purposes, it can be treated as a "daytime" EMA.
#
# This is corrected in `classify_sessions_by_completion_time`

View File

@ -55,19 +55,19 @@ df_esm_inactive = get_esm_data(participants_inactive_usernames)
# %%
df_esm_preprocessed = preprocess_esm(df_esm_inactive)
df_session_counts = classify_sessions_adherence(df_esm_preprocessed)
df_session_counts_time = classify_sessions_by_completion_time(df_esm_preprocessed)
# %%
tbl_session_outcomes = df_session_counts.reset_index()[
tbl_session_outcomes = df_session_counts_time.reset_index()[
"session_response"
].value_counts()
# %%
print("All sessions:", len(df_session_counts))
print("All sessions:", len(df_session_counts_time))
print("-------------------------------------")
print(tbl_session_outcomes)
print("-------------------------------------")
print(tbl_session_outcomes / len(df_session_counts))
print(tbl_session_outcomes / len(df_session_counts_time))
# %%
VARIABLES_TO_TRANSLATE = {
@ -83,11 +83,11 @@ baseline_inactive = baseline_inactive.assign(
)
# %%
df_session_counts
df_session_counts_time
# %%
df_session_finished = df_session_counts[
df_session_counts["session_response"] == "esm_finished"
df_session_finished = df_session_counts_time[
df_session_counts_time["session_response"] == SESSION_STATUS_COMPLETE
].reset_index()
# %%
@ -132,3 +132,171 @@ lr_ols = smf.ols(
)
ls_result = lr_ols.fit()
ls_result.summary()
# %% [markdown]
# # Concordance by type
# %% [markdown]
# ## Workday EMA
# %% [markdown]
# ### Filter the EMA of interest.
# %% [markdown]
# Work with only completed EMA.
# %% tags=[]
df_session_counts_time_completed = df_session_counts_time[
df_session_counts_time.session_response == "ema_completed"
]
# %% [markdown]
# To be able to compare EMA sessions *within* one day, add a date-part column.
#
# **NOTE**: Since daytime EMAs could *theoretically* last beyond midnight, but never after 4 AM, the datetime is first translated to 4 h earlier.
# %%
df_session_counts_time_completed = df_session_counts_time_completed.assign(
date_lj=lambda x: (x.datetime_lj - datetime.timedelta(hours=4)).dt.date
)
# %%
df_session_counts_time_completed
# %% [markdown]
# Next, calculate differences between subsequent record. But first group them by participant and device ID (as usual) and *time*. This way, the differences between the same type of EMA sessions are calculated.
# %% tags=[]
df_session_time_diff = (
df_session_counts_time_completed[["datetime_lj", "date_lj", "time"]]
.groupby(["participant_id", "device_id", "time"])
.diff()
.rename(
columns={
"datetime_lj": "previous_same_type_time_diff",
"date_lj": "time_diff_days",
}
)
)
# %%
df_session_time_diff
# %% tags=[]
df_session_counts_time_diff = df_session_counts_time_completed.join(
df_session_time_diff, how="left"
)
# %% [markdown]
# Now, select only the daytime EMAs of interest. Discard the differences between *different day* EMAs.
# %%
time_workday_completed_less_than_1_day = (
(df_session_counts_time_diff.time == "daytime") # Only take daytime EMAs.
& ~(
df_session_counts_time_diff.previous_same_type_time_diff.isna()
) # Only where the diff was actually calculated.
& (df_session_counts_time_diff.time_diff_days == datetime.timedelta(0))
) # Only take differences *within* a day.
# %% tags=[]
df_session_workday = df_session_counts_time_diff[time_workday_completed_less_than_1_day]
# %%
df_session_workday = df_session_workday.assign(
time_diff_minutes=lambda x: x.previous_same_type_time_diff.dt.seconds / 60
)
# %%
sns.displot(df_session_workday["time_diff_minutes"], binwidth=5, height=5)
# %% [markdown]
# There are some sessions that are really close together. By design, none should be closer than 30 min. Let's take a look at those.
# %%
df_session_workday[df_session_workday.time_diff_minutes < 30]
# %% [markdown]
# There are only 2 instances, look at them individually.
# %%
df_esm_preprocessed.loc[
(df_esm_preprocessed.participant_id == 35) & (df_esm_preprocessed.esm_session == 6),
["esm_trigger", "esm_session", "datetime_lj", "esm_instructions"],
]
# %%
df_esm_preprocessed.loc[
(df_esm_preprocessed.participant_id == 45)
& (df_esm_preprocessed.esm_session < 3)
& (df_esm_preprocessed.device_id == "d848b1c4-33cc-4e22-82ae-96d6b6458a33"),
["esm_trigger", "esm_session", "datetime_lj", "esm_instructions"],
]
# %% [markdown]
# As these signify bugs, we can safely discard them in the following analysis.
# %%
df_session_workday = df_session_workday[df_session_workday.time_diff_minutes > 29]
# %% [markdown]
# ### All participants
# %%
df_session_workday.describe()
# %% [markdown]
# These statistics look reasonable.
# %% [markdown]
# ### Differences between participants
# %%
df_mean_daytime_interval = df_session_workday.groupby("participant_id").mean()
# %%
df_mean_daytime_interval.describe()
# %%
sns.displot(df_mean_daytime_interval.time_diff_minutes, binwidth=5, height=5)
# %%
df_count_daytime_per_participant = df_session_workday.groupby(
["participant_id", "date_lj"]
).count()
# %%
df_count_daytime_per_participant["time"].describe()
# %%
sns.displot(df_count_daytime_per_participant.time, binwidth=1, height=5)
# %% [markdown]
# ## Evening EMA
# %% [markdown]
# For evening EMA, determine whether in a day that any EMA session was completed, an evening EMA is also present.
#
# Note, we are only dealing with true EMA sessions, non-sessions etc. have already been filtered out.
# %%
s_evening_completed = df_session_counts_time_completed.groupby(
["participant_id", "device_id", "date_lj"]
).apply(lambda x: (x.time == "evening").any())
# %%
df_session_counts_time_completed
# %%
s_evening_completed_ratio = (
s_evening_completed.groupby("participant_id").sum()
/ s_evening_completed.groupby("participant_id").count()
)
# %%
s_evening_completed_ratio.describe()
# %%
sns.displot(s_evening_completed_ratio - 0.001, binwidth=0.1, height=5)
# %%