Analyze adherence:
Look at time differences between subsequent daytime EMA. Look at the daily evening EMA proportion.communication
parent
7a12f68dfe
commit
23c3613c60
|
@ -328,7 +328,7 @@ df_esm_session_7 = df_session_7.join(
|
|||
how="left",
|
||||
)
|
||||
|
||||
# %% jupyter={"outputs_hidden": true} tags=[]
|
||||
# %% tags=[]
|
||||
with pd.option_context(
|
||||
"display.max_rows", None, "display.max_columns", None
|
||||
): # more options can be specified also
|
||||
|
@ -347,7 +347,7 @@ df_esm_session_27 = df_session_27.join(
|
|||
how="left",
|
||||
)
|
||||
|
||||
# %% jupyter={"outputs_hidden": true} tags=[]
|
||||
# %% tags=[]
|
||||
with pd.option_context(
|
||||
"display.max_rows", None, "display.max_columns", None
|
||||
): # more options can be specified also
|
||||
|
@ -357,3 +357,28 @@ with pd.option_context(
|
|||
# These are all morning questionnaires with morning *and* workday items, with the feedback added and also branched in the longest possible way.
|
||||
|
||||
# %%
|
||||
df_session_6 = df_session_counts[
|
||||
(df_session_counts["esm_session_count"] == 6)
|
||||
& df_session_counts.session_response.isna()
|
||||
]
|
||||
df_esm_session_6 = df_session_6.join(
|
||||
df_esm_preprocessed.set_index(["participant_id", "device_id", "esm_session"]),
|
||||
how="left",
|
||||
)
|
||||
|
||||
# %%
|
||||
display(df_esm_session_6[["esm_trigger", "esm_instructions", "esm_user_answer"]])
|
||||
|
||||
# %%
|
||||
df_session_counts = classify_sessions_by_completion(df_esm_preprocessed)
|
||||
df_session_time = classify_sessions_by_time(df_esm_preprocessed)
|
||||
|
||||
# %% [markdown]
|
||||
# The sessions were classified by time by taking the **first** record in a session.
|
||||
# However, a morning questionnaire could seamlessly transition into a daytime questionnaire, if the participant was already at work.
|
||||
# In this case, the "time" label changed mid-session.
|
||||
#
|
||||
# Because of the way classify_sessions_by_time works, this questionnaire was classified as "morning".
|
||||
# But for all intents and purposes, it can be treated as a "daytime" EMA.
|
||||
#
|
||||
# This is corrected in `classify_sessions_by_completion_time`
|
||||
|
|
|
@ -55,19 +55,19 @@ df_esm_inactive = get_esm_data(participants_inactive_usernames)
|
|||
|
||||
# %%
|
||||
df_esm_preprocessed = preprocess_esm(df_esm_inactive)
|
||||
df_session_counts = classify_sessions_adherence(df_esm_preprocessed)
|
||||
df_session_counts_time = classify_sessions_by_completion_time(df_esm_preprocessed)
|
||||
|
||||
# %%
|
||||
tbl_session_outcomes = df_session_counts.reset_index()[
|
||||
tbl_session_outcomes = df_session_counts_time.reset_index()[
|
||||
"session_response"
|
||||
].value_counts()
|
||||
|
||||
# %%
|
||||
print("All sessions:", len(df_session_counts))
|
||||
print("All sessions:", len(df_session_counts_time))
|
||||
print("-------------------------------------")
|
||||
print(tbl_session_outcomes)
|
||||
print("-------------------------------------")
|
||||
print(tbl_session_outcomes / len(df_session_counts))
|
||||
print(tbl_session_outcomes / len(df_session_counts_time))
|
||||
|
||||
# %%
|
||||
VARIABLES_TO_TRANSLATE = {
|
||||
|
@ -83,11 +83,11 @@ baseline_inactive = baseline_inactive.assign(
|
|||
)
|
||||
|
||||
# %%
|
||||
df_session_counts
|
||||
df_session_counts_time
|
||||
|
||||
# %%
|
||||
df_session_finished = df_session_counts[
|
||||
df_session_counts["session_response"] == "esm_finished"
|
||||
df_session_finished = df_session_counts_time[
|
||||
df_session_counts_time["session_response"] == SESSION_STATUS_COMPLETE
|
||||
].reset_index()
|
||||
|
||||
# %%
|
||||
|
@ -132,3 +132,171 @@ lr_ols = smf.ols(
|
|||
)
|
||||
ls_result = lr_ols.fit()
|
||||
ls_result.summary()
|
||||
|
||||
# %% [markdown]
|
||||
# # Concordance by type
|
||||
|
||||
# %% [markdown]
|
||||
# ## Workday EMA
|
||||
|
||||
# %% [markdown]
|
||||
# ### Filter the EMA of interest.
|
||||
|
||||
# %% [markdown]
|
||||
# Work with only completed EMA.
|
||||
|
||||
# %% tags=[]
|
||||
df_session_counts_time_completed = df_session_counts_time[
|
||||
df_session_counts_time.session_response == "ema_completed"
|
||||
]
|
||||
|
||||
# %% [markdown]
|
||||
# To be able to compare EMA sessions *within* one day, add a date-part column.
|
||||
#
|
||||
# **NOTE**: Since daytime EMAs could *theoretically* last beyond midnight, but never after 4 AM, the datetime is first translated to 4 h earlier.
|
||||
|
||||
# %%
|
||||
df_session_counts_time_completed = df_session_counts_time_completed.assign(
|
||||
date_lj=lambda x: (x.datetime_lj - datetime.timedelta(hours=4)).dt.date
|
||||
)
|
||||
|
||||
# %%
|
||||
df_session_counts_time_completed
|
||||
|
||||
# %% [markdown]
|
||||
# Next, calculate differences between subsequent record. But first group them by participant and device ID (as usual) and *time*. This way, the differences between the same type of EMA sessions are calculated.
|
||||
|
||||
# %% tags=[]
|
||||
df_session_time_diff = (
|
||||
df_session_counts_time_completed[["datetime_lj", "date_lj", "time"]]
|
||||
.groupby(["participant_id", "device_id", "time"])
|
||||
.diff()
|
||||
.rename(
|
||||
columns={
|
||||
"datetime_lj": "previous_same_type_time_diff",
|
||||
"date_lj": "time_diff_days",
|
||||
}
|
||||
)
|
||||
)
|
||||
|
||||
# %%
|
||||
df_session_time_diff
|
||||
|
||||
# %% tags=[]
|
||||
df_session_counts_time_diff = df_session_counts_time_completed.join(
|
||||
df_session_time_diff, how="left"
|
||||
)
|
||||
|
||||
# %% [markdown]
|
||||
# Now, select only the daytime EMAs of interest. Discard the differences between *different day* EMAs.
|
||||
|
||||
# %%
|
||||
time_workday_completed_less_than_1_day = (
|
||||
(df_session_counts_time_diff.time == "daytime") # Only take daytime EMAs.
|
||||
& ~(
|
||||
df_session_counts_time_diff.previous_same_type_time_diff.isna()
|
||||
) # Only where the diff was actually calculated.
|
||||
& (df_session_counts_time_diff.time_diff_days == datetime.timedelta(0))
|
||||
) # Only take differences *within* a day.
|
||||
|
||||
# %% tags=[]
|
||||
df_session_workday = df_session_counts_time_diff[time_workday_completed_less_than_1_day]
|
||||
|
||||
# %%
|
||||
df_session_workday = df_session_workday.assign(
|
||||
time_diff_minutes=lambda x: x.previous_same_type_time_diff.dt.seconds / 60
|
||||
)
|
||||
|
||||
# %%
|
||||
sns.displot(df_session_workday["time_diff_minutes"], binwidth=5, height=5)
|
||||
|
||||
# %% [markdown]
|
||||
# There are some sessions that are really close together. By design, none should be closer than 30 min. Let's take a look at those.
|
||||
|
||||
# %%
|
||||
df_session_workday[df_session_workday.time_diff_minutes < 30]
|
||||
|
||||
# %% [markdown]
|
||||
# There are only 2 instances, look at them individually.
|
||||
|
||||
# %%
|
||||
df_esm_preprocessed.loc[
|
||||
(df_esm_preprocessed.participant_id == 35) & (df_esm_preprocessed.esm_session == 6),
|
||||
["esm_trigger", "esm_session", "datetime_lj", "esm_instructions"],
|
||||
]
|
||||
|
||||
# %%
|
||||
df_esm_preprocessed.loc[
|
||||
(df_esm_preprocessed.participant_id == 45)
|
||||
& (df_esm_preprocessed.esm_session < 3)
|
||||
& (df_esm_preprocessed.device_id == "d848b1c4-33cc-4e22-82ae-96d6b6458a33"),
|
||||
["esm_trigger", "esm_session", "datetime_lj", "esm_instructions"],
|
||||
]
|
||||
|
||||
# %% [markdown]
|
||||
# As these signify bugs, we can safely discard them in the following analysis.
|
||||
|
||||
# %%
|
||||
df_session_workday = df_session_workday[df_session_workday.time_diff_minutes > 29]
|
||||
|
||||
# %% [markdown]
|
||||
# ### All participants
|
||||
|
||||
# %%
|
||||
df_session_workday.describe()
|
||||
|
||||
# %% [markdown]
|
||||
# These statistics look reasonable.
|
||||
|
||||
# %% [markdown]
|
||||
# ### Differences between participants
|
||||
|
||||
# %%
|
||||
df_mean_daytime_interval = df_session_workday.groupby("participant_id").mean()
|
||||
|
||||
# %%
|
||||
df_mean_daytime_interval.describe()
|
||||
|
||||
# %%
|
||||
sns.displot(df_mean_daytime_interval.time_diff_minutes, binwidth=5, height=5)
|
||||
|
||||
# %%
|
||||
df_count_daytime_per_participant = df_session_workday.groupby(
|
||||
["participant_id", "date_lj"]
|
||||
).count()
|
||||
|
||||
# %%
|
||||
df_count_daytime_per_participant["time"].describe()
|
||||
|
||||
# %%
|
||||
sns.displot(df_count_daytime_per_participant.time, binwidth=1, height=5)
|
||||
|
||||
# %% [markdown]
|
||||
# ## Evening EMA
|
||||
|
||||
# %% [markdown]
|
||||
# For evening EMA, determine whether in a day that any EMA session was completed, an evening EMA is also present.
|
||||
#
|
||||
# Note, we are only dealing with true EMA sessions, non-sessions etc. have already been filtered out.
|
||||
|
||||
# %%
|
||||
s_evening_completed = df_session_counts_time_completed.groupby(
|
||||
["participant_id", "device_id", "date_lj"]
|
||||
).apply(lambda x: (x.time == "evening").any())
|
||||
|
||||
# %%
|
||||
df_session_counts_time_completed
|
||||
|
||||
# %%
|
||||
s_evening_completed_ratio = (
|
||||
s_evening_completed.groupby("participant_id").sum()
|
||||
/ s_evening_completed.groupby("participant_id").count()
|
||||
)
|
||||
|
||||
# %%
|
||||
s_evening_completed_ratio.describe()
|
||||
|
||||
# %%
|
||||
sns.displot(s_evening_completed_ratio - 0.001, binwidth=0.1, height=5)
|
||||
|
||||
# %%
|
||||
|
|
Loading…
Reference in New Issue