Export figures and add additional linear regression analyses.
parent
b1dd27d516
commit
767548b562
|
@ -89,6 +89,6 @@ baseline_inactive = baseline_inactive.assign(
|
||||||
baseline_inactive["age"].describe()
|
baseline_inactive["age"].describe()
|
||||||
|
|
||||||
# %%
|
# %%
|
||||||
3710/365.25
|
3618 / 365.25
|
||||||
|
|
||||||
# %%
|
# %%
|
||||||
|
|
|
@ -51,25 +51,6 @@ baseline_inactive = baseline[
|
||||||
baseline["Gebruikersnaam"].isin(participants_inactive_usernames)
|
baseline["Gebruikersnaam"].isin(participants_inactive_usernames)
|
||||||
]
|
]
|
||||||
|
|
||||||
# %%
|
|
||||||
df_esm_inactive = get_esm_data(participants_inactive_usernames)
|
|
||||||
|
|
||||||
# %%
|
|
||||||
df_esm_preprocessed = preprocess_esm(df_esm_inactive)
|
|
||||||
df_session_counts_time = classify_sessions_by_completion_time(df_esm_preprocessed)
|
|
||||||
|
|
||||||
# %%
|
|
||||||
tbl_session_outcomes = df_session_counts_time.reset_index()[
|
|
||||||
"session_response"
|
|
||||||
].value_counts()
|
|
||||||
|
|
||||||
# %%
|
|
||||||
print("All sessions:", len(df_session_counts_time))
|
|
||||||
print("-------------------------------------")
|
|
||||||
print(tbl_session_outcomes)
|
|
||||||
print("-------------------------------------")
|
|
||||||
print(tbl_session_outcomes / len(df_session_counts_time))
|
|
||||||
|
|
||||||
# %%
|
# %%
|
||||||
VARIABLES_TO_TRANSLATE = {
|
VARIABLES_TO_TRANSLATE = {
|
||||||
"Gebruikersnaam": "username",
|
"Gebruikersnaam": "username",
|
||||||
|
@ -83,9 +64,37 @@ baseline_inactive = baseline_inactive.assign(
|
||||||
age=lambda x: (now - x.date_of_birth).dt.days / 365.25245,
|
age=lambda x: (now - x.date_of_birth).dt.days / 365.25245,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
# %%
|
||||||
|
df_esm_inactive = get_esm_data(participants_inactive_usernames)
|
||||||
|
|
||||||
|
# %% [markdown]
|
||||||
|
# # Classify EMA sessions
|
||||||
|
|
||||||
|
# %%
|
||||||
|
df_esm_preprocessed = preprocess_esm(df_esm_inactive)
|
||||||
|
df_session_counts_time = classify_sessions_by_completion_time(df_esm_preprocessed)
|
||||||
|
|
||||||
|
# %% [markdown]
|
||||||
|
# Sessions are now classified according to the type of a session (a true questionnaire or simple single questions) and users response.
|
||||||
|
|
||||||
# %%
|
# %%
|
||||||
df_session_counts_time
|
df_session_counts_time
|
||||||
|
|
||||||
|
# %%
|
||||||
|
tbl_session_outcomes = df_session_counts_time.reset_index()[
|
||||||
|
"session_response"
|
||||||
|
].value_counts()
|
||||||
|
|
||||||
|
# %%
|
||||||
|
print("All sessions:", len(df_session_counts_time))
|
||||||
|
print("-------------------------------------")
|
||||||
|
print(tbl_session_outcomes)
|
||||||
|
print("-------------------------------------")
|
||||||
|
print(tbl_session_outcomes / len(df_session_counts_time))
|
||||||
|
|
||||||
|
# %% [markdown]
|
||||||
|
# ## Consider only true EMA sessions
|
||||||
|
|
||||||
# %%
|
# %%
|
||||||
df_session_finished = df_session_counts_time[
|
df_session_finished = df_session_counts_time[
|
||||||
df_session_counts_time["session_response"] == SESSION_STATUS_COMPLETE
|
df_session_counts_time["session_response"] == SESSION_STATUS_COMPLETE
|
||||||
|
@ -117,6 +126,9 @@ df_adherence
|
||||||
# %%
|
# %%
|
||||||
df_adherence.describe()
|
df_adherence.describe()
|
||||||
|
|
||||||
|
# %%
|
||||||
|
df_adherence[["gender", "startlanguage"]].value_counts()
|
||||||
|
|
||||||
# %%
|
# %%
|
||||||
sns.displot(df_adherence["finished_sessions"], binwidth=5, height=5)
|
sns.displot(df_adherence["finished_sessions"], binwidth=5, height=5)
|
||||||
|
|
||||||
|
@ -209,7 +221,9 @@ df_session_workday = df_session_workday.assign(
|
||||||
)
|
)
|
||||||
|
|
||||||
# %%
|
# %%
|
||||||
sns.displot(df_session_workday["time_diff_minutes"], binwidth=5, height=5)
|
g1 = sns.displot(df_session_workday["time_diff_minutes"], binwidth=5, height=5)
|
||||||
|
g1.set_axis_labels("Time difference [min]", "Session count")
|
||||||
|
# g1.savefig("WorkdayEMAtimeDiff.pdf")
|
||||||
|
|
||||||
# %% [markdown]
|
# %% [markdown]
|
||||||
# There are some sessions that are really close together. By design, none should be closer than 30 min. Let's take a look at those.
|
# There are some sessions that are really close together. By design, none should be closer than 30 min. Let's take a look at those.
|
||||||
|
@ -246,6 +260,11 @@ df_session_workday = df_session_workday[df_session_workday.time_diff_minutes > 2
|
||||||
# %%
|
# %%
|
||||||
df_session_workday.describe()
|
df_session_workday.describe()
|
||||||
|
|
||||||
|
# %%
|
||||||
|
df_session_workday[df_session_workday["time_diff_minutes"] < 120].shape[
|
||||||
|
0
|
||||||
|
] / df_session_workday.shape[0]
|
||||||
|
|
||||||
# %% [markdown]
|
# %% [markdown]
|
||||||
# These statistics look reasonable.
|
# These statistics look reasonable.
|
||||||
|
|
||||||
|
@ -253,13 +272,27 @@ df_session_workday.describe()
|
||||||
# ### Differences between participants
|
# ### Differences between participants
|
||||||
|
|
||||||
# %%
|
# %%
|
||||||
df_mean_daytime_interval = df_session_workday.groupby("participant_id").mean()
|
df_mean_daytime_interval = df_session_workday.groupby("participant_id").median()
|
||||||
|
|
||||||
# %%
|
# %%
|
||||||
df_mean_daytime_interval.describe()
|
df_mean_daytime_interval.describe()
|
||||||
|
|
||||||
# %%
|
# %%
|
||||||
sns.displot(df_mean_daytime_interval.time_diff_minutes, binwidth=5, height=5)
|
g2 = sns.displot(df_mean_daytime_interval.time_diff_minutes, binwidth=5, height=5)
|
||||||
|
g2.set_axis_labels("Median time difference [min]", "Participant count")
|
||||||
|
# g2.savefig("WorkdayEMAtimeDiffMedianParticip.pdf")
|
||||||
|
|
||||||
|
# %%
|
||||||
|
df_adherence = df_adherence.merge(
|
||||||
|
df_mean_daytime_interval, how="left", left_on="participant_id", right_index=True
|
||||||
|
)
|
||||||
|
|
||||||
|
# %%
|
||||||
|
lr_ols_time_diff_median = smf.ols(
|
||||||
|
"time_diff_minutes ~ C(gender) + C(startlanguage) + age", data=df_adherence
|
||||||
|
)
|
||||||
|
ls_result_time_diff_median = lr_ols_time_diff_median.fit()
|
||||||
|
ls_result_time_diff_median.summary()
|
||||||
|
|
||||||
# %%
|
# %%
|
||||||
df_count_daytime_per_participant = df_session_workday.groupby(
|
df_count_daytime_per_participant = df_session_workday.groupby(
|
||||||
|
@ -288,6 +321,9 @@ s_evening_completed = df_session_counts_time_completed.groupby(
|
||||||
# %%
|
# %%
|
||||||
df_session_counts_time_completed
|
df_session_counts_time_completed
|
||||||
|
|
||||||
|
# %%
|
||||||
|
s_evening_completed.sum()
|
||||||
|
|
||||||
# %%
|
# %%
|
||||||
s_evening_completed_ratio = (
|
s_evening_completed_ratio = (
|
||||||
s_evening_completed.groupby("participant_id").sum()
|
s_evening_completed.groupby("participant_id").sum()
|
||||||
|
@ -298,6 +334,23 @@ s_evening_completed_ratio = (
|
||||||
s_evening_completed_ratio.describe()
|
s_evening_completed_ratio.describe()
|
||||||
|
|
||||||
# %%
|
# %%
|
||||||
sns.displot(s_evening_completed_ratio - 0.001, binwidth=0.1, height=5)
|
g3 = sns.displot(s_evening_completed_ratio - 0.001, binwidth=0.05, height=5)
|
||||||
|
g3.set_axis_labels("Ratio of days with the evening EMA filled out", "Participant count")
|
||||||
|
# g3.savefig("EveningEMAratioParticip.pdf")
|
||||||
|
|
||||||
|
# %%
|
||||||
|
df_adherence = df_adherence.merge(
|
||||||
|
s_evening_completed_ratio.rename("evening_EMA_ratio"),
|
||||||
|
how="left",
|
||||||
|
left_on="participant_id",
|
||||||
|
right_index=True,
|
||||||
|
)
|
||||||
|
|
||||||
|
# %%
|
||||||
|
lr_ols_evening_ratio = smf.ols(
|
||||||
|
"evening_EMA_ratio ~ C(gender) + C(startlanguage) + age", data=df_adherence
|
||||||
|
)
|
||||||
|
ls_result_evening_ratio = lr_ols_evening_ratio.fit()
|
||||||
|
ls_result_evening_ratio.summary()
|
||||||
|
|
||||||
# %%
|
# %%
|
||||||
|
|
Loading…
Reference in New Issue