Correct adherence data to only count sessions once.

Add age as a float predictor.
Obtain the same result with linear regression.
communication
junos 2021-06-08 22:32:14 +02:00
parent 1294ee40a0
commit 9cba88a6e3
1 changed files with 18 additions and 10 deletions

View File

@ -20,7 +20,7 @@ import datetime
import seaborn as sns import seaborn as sns
import pandas as pd import pandas as pd
import statsmodels.api as sm import statsmodels.api as sm
from statsmodels.formula.api import ols import statsmodels.formula.api as smf
nb_dir = os.path.split(os.getcwd())[0] nb_dir = os.path.split(os.getcwd())[0]
if nb_dir not in sys.path: if nb_dir not in sys.path:
@ -66,22 +66,30 @@ VARIABLES_TO_TRANSLATE = {
baseline_inactive.rename(columns=VARIABLES_TO_TRANSLATE, copy=False, inplace=True) baseline_inactive.rename(columns=VARIABLES_TO_TRANSLATE, copy=False, inplace=True)
now = pd.Timestamp('now') now = pd.Timestamp('now')
baseline_inactive = baseline_inactive.assign(date_of_birth = lambda x: pd.to_datetime(x.date_of_birth), baseline_inactive = baseline_inactive.assign(date_of_birth = lambda x: pd.to_datetime(x.date_of_birth),
age = lambda x: now - x.date_of_birth) age = lambda x: (now - x.date_of_birth).dt.days/365.25245)
# %% # %%
df_session_counts df_session_counts
# %%
df_session_finished = df_session_counts[df_session_counts["session_response"] == "esm_finished"].reset_index()
# %%
df_participant_finished_sessions = df_session_finished.groupby("participant_id").count()["esm_session"].rename("finished_sessions")
# %% # %%
df_adherence = baseline_inactive[["username", "gender", "age", "startlanguage"]].merge(df_esm_preprocessed[["username", "participant_id"]].drop_duplicates(), how="left", on="username") df_adherence = baseline_inactive[["username", "gender", "age", "startlanguage"]].merge(df_esm_preprocessed[["username", "participant_id"]].drop_duplicates(), how="left", on="username")
df_adherence = df_adherence.merge(df_participant_finished_sessions, how="left", left_on="participant_id", right_index=True)
# %% tags=[]
df_adherence
# %% # %%
df_esm_preprocessed_adherence = df_esm_preprocessed.merge(df_session_counts.reset_index(), how="left", on=["participant_id", "device_id", "esm_session"]) lm_adherence = smf.ols('finished_sessions ~ C(gender) + C(startlanguage) + age', data=df_adherence).fit()
#df_esm_finished = df_esm_preprocessed_adherence[df_esm_preprocessed_adherence["session_response"]=="esm_finished"]
# %%
df_adherence = df_adherence.merge(df_esm_preprocessed_adherence[df_esm_preprocessed_adherence["session_response"] == "esm_finished"].groupby("participant_id").count()["session_response"], how="left", on="participant_id")
# %%
lm_adherence = ols('session_response ~ C(gender, Sum) + C(startlanguage, Sum)', data=df_adherence).fit()
table = sm.stats.anova_lm(lm_adherence, typ=2) # Type 2 ANOVA DataFrame table = sm.stats.anova_lm(lm_adherence, typ=2) # Type 2 ANOVA DataFrame
print(table) print(table)
# %%
lr_ols = smf.ols('finished_sessions ~ C(gender) + C(startlanguage) + age', data=df_adherence)
ls_result = lr_ols.fit()
ls_result.summary()