Correct adherence data to only count sessions once.
Add age as a float predictor. Obtain the same result with linear regression.communication
parent
1294ee40a0
commit
9cba88a6e3
|
@ -20,7 +20,7 @@ import datetime
|
||||||
import seaborn as sns
|
import seaborn as sns
|
||||||
import pandas as pd
|
import pandas as pd
|
||||||
import statsmodels.api as sm
|
import statsmodels.api as sm
|
||||||
from statsmodels.formula.api import ols
|
import statsmodels.formula.api as smf
|
||||||
|
|
||||||
nb_dir = os.path.split(os.getcwd())[0]
|
nb_dir = os.path.split(os.getcwd())[0]
|
||||||
if nb_dir not in sys.path:
|
if nb_dir not in sys.path:
|
||||||
|
@ -66,22 +66,30 @@ VARIABLES_TO_TRANSLATE = {
|
||||||
baseline_inactive.rename(columns=VARIABLES_TO_TRANSLATE, copy=False, inplace=True)
|
baseline_inactive.rename(columns=VARIABLES_TO_TRANSLATE, copy=False, inplace=True)
|
||||||
now = pd.Timestamp('now')
|
now = pd.Timestamp('now')
|
||||||
baseline_inactive = baseline_inactive.assign(date_of_birth = lambda x: pd.to_datetime(x.date_of_birth),
|
baseline_inactive = baseline_inactive.assign(date_of_birth = lambda x: pd.to_datetime(x.date_of_birth),
|
||||||
age = lambda x: now - x.date_of_birth)
|
age = lambda x: (now - x.date_of_birth).dt.days/365.25245)
|
||||||
|
|
||||||
# %%
|
# %%
|
||||||
df_session_counts
|
df_session_counts
|
||||||
|
|
||||||
|
# %%
|
||||||
|
df_session_finished = df_session_counts[df_session_counts["session_response"] == "esm_finished"].reset_index()
|
||||||
|
|
||||||
|
# %%
|
||||||
|
df_participant_finished_sessions = df_session_finished.groupby("participant_id").count()["esm_session"].rename("finished_sessions")
|
||||||
|
|
||||||
# %%
|
# %%
|
||||||
df_adherence = baseline_inactive[["username", "gender", "age", "startlanguage"]].merge(df_esm_preprocessed[["username", "participant_id"]].drop_duplicates(), how="left", on="username")
|
df_adherence = baseline_inactive[["username", "gender", "age", "startlanguage"]].merge(df_esm_preprocessed[["username", "participant_id"]].drop_duplicates(), how="left", on="username")
|
||||||
|
df_adherence = df_adherence.merge(df_participant_finished_sessions, how="left", left_on="participant_id", right_index=True)
|
||||||
|
|
||||||
|
# %% tags=[]
|
||||||
|
df_adherence
|
||||||
|
|
||||||
# %%
|
# %%
|
||||||
df_esm_preprocessed_adherence = df_esm_preprocessed.merge(df_session_counts.reset_index(), how="left", on=["participant_id", "device_id", "esm_session"])
|
lm_adherence = smf.ols('finished_sessions ~ C(gender) + C(startlanguage) + age', data=df_adherence).fit()
|
||||||
#df_esm_finished = df_esm_preprocessed_adherence[df_esm_preprocessed_adherence["session_response"]=="esm_finished"]
|
|
||||||
|
|
||||||
# %%
|
|
||||||
df_adherence = df_adherence.merge(df_esm_preprocessed_adherence[df_esm_preprocessed_adherence["session_response"] == "esm_finished"].groupby("participant_id").count()["session_response"], how="left", on="participant_id")
|
|
||||||
|
|
||||||
# %%
|
|
||||||
lm_adherence = ols('session_response ~ C(gender, Sum) + C(startlanguage, Sum)', data=df_adherence).fit()
|
|
||||||
table = sm.stats.anova_lm(lm_adherence, typ=2) # Type 2 ANOVA DataFrame
|
table = sm.stats.anova_lm(lm_adherence, typ=2) # Type 2 ANOVA DataFrame
|
||||||
print(table)
|
print(table)
|
||||||
|
|
||||||
|
# %%
|
||||||
|
lr_ols = smf.ols('finished_sessions ~ C(gender) + C(startlanguage) + age', data=df_adherence)
|
||||||
|
ls_result = lr_ols.fit()
|
||||||
|
ls_result.summary()
|
||||||
|
|
Loading…
Reference in New Issue