From 9cba88a6e3e7cbc940f0a25793048abfff11a20e Mon Sep 17 00:00:00 2001 From: junos Date: Tue, 8 Jun 2021 22:32:14 +0200 Subject: [PATCH] Correct adherence data to only count sessions once. Add age as a float predictor. Obtain the same result with linear regression. --- statistical_analysis/concordance.py | 28 ++++++++++++++++++---------- 1 file changed, 18 insertions(+), 10 deletions(-) diff --git a/statistical_analysis/concordance.py b/statistical_analysis/concordance.py index 45d2b9c..12501b7 100644 --- a/statistical_analysis/concordance.py +++ b/statistical_analysis/concordance.py @@ -20,7 +20,7 @@ import datetime import seaborn as sns import pandas as pd import statsmodels.api as sm -from statsmodels.formula.api import ols +import statsmodels.formula.api as smf nb_dir = os.path.split(os.getcwd())[0] if nb_dir not in sys.path: @@ -66,22 +66,30 @@ VARIABLES_TO_TRANSLATE = { baseline_inactive.rename(columns=VARIABLES_TO_TRANSLATE, copy=False, inplace=True) now = pd.Timestamp('now') baseline_inactive = baseline_inactive.assign(date_of_birth = lambda x: pd.to_datetime(x.date_of_birth), - age = lambda x: now - x.date_of_birth) + age = lambda x: (now - x.date_of_birth).dt.days/365.25245) # %% df_session_counts +# %% +df_session_finished = df_session_counts[df_session_counts["session_response"] == "esm_finished"].reset_index() + +# %% +df_participant_finished_sessions = df_session_finished.groupby("participant_id").count()["esm_session"].rename("finished_sessions") + # %% df_adherence = baseline_inactive[["username", "gender", "age", "startlanguage"]].merge(df_esm_preprocessed[["username", "participant_id"]].drop_duplicates(), how="left", on="username") +df_adherence = df_adherence.merge(df_participant_finished_sessions, how="left", left_on="participant_id", right_index=True) + +# %% tags=[] +df_adherence # %% -df_esm_preprocessed_adherence = df_esm_preprocessed.merge(df_session_counts.reset_index(), how="left", on=["participant_id", "device_id", "esm_session"]) -#df_esm_finished = df_esm_preprocessed_adherence[df_esm_preprocessed_adherence["session_response"]=="esm_finished"] - -# %% -df_adherence = df_adherence.merge(df_esm_preprocessed_adherence[df_esm_preprocessed_adherence["session_response"] == "esm_finished"].groupby("participant_id").count()["session_response"], how="left", on="participant_id") - -# %% -lm_adherence = ols('session_response ~ C(gender, Sum) + C(startlanguage, Sum)', data=df_adherence).fit() +lm_adherence = smf.ols('finished_sessions ~ C(gender) + C(startlanguage) + age', data=df_adherence).fit() table = sm.stats.anova_lm(lm_adherence, typ=2) # Type 2 ANOVA DataFrame print(table) + +# %% +lr_ols = smf.ols('finished_sessions ~ C(gender) + C(startlanguage) + age', data=df_adherence) +ls_result = lr_ols.fit() +ls_result.summary()