Finish labelling EMA sessions and document classify_sessions_adherence function.
parent
371e755159
commit
f48e5469e0
|
@ -13,35 +13,50 @@
|
|||
# name: straw2analysis
|
||||
# ---
|
||||
|
||||
import datetime
|
||||
|
||||
# %%
|
||||
import os
|
||||
import sys
|
||||
import datetime
|
||||
import seaborn as sns
|
||||
|
||||
import pandas as pd
|
||||
import seaborn as sns
|
||||
|
||||
nb_dir = os.path.split(os.getcwd())[0]
|
||||
if nb_dir not in sys.path: sys.path.append(nb_dir)
|
||||
if nb_dir not in sys.path:
|
||||
sys.path.append(nb_dir)
|
||||
|
||||
import participants.query_db
|
||||
|
||||
# %%
|
||||
baseline_si = pd.read_csv('E:/STRAWbaseline/results-survey637813.csv')
|
||||
baseline_be_1 = pd.read_csv('E:/STRAWbaseline/results-survey358134.csv')
|
||||
baseline_be_2 = pd.read_csv('E:/STRAWbaseline/results-survey413767.csv')
|
||||
baseline_si = pd.read_csv("E:/STRAWbaseline/results-survey637813.csv")
|
||||
baseline_be_1 = pd.read_csv("E:/STRAWbaseline/results-survey358134.csv")
|
||||
baseline_be_2 = pd.read_csv("E:/STRAWbaseline/results-survey413767.csv")
|
||||
|
||||
# %%
|
||||
participants_inactive_usernames = participants.query_db.get_usernames(collection_start=datetime.date.fromisoformat("2020-08-01"))
|
||||
participants_inactive_usernames = participants.query_db.get_usernames(
|
||||
collection_start=datetime.date.fromisoformat("2020-08-01")
|
||||
)
|
||||
|
||||
# %%
|
||||
baseline = pd.concat([baseline_si, baseline_be_1, baseline_be_2], join="inner").reset_index().drop(columns="index")
|
||||
baseline_inactive = baseline[baseline["Gebruikersnaam"].isin(participants_inactive_usernames)]
|
||||
baseline = (
|
||||
pd.concat([baseline_si, baseline_be_1, baseline_be_2], join="inner")
|
||||
.reset_index()
|
||||
.drop(columns="index")
|
||||
)
|
||||
baseline_inactive = baseline[
|
||||
baseline["Gebruikersnaam"].isin(participants_inactive_usernames)
|
||||
]
|
||||
|
||||
# %%
|
||||
baseline
|
||||
|
||||
# %%
|
||||
participants_inactive_usernames = pd.Series(participants.query_db.get_usernames(collection_start=datetime.date.fromisoformat("2020-08-01")))
|
||||
participants_inactive_usernames = pd.Series(
|
||||
participants.query_db.get_usernames(
|
||||
collection_start=datetime.date.fromisoformat("2020-08-01")
|
||||
)
|
||||
)
|
||||
|
||||
# %% [markdown]
|
||||
# # Demographic information
|
||||
|
@ -54,7 +69,9 @@ print(baseline_inactive.shape[0])
|
|||
print(participants_inactive_usernames.shape[0])
|
||||
|
||||
# %%
|
||||
participants_inactive_usernames[~participants_inactive_usernames.isin(baseline["Gebruikersnaam"])].sort_values()
|
||||
participants_inactive_usernames[
|
||||
~participants_inactive_usernames.isin(baseline["Gebruikersnaam"])
|
||||
].sort_values()
|
||||
|
||||
# %%
|
||||
baseline_inactive["startlanguage"].value_counts()
|
||||
|
@ -63,9 +80,10 @@ baseline_inactive["startlanguage"].value_counts()
|
|||
baseline_inactive["Geslacht"].value_counts()
|
||||
|
||||
# %%
|
||||
now = pd.Timestamp('now')
|
||||
baseline_inactive = baseline_inactive.assign(dob = lambda x: pd.to_datetime(x.Geboortedatum),
|
||||
age = lambda x: now - x.dob)
|
||||
now = pd.Timestamp("now")
|
||||
baseline_inactive = baseline_inactive.assign(
|
||||
dob=lambda x: pd.to_datetime(x.Geboortedatum), age=lambda x: now - x.dob
|
||||
)
|
||||
|
||||
# %%
|
||||
baseline_inactive["age"].describe()
|
||||
|
|
|
@ -61,9 +61,9 @@ df_esm_preprocessed.columns
|
|||
# One approach would be to count distinct session IDs which are incremented for each group of EMAs. However, since not every question answered counts as a fulfilled EMA, some unique session IDs should be eliminated first.
|
||||
|
||||
# %%
|
||||
session_counts = df_esm_preprocessed.groupby(["participant_id", "device_id", "esm_session"]).count()[
|
||||
"id"
|
||||
]
|
||||
session_counts = df_esm_preprocessed.groupby(
|
||||
["participant_id", "device_id", "esm_session"]
|
||||
).count()["id"]
|
||||
|
||||
# %% [markdown]
|
||||
# Group data by participant_id and esm_session and count the number of instances (by id). Session counts are therefore counts of how many times a specific session ID appears *within* a specific participant.
|
||||
|
@ -142,11 +142,17 @@ df_esm_preprocessed.query("participant_id == 31 & esm_session == 77")[
|
|||
]
|
||||
|
||||
# %% tags=[]
|
||||
df_esm_2 = df_session_counts[
|
||||
df_session_counts["esm_session_count"] == 2
|
||||
].reset_index().merge(df_esm_preprocessed, how="left", on=["participant_id", "device_id", "esm_session"])
|
||||
#with pd.option_context('display.max_rows', None, 'display.max_columns', None): # more options can be specified also
|
||||
#display(df_esm_2)
|
||||
df_esm_2 = (
|
||||
df_session_counts[df_session_counts["esm_session_count"] == 2]
|
||||
.reset_index()
|
||||
.merge(
|
||||
df_esm_preprocessed,
|
||||
how="left",
|
||||
on=["participant_id", "device_id", "esm_session"],
|
||||
)
|
||||
)
|
||||
# with pd.option_context('display.max_rows', None, 'display.max_columns', None): # more options can be specified also
|
||||
# display(df_esm_2)
|
||||
|
||||
# %% [markdown] tags=[]
|
||||
# ### Long sessions
|
||||
|
@ -215,7 +221,9 @@ df_session_counts.count()
|
|||
# %%
|
||||
non_session = session_group_by.apply(
|
||||
lambda x: (
|
||||
(x.esm_user_answer == "DayFinished3421") | (x.esm_user_answer == "DayOff3421") | (x.esm_user_answer == "DayFinishedSetEvening")
|
||||
(x.esm_user_answer == "DayFinished3421")
|
||||
| (x.esm_user_answer == "DayOff3421")
|
||||
| (x.esm_user_answer == "DayFinishedSetEvening")
|
||||
).any()
|
||||
)
|
||||
df_session_counts.loc[non_session, "session_response"] = "day_finished"
|
||||
|
@ -243,6 +251,36 @@ df_session_counts.loc[esm_removed, "session_response"].value_counts()
|
|||
# %% [markdown]
|
||||
# It turns out that these had been accounted for with ESM statuses.
|
||||
|
||||
# %% [markdown]
|
||||
# ### Singleton sessions
|
||||
|
||||
# %%
|
||||
df_session_counts.count()
|
||||
|
||||
# %%
|
||||
df_session_counts[
|
||||
(df_session_counts.esm_session_count == 1)
|
||||
& df_session_counts.session_response.isna()
|
||||
]
|
||||
|
||||
# %%
|
||||
df_session_1 = df_session_counts[
|
||||
(df_session_counts["esm_session_count"] == 1)
|
||||
& df_session_counts.session_response.isna()
|
||||
]
|
||||
df_esm_unique_session = df_session_1.join(
|
||||
df_esm_preprocessed.set_index(["participant_id", "device_id", "esm_session"])
|
||||
)
|
||||
df_esm_unique_session = df_esm_unique_session["esm_trigger"].rename("session_response")
|
||||
|
||||
# %%
|
||||
df_session_counts.loc[
|
||||
df_esm_unique_session.index, "session_response"
|
||||
] = df_esm_unique_session
|
||||
|
||||
# %%
|
||||
df_session_counts.count()
|
||||
|
||||
# %% [markdown]
|
||||
# ### Evening_last
|
||||
|
||||
|
@ -270,38 +308,52 @@ sns.displot(
|
|||
)
|
||||
|
||||
# %% [markdown]
|
||||
# ### Singleton sessions
|
||||
# ### Repeated sessions
|
||||
|
||||
# %% [markdown]
|
||||
# The sessions lengths that repeat often can probably be used as filled in EMAs. Let's only review the session lengths that are rare.
|
||||
|
||||
# %%
|
||||
df_session_counts.count()
|
||||
df_session_counts.loc[
|
||||
df_session_counts.session_response.isna(), "esm_session_count"
|
||||
].value_counts().sort_index()
|
||||
|
||||
# %%
|
||||
df_session_counts[(df_session_counts.esm_session_count == 1) & df_session_counts.session_response.isna()]
|
||||
|
||||
# %%
|
||||
df_session_1 = df_session_counts[(df_session_counts["esm_session_count"] == 1) & df_session_counts.session_response.isna()]
|
||||
df_esm_unique_session = df_session_1.join(
|
||||
df_esm_preprocessed.set_index(["participant_id", "device_id", "esm_session"])
|
||||
df_session_7 = df_session_counts[
|
||||
(df_session_counts["esm_session_count"] == 7)
|
||||
& df_session_counts.session_response.isna()
|
||||
]
|
||||
df_esm_session_7 = df_session_7.join(
|
||||
df_esm_preprocessed.set_index(["participant_id", "device_id", "esm_session"]),
|
||||
how="left",
|
||||
)
|
||||
df_esm_unique_session = df_esm_unique_session["esm_trigger"].rename("session_response")
|
||||
|
||||
# %% jupyter={"outputs_hidden": true} tags=[]
|
||||
with pd.option_context(
|
||||
"display.max_rows", None, "display.max_columns", None
|
||||
): # more options can be specified also
|
||||
display(df_esm_session_7[["esm_trigger", "esm_instructions", "esm_user_answer"]])
|
||||
|
||||
# %% [markdown]
|
||||
# These are all morning questionnaires with "commute" selected or rarely "long break" in the morning.
|
||||
|
||||
# %%
|
||||
df_session_counts.loc[df_esm_unique_session.index, "session_response"] = df_esm_unique_session
|
||||
|
||||
# %%
|
||||
df_session_counts.count()
|
||||
|
||||
# %%
|
||||
df_session_counts.merge()
|
||||
|
||||
# %%
|
||||
df_esm_78243 = df_esm_preprocessed[df_esm_preprocessed["username"] == "uploader_78243"]
|
||||
df_esm_78243 = df_esm_78243.sort_values("_id")[["id","_id","datetime_lj", "esm_status","esm_trigger","esm_instructions","esm_user_answer","esm_session"]]
|
||||
|
||||
# %%
|
||||
df_esm_78243.columns
|
||||
|
||||
# %%
|
||||
df_esm_78243.to_csv("example.csv")
|
||||
df_session_27 = df_session_counts[
|
||||
(df_session_counts["esm_session_count"] == 27)
|
||||
& df_session_counts.session_response.isna()
|
||||
]
|
||||
df_esm_session_27 = df_session_27.join(
|
||||
df_esm_preprocessed.set_index(["participant_id", "device_id", "esm_session"]),
|
||||
how="left",
|
||||
)
|
||||
|
||||
# %% jupyter={"outputs_hidden": true} tags=[]
|
||||
with pd.option_context(
|
||||
"display.max_rows", None, "display.max_columns", None
|
||||
): # more options can be specified also
|
||||
display(df_esm_session_27[["esm_trigger", "esm_instructions", "esm_user_answer"]])
|
||||
|
||||
# %% [markdown]
|
||||
# These are all morning questionnaires with morning *and* workday items, with the feedback added and also branched in the longest possible way.
|
||||
|
||||
# %%
|
||||
|
|
|
@ -2,11 +2,12 @@ from collections.abc import Collection
|
|||
|
||||
import pandas as pd
|
||||
|
||||
|
||||
def read_baseline(paths: Collection) -> pd.DataFrame:
|
||||
#TODO Read CSV files and concat them.
|
||||
# TODO Read CSV files and concat them.
|
||||
pass
|
||||
|
||||
|
||||
def preprocess_baseline(df_baseline_from_csv: pd.DataFrame) -> pd.DataFrame:
|
||||
#TODO Translate columns, calculate age.
|
||||
# TODO Translate columns, calculate age.
|
||||
pass
|
||||
|
|
|
@ -68,10 +68,21 @@ def preprocess_esm(df_esm: pd.DataFrame) -> pd.DataFrame:
|
|||
def classify_sessions_adherence(df_esm_preprocessed: pd.DataFrame) -> pd.DataFrame:
|
||||
"""
|
||||
For each distinct EMA session, determine how the participant responded to it.
|
||||
Possible outcomes are: esm_unanswered
|
||||
Possible outcomes are: SESSION_STATUS_UNANSWERED, SESSION_STATUS_DAY_FINISHED, and SESSION_STATUS_COMPLETE
|
||||
|
||||
This is done in several steps.
|
||||
#TODO Finish the documentation.
|
||||
This is done in three steps.
|
||||
|
||||
First, the esm_status is considered.
|
||||
If any of the ESMs in a session has a status *other than* "answered", then this session is taken as unfinished.
|
||||
|
||||
Second, the sessions which do not represent full questionnaires are identified.
|
||||
These are sessions where participants only marked they are finished with the day or have not yet started working.
|
||||
|
||||
Third, the sessions with only one item are marked with their trigger.
|
||||
We never offered questionnaires with single items, so we can be sure these are unfinished.
|
||||
|
||||
Finally, all sessions that remain are marked as completed.
|
||||
By going through different possibilities in expl_esm.ipynb, this turned out to be a reasonable option.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
|
@ -80,47 +91,55 @@ def classify_sessions_adherence(df_esm_preprocessed: pd.DataFrame) -> pd.DataFra
|
|||
|
||||
Returns
|
||||
-------
|
||||
some dataframe
|
||||
df_session_counts: pd.Dataframe
|
||||
A dataframe of all sessions (grouped by GROUP_SESSIONS_BY) with their statuses and the number of items.
|
||||
"""
|
||||
sessions_grouped = df_esm_preprocessed.groupby(GROUP_SESSIONS_BY)
|
||||
|
||||
# 0. First, assign all session statuses as NaN.
|
||||
df_session_counts = pd.DataFrame(sessions_grouped.count()["id"]).rename(
|
||||
columns={"id": "esm_session_count"}
|
||||
)
|
||||
df_session_counts["session_response"] = np.NaN
|
||||
|
||||
esm_not_answered = sessions_grouped.apply(lambda x: (x.esm_status != ESM_STATUS_ANSWERED).any())
|
||||
df_session_counts.loc[esm_not_answered, "session_response"] = SESSION_STATUS_UNANSWERED
|
||||
# 1. Identify all ESMs with status other than answered.
|
||||
esm_not_answered = sessions_grouped.apply(
|
||||
lambda x: (x.esm_status != ESM_STATUS_ANSWERED).any()
|
||||
)
|
||||
df_session_counts.loc[
|
||||
esm_not_answered, "session_response"
|
||||
] = SESSION_STATUS_UNANSWERED
|
||||
|
||||
# 2. Identify non-sessions, i.e. answers about the end of the day.
|
||||
non_session = sessions_grouped.apply(
|
||||
lambda x: (
|
||||
(x.esm_user_answer == "DayFinished3421") # I finished working for today.
|
||||
| (x.esm_user_answer == "DayOff3421") # I am not going to work today.
|
||||
| (x.esm_user_answer == "DayFinishedSetEvening") # When would you like to answer the evening EMA?
|
||||
| (
|
||||
x.esm_user_answer == "DayFinishedSetEvening"
|
||||
) # When would you like to answer the evening EMA?
|
||||
).any()
|
||||
)
|
||||
df_session_counts.loc[non_session, "session_response"] = SESSION_STATUS_DAY_FINISHED
|
||||
|
||||
singleton_sessions = (df_session_counts.esm_session_count == 1) & (df_session_counts.session_response.isna())
|
||||
# 3. Identify sessions appearing only once, as those were not true EMAs for sure.
|
||||
singleton_sessions = (df_session_counts.esm_session_count == 1) & (
|
||||
df_session_counts.session_response.isna()
|
||||
)
|
||||
df_session_1 = df_session_counts[singleton_sessions]
|
||||
df_esm_unique_session = df_session_1.join(
|
||||
df_esm_preprocessed.set_index(GROUP_SESSIONS_BY), how="left"
|
||||
)
|
||||
df_esm_unique_session = df_esm_unique_session.assign(session_response=lambda x: x.esm_trigger)["session_response"]
|
||||
df_session_counts.loc[df_esm_unique_session.index, "session_response"] = df_esm_unique_session
|
||||
df_esm_unique_session = df_esm_unique_session.assign(
|
||||
session_response=lambda x: x.esm_trigger
|
||||
)["session_response"]
|
||||
df_session_counts.loc[
|
||||
df_esm_unique_session.index, "session_response"
|
||||
] = df_esm_unique_session
|
||||
|
||||
finished_sessions = sessions_grouped.apply(
|
||||
lambda x: (x.esm_trigger.str.endswith("_last")).any()
|
||||
)
|
||||
df_session_counts.loc[finished_sessions, "session_response"] = SESSION_STATUS_COMPLETE
|
||||
|
||||
# TODO Look at evening-evening_last sequence, if everything is caught with finished sessions
|
||||
|
||||
# TODO What can be done about morning EMA, perhaps morning-morning_first (sic!) sequence?
|
||||
|
||||
# TODO What can be done about workday EMA.
|
||||
|
||||
df_session_counts.loc[df_session_counts.session_response.isna(), "session_response"] = "esm_finished"
|
||||
# TODO But for now, simply take all other ESMs as answered.
|
||||
# 4. Mark the remaining sessions as completed.
|
||||
df_session_counts.loc[
|
||||
df_session_counts.session_response.isna(), "session_response"
|
||||
] = SESSION_STATUS_COMPLETE
|
||||
|
||||
return df_session_counts
|
||||
|
|
|
@ -13,12 +13,14 @@
|
|||
# name: straw2analysis
|
||||
# ---
|
||||
|
||||
import datetime
|
||||
|
||||
# %%
|
||||
import os
|
||||
import sys
|
||||
import datetime
|
||||
import seaborn as sns
|
||||
|
||||
import pandas as pd
|
||||
import seaborn as sns
|
||||
import statsmodels.api as sm
|
||||
import statsmodels.formula.api as smf
|
||||
|
||||
|
@ -29,16 +31,24 @@ import participants.query_db
|
|||
from features.esm import *
|
||||
|
||||
# %%
|
||||
baseline_si = pd.read_csv('E:/STRAWbaseline/results-survey637813.csv')
|
||||
baseline_be_1 = pd.read_csv('E:/STRAWbaseline/results-survey358134.csv')
|
||||
baseline_be_2 = pd.read_csv('E:/STRAWbaseline/results-survey413767.csv')
|
||||
baseline = pd.concat([baseline_si, baseline_be_1, baseline_be_2], join="inner").reset_index().drop(columns="index")
|
||||
baseline_si = pd.read_csv("E:/STRAWbaseline/results-survey637813.csv")
|
||||
baseline_be_1 = pd.read_csv("E:/STRAWbaseline/results-survey358134.csv")
|
||||
baseline_be_2 = pd.read_csv("E:/STRAWbaseline/results-survey413767.csv")
|
||||
baseline = (
|
||||
pd.concat([baseline_si, baseline_be_1, baseline_be_2], join="inner")
|
||||
.reset_index()
|
||||
.drop(columns="index")
|
||||
)
|
||||
|
||||
# %%
|
||||
participants_inactive_usernames = participants.query_db.get_usernames(collection_start=datetime.date.fromisoformat("2020-08-01"))
|
||||
participants_inactive_usernames = participants.query_db.get_usernames(
|
||||
collection_start=datetime.date.fromisoformat("2020-08-01")
|
||||
)
|
||||
|
||||
# %%
|
||||
baseline_inactive = baseline[baseline["Gebruikersnaam"].isin(participants_inactive_usernames)]
|
||||
baseline_inactive = baseline[
|
||||
baseline["Gebruikersnaam"].isin(participants_inactive_usernames)
|
||||
]
|
||||
|
||||
# %%
|
||||
df_esm_inactive = get_esm_data(participants_inactive_usernames)
|
||||
|
@ -48,38 +58,57 @@ df_esm_preprocessed = preprocess_esm(df_esm_inactive)
|
|||
df_session_counts = classify_sessions_adherence(df_esm_preprocessed)
|
||||
|
||||
# %%
|
||||
tbl_session_outcomes = df_session_counts.reset_index()["session_response"].value_counts()
|
||||
tbl_session_outcomes = df_session_counts.reset_index()[
|
||||
"session_response"
|
||||
].value_counts()
|
||||
|
||||
# %%
|
||||
print("All sessions:", len(df_session_counts))
|
||||
print("-------------------------------------")
|
||||
print(tbl_session_outcomes)
|
||||
print("-------------------------------------")
|
||||
print(tbl_session_outcomes/len(df_session_counts))
|
||||
print(tbl_session_outcomes / len(df_session_counts))
|
||||
|
||||
# %%
|
||||
VARIABLES_TO_TRANSLATE = {
|
||||
"Gebruikersnaam": "username",
|
||||
"Geslacht": "gender",
|
||||
"Geboortedatum": "date_of_birth"
|
||||
"Geboortedatum": "date_of_birth",
|
||||
}
|
||||
baseline_inactive.rename(columns=VARIABLES_TO_TRANSLATE, copy=False, inplace=True)
|
||||
now = pd.Timestamp('now')
|
||||
baseline_inactive = baseline_inactive.assign(date_of_birth = lambda x: pd.to_datetime(x.date_of_birth),
|
||||
age = lambda x: (now - x.date_of_birth).dt.days/365.25245)
|
||||
now = pd.Timestamp("now")
|
||||
baseline_inactive = baseline_inactive.assign(
|
||||
date_of_birth=lambda x: pd.to_datetime(x.date_of_birth),
|
||||
age=lambda x: (now - x.date_of_birth).dt.days / 365.25245,
|
||||
)
|
||||
|
||||
# %%
|
||||
df_session_counts
|
||||
|
||||
# %%
|
||||
df_session_finished = df_session_counts[df_session_counts["session_response"] == "esm_finished"].reset_index()
|
||||
df_session_finished = df_session_counts[
|
||||
df_session_counts["session_response"] == "esm_finished"
|
||||
].reset_index()
|
||||
|
||||
# %%
|
||||
df_participant_finished_sessions = df_session_finished.groupby("participant_id").count()["esm_session"].rename("finished_sessions")
|
||||
df_participant_finished_sessions = (
|
||||
df_session_finished.groupby("participant_id")
|
||||
.count()["esm_session"]
|
||||
.rename("finished_sessions")
|
||||
)
|
||||
|
||||
# %%
|
||||
df_adherence = baseline_inactive[["username", "gender", "age", "startlanguage"]].merge(df_esm_preprocessed[["username", "participant_id"]].drop_duplicates(), how="left", on="username")
|
||||
df_adherence = df_adherence.merge(df_participant_finished_sessions, how="left", left_on="participant_id", right_index=True)
|
||||
df_adherence = baseline_inactive[["username", "gender", "age", "startlanguage"]].merge(
|
||||
df_esm_preprocessed[["username", "participant_id"]].drop_duplicates(),
|
||||
how="left",
|
||||
on="username",
|
||||
)
|
||||
df_adherence = df_adherence.merge(
|
||||
df_participant_finished_sessions,
|
||||
how="left",
|
||||
left_on="participant_id",
|
||||
right_index=True,
|
||||
)
|
||||
|
||||
# %% tags=[]
|
||||
df_adherence
|
||||
|
@ -91,11 +120,15 @@ df_adherence.describe()
|
|||
sns.displot(df_adherence["finished_sessions"], binwidth=5, height=5)
|
||||
|
||||
# %%
|
||||
lm_adherence = smf.ols('finished_sessions ~ C(gender) + C(startlanguage) + age', data=df_adherence).fit()
|
||||
table = sm.stats.anova_lm(lm_adherence, typ=2) # Type 2 ANOVA DataFrame
|
||||
lm_adherence = smf.ols(
|
||||
"finished_sessions ~ C(gender) + C(startlanguage) + age", data=df_adherence
|
||||
).fit()
|
||||
table = sm.stats.anova_lm(lm_adherence, typ=2) # Type 2 ANOVA DataFrame
|
||||
print(table)
|
||||
|
||||
# %%
|
||||
lr_ols = smf.ols('finished_sessions ~ C(gender) + C(startlanguage) + age', data=df_adherence)
|
||||
lr_ols = smf.ols(
|
||||
"finished_sessions ~ C(gender) + C(startlanguage) + age", data=df_adherence
|
||||
)
|
||||
ls_result = lr_ols.fit()
|
||||
ls_result.summary()
|
||||
|
|
Loading…
Reference in New Issue