Explain the histogram better and explore long sessions.
parent
06c179f4dd
commit
8306e99392
|
@ -34,9 +34,7 @@ from features.esm import *
|
||||||
# Only take data from the main part of the study. The pilot data have different structure, there were especially many additions to ESM_JSON.
|
# Only take data from the main part of the study. The pilot data have different structure, there were especially many additions to ESM_JSON.
|
||||||
|
|
||||||
# %%
|
# %%
|
||||||
participants_inactive_usernames = participants.query_db.get_usernames(
|
participants_inactive_usernames = participants.query_db.get_usernames(collection_start=datetime.date.fromisoformat("2020-08-01"))
|
||||||
collection_start=datetime.date.fromisoformat("2020-08-01")
|
|
||||||
)
|
|
||||||
df_esm_inactive = get_esm_data(participants_inactive_usernames)
|
df_esm_inactive = get_esm_data(participants_inactive_usernames)
|
||||||
|
|
||||||
# %%
|
# %%
|
||||||
|
@ -63,9 +61,12 @@ df_esm_preprocessed.columns
|
||||||
# One approach would be to count distinct session IDs which are incremented for each group of EMAs. However, since not every question answered counts as a fulfilled EMA, some unique session IDs should be eliminated first.
|
# One approach would be to count distinct session IDs which are incremented for each group of EMAs. However, since not every question answered counts as a fulfilled EMA, some unique session IDs should be eliminated first.
|
||||||
|
|
||||||
# %%
|
# %%
|
||||||
session_counts = df_esm_preprocessed.groupby(["participant_id", "esm_session"])[
|
session_counts = df_esm_preprocessed.groupby(["participant_id","esm_session"]).count()["id"]
|
||||||
"esm_session"
|
|
||||||
].count()
|
# %% [markdown]
|
||||||
|
# Group data by participant_id and esm_session and count the number of instances (by id). Session counts are therefore counts of how many times a specific session ID appears *within* a specific participant.
|
||||||
|
#
|
||||||
|
# In the plot below, it is impossible to distinguish whether a specific count appears many times within the same or across different participants.
|
||||||
|
|
||||||
# %%
|
# %%
|
||||||
sns.displot(session_counts.to_numpy(), binwidth=1, height=8)
|
sns.displot(session_counts.to_numpy(), binwidth=1, height=8)
|
||||||
|
@ -74,11 +75,9 @@ sns.displot(session_counts.to_numpy(), binwidth=1, height=8)
|
||||||
# ### Unique session IDs
|
# ### Unique session IDs
|
||||||
|
|
||||||
# %%
|
# %%
|
||||||
df_session_counts = pd.DataFrame(session_counts)
|
df_session_counts = pd.DataFrame(session_counts).rename(columns={"id": "esm_session_count"})
|
||||||
df_session_1 = df_session_counts[(df_session_counts["esm_session"] == 1)]
|
df_session_1 = df_session_counts[(df_session_counts["esm_session_count"] == 1)]
|
||||||
df_esm_unique_session = df_session_1.join(
|
df_esm_unique_session = df_session_1.join(df_esm_preprocessed.set_index(["participant_id","esm_session"]))
|
||||||
df_esm_preprocessed.set_index(["participant_id", "esm_session"])
|
|
||||||
)
|
|
||||||
|
|
||||||
# %%
|
# %%
|
||||||
df_esm_unique_session["esm_user_answer"].value_counts()
|
df_esm_unique_session["esm_user_answer"].value_counts()
|
||||||
|
@ -88,14 +87,10 @@ df_esm_unique_session["esm_user_answer"].value_counts()
|
||||||
# What do the answers "Ne" represent?
|
# What do the answers "Ne" represent?
|
||||||
|
|
||||||
# %%
|
# %%
|
||||||
df_esm_unique_session.query("esm_user_answer == 'Ne'")[
|
df_esm_unique_session.query("esm_user_answer == 'Ne'")[[ "esm_trigger", "esm_instructions", "esm_user_answer"]].head()
|
||||||
["esm_trigger", "esm_instructions", "esm_user_answer"]
|
|
||||||
].head()
|
|
||||||
|
|
||||||
# %%
|
# %%
|
||||||
df_esm_unique_session.loc[
|
df_esm_unique_session.loc[df_esm_unique_session["esm_user_answer"].str.contains("Ne"), "esm_trigger"].value_counts()
|
||||||
df_esm_unique_session["esm_user_answer"].str.contains("Ne"), "esm_trigger"
|
|
||||||
].value_counts()
|
|
||||||
|
|
||||||
# %% [markdown]
|
# %% [markdown]
|
||||||
# These are all "first" questions of EMAs which serve as a way to postpone the daytime or evening EMAs.
|
# These are all "first" questions of EMAs which serve as a way to postpone the daytime or evening EMAs.
|
||||||
|
@ -110,30 +105,36 @@ df_esm_unique_session.loc[
|
||||||
# There are some session IDs that only appear twice or three times.
|
# There are some session IDs that only appear twice or three times.
|
||||||
|
|
||||||
# %%
|
# %%
|
||||||
df_session_counts[
|
df_session_counts[(df_session_counts["esm_session_count"] < 4) & (df_session_counts["esm_session_count"] > 1)]
|
||||||
(df_session_counts["esm_session"] < 4) & (df_session_counts["esm_session"] > 1)
|
|
||||||
]
|
|
||||||
|
|
||||||
# %% [markdown]
|
# %% [markdown]
|
||||||
# Some represent the morning EMAs that only contained three questions.
|
# Some represent the morning EMAs that only contained three questions.
|
||||||
|
|
||||||
# %%
|
# %%
|
||||||
df_esm_preprocessed.query("participant_id == 89 & esm_session == 158")[
|
df_esm_preprocessed.query("participant_id == 89 & esm_session == 158")[[ "esm_trigger", "esm_instructions", "esm_user_answer"]]
|
||||||
["esm_trigger", "esm_instructions", "esm_user_answer"]
|
|
||||||
]
|
|
||||||
|
|
||||||
# %%
|
# %%
|
||||||
df_esm_preprocessed.query("participant_id == 89 & esm_session == 157")[
|
df_esm_preprocessed.query("participant_id == 89 & esm_session == 157")[[ "esm_trigger", "esm_instructions", "esm_user_answer"]]
|
||||||
["esm_trigger", "esm_instructions", "esm_user_answer"]
|
|
||||||
]
|
|
||||||
|
|
||||||
# %% [markdown]
|
# %% [markdown]
|
||||||
# Others represent interrupted EMA sessions.
|
# Others represent interrupted EMA sessions.
|
||||||
|
|
||||||
# %%
|
# %%
|
||||||
df_esm_preprocessed.query("participant_id == 31 & esm_session == 77")[
|
df_esm_preprocessed.query("participant_id == 31 & esm_session == 77")[[ "esm_trigger", "esm_instructions", "esm_user_answer"]]
|
||||||
["esm_trigger", "esm_instructions", "esm_user_answer"]
|
|
||||||
]
|
# %% [markdown]
|
||||||
|
# ### Long sessions
|
||||||
|
|
||||||
|
# %%
|
||||||
|
df_session_counts[(df_session_counts["esm_session_count"] > 40)]
|
||||||
|
|
||||||
|
# %%
|
||||||
|
df_esm_preprocessed.query("participant_id == 83").sort_values("_id")[[ "esm_trigger","datetime_lj", "_id", "username"]]
|
||||||
|
|
||||||
|
# %% [markdown]
|
||||||
|
# Both, session ID and \_ID (and others) reset on application reinstall. Here, it can be seen that the application was reinstalled on 2 April (actually, the phone was replaced as reported by the participant).
|
||||||
|
#
|
||||||
|
# Session IDs should therefore be grouped while taking the timestamp into account (e.g. by sorting first).
|
||||||
|
|
||||||
# %% [markdown]
|
# %% [markdown]
|
||||||
# ## Other possibilities
|
# ## Other possibilities
|
||||||
|
|
Loading…
Reference in New Issue