Clean up ESM by eliminating non-answers.
Convert radio string answer to numeric.communication
parent
74392f229a
commit
e0da6757ec
|
@ -25,12 +25,6 @@ if nb_dir not in sys.path:
|
|||
import participants.query_db
|
||||
from features.esm import *
|
||||
|
||||
# %% [markdown]
|
||||
# # ESM data
|
||||
|
||||
# %% [markdown]
|
||||
# Only take data from the main part of the study. The pilot data have different structure, there were especially many additions to ESM_JSON.
|
||||
|
||||
# %%
|
||||
participants_inactive_usernames = participants.query_db.get_usernames(
|
||||
collection_start=datetime.date.fromisoformat("2020-08-01")
|
||||
|
@ -39,15 +33,20 @@ df_esm_inactive = get_esm_data(participants_inactive_usernames)
|
|||
|
||||
# %%
|
||||
df_esm_preprocessed = preprocess_esm(df_esm_inactive)
|
||||
df_esm_clean = clean_up_esm(df_esm_preprocessed)
|
||||
|
||||
# %% [markdown]
|
||||
# # PANAS
|
||||
|
||||
# %%
|
||||
df_esm_PANAS = df_esm_clean[
|
||||
(df_esm_clean["questionnaire_id"] == 8) | (df_esm_clean["questionnaire_id"] == 9)
|
||||
df_esm_PANAS = df_esm_preprocessed[
|
||||
(df_esm_preprocessed["questionnaire_id"] == 8)
|
||||
| (df_esm_preprocessed["questionnaire_id"] == 9)
|
||||
]
|
||||
df_esm_PANAS_grouped = df_esm_PANAS.groupby(["participant_id", "questionnaire_id"])
|
||||
df_esm_PANAS_clean = clean_up_esm(df_esm_PANAS)
|
||||
|
||||
# %%
|
||||
df_esm_PANAS.head()
|
||||
df_esm_PANAS_grouped = df_esm_PANAS_clean.groupby(
|
||||
["participant_id", "questionnaire_id"]
|
||||
)
|
||||
|
||||
# %%
|
||||
|
|
|
@ -17,6 +17,10 @@ SESSION_STATUS_UNANSWERED = "ema_unanswered"
|
|||
SESSION_STATUS_DAY_FINISHED = "day_finished"
|
||||
SESSION_STATUS_COMPLETE = "ema_completed"
|
||||
|
||||
ANSWER_DAY_FINISHED = "DayFinished3421"
|
||||
ANSWER_DAY_OFF = "DayOff3421"
|
||||
ANSWER_SET_EVENING = "DayFinishedSetEvening"
|
||||
|
||||
MAX_MORNING_LENGTH = 3
|
||||
# When the participants was not yet at work at the time of the first (morning) EMA,
|
||||
# only three items were answered.
|
||||
|
@ -119,10 +123,10 @@ def classify_sessions_by_completion(df_esm_preprocessed: pd.DataFrame) -> pd.Dat
|
|||
# 2. Identify non-sessions, i.e. answers about the end of the day.
|
||||
non_session = sessions_grouped.apply(
|
||||
lambda x: (
|
||||
(x.esm_user_answer == "DayFinished3421") # I finished working for today.
|
||||
| (x.esm_user_answer == "DayOff3421") # I am not going to work today.
|
||||
(x.esm_user_answer == ANSWER_DAY_FINISHED) # I finished working for today.
|
||||
| (x.esm_user_answer == ANSWER_DAY_OFF) # I am not going to work today.
|
||||
| (
|
||||
x.esm_user_answer == "DayFinishedSetEvening"
|
||||
x.esm_user_answer == ANSWER_SET_EVENING
|
||||
) # When would you like to answer the evening EMA?
|
||||
).any()
|
||||
)
|
||||
|
@ -234,4 +238,14 @@ def clean_up_esm(df_esm_preprocessed: pd.DataFrame) -> pd.DataFrame:
|
|||
df_esm_clean = df_esm_preprocessed[
|
||||
df_esm_preprocessed["esm_status"] == ESM_STATUS_ANSWERED
|
||||
]
|
||||
df_esm_clean = df_esm_clean[
|
||||
~df_esm_clean["esm_user_answer"].isin(
|
||||
[ANSWER_DAY_FINISHED, ANSWER_DAY_OFF, ANSWER_SET_EVENING]
|
||||
)
|
||||
]
|
||||
df_esm_clean = df_esm_clean.assign(
|
||||
esm_user_answer_numeric=lambda x: x.esm_user_answer.str.slice(stop=1).astype(
|
||||
int
|
||||
)
|
||||
)
|
||||
return df_esm_clean
|
||||
|
|
Loading…
Reference in New Issue