Clean up ESM by eliminating non-answers.

Convert radio string answer to numeric.
communication
junos 2021-07-03 16:34:11 +02:00
parent 74392f229a
commit e0da6757ec
2 changed files with 27 additions and 14 deletions

View File

@ -25,12 +25,6 @@ if nb_dir not in sys.path:
import participants.query_db import participants.query_db
from features.esm import * from features.esm import *
# %% [markdown]
# # ESM data
# %% [markdown]
# Only take data from the main part of the study. The pilot data have different structure, there were especially many additions to ESM_JSON.
# %% # %%
participants_inactive_usernames = participants.query_db.get_usernames( participants_inactive_usernames = participants.query_db.get_usernames(
collection_start=datetime.date.fromisoformat("2020-08-01") collection_start=datetime.date.fromisoformat("2020-08-01")
@ -39,15 +33,20 @@ df_esm_inactive = get_esm_data(participants_inactive_usernames)
# %% # %%
df_esm_preprocessed = preprocess_esm(df_esm_inactive) df_esm_preprocessed = preprocess_esm(df_esm_inactive)
df_esm_clean = clean_up_esm(df_esm_preprocessed)
# %% [markdown]
# # PANAS
# %% # %%
df_esm_PANAS = df_esm_clean[ df_esm_PANAS = df_esm_preprocessed[
(df_esm_clean["questionnaire_id"] == 8) | (df_esm_clean["questionnaire_id"] == 9) (df_esm_preprocessed["questionnaire_id"] == 8)
| (df_esm_preprocessed["questionnaire_id"] == 9)
] ]
df_esm_PANAS_grouped = df_esm_PANAS.groupby(["participant_id", "questionnaire_id"]) df_esm_PANAS_clean = clean_up_esm(df_esm_PANAS)
# %% # %%
df_esm_PANAS.head() df_esm_PANAS_grouped = df_esm_PANAS_clean.groupby(
["participant_id", "questionnaire_id"]
)
# %% # %%

View File

@ -17,6 +17,10 @@ SESSION_STATUS_UNANSWERED = "ema_unanswered"
SESSION_STATUS_DAY_FINISHED = "day_finished" SESSION_STATUS_DAY_FINISHED = "day_finished"
SESSION_STATUS_COMPLETE = "ema_completed" SESSION_STATUS_COMPLETE = "ema_completed"
ANSWER_DAY_FINISHED = "DayFinished3421"
ANSWER_DAY_OFF = "DayOff3421"
ANSWER_SET_EVENING = "DayFinishedSetEvening"
MAX_MORNING_LENGTH = 3 MAX_MORNING_LENGTH = 3
# When the participants was not yet at work at the time of the first (morning) EMA, # When the participants was not yet at work at the time of the first (morning) EMA,
# only three items were answered. # only three items were answered.
@ -119,10 +123,10 @@ def classify_sessions_by_completion(df_esm_preprocessed: pd.DataFrame) -> pd.Dat
# 2. Identify non-sessions, i.e. answers about the end of the day. # 2. Identify non-sessions, i.e. answers about the end of the day.
non_session = sessions_grouped.apply( non_session = sessions_grouped.apply(
lambda x: ( lambda x: (
(x.esm_user_answer == "DayFinished3421") # I finished working for today. (x.esm_user_answer == ANSWER_DAY_FINISHED) # I finished working for today.
| (x.esm_user_answer == "DayOff3421") # I am not going to work today. | (x.esm_user_answer == ANSWER_DAY_OFF) # I am not going to work today.
| ( | (
x.esm_user_answer == "DayFinishedSetEvening" x.esm_user_answer == ANSWER_SET_EVENING
) # When would you like to answer the evening EMA? ) # When would you like to answer the evening EMA?
).any() ).any()
) )
@ -234,4 +238,14 @@ def clean_up_esm(df_esm_preprocessed: pd.DataFrame) -> pd.DataFrame:
df_esm_clean = df_esm_preprocessed[ df_esm_clean = df_esm_preprocessed[
df_esm_preprocessed["esm_status"] == ESM_STATUS_ANSWERED df_esm_preprocessed["esm_status"] == ESM_STATUS_ANSWERED
] ]
df_esm_clean = df_esm_clean[
~df_esm_clean["esm_user_answer"].isin(
[ANSWER_DAY_FINISHED, ANSWER_DAY_OFF, ANSWER_SET_EVENING]
)
]
df_esm_clean = df_esm_clean.assign(
esm_user_answer_numeric=lambda x: x.esm_user_answer.str.slice(stop=1).astype(
int
)
)
return df_esm_clean return df_esm_clean