Add a function to fix SAM question IDs.
parent
ef26772038
commit
825380a47e
|
@ -3,6 +3,9 @@ import pandas as pd
|
|||
|
||||
import features.esm
|
||||
|
||||
SAM_ORIGINAL_MAX = 5
|
||||
SAM_ORIGINAL_MIN = 1
|
||||
|
||||
QUESTIONNAIRE_ID_SAM = {
|
||||
"event_stress": 87,
|
||||
"event_threat": 88,
|
||||
|
@ -441,3 +444,58 @@ def extract_event_duration(df_esm_sam_clean: pd.DataFrame) -> pd.DataFrame:
|
|||
|
||||
# TODO: How many questions about the stressfulness of the period were asked
|
||||
# and how does this relate to events?
|
||||
|
||||
|
||||
def reassign_question_ids(df_sam_cleaned: pd.DataFrame) -> pd.DataFrame:
|
||||
df_esm_sam_unique_questions = (
|
||||
df_sam_cleaned.groupby("question_id")
|
||||
.esm_instructions.value_counts()
|
||||
.rename()
|
||||
.reset_index()
|
||||
)
|
||||
# Tabulate all possible answers to each question (group by question ID).
|
||||
|
||||
# First, check that we anticipated all esm instructions.
|
||||
for q_id in DICT_SAM_QUESTION_IDS.keys():
|
||||
# Look for all questions ("instructions") occurring in the dataframe.
|
||||
actual_questions = df_esm_sam_unique_questions.loc[
|
||||
df_esm_sam_unique_questions["question_id"] == q_id,
|
||||
"esm_instructions",
|
||||
]
|
||||
# These are all answers to a given question (by q_id).
|
||||
questions_matches = actual_questions.str.startswith(
|
||||
DICT_SAM_QUESTION_IDS.get(q_id)
|
||||
)
|
||||
# See if they are expected, i.e. included in the dictionary.
|
||||
if ~actual_questions.all():
|
||||
print("One of the questions that occur in the data was undefined.")
|
||||
print("This were the questions found in the data: ")
|
||||
raise KeyError(actual_questions[~questions_matches])
|
||||
# In case there is an unexpected answer, raise an exception.
|
||||
|
||||
# Next, replace question IDs.
|
||||
df_sam_fixed = df_sam_cleaned.copy()
|
||||
df_sam_fixed["question_id"] = df_sam_cleaned["esm_instructions"].apply(
|
||||
lambda x: next(
|
||||
(
|
||||
key
|
||||
for key, values in DICT_SAM_QUESTION_IDS.items()
|
||||
if x.startswith(values)
|
||||
),
|
||||
None,
|
||||
)
|
||||
)
|
||||
|
||||
# Finally, increment numeric answers.
|
||||
try:
|
||||
df_sam_fixed = df_sam_fixed.assign(
|
||||
esm_user_score=lambda x: x.esm_user_answer_numeric + 1
|
||||
)
|
||||
# Increment the original answer by 1
|
||||
# to keep in line with traditional scoring
|
||||
# (from SAM_ORIGINAL_MIN - SAM_ORIGINAL_MAX).
|
||||
except AttributeError as e:
|
||||
print("Please, clean the dataframe first using features.esm.clean_up_esm.")
|
||||
print(e)
|
||||
|
||||
return df_sam_fixed
|
||||
|
|
Loading…
Reference in New Issue