[WIP] Add a function to recode question IDs.
parent
2aca64aa09
commit
ec51d7d406
|
@ -1,3 +1,8 @@
|
||||||
|
import pandas as pd
|
||||||
|
|
||||||
|
COPE_ORIGINAL_MAX = 4
|
||||||
|
COPE_ORIGINAL_MIN = 1
|
||||||
|
|
||||||
dict_COPE_question_ids = {
|
dict_COPE_question_ids = {
|
||||||
164: (
|
164: (
|
||||||
"I took additional action to try to get rid of the problem",
|
"I took additional action to try to get rid of the problem",
|
||||||
|
@ -120,3 +125,44 @@ dict_COPE_question_ids = {
|
||||||
"Razburil sem se in razmišljal samo o tem",
|
"Razburil sem se in razmišljal samo o tem",
|
||||||
),
|
),
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def reassign_question_ids(df_cope_cleaned: pd.DataFrame) -> pd.DataFrame:
|
||||||
|
df_esm_cope_unique_questions = (
|
||||||
|
df_cope_cleaned.groupby("question_id")
|
||||||
|
.esm_instructions.value_counts()
|
||||||
|
.rename()
|
||||||
|
.reset_index()
|
||||||
|
)
|
||||||
|
# Tabulate all possible answers to each question (group by question ID).
|
||||||
|
|
||||||
|
df_cope_fixed = df_cope_cleaned.copy()
|
||||||
|
for q_id in dict_COPE_question_ids.keys():
|
||||||
|
# Look for all questions ("instructions") occurring in the dataframe.
|
||||||
|
actual_questions = df_esm_cope_unique_questions.loc[
|
||||||
|
df_esm_cope_unique_questions["question_id"] == q_id,
|
||||||
|
"esm_instructions",
|
||||||
|
]
|
||||||
|
# These are all answers to a given question (by q_id).
|
||||||
|
questions_matches = actual_questions.str.startswith(
|
||||||
|
dict_COPE_question_ids.get(q_id)
|
||||||
|
)
|
||||||
|
# See if they are expected, i.e. included in the dictionary.
|
||||||
|
if ~actual_questions.all():
|
||||||
|
print("One of the questions that occur in the data was undefined.")
|
||||||
|
print("This were the questions found in the data: ")
|
||||||
|
raise KeyError(actual_questions[~questions_matches])
|
||||||
|
# In case there is an unexpected answer, raise an exception.
|
||||||
|
|
||||||
|
try:
|
||||||
|
df_cope_fixed = df_cope_fixed.assign(
|
||||||
|
esm_user_score=lambda x: x.esm_user_answer_numeric + 1
|
||||||
|
)
|
||||||
|
# Increment the original answer by 1
|
||||||
|
# to keep in line with traditional scoring
|
||||||
|
# (from COPE_ORIGINAL_MIN - COPE_ORIGINAL_MAX).
|
||||||
|
except AttributeError as e:
|
||||||
|
print("Please, clean the dataframe first using features.esm.clean_up_esm.")
|
||||||
|
print(e)
|
||||||
|
|
||||||
|
return df_cope_fixed
|
||||||
|
|
Loading…
Reference in New Issue