diff --git a/features/esm_COPE.py b/features/esm_COPE.py index c88a2c0..3f25362 100644 --- a/features/esm_COPE.py +++ b/features/esm_COPE.py @@ -1,3 +1,8 @@ +import pandas as pd + +COPE_ORIGINAL_MAX = 4 +COPE_ORIGINAL_MIN = 1 + dict_COPE_question_ids = { 164: ( "I took additional action to try to get rid of the problem", @@ -120,3 +125,44 @@ dict_COPE_question_ids = { "Razburil sem se in razmiĆĄljal samo o tem", ), } + + +def reassign_question_ids(df_cope_cleaned: pd.DataFrame) -> pd.DataFrame: + df_esm_cope_unique_questions = ( + df_cope_cleaned.groupby("question_id") + .esm_instructions.value_counts() + .rename() + .reset_index() + ) + # Tabulate all possible answers to each question (group by question ID). + + df_cope_fixed = df_cope_cleaned.copy() + for q_id in dict_COPE_question_ids.keys(): + # Look for all questions ("instructions") occurring in the dataframe. + actual_questions = df_esm_cope_unique_questions.loc[ + df_esm_cope_unique_questions["question_id"] == q_id, + "esm_instructions", + ] + # These are all answers to a given question (by q_id). + questions_matches = actual_questions.str.startswith( + dict_COPE_question_ids.get(q_id) + ) + # See if they are expected, i.e. included in the dictionary. + if ~actual_questions.all(): + print("One of the questions that occur in the data was undefined.") + print("This were the questions found in the data: ") + raise KeyError(actual_questions[~questions_matches]) + # In case there is an unexpected answer, raise an exception. + + try: + df_cope_fixed = df_cope_fixed.assign( + esm_user_score=lambda x: x.esm_user_answer_numeric + 1 + ) + # Increment the original answer by 1 + # to keep in line with traditional scoring + # (from COPE_ORIGINAL_MIN - COPE_ORIGINAL_MAX). + except AttributeError as e: + print("Please, clean the dataframe first using features.esm.clean_up_esm.") + print(e) + + return df_cope_fixed