[WIP] Add a function to recode question IDs.

2023-07-03 19:57:28 +02:00 · 2023-07-03 19:57:28 +02:00 · ec51d7d406
parent 2aca64aa09
commit ec51d7d406
1 changed files with 46 additions and 0 deletions
--- a/features/esm_COPE.py
+++ b/features/esm_COPE.py
@ -1,3 +1,8 @@
 import pandas as pd
 COPE_ORIGINAL_MAX = 4
 COPE_ORIGINAL_MIN = 1
 dict_COPE_question_ids = {
    164: (
        "I took additional action to try to get rid of the problem",
@ -120,3 +125,44 @@ dict_COPE_question_ids = {
        "Razburil sem se in razmišljal samo o tem",
    ),
 }
 def reassign_question_ids(df_cope_cleaned: pd.DataFrame) -> pd.DataFrame:
    df_esm_cope_unique_questions = (
        df_cope_cleaned.groupby("question_id")
        .esm_instructions.value_counts()
        .rename()
        .reset_index()
    )
    # Tabulate all possible answers to each question (group by question ID).
    df_cope_fixed = df_cope_cleaned.copy()
    for q_id in dict_COPE_question_ids.keys():
        # Look for all questions ("instructions") occurring in the dataframe.
        actual_questions = df_esm_cope_unique_questions.loc[
            df_esm_cope_unique_questions["question_id"] == q_id,
            "esm_instructions",
        ]
        # These are all answers to a given question (by q_id).
        questions_matches = actual_questions.str.startswith(
            dict_COPE_question_ids.get(q_id)
        )
        # See if they are expected, i.e. included in the dictionary.
        if ~actual_questions.all():
            print("One of the questions that occur in the data was undefined.")
            print("This were the questions found in the data: ")
            raise KeyError(actual_questions[~questions_matches])
            # In case there is an unexpected answer, raise an exception.
    try:
        df_cope_fixed = df_cope_fixed.assign(
            esm_user_score=lambda x: x.esm_user_answer_numeric + 1
        )
        # Increment the original answer by 1
        # to keep in line with traditional scoring
        # (from COPE_ORIGINAL_MIN - COPE_ORIGINAL_MAX).
    except AttributeError as e:
        print("Please, clean the dataframe first using features.esm.clean_up_esm.")
        print(e)
    return df_cope_fixed