From e2808422db502eeaf2da0f3266cb38be6e127c9a Mon Sep 17 00:00:00 2001 From: junos Date: Mon, 5 Jul 2021 18:24:22 +0200 Subject: [PATCH] Implement a method to recode JCQ answers. --- features/esm.py | 3 +- features/esm_JCQ.py | 77 +++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 79 insertions(+), 1 deletion(-) diff --git a/features/esm.py b/features/esm.py index c88c841..88318fa 100644 --- a/features/esm.py +++ b/features/esm.py @@ -228,7 +228,8 @@ def classify_sessions_by_completion_time( def clean_up_esm(df_esm_preprocessed: pd.DataFrame) -> pd.DataFrame: """ This function eliminates invalid ESM responses. - It removes unanswered ESMs. + It removes unanswered ESMs and those that indicate end of work and similar. + It also extracts a numeric answer from strings such as "4 - I strongly agree". Parameters ---------- diff --git a/features/esm_JCQ.py b/features/esm_JCQ.py index 4da1e2c..3750a5d 100644 --- a/features/esm_JCQ.py +++ b/features/esm_JCQ.py @@ -1,3 +1,8 @@ +import pandas as pd + +JCQ_ORIGINAL_MAX = 4 +JCQ_ORIGINAL_MIN = 1 + dict_JCQ_demand_control_reverse = { 75: ( "I was NOT asked", @@ -29,3 +34,75 @@ dict_JCQ_demand_control_reverse = { "Pri svojem delu sem imela zelo malo svobode", ), } + + +def reverse_jcq_demand_control_scoring( + df_esm_jcq_demand_control: pd.DataFrame, +) -> pd.DataFrame: + """ + This function recodes answers in Job content questionnaire by first incrementing them by 1, + to be in line with original (1-4) scoring. + Then, some answers are reversed (i.e. 1 becomes 4 etc.), because the questions are negatively phrased. + These answers are listed in dict_JCQ_demand_control_reverse and identified by their question ID. + However, the existing data is checked against literal phrasing of these questions + to protect against wrong numbering of questions (differing question IDs). + + Parameters + ---------- + df_esm_jcq_demand_control: pd.DataFrame + A cleaned up dataframe, which must also include esm_user_answer_numeric. + + Returns + ------- + df_esm_jcq_demand_control: pd.DataFrame + The same dataframe with a column esm_user_score containing answers recoded and reversed. + """ + df_esm_jcq_demand_control_unique_answers = ( + df_esm_jcq_demand_control.groupby("question_id") + .esm_instructions.value_counts() + .rename() + .reset_index() + ) + # Tabulate all possible answers to each question (group by question ID). + for q_id in dict_JCQ_demand_control_reverse.keys(): + # Look through all answers that need to be reversed. + possible_answers = df_esm_jcq_demand_control_unique_answers.loc[ + df_esm_jcq_demand_control_unique_answers["question_id"] == q_id, + "esm_instructions", + ] + # These are all answers to a given question (by q_id). + answers_matches = possible_answers.str.startswith( + dict_JCQ_demand_control_reverse.get(q_id) + ) + # See if they are expected, i.e. included in the dictionary. + if ~answers_matches.all(): + print("One of the answers that occur in the data should not be reversed.") + print("This was the answer found in the data: ") + raise KeyError(possible_answers[~answers_matches]) + # In case there is an unexpected answer, raise an exception. + + try: + df_esm_jcq_demand_control = df_esm_jcq_demand_control.assign( + esm_user_score=lambda x: x.esm_user_answer_numeric + 1 + ) + # Increment the original answer by 1 + # to keep in line with traditional scoring (JCQ_ORIGINAL_MIN - JCQ_ORIGINAL_MAX). + df_esm_jcq_demand_control[ + df_esm_jcq_demand_control["question_id"].isin( + dict_JCQ_demand_control_reverse.keys() + ) + ] = df_esm_jcq_demand_control[ + df_esm_jcq_demand_control["question_id"].isin( + dict_JCQ_demand_control_reverse.keys() + ) + ].assign( + esm_user_score=lambda x: JCQ_ORIGINAL_MAX + + JCQ_ORIGINAL_MIN + - x.esm_user_score + ) + # Reverse the items that require it. + except KeyError as e: + print("Please, clean the dataframe first using features.esm.clean_up_esm.") + print(e) + + return df_esm_jcq_demand_control