import pandas as pd JCQ_ORIGINAL_MAX = 4 JCQ_ORIGINAL_MIN = 1 dict_JCQ_demand_control_reverse = { 75: ( "I was NOT asked", "Men legde mij geen overdreven", "Men legde mij GEEN overdreven", # Capitalized in some versions "Od mene se NI zahtevalo", ), 76: ( "I had enough time to do my work", "Ik had voldoende tijd om mijn werk", "Imela sem dovolj časa, da končam", "Imel sem dovolj časa, da končam", ), 77: ( "I was free of conflicting demands", "Er werden mij op het werk geen tegenstrijdige", "Er werden mij op het werk GEEN tegenstrijdige", # Capitalized in some versions "Pri svojem delu se NISEM srečeval", ), 79: ( "My job involved a lot of repetitive work", "Mijn taak omvatte veel repetitief werk", "Moje delo je vključevalo veliko ponavljajočega", ), 85: ( "On my job, I had very little freedom", "In mijn taak had ik zeer weinig vrijheid", "Pri svojem delu sem imel zelo malo svobode", "Pri svojem delu sem imela zelo malo svobode", ), } def reverse_jcq_demand_control_scoring( df_esm_jcq_demand_control: pd.DataFrame, ) -> pd.DataFrame: """ This function recodes answers in Job content questionnaire by first incrementing them by 1, to be in line with original (1-4) scoring. Then, some answers are reversed (i.e. 1 becomes 4 etc.), because the questions are negatively phrased. These answers are listed in dict_JCQ_demand_control_reverse and identified by their question ID. However, the existing data is checked against literal phrasing of these questions to protect against wrong numbering of questions (differing question IDs). Parameters ---------- df_esm_jcq_demand_control: pd.DataFrame A cleaned up dataframe, which must also include esm_user_answer_numeric. Returns ------- df_esm_jcq_demand_control: pd.DataFrame The same dataframe with a column esm_user_score containing answers recoded and reversed. """ df_esm_jcq_demand_control_unique_answers = ( df_esm_jcq_demand_control.groupby("question_id") .esm_instructions.value_counts() .rename() .reset_index() ) # Tabulate all possible answers to each question (group by question ID). for q_id in dict_JCQ_demand_control_reverse.keys(): # Look through all answers that need to be reversed. possible_answers = df_esm_jcq_demand_control_unique_answers.loc[ df_esm_jcq_demand_control_unique_answers["question_id"] == q_id, "esm_instructions", ] # These are all answers to a given question (by q_id). answers_matches = possible_answers.str.startswith( dict_JCQ_demand_control_reverse.get(q_id) ) # See if they are expected, i.e. included in the dictionary. if ~answers_matches.all(): print("One of the answers that occur in the data should not be reversed.") print("This was the answer found in the data: ") raise KeyError(possible_answers[~answers_matches]) # In case there is an unexpected answer, raise an exception. try: df_esm_jcq_demand_control = df_esm_jcq_demand_control.assign( esm_user_score=lambda x: x.esm_user_answer_numeric + 1 ) # Increment the original answer by 1 # to keep in line with traditional scoring (JCQ_ORIGINAL_MIN - JCQ_ORIGINAL_MAX). df_esm_jcq_demand_control[ df_esm_jcq_demand_control["question_id"].isin( dict_JCQ_demand_control_reverse.keys() ) ] = df_esm_jcq_demand_control[ df_esm_jcq_demand_control["question_id"].isin( dict_JCQ_demand_control_reverse.keys() ) ].assign( esm_user_score=lambda x: JCQ_ORIGINAL_MAX + JCQ_ORIGINAL_MIN - x.esm_user_score ) # Reverse the items that require it. except AttributeError as e: print("Please, clean the dataframe first using features.esm.clean_up_esm.") print(e) return df_esm_jcq_demand_control