import pandas as pd from features.esm import increment_answers JCQ_ORIGINAL_MAX = 4 JCQ_ORIGINAL_MIN = 1 DICT_JCQ_DEMAND_CONTROL_REVERSE = { 75: ( "I was NOT asked", "Men legde mij geen overdreven", "Men legde mij GEEN overdreven", # Capitalized in some versions "Od mene se NI zahtevalo", ), 76: ( "I had enough time to do my work", "Ik had voldoende tijd om mijn werk", "Imela sem dovolj časa, da končam", "Imel sem dovolj časa, da končam", ), 77: ( "I was free of conflicting demands", "Er werden mij op het werk geen tegenstrijdige", "Er werden mij op het werk GEEN tegenstrijdige", # Capitalized in some versions "Pri svojem delu se NISEM srečeval", ), 79: ( "My job involved a lot of repetitive work", "Mijn taak omvatte veel repetitief werk", "Moje delo je vključevalo veliko ponavljajočega", ), 85: ( "On my job, I had very little freedom", "In mijn taak had ik zeer weinig vrijheid", "Pri svojem delu sem imel zelo malo svobode", "Pri svojem delu sem imela zelo malo svobode", ), } def reverse_jcq_demand_control_scoring( df_esm_jcq_demand_control: pd.DataFrame, ) -> pd.DataFrame: """ Reverse JCQ demand and control answers. This function recodes answers in Job content questionnaire by first incrementing them by 1, to be in line with original (1-4) scoring. Then, some answers are reversed (i.e. 1 becomes 4 etc.), because the questions are negatively phrased. These answers are listed in DICT_JCQ_DEMAND_CONTROL_REVERSE and identified by their question ID. However, the existing data is checked against literal phrasing of these questions to protect against wrong numbering of questions (differing question IDs). Parameters ---------- df_esm_jcq_demand_control: pd.DataFrame A cleaned up dataframe, which must also include esm_user_answer_numeric. Returns ------- df_esm_jcq_demand_control: pd.DataFrame The same dataframe with a column esm_user_score containing answers recoded and reversed. """ df_esm_jcq_demand_control_unique_answers = ( df_esm_jcq_demand_control.groupby("question_id") .esm_instructions.value_counts() .rename() .reset_index() ) # Tabulate all possible answers to each question (group by question ID). for q_id in DICT_JCQ_DEMAND_CONTROL_REVERSE.keys(): # Look through all answers that need to be reversed. possible_answers = df_esm_jcq_demand_control_unique_answers.loc[ df_esm_jcq_demand_control_unique_answers["question_id"] == q_id, "esm_instructions", ] # These are all answers to a given question (by q_id). answers_matches = possible_answers.str.startswith( DICT_JCQ_DEMAND_CONTROL_REVERSE.get(q_id) ) # See if they are expected, i.e. included in the dictionary. if ~answers_matches.all(): print("One of the answers that occur in the data should not be reversed.") print("This was the answer found in the data: ") raise KeyError(possible_answers[~answers_matches]) # In case there is an unexpected answer, raise an exception. try: df_esm_jcq_demand_control = increment_answers(df_esm_jcq_demand_control) # Increment the original answer by 1 to keep in line # with traditional scoring (from JCQ_ORIGINAL_MIN to JCQ_ORIGINAL_MAX). df_esm_jcq_demand_control[ df_esm_jcq_demand_control["question_id"].isin( DICT_JCQ_DEMAND_CONTROL_REVERSE.keys() ) ] = df_esm_jcq_demand_control[ df_esm_jcq_demand_control["question_id"].isin( DICT_JCQ_DEMAND_CONTROL_REVERSE.keys() ) ].assign( esm_user_score=lambda x: JCQ_ORIGINAL_MAX + JCQ_ORIGINAL_MIN - x.esm_user_score ) # Reverse the items that require it. except AttributeError as e: print("Please, clean the dataframe first using features.esm.clean_up_esm.") print(e) return df_esm_jcq_demand_control