diff --git a/config.yaml b/config.yaml index d43dbebe..dd841530 100644 --- a/config.yaml +++ b/config.yaml @@ -239,7 +239,7 @@ PHONE_ESM: PROVIDERS: STRAW: COMPUTE: True - FEATURES: ["PANAS_positive_affect", "PANAS_negative_affect"] + FEATURES: ["PANAS_positive_affect", "PANAS_negative_affect", "JCQ_job_demand", "JCQ_job_control", "JCQ_supervisor_support", "JCQ_coworker_support"] SRC_SCRIPT: src/features/phone_esm/rapids/main.py # See https://www.rapids.science/latest/features/phone-keyboard/ diff --git a/src/features/phone_esm/straw/esm_JCQ.py b/src/features/phone_esm/straw/esm_JCQ.py new file mode 100644 index 00000000..88073ced --- /dev/null +++ b/src/features/phone_esm/straw/esm_JCQ.py @@ -0,0 +1,108 @@ +import pandas as pd + +JCQ_ORIGINAL_MAX = 4 +JCQ_ORIGINAL_MIN = 1 + +dict_JCQ_demand_control_reverse = { + 75: ( + "I was NOT asked", + "Men legde mij geen overdreven", + "Men legde mij GEEN overdreven", # Capitalized in some versions + "Od mene se NI zahtevalo", + ), + 76: ( + "I had enough time to do my work", + "Ik had voldoende tijd om mijn werk", + "Imela sem dovolj časa, da končam", + "Imel sem dovolj časa, da končam", + ), + 77: ( + "I was free of conflicting demands", + "Er werden mij op het werk geen tegenstrijdige", + "Er werden mij op het werk GEEN tegenstrijdige", # Capitalized in some versions + "Pri svojem delu se NISEM srečeval", + ), + 79: ( + "My job involved a lot of repetitive work", + "Mijn taak omvatte veel repetitief werk", + "Moje delo je vključevalo veliko ponavljajočega", + ), + 85: ( + "On my job, I had very little freedom", + "In mijn taak had ik zeer weinig vrijheid", + "Pri svojem delu sem imel zelo malo svobode", + "Pri svojem delu sem imela zelo malo svobode", + ), +} + + +def reverse_jcq_demand_control_scoring( + df_esm_jcq_demand_control: pd.DataFrame, +) -> pd.DataFrame: + """ + This function recodes answers in Job content questionnaire by first incrementing them by 1, + to be in line with original (1-4) scoring. + Then, some answers are reversed (i.e. 1 becomes 4 etc.), because the questions are negatively phrased. + These answers are listed in dict_JCQ_demand_control_reverse and identified by their question ID. + However, the existing data is checked against literal phrasing of these questions + to protect against wrong numbering of questions (differing question IDs). + + Parameters + ---------- + df_esm_jcq_demand_control: pd.DataFrame + A cleaned up dataframe, which must also include esm_user_answer_numeric. + + Returns + ------- + df_esm_jcq_demand_control: pd.DataFrame + The same dataframe with a column esm_user_score containing answers recoded and reversed. + """ + df_esm_jcq_demand_control_unique_answers = ( + df_esm_jcq_demand_control.groupby("question_id") + .esm_instructions.value_counts() + .rename() + .reset_index() + ) + # Tabulate all possible answers to each question (group by question ID). + for q_id in dict_JCQ_demand_control_reverse.keys(): + # Look through all answers that need to be reversed. + possible_answers = df_esm_jcq_demand_control_unique_answers.loc[ + df_esm_jcq_demand_control_unique_answers["question_id"] == q_id, + "esm_instructions", + ] + # These are all answers to a given question (by q_id). + answers_matches = possible_answers.str.startswith( + dict_JCQ_demand_control_reverse.get(q_id) + ) + # See if they are expected, i.e. included in the dictionary. + if ~answers_matches.all(): + print("One of the answers that occur in the data should not be reversed.") + print("This was the answer found in the data: ") + raise KeyError(possible_answers[~answers_matches]) + # In case there is an unexpected answer, raise an exception. + + try: + df_esm_jcq_demand_control = df_esm_jcq_demand_control.assign( + esm_user_score=lambda x: x.esm_user_answer_numeric + 1 + ) + # Increment the original answer by 1 + # to keep in line with traditional scoring (JCQ_ORIGINAL_MIN - JCQ_ORIGINAL_MAX). + df_esm_jcq_demand_control[ + df_esm_jcq_demand_control["question_id"].isin( + dict_JCQ_demand_control_reverse.keys() + ) + ] = df_esm_jcq_demand_control[ + df_esm_jcq_demand_control["question_id"].isin( + dict_JCQ_demand_control_reverse.keys() + ) + ].assign( + esm_user_score=lambda x: JCQ_ORIGINAL_MAX + + JCQ_ORIGINAL_MIN + - x.esm_user_score + ) + # Reverse the items that require it. + except AttributeError as e: + print("Please, clean the dataframe first using features.esm.clean_up_esm.") + print(e) + + return df_esm_jcq_demand_control diff --git a/src/features/phone_esm/straw/preprocess.py b/src/features/phone_esm/straw/preprocess.py index e3805a77..ce2a9bf3 100644 --- a/src/features/phone_esm/straw/preprocess.py +++ b/src/features/phone_esm/straw/preprocess.py @@ -1,4 +1,5 @@ from esm_preprocess import * +from esm_JCQ import reverse_jcq_demand_control_scoring questionnaire_name = snakemake.params["questionnaire_name"] @@ -13,4 +14,6 @@ except ValueError: else: df_esm_selected = df_esm_preprocessed[df_esm_preprocessed["questionnaire_id"] == questionnaire_id] df_esm_clean = clean_up_esm(df_esm_selected) + if questionnaire_name.startswith("JCQ"): + df_esm_clean = reverse_jcq_demand_control_scoring(df_esm_clean) df_esm_clean.to_csv(snakemake.output[0])