From c6144f8403fa9cf04b32143be9aadeed65818af3 Mon Sep 17 00:00:00 2001
From: junos <junos.lukan@ijs.si>
Date: Wed, 16 Mar 2022 18:55:30 +0100
Subject: [PATCH] Reverse JCQ items.

---
 config.yaml                                |   2 +-
 src/features/phone_esm/straw/esm_JCQ.py    | 108 +++++++++++++++++++++
 src/features/phone_esm/straw/preprocess.py |   3 +
 3 files changed, 112 insertions(+), 1 deletion(-)
 create mode 100644 src/features/phone_esm/straw/esm_JCQ.py

diff --git a/config.yaml b/config.yaml
index e632516d..caa19cb0 100644
--- a/config.yaml
+++ b/config.yaml
@@ -239,7 +239,7 @@ PHONE_ESM:
   PROVIDERS:
     STRAW:
       COMPUTE: True
-      FEATURES: ["PANAS_positive_affect", "PANAS_negative_affect"]
+      FEATURES: ["PANAS_positive_affect", "PANAS_negative_affect", "JCQ_job_demand", "JCQ_job_control", "JCQ_supervisor_support", "JCQ_coworker_support"]
       SRC_SCRIPT: src/features/phone_esm/rapids/main.py
 
 # See https://www.rapids.science/latest/features/phone-keyboard/
diff --git a/src/features/phone_esm/straw/esm_JCQ.py b/src/features/phone_esm/straw/esm_JCQ.py
new file mode 100644
index 00000000..88073ced
--- /dev/null
+++ b/src/features/phone_esm/straw/esm_JCQ.py
@@ -0,0 +1,108 @@
+import pandas as pd
+
+JCQ_ORIGINAL_MAX = 4
+JCQ_ORIGINAL_MIN = 1
+
+dict_JCQ_demand_control_reverse = {
+    75: (
+        "I was NOT asked",
+        "Men legde mij geen overdreven",
+        "Men legde mij GEEN overdreven",  # Capitalized in some versions
+        "Od mene se NI zahtevalo",
+    ),
+    76: (
+        "I had enough time to do my work",
+        "Ik had voldoende tijd om mijn werk",
+        "Imela sem dovolj časa, da končam",
+        "Imel sem dovolj časa, da končam",
+    ),
+    77: (
+        "I was free of conflicting demands",
+        "Er werden mij op het werk geen tegenstrijdige",
+        "Er werden mij op het werk GEEN tegenstrijdige",  # Capitalized in some versions
+        "Pri svojem delu se NISEM srečeval",
+    ),
+    79: (
+        "My job involved a lot of repetitive work",
+        "Mijn taak omvatte veel repetitief werk",
+        "Moje delo je vključevalo veliko ponavljajočega",
+    ),
+    85: (
+        "On my job, I had very little freedom",
+        "In mijn taak had ik zeer weinig vrijheid",
+        "Pri svojem delu sem imel zelo malo svobode",
+        "Pri svojem delu sem imela zelo malo svobode",
+    ),
+}
+
+
+def reverse_jcq_demand_control_scoring(
+    df_esm_jcq_demand_control: pd.DataFrame,
+) -> pd.DataFrame:
+    """
+    This function recodes answers in Job content questionnaire by first incrementing them by 1,
+    to be in line with original (1-4) scoring.
+    Then, some answers are reversed (i.e. 1 becomes 4 etc.), because the questions are negatively phrased.
+    These answers are listed in dict_JCQ_demand_control_reverse and identified by their question ID.
+    However, the existing data is checked against literal phrasing of these questions
+        to protect against wrong numbering of questions (differing question IDs).
+
+    Parameters
+    ----------
+    df_esm_jcq_demand_control: pd.DataFrame
+        A cleaned up dataframe, which must also include esm_user_answer_numeric.
+
+    Returns
+    -------
+    df_esm_jcq_demand_control: pd.DataFrame
+        The same dataframe with a column esm_user_score containing answers recoded and reversed.
+    """
+    df_esm_jcq_demand_control_unique_answers = (
+        df_esm_jcq_demand_control.groupby("question_id")
+        .esm_instructions.value_counts()
+        .rename()
+        .reset_index()
+    )
+    # Tabulate all possible answers to each question (group by question ID).
+    for q_id in dict_JCQ_demand_control_reverse.keys():
+        # Look through all answers that need to be reversed.
+        possible_answers = df_esm_jcq_demand_control_unique_answers.loc[
+            df_esm_jcq_demand_control_unique_answers["question_id"] == q_id,
+            "esm_instructions",
+        ]
+        # These are all answers to a given question (by q_id).
+        answers_matches = possible_answers.str.startswith(
+            dict_JCQ_demand_control_reverse.get(q_id)
+        )
+        # See if they are expected, i.e. included in the dictionary.
+        if ~answers_matches.all():
+            print("One of the answers that occur in the data should not be reversed.")
+            print("This was the answer found in the data: ")
+            raise KeyError(possible_answers[~answers_matches])
+            # In case there is an unexpected answer, raise an exception.
+
+    try:
+        df_esm_jcq_demand_control = df_esm_jcq_demand_control.assign(
+            esm_user_score=lambda x: x.esm_user_answer_numeric + 1
+        )
+        # Increment the original answer by 1
+        # to keep in line with traditional scoring (JCQ_ORIGINAL_MIN - JCQ_ORIGINAL_MAX).
+        df_esm_jcq_demand_control[
+            df_esm_jcq_demand_control["question_id"].isin(
+                dict_JCQ_demand_control_reverse.keys()
+            )
+        ] = df_esm_jcq_demand_control[
+            df_esm_jcq_demand_control["question_id"].isin(
+                dict_JCQ_demand_control_reverse.keys()
+            )
+        ].assign(
+            esm_user_score=lambda x: JCQ_ORIGINAL_MAX
+            + JCQ_ORIGINAL_MIN
+            - x.esm_user_score
+        )
+        # Reverse the items that require it.
+    except AttributeError as e:
+        print("Please, clean the dataframe first using features.esm.clean_up_esm.")
+        print(e)
+
+    return df_esm_jcq_demand_control
diff --git a/src/features/phone_esm/straw/preprocess.py b/src/features/phone_esm/straw/preprocess.py
index e3805a77..ce2a9bf3 100644
--- a/src/features/phone_esm/straw/preprocess.py
+++ b/src/features/phone_esm/straw/preprocess.py
@@ -1,4 +1,5 @@
 from esm_preprocess import *
+from esm_JCQ import reverse_jcq_demand_control_scoring
 
 questionnaire_name = snakemake.params["questionnaire_name"]
 
@@ -13,4 +14,6 @@ except ValueError:
 else:
     df_esm_selected = df_esm_preprocessed[df_esm_preprocessed["questionnaire_id"] == questionnaire_id]
     df_esm_clean = clean_up_esm(df_esm_selected)
+    if questionnaire_name.startswith("JCQ"):
+        df_esm_clean = reverse_jcq_demand_control_scoring(df_esm_clean)
     df_esm_clean.to_csv(snakemake.output[0])