Calculate JCQ control and demand control ratio.

Include norms and corresponding quartile.
2022-02-28 18:51:47 +01:00 · 2022-02-28 18:51:47 +01:00 · b5a6317f4b
parent 2fed962644
commit b5a6317f4b
1 changed files with 129 additions and 29 deletions
--- a/src/data/baseline_features.py
+++ b/src/data/baseline_features.py
@ -1,3 +1,4 @@
+import numpy as np
 import pandas as pd

 pid = snakemake.params["pid"]
@ -23,6 +24,26 @@ dict_JCQ_demand_control_reverse = {
 LIMESURVEY_JCQ_MIN = 1
 LIMESURVEY_JCQ_MAX = 4

+DEMAND_CONTROL_RATIO_MIN = 5 / (9 * 4)
+DEMAND_CONTROL_RATIO_MAX = (4 * 5) / 9
+
+JCQ_NORMS = {
+    "F": {
+        0: DEMAND_CONTROL_RATIO_MIN,
+        1: 0.45,
+        2: 0.52,
+        3: 0.62,
+        4: DEMAND_CONTROL_RATIO_MAX,
+    },
+    "M": {
+        0: DEMAND_CONTROL_RATIO_MIN,
+        1: 0.41,
+        2: 0.48,
+        3: 0.56,
+        4: DEMAND_CONTROL_RATIO_MAX,
+    },
+}
+
 participant_info = pd.read_csv(snakemake.input[0], parse_dates=["date_of_birth"])

 if not participant_info.empty:
@ -37,38 +58,117 @@ if not participant_info.empty:
        baseline_features.loc[0, "startlanguage"] = participant_info.loc[
            0, "startlanguage"
        ]
-    if "demand" in requested_features:
+    if (
+        ("demand" in requested_features)
+        or ("control" in requested_features)
+        or ("demand_control_ratio" in requested_features)
+    ):
        participant_info_t = participant_info.T
        rows_baseline = participant_info_t.index
-        # Find questions about demand, but disregard time (duration of filling in questionnaire)
-        rows_demand = rows_baseline.str.startswith(
-            JCQ_DEMAND
-        ) & ~rows_baseline.str.endswith("Time")
-        limesurvey_control = (
-            participant_info_t[rows_demand]
-            .reset_index()
-            .rename(columns={"index": "question", 0: "score_original"})
-        )
-        # Extract question IDs from names such as JobEisen[3]
-        limesurvey_control.loc[:, "qid"] = (
-            limesurvey_control["question"].str.extract(r"\[(\d+)\]").astype(int)
-        )
-        limesurvey_control["score"] = limesurvey_control["score_original"]
-        # Identify rows that include questions to be reversed.
-        rows_demand_reverse = limesurvey_control["qid"].isin(
-            dict_JCQ_demand_control_reverse[JCQ_DEMAND].keys()
-        )
-        # Reverse the score, so that the maximum value becomes the minimum etc.
-        limesurvey_control.loc[rows_demand_reverse, "score"] = (
-            LIMESURVEY_JCQ_MAX
-            + LIMESURVEY_JCQ_MIN
-            - limesurvey_control.loc[rows_demand_reverse, "score_original"]
-        )
-        # TODO Write to data/interim
-        baseline_features.loc[0, "limesurvey_demand"] = limesurvey_control[
-            "score"
-        ].sum()

+        if ("demand" in requested_features) or (
+            "demand_control_ratio" in requested_features
+        ):
+            # Find questions about demand, but disregard time (duration of filling in questionnaire)
+            rows_demand = rows_baseline.str.startswith(
+                JCQ_DEMAND
+            ) & ~rows_baseline.str.endswith("Time")
+            limesurvey_demand = (
+                participant_info_t[rows_demand]
+                .reset_index()
+                .rename(columns={"index": "question", 0: "score_original"})
+            )
+            # Extract question IDs from names such as JobEisen[3]
+            limesurvey_demand.loc[:, "qid"] = (
+                limesurvey_demand["question"].str.extract(r"\[(\d+)\]").astype(int)
+            )
+            limesurvey_demand["score"] = limesurvey_demand["score_original"]
+            # Identify rows that include questions to be reversed.
+            rows_demand_reverse = limesurvey_demand["qid"].isin(
+                dict_JCQ_demand_control_reverse[JCQ_DEMAND].keys()
+            )
+            # Reverse the score, so that the maximum value becomes the minimum etc.
+            limesurvey_demand.loc[rows_demand_reverse, "score"] = (
+                LIMESURVEY_JCQ_MAX
+                + LIMESURVEY_JCQ_MIN
+                - limesurvey_demand.loc[rows_demand_reverse, "score_original"]
+            )
+            # TODO Write to data/interim
+            if "demand" in requested_features:
+                baseline_features.loc[0, "limesurvey_demand"] = limesurvey_demand[
+                    "score"
+                ].sum()
+
+        if ("control" in requested_features) or (
+            "demand_control_ratio" in requested_features
+        ):
+            # Find questions about control, but disregard time (duration of filling in questionnaire)
+            rows_control = rows_baseline.str.startswith(
+                JCQ_CONTROL
+            ) & ~rows_baseline.str.endswith("Time")
+            limesurvey_control = (
+                participant_info_t[rows_control]
+                .reset_index()
+                .rename(columns={"index": "question", 0: "score_original"})
+            )
+            # Extract question IDs from names such as JobControle[3]
+            limesurvey_control.loc[:, "qid"] = (
+                limesurvey_control["question"].str.extract(r"\[(\d+)\]").astype(int)
+            )
+            limesurvey_control["score"] = limesurvey_control["score_original"]
+            # Identify rows that include questions to be reversed.
+            rows_control_reverse = limesurvey_control["qid"].isin(
+                dict_JCQ_demand_control_reverse[JCQ_CONTROL].keys()
+            )
+            # Reverse the score, so that the maximum value becomes the minimum etc.
+            limesurvey_control.loc[rows_control_reverse, "score"] = (
+                LIMESURVEY_JCQ_MAX
+                + LIMESURVEY_JCQ_MIN
+                - limesurvey_control.loc[rows_control_reverse, "score_original"]
+            )
+            # TODO Write to data/interim
+            if "control" in requested_features:
+                baseline_features.loc[0, "limesurvey_control"] = limesurvey_control[
+                    "score"
+                ].sum()
+
+        if "demand_control_ratio" in requested_features:
+            limesurvey_demand_control_ratio = (
+                limesurvey_demand["score"].sum() / limesurvey_control["score"].sum()
+            )
+            if (
+                JCQ_NORMS[participant_info.loc[0, "gender"]][0]
+                <= limesurvey_demand_control_ratio
+                < JCQ_NORMS[participant_info.loc[0, "gender"]][1]
+            ):
+                limesurvey_quartile = 1
+            elif (
+                JCQ_NORMS[participant_info.loc[0, "gender"]][1]
+                <= limesurvey_demand_control_ratio
+                < JCQ_NORMS[participant_info.loc[0, "gender"]][2]
+            ):
+                limesurvey_quartile = 2
+            elif (
+                JCQ_NORMS[participant_info.loc[0, "gender"]][2]
+                <= limesurvey_demand_control_ratio
+                < JCQ_NORMS[participant_info.loc[0, "gender"]][3]
+            ):
+                limesurvey_quartile = 3
+            elif (
+                JCQ_NORMS[participant_info.loc[0, "gender"]][3]
+                <= limesurvey_demand_control_ratio
+                < JCQ_NORMS[participant_info.loc[0, "gender"]][4]
+            ):
+                limesurvey_quartile = 4
+            else:
+                limesurvey_quartile = np.nan
+
+            baseline_features.loc[
+                0, "limesurvey_demand_control_ratio"
+            ] = limesurvey_demand_control_ratio
+            baseline_features.loc[
+                0, "limesurvey_demand_control_ratio_quartile"
+            ] = limesurvey_quartile

 baseline_features.to_csv(
    snakemake.output[0], index=False, encoding="utf-8",