From 2fed9626444aad9220415126fda32d6d1ca0f59b Mon Sep 17 00:00:00 2001 From: junos Date: Mon, 28 Feb 2022 18:30:41 +0100 Subject: [PATCH] Calculate JCQ demand score. Hardcode question IDs to be reversed. --- src/data/baseline_features.py | 57 +++++++++++++++++++++++++++++------ 1 file changed, 47 insertions(+), 10 deletions(-) diff --git a/src/data/baseline_features.py b/src/data/baseline_features.py index 4978e122..b9a601c1 100644 --- a/src/data/baseline_features.py +++ b/src/data/baseline_features.py @@ -5,13 +5,26 @@ requested_features = snakemake.params["features"] baseline_features = pd.DataFrame(columns=requested_features) question_filename = snakemake.params["question_filename"] +JCQ_DEMAND = "JobEisen" +JCQ_CONTROL = "JobControle" + dict_JCQ_demand_control_reverse = { - "demand_0": " [Od mene se ne zahteva,", - "demand_1": " [Imam dovolj časa, da končam", - "demand_2": " [Pri svojem delu se ne srečujem s konfliktnimi" + JCQ_DEMAND: { + 3: " [Od mene se ne zahteva,", + 4: " [Imam dovolj časa, da končam", + 5: " [Pri svojem delu se ne srečujem s konfliktnimi", + }, + JCQ_CONTROL: { + 2: " |Moje delo vključuje veliko ponavljajočega", + 6: " [Pri svojem delu imam zelo malo svobode", + }, } +LIMESURVEY_JCQ_MIN = 1 +LIMESURVEY_JCQ_MAX = 4 + participant_info = pd.read_csv(snakemake.input[0], parse_dates=["date_of_birth"]) + if not participant_info.empty: if "age" in requested_features: now = pd.Timestamp("now") @@ -25,13 +38,37 @@ if not participant_info.empty: 0, "startlanguage" ] if "demand" in requested_features: - limesurvey_questions = pd.read_csv(question_filename, header=None).T - limesurvey_questions[["code", "text"]] = limesurvey_questions[0].str.split(r"\.\s", expand=True, n=1) - demand_reverse_lime_rows = limesurvey_questions["text"].str.startswith(dict_JCQ_demand_control_reverse["demand_0"]) | \ - limesurvey_questions["text"].str.startswith(dict_JCQ_demand_control_reverse["demand_1"]) | \ - limesurvey_questions["text"].str.startswith(dict_JCQ_demand_control_reverse["demand_2"]) - demand_reverse_lime = limesurvey_questions[demand_reverse_lime_rows] - demand_reverse_lime.loc[:, "qid"] = demand_reverse_lime["code"].str.extract(r"\[(\d+)\]") + participant_info_t = participant_info.T + rows_baseline = participant_info_t.index + # Find questions about demand, but disregard time (duration of filling in questionnaire) + rows_demand = rows_baseline.str.startswith( + JCQ_DEMAND + ) & ~rows_baseline.str.endswith("Time") + limesurvey_control = ( + participant_info_t[rows_demand] + .reset_index() + .rename(columns={"index": "question", 0: "score_original"}) + ) + # Extract question IDs from names such as JobEisen[3] + limesurvey_control.loc[:, "qid"] = ( + limesurvey_control["question"].str.extract(r"\[(\d+)\]").astype(int) + ) + limesurvey_control["score"] = limesurvey_control["score_original"] + # Identify rows that include questions to be reversed. + rows_demand_reverse = limesurvey_control["qid"].isin( + dict_JCQ_demand_control_reverse[JCQ_DEMAND].keys() + ) + # Reverse the score, so that the maximum value becomes the minimum etc. + limesurvey_control.loc[rows_demand_reverse, "score"] = ( + LIMESURVEY_JCQ_MAX + + LIMESURVEY_JCQ_MIN + - limesurvey_control.loc[rows_demand_reverse, "score_original"] + ) + # TODO Write to data/interim + baseline_features.loc[0, "limesurvey_demand"] = limesurvey_control[ + "score" + ].sum() + baseline_features.to_csv( snakemake.output[0], index=False, encoding="utf-8",