diff --git a/config.yaml b/config.yaml index f14e3bfd..6a267081 100644 --- a/config.yaml +++ b/config.yaml @@ -634,5 +634,6 @@ PARAMS_FOR_ANALYSIS: results-survey358134_final.csv, # Belgium 1 results-survey413767_final.csv # Belgium 2 ] + QUESTION_LIST: survey637813+question_text.csv FEATURES: [age, gender, startlanguage] CATEGORICAL_FEATURES: [gender] diff --git a/rules/models.smk b/rules/models.smk index 94df273a..92b4a935 100644 --- a/rules/models.smk +++ b/rules/models.smk @@ -20,7 +20,8 @@ rule baseline_features: "data/raw/{pid}/participant_baseline_raw.csv" params: pid="{pid}", - features=config["PARAMS_FOR_ANALYSIS"]["BASELINE"]["FEATURES"] + features=config["PARAMS_FOR_ANALYSIS"]["BASELINE"]["FEATURES"], + question_filename=config["PARAMS_FOR_ANALYSIS"]["BASELINE"]["FOLDER"] + "/" + config["PARAMS_FOR_ANALYSIS"]["BASELINE"]["QUESTION_LIST"] output: "data/processed/features/{pid}/baseline_features.csv" script: diff --git a/src/data/baseline_features.py b/src/data/baseline_features.py index 599dab4c..4978e122 100644 --- a/src/data/baseline_features.py +++ b/src/data/baseline_features.py @@ -3,6 +3,13 @@ import pandas as pd pid = snakemake.params["pid"] requested_features = snakemake.params["features"] baseline_features = pd.DataFrame(columns=requested_features) +question_filename = snakemake.params["question_filename"] + +dict_JCQ_demand_control_reverse = { + "demand_0": " [Od mene se ne zahteva,", + "demand_1": " [Imam dovolj časa, da končam", + "demand_2": " [Pri svojem delu se ne srečujem s konfliktnimi" +} participant_info = pd.read_csv(snakemake.input[0], parse_dates=["date_of_birth"]) if not participant_info.empty: @@ -17,6 +24,14 @@ if not participant_info.empty: baseline_features.loc[0, "startlanguage"] = participant_info.loc[ 0, "startlanguage" ] + if "demand" in requested_features: + limesurvey_questions = pd.read_csv(question_filename, header=None).T + limesurvey_questions[["code", "text"]] = limesurvey_questions[0].str.split(r"\.\s", expand=True, n=1) + demand_reverse_lime_rows = limesurvey_questions["text"].str.startswith(dict_JCQ_demand_control_reverse["demand_0"]) | \ + limesurvey_questions["text"].str.startswith(dict_JCQ_demand_control_reverse["demand_1"]) | \ + limesurvey_questions["text"].str.startswith(dict_JCQ_demand_control_reverse["demand_2"]) + demand_reverse_lime = limesurvey_questions[demand_reverse_lime_rows] + demand_reverse_lime.loc[:, "qid"] = demand_reverse_lime["code"].str.extract(r"\[(\d+)\]") baseline_features.to_csv( snakemake.output[0], index=False, encoding="utf-8",