Start calculating demand control features.
parent
9a74e74d08
commit
30ac8b1cd5
|
@ -634,5 +634,6 @@ PARAMS_FOR_ANALYSIS:
|
||||||
results-survey358134_final.csv, # Belgium 1
|
results-survey358134_final.csv, # Belgium 1
|
||||||
results-survey413767_final.csv # Belgium 2
|
results-survey413767_final.csv # Belgium 2
|
||||||
]
|
]
|
||||||
|
QUESTION_LIST: survey637813+question_text.csv
|
||||||
FEATURES: [age, gender, startlanguage]
|
FEATURES: [age, gender, startlanguage]
|
||||||
CATEGORICAL_FEATURES: [gender]
|
CATEGORICAL_FEATURES: [gender]
|
||||||
|
|
|
@ -20,7 +20,8 @@ rule baseline_features:
|
||||||
"data/raw/{pid}/participant_baseline_raw.csv"
|
"data/raw/{pid}/participant_baseline_raw.csv"
|
||||||
params:
|
params:
|
||||||
pid="{pid}",
|
pid="{pid}",
|
||||||
features=config["PARAMS_FOR_ANALYSIS"]["BASELINE"]["FEATURES"]
|
features=config["PARAMS_FOR_ANALYSIS"]["BASELINE"]["FEATURES"],
|
||||||
|
question_filename=config["PARAMS_FOR_ANALYSIS"]["BASELINE"]["FOLDER"] + "/" + config["PARAMS_FOR_ANALYSIS"]["BASELINE"]["QUESTION_LIST"]
|
||||||
output:
|
output:
|
||||||
"data/processed/features/{pid}/baseline_features.csv"
|
"data/processed/features/{pid}/baseline_features.csv"
|
||||||
script:
|
script:
|
||||||
|
|
|
@ -3,6 +3,13 @@ import pandas as pd
|
||||||
pid = snakemake.params["pid"]
|
pid = snakemake.params["pid"]
|
||||||
requested_features = snakemake.params["features"]
|
requested_features = snakemake.params["features"]
|
||||||
baseline_features = pd.DataFrame(columns=requested_features)
|
baseline_features = pd.DataFrame(columns=requested_features)
|
||||||
|
question_filename = snakemake.params["question_filename"]
|
||||||
|
|
||||||
|
dict_JCQ_demand_control_reverse = {
|
||||||
|
"demand_0": " [Od mene se ne zahteva,",
|
||||||
|
"demand_1": " [Imam dovolj časa, da končam",
|
||||||
|
"demand_2": " [Pri svojem delu se ne srečujem s konfliktnimi"
|
||||||
|
}
|
||||||
|
|
||||||
participant_info = pd.read_csv(snakemake.input[0], parse_dates=["date_of_birth"])
|
participant_info = pd.read_csv(snakemake.input[0], parse_dates=["date_of_birth"])
|
||||||
if not participant_info.empty:
|
if not participant_info.empty:
|
||||||
|
@ -17,6 +24,14 @@ if not participant_info.empty:
|
||||||
baseline_features.loc[0, "startlanguage"] = participant_info.loc[
|
baseline_features.loc[0, "startlanguage"] = participant_info.loc[
|
||||||
0, "startlanguage"
|
0, "startlanguage"
|
||||||
]
|
]
|
||||||
|
if "demand" in requested_features:
|
||||||
|
limesurvey_questions = pd.read_csv(question_filename, header=None).T
|
||||||
|
limesurvey_questions[["code", "text"]] = limesurvey_questions[0].str.split(r"\.\s", expand=True, n=1)
|
||||||
|
demand_reverse_lime_rows = limesurvey_questions["text"].str.startswith(dict_JCQ_demand_control_reverse["demand_0"]) | \
|
||||||
|
limesurvey_questions["text"].str.startswith(dict_JCQ_demand_control_reverse["demand_1"]) | \
|
||||||
|
limesurvey_questions["text"].str.startswith(dict_JCQ_demand_control_reverse["demand_2"])
|
||||||
|
demand_reverse_lime = limesurvey_questions[demand_reverse_lime_rows]
|
||||||
|
demand_reverse_lime.loc[:, "qid"] = demand_reverse_lime["code"].str.extract(r"\[(\d+)\]")
|
||||||
|
|
||||||
baseline_features.to_csv(
|
baseline_features.to_csv(
|
||||||
snakemake.output[0], index=False, encoding="utf-8",
|
snakemake.output[0], index=False, encoding="utf-8",
|
||||||
|
|
Loading…
Reference in New Issue