import pandas as pd pid = snakemake.params["pid"] requested_features = snakemake.params["features"] baseline_features = pd.DataFrame(columns=requested_features) question_filename = snakemake.params["question_filename"] dict_JCQ_demand_control_reverse = { "demand_0": " [Od mene se ne zahteva,", "demand_1": " [Imam dovolj časa, da končam", "demand_2": " [Pri svojem delu se ne srečujem s konfliktnimi" } participant_info = pd.read_csv(snakemake.input[0], parse_dates=["date_of_birth"]) if not participant_info.empty: if "age" in requested_features: now = pd.Timestamp("now") baseline_features.loc[0, "age"] = ( now - participant_info.loc[0, "date_of_birth"] ).days / 365.25245 if "gender" in requested_features: baseline_features.loc[0, "gender"] = participant_info.loc[0, "gender"] if "startlanguage" in requested_features: baseline_features.loc[0, "startlanguage"] = participant_info.loc[ 0, "startlanguage" ] if "demand" in requested_features: limesurvey_questions = pd.read_csv(question_filename, header=None).T limesurvey_questions[["code", "text"]] = limesurvey_questions[0].str.split(r"\.\s", expand=True, n=1) demand_reverse_lime_rows = limesurvey_questions["text"].str.startswith(dict_JCQ_demand_control_reverse["demand_0"]) | \ limesurvey_questions["text"].str.startswith(dict_JCQ_demand_control_reverse["demand_1"]) | \ limesurvey_questions["text"].str.startswith(dict_JCQ_demand_control_reverse["demand_2"]) demand_reverse_lime = limesurvey_questions[demand_reverse_lime_rows] demand_reverse_lime.loc[:, "qid"] = demand_reverse_lime["code"].str.extract(r"\[(\d+)\]") baseline_features.to_csv( snakemake.output[0], index=False, encoding="utf-8", )