Write questionnaire data to data/interim.
parent
b5a6317f4b
commit
aedb8b6785
|
@ -406,6 +406,7 @@ for provider in config["ALL_CLEANING_OVERALL"]["PROVIDERS"].keys():
|
|||
# Baseline features
|
||||
files_to_compute.extend(expand("data/raw/baseline_merged.csv"))
|
||||
files_to_compute.extend(expand("data/raw/{pid}/participant_baseline_raw.csv", pid=config["PIDS"]))
|
||||
files_to_compute.extend(expand("data/interim/{pid}/baseline_questionnaires.csv", pid=config["PIDS"]))
|
||||
files_to_compute.extend(expand("data/processed/features/{pid}/baseline_features.csv", pid=config["PIDS"]))
|
||||
|
||||
rule all:
|
||||
|
|
|
@ -635,5 +635,5 @@ PARAMS_FOR_ANALYSIS:
|
|||
results-survey413767_final.csv # Belgium 2
|
||||
]
|
||||
QUESTION_LIST: survey637813+question_text.csv
|
||||
FEATURES: [age, gender, startlanguage]
|
||||
FEATURES: [age, gender, startlanguage, demand, control, demand_control_ratio]
|
||||
CATEGORICAL_FEATURES: [gender]
|
||||
|
|
|
@ -23,6 +23,7 @@ rule baseline_features:
|
|||
features=config["PARAMS_FOR_ANALYSIS"]["BASELINE"]["FEATURES"],
|
||||
question_filename=config["PARAMS_FOR_ANALYSIS"]["BASELINE"]["FOLDER"] + "/" + config["PARAMS_FOR_ANALYSIS"]["BASELINE"]["QUESTION_LIST"]
|
||||
output:
|
||||
"data/processed/features/{pid}/baseline_features.csv"
|
||||
interim="data/interim/{pid}/baseline_questionnaires.csv",
|
||||
features="data/processed/features/{pid}/baseline_features.csv"
|
||||
script:
|
||||
"../src/data/baseline_features.py"
|
||||
|
|
|
@ -3,6 +3,7 @@ import pandas as pd
|
|||
|
||||
pid = snakemake.params["pid"]
|
||||
requested_features = snakemake.params["features"]
|
||||
baseline_interim = pd.DataFrame(columns=["qid", "question", "score_original", "score"])
|
||||
baseline_features = pd.DataFrame(columns=requested_features)
|
||||
question_filename = snakemake.params["question_filename"]
|
||||
|
||||
|
@ -93,7 +94,7 @@ if not participant_info.empty:
|
|||
+ LIMESURVEY_JCQ_MIN
|
||||
- limesurvey_demand.loc[rows_demand_reverse, "score_original"]
|
||||
)
|
||||
# TODO Write to data/interim
|
||||
pd.concat([baseline_interim, limesurvey_demand], axis=0, ignore_index=True)
|
||||
if "demand" in requested_features:
|
||||
baseline_features.loc[0, "limesurvey_demand"] = limesurvey_demand[
|
||||
"score"
|
||||
|
@ -126,7 +127,7 @@ if not participant_info.empty:
|
|||
+ LIMESURVEY_JCQ_MIN
|
||||
- limesurvey_control.loc[rows_control_reverse, "score_original"]
|
||||
)
|
||||
# TODO Write to data/interim
|
||||
pd.concat([baseline_interim, limesurvey_control], axis=0, ignore_index=True)
|
||||
if "control" in requested_features:
|
||||
baseline_features.loc[0, "limesurvey_control"] = limesurvey_control[
|
||||
"score"
|
||||
|
@ -170,6 +171,7 @@ if not participant_info.empty:
|
|||
0, "limesurvey_demand_control_ratio_quartile"
|
||||
] = limesurvey_quartile
|
||||
|
||||
baseline_features.to_csv(
|
||||
snakemake.output[0], index=False, encoding="utf-8",
|
||||
)
|
||||
if not baseline_interim.empty:
|
||||
baseline_interim.to_csv(snakemake.output["interim"], index=False, encoding="utf-8")
|
||||
|
||||
baseline_features.to_csv(snakemake.output["features"], index=False, encoding="utf-8")
|
||||
|
|
Loading…
Reference in New Issue