Write questionnaire data to data/interim.
parent
b5a6317f4b
commit
aedb8b6785
|
@ -406,6 +406,7 @@ for provider in config["ALL_CLEANING_OVERALL"]["PROVIDERS"].keys():
|
||||||
# Baseline features
|
# Baseline features
|
||||||
files_to_compute.extend(expand("data/raw/baseline_merged.csv"))
|
files_to_compute.extend(expand("data/raw/baseline_merged.csv"))
|
||||||
files_to_compute.extend(expand("data/raw/{pid}/participant_baseline_raw.csv", pid=config["PIDS"]))
|
files_to_compute.extend(expand("data/raw/{pid}/participant_baseline_raw.csv", pid=config["PIDS"]))
|
||||||
|
files_to_compute.extend(expand("data/interim/{pid}/baseline_questionnaires.csv", pid=config["PIDS"]))
|
||||||
files_to_compute.extend(expand("data/processed/features/{pid}/baseline_features.csv", pid=config["PIDS"]))
|
files_to_compute.extend(expand("data/processed/features/{pid}/baseline_features.csv", pid=config["PIDS"]))
|
||||||
|
|
||||||
rule all:
|
rule all:
|
||||||
|
|
|
@ -635,5 +635,5 @@ PARAMS_FOR_ANALYSIS:
|
||||||
results-survey413767_final.csv # Belgium 2
|
results-survey413767_final.csv # Belgium 2
|
||||||
]
|
]
|
||||||
QUESTION_LIST: survey637813+question_text.csv
|
QUESTION_LIST: survey637813+question_text.csv
|
||||||
FEATURES: [age, gender, startlanguage]
|
FEATURES: [age, gender, startlanguage, demand, control, demand_control_ratio]
|
||||||
CATEGORICAL_FEATURES: [gender]
|
CATEGORICAL_FEATURES: [gender]
|
||||||
|
|
|
@ -23,6 +23,7 @@ rule baseline_features:
|
||||||
features=config["PARAMS_FOR_ANALYSIS"]["BASELINE"]["FEATURES"],
|
features=config["PARAMS_FOR_ANALYSIS"]["BASELINE"]["FEATURES"],
|
||||||
question_filename=config["PARAMS_FOR_ANALYSIS"]["BASELINE"]["FOLDER"] + "/" + config["PARAMS_FOR_ANALYSIS"]["BASELINE"]["QUESTION_LIST"]
|
question_filename=config["PARAMS_FOR_ANALYSIS"]["BASELINE"]["FOLDER"] + "/" + config["PARAMS_FOR_ANALYSIS"]["BASELINE"]["QUESTION_LIST"]
|
||||||
output:
|
output:
|
||||||
"data/processed/features/{pid}/baseline_features.csv"
|
interim="data/interim/{pid}/baseline_questionnaires.csv",
|
||||||
|
features="data/processed/features/{pid}/baseline_features.csv"
|
||||||
script:
|
script:
|
||||||
"../src/data/baseline_features.py"
|
"../src/data/baseline_features.py"
|
||||||
|
|
|
@ -3,6 +3,7 @@ import pandas as pd
|
||||||
|
|
||||||
pid = snakemake.params["pid"]
|
pid = snakemake.params["pid"]
|
||||||
requested_features = snakemake.params["features"]
|
requested_features = snakemake.params["features"]
|
||||||
|
baseline_interim = pd.DataFrame(columns=["qid", "question", "score_original", "score"])
|
||||||
baseline_features = pd.DataFrame(columns=requested_features)
|
baseline_features = pd.DataFrame(columns=requested_features)
|
||||||
question_filename = snakemake.params["question_filename"]
|
question_filename = snakemake.params["question_filename"]
|
||||||
|
|
||||||
|
@ -93,7 +94,7 @@ if not participant_info.empty:
|
||||||
+ LIMESURVEY_JCQ_MIN
|
+ LIMESURVEY_JCQ_MIN
|
||||||
- limesurvey_demand.loc[rows_demand_reverse, "score_original"]
|
- limesurvey_demand.loc[rows_demand_reverse, "score_original"]
|
||||||
)
|
)
|
||||||
# TODO Write to data/interim
|
pd.concat([baseline_interim, limesurvey_demand], axis=0, ignore_index=True)
|
||||||
if "demand" in requested_features:
|
if "demand" in requested_features:
|
||||||
baseline_features.loc[0, "limesurvey_demand"] = limesurvey_demand[
|
baseline_features.loc[0, "limesurvey_demand"] = limesurvey_demand[
|
||||||
"score"
|
"score"
|
||||||
|
@ -126,7 +127,7 @@ if not participant_info.empty:
|
||||||
+ LIMESURVEY_JCQ_MIN
|
+ LIMESURVEY_JCQ_MIN
|
||||||
- limesurvey_control.loc[rows_control_reverse, "score_original"]
|
- limesurvey_control.loc[rows_control_reverse, "score_original"]
|
||||||
)
|
)
|
||||||
# TODO Write to data/interim
|
pd.concat([baseline_interim, limesurvey_control], axis=0, ignore_index=True)
|
||||||
if "control" in requested_features:
|
if "control" in requested_features:
|
||||||
baseline_features.loc[0, "limesurvey_control"] = limesurvey_control[
|
baseline_features.loc[0, "limesurvey_control"] = limesurvey_control[
|
||||||
"score"
|
"score"
|
||||||
|
@ -170,6 +171,7 @@ if not participant_info.empty:
|
||||||
0, "limesurvey_demand_control_ratio_quartile"
|
0, "limesurvey_demand_control_ratio_quartile"
|
||||||
] = limesurvey_quartile
|
] = limesurvey_quartile
|
||||||
|
|
||||||
baseline_features.to_csv(
|
if not baseline_interim.empty:
|
||||||
snakemake.output[0], index=False, encoding="utf-8",
|
baseline_interim.to_csv(snakemake.output["interim"], index=False, encoding="utf-8")
|
||||||
)
|
|
||||||
|
baseline_features.to_csv(snakemake.output["features"], index=False, encoding="utf-8")
|
||||||
|
|
Loading…
Reference in New Issue