Write questionnaire data to data/interim.

labels
junos 2022-03-01 11:39:58 +01:00
parent b5a6317f4b
commit aedb8b6785
4 changed files with 11 additions and 7 deletions

View File

@ -406,6 +406,7 @@ for provider in config["ALL_CLEANING_OVERALL"]["PROVIDERS"].keys():
# Baseline features
files_to_compute.extend(expand("data/raw/baseline_merged.csv"))
files_to_compute.extend(expand("data/raw/{pid}/participant_baseline_raw.csv", pid=config["PIDS"]))
files_to_compute.extend(expand("data/interim/{pid}/baseline_questionnaires.csv", pid=config["PIDS"]))
files_to_compute.extend(expand("data/processed/features/{pid}/baseline_features.csv", pid=config["PIDS"]))
rule all:

View File

@ -635,5 +635,5 @@ PARAMS_FOR_ANALYSIS:
results-survey413767_final.csv # Belgium 2
]
QUESTION_LIST: survey637813+question_text.csv
FEATURES: [age, gender, startlanguage]
FEATURES: [age, gender, startlanguage, demand, control, demand_control_ratio]
CATEGORICAL_FEATURES: [gender]

View File

@ -23,6 +23,7 @@ rule baseline_features:
features=config["PARAMS_FOR_ANALYSIS"]["BASELINE"]["FEATURES"],
question_filename=config["PARAMS_FOR_ANALYSIS"]["BASELINE"]["FOLDER"] + "/" + config["PARAMS_FOR_ANALYSIS"]["BASELINE"]["QUESTION_LIST"]
output:
"data/processed/features/{pid}/baseline_features.csv"
interim="data/interim/{pid}/baseline_questionnaires.csv",
features="data/processed/features/{pid}/baseline_features.csv"
script:
"../src/data/baseline_features.py"

View File

@ -3,6 +3,7 @@ import pandas as pd
pid = snakemake.params["pid"]
requested_features = snakemake.params["features"]
baseline_interim = pd.DataFrame(columns=["qid", "question", "score_original", "score"])
baseline_features = pd.DataFrame(columns=requested_features)
question_filename = snakemake.params["question_filename"]
@ -93,7 +94,7 @@ if not participant_info.empty:
+ LIMESURVEY_JCQ_MIN
- limesurvey_demand.loc[rows_demand_reverse, "score_original"]
)
# TODO Write to data/interim
pd.concat([baseline_interim, limesurvey_demand], axis=0, ignore_index=True)
if "demand" in requested_features:
baseline_features.loc[0, "limesurvey_demand"] = limesurvey_demand[
"score"
@ -126,7 +127,7 @@ if not participant_info.empty:
+ LIMESURVEY_JCQ_MIN
- limesurvey_control.loc[rows_control_reverse, "score_original"]
)
# TODO Write to data/interim
pd.concat([baseline_interim, limesurvey_control], axis=0, ignore_index=True)
if "control" in requested_features:
baseline_features.loc[0, "limesurvey_control"] = limesurvey_control[
"score"
@ -170,6 +171,7 @@ if not participant_info.empty:
0, "limesurvey_demand_control_ratio_quartile"
] = limesurvey_quartile
baseline_features.to_csv(
snakemake.output[0], index=False, encoding="utf-8",
)
if not baseline_interim.empty:
baseline_interim.to_csv(snakemake.output["interim"], index=False, encoding="utf-8")
baseline_features.to_csv(snakemake.output["features"], index=False, encoding="utf-8")