parent
9ab0c8f289
commit
4ad261fae5
|
@ -415,7 +415,7 @@ for provider in config["ALL_CLEANING_OVERALL"]["PROVIDERS"].keys():
|
|||
|
||||
# Baseline features
|
||||
if config["PARAMS_FOR_ANALYSIS"]["BASELINE"]["COMPUTE"]:
|
||||
files_to_compute.extend("data/raw/baseline_merged.csv")
|
||||
files_to_compute.extend(expand("data/raw/baseline_merged.csv"))
|
||||
files_to_compute.extend(expand("data/raw/{pid}/participant_baseline_raw.csv", pid=config["PIDS"]))
|
||||
files_to_compute.extend(expand("data/interim/{pid}/baseline_questionnaires.csv", pid=config["PIDS"]))
|
||||
files_to_compute.extend(expand("data/processed/features/{pid}/baseline_features.csv", pid=config["PIDS"]))
|
||||
|
@ -423,7 +423,9 @@ if config["PARAMS_FOR_ANALYSIS"]["BASELINE"]["COMPUTE"]:
|
|||
# Targets (labels)
|
||||
if config["PARAMS_FOR_ANALYSIS"]["TARGET"]["COMPUTE"]:
|
||||
files_to_compute.extend(expand("data/processed/models/individual_model/{pid}/input.csv", pid=config["PIDS"]))
|
||||
files_to_compute.extend("data/processed/models/population_model/input.csv")
|
||||
files_to_compute.extend(expand("data/processed/models/population_model/input.csv"))
|
||||
|
||||
#files_to_compute.extend(expand("data/processed/models/individual_model/{pid}/output_{cv_method}/baselines.csv", pid=config["PIDS"], cv_method=config["PARAMS_FOR_ANALYSIS"]["CV_METHODS"]))
|
||||
|
||||
rule all:
|
||||
input:
|
||||
|
|
|
@ -54,7 +54,7 @@ rule model_individual_baselines:
|
|||
"data/processed/models/individual_model/{pid}/input.csv"
|
||||
params:
|
||||
cv_method = "{cv_method}",
|
||||
colnames_demographic_features = config["PARAMS_FOR_ANALYSIS"]["DEMOGRAPHIC"]["FEATURES"],
|
||||
colnames_demographic_features = config["PARAMS_FOR_ANALYSIS"]["BASELINE"]["FEATURES"],
|
||||
output:
|
||||
"data/processed/models/individual_model/{pid}/output_{cv_method}/baselines.csv"
|
||||
log:
|
||||
|
|
|
@ -60,15 +60,15 @@ if not participant_info.empty:
|
|||
0, "startlanguage"
|
||||
]
|
||||
if (
|
||||
("demand" in requested_features)
|
||||
or ("control" in requested_features)
|
||||
or ("demand_control_ratio" in requested_features)
|
||||
("limesurvey_demand" in requested_features)
|
||||
or ("limesurvey_control" in requested_features)
|
||||
or ("limesurvey_demand_control_ratio" in requested_features)
|
||||
):
|
||||
participant_info_t = participant_info.T
|
||||
rows_baseline = participant_info_t.index
|
||||
|
||||
if ("demand" in requested_features) or (
|
||||
"demand_control_ratio" in requested_features
|
||||
if ("limesurvey_demand" in requested_features) or (
|
||||
"limesurvey_demand_control_ratio" in requested_features
|
||||
):
|
||||
# Find questions about demand, but disregard time (duration of filling in questionnaire)
|
||||
rows_demand = rows_baseline.str.startswith(
|
||||
|
@ -96,12 +96,12 @@ if not participant_info.empty:
|
|||
)
|
||||
baseline_interim = pd.concat([baseline_interim, limesurvey_demand], axis=0, ignore_index=True)
|
||||
if "demand" in requested_features:
|
||||
baseline_features.loc[0, "demand"] = limesurvey_demand[
|
||||
baseline_features.loc[0, "limesurvey_demand"] = limesurvey_demand[
|
||||
"score"
|
||||
].sum()
|
||||
|
||||
if ("control" in requested_features) or (
|
||||
"demand_control_ratio" in requested_features
|
||||
if ("limesurvey_control" in requested_features) or (
|
||||
"limesurvey_demand_control_ratio" in requested_features
|
||||
):
|
||||
# Find questions about control, but disregard time (duration of filling in questionnaire)
|
||||
rows_control = rows_baseline.str.startswith(
|
||||
|
@ -130,12 +130,12 @@ if not participant_info.empty:
|
|||
|
||||
baseline_interim = pd.concat([baseline_interim, limesurvey_control], axis=0, ignore_index=True)
|
||||
|
||||
if "control" in requested_features:
|
||||
baseline_features.loc[0, "control"] = limesurvey_control[
|
||||
if "limesurvey_control" in requested_features:
|
||||
baseline_features.loc[0, "limesurvey_control"] = limesurvey_control[
|
||||
"score"
|
||||
].sum()
|
||||
|
||||
if "demand_control_ratio" in requested_features:
|
||||
if "limesurvey_demand_control_ratio" in requested_features:
|
||||
limesurvey_demand_control_ratio = (
|
||||
limesurvey_demand["score"].sum() / limesurvey_control["score"].sum()
|
||||
)
|
||||
|
@ -167,10 +167,10 @@ if not participant_info.empty:
|
|||
limesurvey_quartile = np.nan
|
||||
|
||||
baseline_features.loc[
|
||||
0, "demand_control_ratio"
|
||||
0, "limesurvey_demand_control_ratio"
|
||||
] = limesurvey_demand_control_ratio
|
||||
baseline_features.loc[
|
||||
0, "demand_control_ratio_quartile"
|
||||
0, "limesurvey_demand_control_ratio_quartile"
|
||||
] = limesurvey_quartile
|
||||
|
||||
if not baseline_interim.empty:
|
||||
|
|
Loading…
Reference in New Issue