Browse Source

Rename baseline features AGAIN.

Correct other mistakes.
master
junos 1 month ago
parent
commit
4ad261fae5
  1. 6
      Snakefile
  2. 2
      rules/models.smk
  3. 26
      src/data/baseline_features.py

6
Snakefile

@ -415,7 +415,7 @@ for provider in config["ALL_CLEANING_OVERALL"]["PROVIDERS"].keys():
# Baseline features
if config["PARAMS_FOR_ANALYSIS"]["BASELINE"]["COMPUTE"]:
files_to_compute.extend("data/raw/baseline_merged.csv")
files_to_compute.extend(expand("data/raw/baseline_merged.csv"))
files_to_compute.extend(expand("data/raw/{pid}/participant_baseline_raw.csv", pid=config["PIDS"]))
files_to_compute.extend(expand("data/interim/{pid}/baseline_questionnaires.csv", pid=config["PIDS"]))
files_to_compute.extend(expand("data/processed/features/{pid}/baseline_features.csv", pid=config["PIDS"]))
@ -423,7 +423,9 @@ if config["PARAMS_FOR_ANALYSIS"]["BASELINE"]["COMPUTE"]:
# Targets (labels)
if config["PARAMS_FOR_ANALYSIS"]["TARGET"]["COMPUTE"]:
files_to_compute.extend(expand("data/processed/models/individual_model/{pid}/input.csv", pid=config["PIDS"]))
files_to_compute.extend("data/processed/models/population_model/input.csv")
files_to_compute.extend(expand("data/processed/models/population_model/input.csv"))
#files_to_compute.extend(expand("data/processed/models/individual_model/{pid}/output_{cv_method}/baselines.csv", pid=config["PIDS"], cv_method=config["PARAMS_FOR_ANALYSIS"]["CV_METHODS"]))
rule all:
input:

2
rules/models.smk

@ -54,7 +54,7 @@ rule model_individual_baselines:
"data/processed/models/individual_model/{pid}/input.csv"
params:
cv_method = "{cv_method}",
colnames_demographic_features = config["PARAMS_FOR_ANALYSIS"]["DEMOGRAPHIC"]["FEATURES"],
colnames_demographic_features = config["PARAMS_FOR_ANALYSIS"]["BASELINE"]["FEATURES"],
output:
"data/processed/models/individual_model/{pid}/output_{cv_method}/baselines.csv"
log:

26
src/data/baseline_features.py

@ -60,15 +60,15 @@ if not participant_info.empty:
0, "startlanguage"
]
if (
("demand" in requested_features)
or ("control" in requested_features)
or ("demand_control_ratio" in requested_features)
("limesurvey_demand" in requested_features)
or ("limesurvey_control" in requested_features)
or ("limesurvey_demand_control_ratio" in requested_features)
):
participant_info_t = participant_info.T
rows_baseline = participant_info_t.index
if ("demand" in requested_features) or (
"demand_control_ratio" in requested_features
if ("limesurvey_demand" in requested_features) or (
"limesurvey_demand_control_ratio" in requested_features
):
# Find questions about demand, but disregard time (duration of filling in questionnaire)
rows_demand = rows_baseline.str.startswith(
@ -96,12 +96,12 @@ if not participant_info.empty:
)
baseline_interim = pd.concat([baseline_interim, limesurvey_demand], axis=0, ignore_index=True)
if "demand" in requested_features:
baseline_features.loc[0, "demand"] = limesurvey_demand[
baseline_features.loc[0, "limesurvey_demand"] = limesurvey_demand[
"score"
].sum()
if ("control" in requested_features) or (
"demand_control_ratio" in requested_features
if ("limesurvey_control" in requested_features) or (
"limesurvey_demand_control_ratio" in requested_features
):
# Find questions about control, but disregard time (duration of filling in questionnaire)
rows_control = rows_baseline.str.startswith(
@ -130,12 +130,12 @@ if not participant_info.empty:
baseline_interim = pd.concat([baseline_interim, limesurvey_control], axis=0, ignore_index=True)
if "control" in requested_features:
baseline_features.loc[0, "control"] = limesurvey_control[
if "limesurvey_control" in requested_features:
baseline_features.loc[0, "limesurvey_control"] = limesurvey_control[
"score"
].sum()
if "demand_control_ratio" in requested_features:
if "limesurvey_demand_control_ratio" in requested_features:
limesurvey_demand_control_ratio = (
limesurvey_demand["score"].sum() / limesurvey_control["score"].sum()
)
@ -167,10 +167,10 @@ if not participant_info.empty:
limesurvey_quartile = np.nan
baseline_features.loc[
0, "demand_control_ratio"
0, "limesurvey_demand_control_ratio"
] = limesurvey_demand_control_ratio
baseline_features.loc[
0, "demand_control_ratio_quartile"
0, "limesurvey_demand_control_ratio_quartile"
] = limesurvey_quartile
if not baseline_interim.empty:

Loading…
Cancel
Save