diff --git a/rules/models.smk b/rules/models.smk index 287fdf35..ce6fcba6 100644 --- a/rules/models.smk +++ b/rules/models.smk @@ -17,9 +17,7 @@ rule targets: participant_info = "data/raw/{pid}/" + config["PARAMS_FOR_ANALYSIS"]["TARGET_TABLE"] + "_raw.csv" params: pid = "{pid}", - summarised = "{summarised}", - targets_ratio_threshold = config["PARAMS_FOR_ANALYSIS"]["TARGETS_RATIO_THRESHOLD"], - targets_value_threshold = config["PARAMS_FOR_ANALYSIS"]["TARGETS_VALUE_THRESHOLD"] + summarised = "{summarised}" output: "data/processed/{pid}/targets_{summarised}.csv" script: diff --git a/src/models/targets.py b/src/models/targets.py index e3ebcdd8..e12794d4 100644 --- a/src/models/targets.py +++ b/src/models/targets.py @@ -3,19 +3,10 @@ import numpy as np pid = snakemake.params["pid"] summarised = snakemake.params["summarised"] -targets_ratio_threshold = snakemake.params["targets_ratio_threshold"] -targets_value_threshold = snakemake.params["targets_value_threshold"] participant_info = pd.read_csv(snakemake.input["participant_info"]) if summarised == "summarised": - targets = pd.DataFrame(columns=["pid", "target"]) - - if not participant_info.empty: - cesds = participant_info.loc[0, ["preop_cesd_total", "inpatient_cesd_total", "postop_cesd_total", "3month_cesd_total"]] - # targets: 1 => 50% (ceiling) or more of available CESD scores were 16 or higher; 0 => otherwise - num_threshold = int((cesds.count() + 1) * targets_ratio_threshold) - target = 1 if cesds.apply(lambda x : 1 if x >= targets_value_threshold else 0).sum() >= num_threshold else 0 - targets.loc[0, :] = [pid, target] + raise ValueError("Do not support summarised features for example dataset.") elif summarised == "notsummarised": targets = participant_info[["local_date", "target"]]