Small corrections.
parent
aa13123136
commit
aca84b214d
|
@ -25,9 +25,6 @@ from sklearn.model_selection import LeaveOneGroupOut, StratifiedKFold, cross_val
|
||||||
|
|
||||||
from machine_learning.classification_models import ClassificationModels
|
from machine_learning.classification_models import ClassificationModels
|
||||||
|
|
||||||
# %% [markdown]
|
|
||||||
# # RAPIDS models
|
|
||||||
|
|
||||||
# %%
|
# %%
|
||||||
# ## Set script's parameters
|
# ## Set script's parameters
|
||||||
N_CLUSTERS = 4 # Number of clusters (could be regarded as a hyperparameter)
|
N_CLUSTERS = 4 # Number of clusters (could be regarded as a hyperparameter)
|
||||||
|
@ -75,8 +72,10 @@ print("BINS: " + str(BINS))
|
||||||
model_input[CLUST_COL].describe()
|
model_input[CLUST_COL].describe()
|
||||||
|
|
||||||
|
|
||||||
# %% jupyter={"source_hidden": true}
|
# %%
|
||||||
|
model_input["target"].value_counts()
|
||||||
|
|
||||||
|
# %% jupyter={"source_hidden": true}
|
||||||
# Filter-out outlier rows by clust_col
|
# Filter-out outlier rows by clust_col
|
||||||
# model_input = model_input[(np.abs(stats.zscore(model_input[clust_col])) < 3)]
|
# model_input = model_input[(np.abs(stats.zscore(model_input[clust_col])) < 3)]
|
||||||
|
|
||||||
|
@ -92,6 +91,9 @@ uniq["cluster"] = km
|
||||||
|
|
||||||
model_input = model_input.merge(uniq[["pid", "cluster"]])
|
model_input = model_input.merge(uniq[["pid", "cluster"]])
|
||||||
|
|
||||||
|
# %%
|
||||||
|
model_input[["cluster", "target"]].value_counts().sort_index()
|
||||||
|
|
||||||
# %% jupyter={"source_hidden": true}
|
# %% jupyter={"source_hidden": true}
|
||||||
model_input.set_index(index_columns, inplace=True)
|
model_input.set_index(index_columns, inplace=True)
|
||||||
|
|
||||||
|
@ -107,7 +109,7 @@ for k in range(N_CLUSTERS):
|
||||||
model_input_subset.loc[:, "target"],
|
model_input_subset.loc[:, "target"],
|
||||||
bins=BINS,
|
bins=BINS,
|
||||||
labels=["low", "high"],
|
labels=["low", "high"],
|
||||||
right=False,
|
right=True,
|
||||||
) # ['low', 'medium', 'high']
|
) # ['low', 'medium', 'high']
|
||||||
model_input_subset["target"].value_counts()
|
model_input_subset["target"].value_counts()
|
||||||
# model_input_subset = model_input_subset[model_input_subset["target"] != "medium"]
|
# model_input_subset = model_input_subset[model_input_subset["target"] != "medium"]
|
||||||
|
@ -115,7 +117,7 @@ for k in range(N_CLUSTERS):
|
||||||
model_input_subset["target"].astype(str).apply(lambda x: 0 if x == "low" else 1)
|
model_input_subset["target"].astype(str).apply(lambda x: 0 if x == "low" else 1)
|
||||||
)
|
)
|
||||||
|
|
||||||
model_input_subset["target"].value_counts()
|
print(model_input_subset["target"].value_counts())
|
||||||
|
|
||||||
if CV_METHOD == "half_logo":
|
if CV_METHOD == "half_logo":
|
||||||
model_input_subset["pid_index"] = model_input_subset.groupby("pid").cumcount()
|
model_input_subset["pid_index"] = model_input_subset.groupby("pid").cumcount()
|
||||||
|
|
Loading…
Reference in New Issue