Fix minor bugs of modeling.py: f1-macro and proba
parent
b9306612cb
commit
123b78d438
|
@ -117,10 +117,10 @@ for train_index, test_index in outer_cv.split(data_x):
|
||||||
# Inner cross validation
|
# Inner cross validation
|
||||||
if min(targets_value_counts) >= 6:
|
if min(targets_value_counts) >= 6:
|
||||||
# SMOTE requires n_neighbors <= n_samples, the default value of n_neighbors is 6
|
# SMOTE requires n_neighbors <= n_samples, the default value of n_neighbors is 6
|
||||||
clf = GridSearchCV(estimator=createPipeline(model, "SMOTE"), param_grid=model_hyperparams, cv=inner_cv, scoring="f1_micro")
|
clf = GridSearchCV(estimator=createPipeline(model, "SMOTE"), param_grid=model_hyperparams, cv=inner_cv, scoring="f1_macro")
|
||||||
else:
|
else:
|
||||||
# RandomOverSampler: over-sample the minority class(es) by picking samples at random with replacement.
|
# RandomOverSampler: over-sample the minority class(es) by picking samples at random with replacement.
|
||||||
clf = GridSearchCV(estimator=createPipeline(model, "RandomOverSampler"), param_grid=model_hyperparams, cv=inner_cv, scoring="f1_micro")
|
clf = GridSearchCV(estimator=createPipeline(model, "RandomOverSampler"), param_grid=model_hyperparams, cv=inner_cv, scoring="f1_macro")
|
||||||
clf.fit(train_x, train_y.values.ravel())
|
clf.fit(train_x, train_y.values.ravel())
|
||||||
|
|
||||||
# Collect results and parameters
|
# Collect results and parameters
|
||||||
|
@ -129,10 +129,7 @@ for train_index, test_index in outer_cv.split(data_x):
|
||||||
pred_y = pred_y + cur_fold_pred
|
pred_y = pred_y + cur_fold_pred
|
||||||
|
|
||||||
proba_of_two_categories = clf.predict_proba(test_x).tolist()
|
proba_of_two_categories = clf.predict_proba(test_x).tolist()
|
||||||
if cur_fold_pred[0]:
|
pred_y_prob = pred_y_prob + [probabilities[clf.classes_.tolist().index(1)] for probabilities in proba_of_two_categories]
|
||||||
pred_y_prob = pred_y_prob + [row[proba_of_two_categories[0].index(max(proba_of_two_categories[0]))] for row in proba_of_two_categories]
|
|
||||||
else:
|
|
||||||
pred_y_prob = pred_y_prob + [row[proba_of_two_categories[0].index(min(proba_of_two_categories[0]))] for row in proba_of_two_categories]
|
|
||||||
|
|
||||||
true_y = true_y + test_y.values.ravel().tolist()
|
true_y = true_y + test_y.values.ravel().tolist()
|
||||||
pid = pid + test_y.index.tolist() # each test partition (fold) in the outer cv is a participant (LeaveOneOut cv)
|
pid = pid + test_y.index.tolist() # each test partition (fold) in the outer cv is a participant (LeaveOneOut cv)
|
||||||
|
|
Loading…
Reference in New Issue