import pandas as pd import xgboost as xg from lightgbm import LGBMClassifier from sklearn import ensemble, linear_model, naive_bayes, neighbors, svm, tree from sklearn.dummy import DummyClassifier class ClassificationModels: def __init__(self): self.cmodels = self.init_classification_models() def get_cmodels(self): return self.cmodels def init_classification_models(self): cmodels = { "dummy_classifier": { "model": DummyClassifier(strategy="most_frequent"), "metrics": [0, 0, 0, 0], }, "logistic_regression": { "model": linear_model.LogisticRegression(max_iter=1000), "metrics": [0, 0, 0, 0], }, "support_vector_machine": {"model": svm.SVC(), "metrics": [0, 0, 0, 0]}, "gaussian_naive_bayes": { "model": naive_bayes.GaussianNB(), "metrics": [0, 0, 0, 0], }, "stochastic_gradient_descent_classifier": { "model": linear_model.SGDClassifier(), "metrics": [0, 0, 0, 0], }, "knn": {"model": neighbors.KNeighborsClassifier(), "metrics": [0, 0, 0, 0]}, "decision_tree": { "model": tree.DecisionTreeClassifier(), "metrics": [0, 0, 0, 0], }, "random_forest_classifier": { "model": ensemble.RandomForestClassifier(), "metrics": [0, 0, 0, 0], }, "gradient_boosting_classifier": { "model": ensemble.GradientBoostingClassifier(), "metrics": [0, 0, 0, 0], }, "lgbm_classifier": {"model": LGBMClassifier(), "metrics": [0, 0, 0, 0]}, "XGBoost_classifier": { "model": xg.sklearn.XGBClassifier(), "metrics": [0, 0, 0, 0], }, } return cmodels def get_total_models_scores(self, n_clusters=1): scores = pd.DataFrame(columns=["method", "metric", "mean"]) for model_title, model in self.cmodels.items(): scores_df = pd.DataFrame(columns=["method", "metric", "mean"]) print("\n************************************\n") print("Current model:", model_title, end="\n") print("Acc:", model["metrics"][0] / n_clusters) scores_df = pd.concat( [ scores_df, pd.DataFrame( { "method": model_title, "metric": "test_accuracy", "mean": model["metrics"][0] / n_clusters, }, index=[0], ), ], ignore_index=True, ) print("Precision:", model["metrics"][1] / n_clusters) scores_df = pd.concat( [ scores_df, pd.DataFrame( { "method": model_title, "metric": "test_precision", "mean": model["metrics"][1] / n_clusters, }, index=[0], ), ], ignore_index=True, ) print("Recall:", model["metrics"][2] / n_clusters) scores_df = pd.concat( [ scores_df, pd.DataFrame( { "method": model_title, "metric": "test_recall", "mean": model["metrics"][2] / n_clusters, }, index=[0], ), ], ignore_index=True, ) print("F1:", model["metrics"][3] / n_clusters) scores_df = pd.concat( [ scores_df, pd.DataFrame( { "method": model_title, "metric": "test_f1", "mean": model["metrics"][3] / n_clusters, }, index=[0], ), ], ignore_index=True, ) scores = pd.concat([scores, scores_df]) return scores