Create a classification models class and use it in the ml pipeline script.
parent
218b684514
commit
98f78d72fc
|
@ -26,10 +26,11 @@ import pandas as pd
|
|||
import seaborn as sns
|
||||
from scipy import stats
|
||||
|
||||
from sklearn import linear_model, svm, naive_bayes, neighbors, tree, ensemble
|
||||
from sklearn.model_selection import LeaveOneGroupOut, cross_validate
|
||||
from sklearn.dummy import DummyClassifier
|
||||
from sklearn.impute import SimpleImputer
|
||||
|
||||
from sklearn.dummy import DummyClassifier
|
||||
from sklearn import linear_model, svm, naive_bayes, neighbors, tree, ensemble
|
||||
from lightgbm import LGBMClassifier
|
||||
import xgboost as xg
|
||||
|
||||
|
@ -44,6 +45,7 @@ if nb_dir not in sys.path:
|
|||
|
||||
import machine_learning.labels
|
||||
import machine_learning.model
|
||||
from machine_learning.classification_models import ClassificationModels
|
||||
|
||||
# %% [markdown]
|
||||
# # RAPIDS models
|
||||
|
@ -92,52 +94,8 @@ model_input.set_index(index_columns, inplace=True)
|
|||
|
||||
# %% jupyter={"source_hidden": true}
|
||||
# Create dict with classification ml models
|
||||
cmodels = {
|
||||
'dummy_classifier': {
|
||||
'model': DummyClassifier(strategy="most_frequent"),
|
||||
'metrics': [0, 0, 0, 0]
|
||||
},
|
||||
'logistic_regression': {
|
||||
'model': linear_model.LogisticRegression(),
|
||||
'metrics': [0, 0, 0, 0]
|
||||
},
|
||||
'support_vector_machine': {
|
||||
'model': svm.SVC(),
|
||||
'metrics': [0, 0, 0, 0]
|
||||
},
|
||||
'gaussian_naive_bayes': {
|
||||
'model': naive_bayes.GaussianNB(),
|
||||
'metrics': [0, 0, 0, 0]
|
||||
},
|
||||
'stochastic_gradient_descent_classifier': {
|
||||
'model': linear_model.SGDClassifier(),
|
||||
'metrics': [0, 0, 0, 0]
|
||||
},
|
||||
'knn': {
|
||||
'model': neighbors.KNeighborsClassifier(),
|
||||
'metrics': [0, 0, 0, 0]
|
||||
},
|
||||
'decision_tree': {
|
||||
'model': tree.DecisionTreeClassifier(),
|
||||
'metrics': [0, 0, 0, 0]
|
||||
},
|
||||
'random_forest_classifier': {
|
||||
'model': ensemble.RandomForestClassifier(),
|
||||
'metrics': [0, 0, 0, 0]
|
||||
},
|
||||
'gradient_boosting_classifier': {
|
||||
'model': ensemble.GradientBoostingClassifier(),
|
||||
'metrics': [0, 0, 0, 0]
|
||||
},
|
||||
'lgbm_classifier': {
|
||||
'model': LGBMClassifier(),
|
||||
'metrics': [0, 0, 0, 0]
|
||||
},
|
||||
'XGBoost_classifier': {
|
||||
'model': xg.sklearn.XGBClassifier(),
|
||||
'metrics': [0, 0, 0, 0]
|
||||
}
|
||||
}
|
||||
cm = ClassificationModels()
|
||||
cmodels = cm.get_cmodels()
|
||||
|
||||
# %% jupyter={"source_hidden": true}
|
||||
for k in range(n_clusters):
|
||||
|
@ -223,10 +181,4 @@ for k in range(n_clusters):
|
|||
|
||||
# %% jupyter={"source_hidden": true}
|
||||
# Get overall results
|
||||
for model_title, model in cmodels.items():
|
||||
print("\n************************************\n")
|
||||
print("Current model:", model_title, end="\n")
|
||||
print("Acc", model['metrics'][0]/n_clusters)
|
||||
print("Precision", model['metrics'][1]/n_clusters)
|
||||
print("Recall", model['metrics'][2]/n_clusters)
|
||||
print("F1", model['metrics'][3]/n_clusters)
|
||||
cm.get_total_models_scores(n_clusters=n_clusters)
|
||||
|
|
|
@ -0,0 +1,71 @@
|
|||
from sklearn.dummy import DummyClassifier
|
||||
from sklearn import linear_model, svm, naive_bayes, neighbors, tree, ensemble
|
||||
from lightgbm import LGBMClassifier
|
||||
import xgboost as xg
|
||||
|
||||
class ClassificationModels():
|
||||
|
||||
def __init__(self):
|
||||
self.cmodels = self.init_classification_models()
|
||||
|
||||
def get_cmodels(self):
|
||||
return self.cmodels
|
||||
|
||||
def init_classification_models(self):
|
||||
cmodels = {
|
||||
'dummy_classifier': {
|
||||
'model': DummyClassifier(strategy="most_frequent"),
|
||||
'metrics': [0, 0, 0, 0]
|
||||
},
|
||||
'logistic_regression': {
|
||||
'model': linear_model.LogisticRegression(),
|
||||
'metrics': [0, 0, 0, 0]
|
||||
},
|
||||
'support_vector_machine': {
|
||||
'model': svm.SVC(),
|
||||
'metrics': [0, 0, 0, 0]
|
||||
},
|
||||
'gaussian_naive_bayes': {
|
||||
'model': naive_bayes.GaussianNB(),
|
||||
'metrics': [0, 0, 0, 0]
|
||||
},
|
||||
'stochastic_gradient_descent_classifier': {
|
||||
'model': linear_model.SGDClassifier(),
|
||||
'metrics': [0, 0, 0, 0]
|
||||
},
|
||||
'knn': {
|
||||
'model': neighbors.KNeighborsClassifier(),
|
||||
'metrics': [0, 0, 0, 0]
|
||||
},
|
||||
'decision_tree': {
|
||||
'model': tree.DecisionTreeClassifier(),
|
||||
'metrics': [0, 0, 0, 0]
|
||||
},
|
||||
'random_forest_classifier': {
|
||||
'model': ensemble.RandomForestClassifier(),
|
||||
'metrics': [0, 0, 0, 0]
|
||||
},
|
||||
'gradient_boosting_classifier': {
|
||||
'model': ensemble.GradientBoostingClassifier(),
|
||||
'metrics': [0, 0, 0, 0]
|
||||
},
|
||||
'lgbm_classifier': {
|
||||
'model': LGBMClassifier(),
|
||||
'metrics': [0, 0, 0, 0]
|
||||
},
|
||||
'XGBoost_classifier': {
|
||||
'model': xg.sklearn.XGBClassifier(),
|
||||
'metrics': [0, 0, 0, 0]
|
||||
}
|
||||
}
|
||||
|
||||
return cmodels
|
||||
|
||||
def get_total_models_scores(self, n_clusters=1):
|
||||
for model_title, model in self.cmodels.items():
|
||||
print("\n************************************\n")
|
||||
print("Current model:", model_title, end="\n")
|
||||
print("Acc:", model['metrics'][0]/n_clusters)
|
||||
print("Precision:", model['metrics'][1]/n_clusters)
|
||||
print("Recall:", model['metrics'][2]/n_clusters)
|
||||
print("F1:", model['metrics'][3]/n_clusters)
|
Loading…
Reference in New Issue