From 8507ff5761e8c8f4d74672f8718b5604cce7fa4c Mon Sep 17 00:00:00 2001 From: junos Date: Sat, 21 Aug 2021 17:45:23 +0200 Subject: [PATCH] Check for NaNs in the data, since sklearn.LinearRegression cannot handle them. --- machine_learning/pipeline.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/machine_learning/pipeline.py b/machine_learning/pipeline.py index 945a9e5..e07adae 100644 --- a/machine_learning/pipeline.py +++ b/machine_learning/pipeline.py @@ -180,12 +180,15 @@ class ModelValidation: def cross_validate(self): if self.model is None: - raise ValueError( + raise TypeError( "Please set self.model first, e.g. self.model = sklearn.linear_model.LinearRegression()" ) - # TODO Is ValueError appropriate here? if self.cv is None: - raise ValueError("Please use set_cv_method() first.") + raise TypeError("Please use set_cv_method() first.") + if self.X.isna().any().any() or self.y.isna().any().any(): + raise ValueError( + "NaNs were found in either X or y. Please, check your data before continuing." + ) return cross_val_score( estimator=self.model, X=self.X,