From 3fe7f5781770fdfc127e2742adc18e13115542db Mon Sep 17 00:00:00 2001 From: AleksanderWWW Date: Wed, 17 Jan 2024 23:26:19 +0100 Subject: [PATCH 1/4] dummy regressors and classifiers --- tests/test_e2e.py | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/tests/test_e2e.py b/tests/test_e2e.py index 64b0e6f..9fe3eee 100644 --- a/tests/test_e2e.py +++ b/tests/test_e2e.py @@ -6,10 +6,9 @@ import pytest from sklearn import datasets from sklearn.cluster import KMeans -from sklearn.dummy import DummyRegressor -from sklearn.linear_model import ( - LinearRegression, - LogisticRegression, +from sklearn.dummy import ( + DummyClassifier, + DummyRegressor, ) from sklearn.model_selection import ( GridSearchCV, @@ -27,7 +26,7 @@ def test_classifier_summary(): y = iris.target X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.5) - model = LogisticRegression(C=1e5) + model = DummyClassifier() model.fit(X_train, y_train) run["summary"] = npt_utils.create_classifier_summary(model, X_train, X_test, y_train, y_test) @@ -42,7 +41,7 @@ def test_regressor_summary(): X, y = datasets.load_diabetes(return_X_y=True) X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.5) - model = LinearRegression() + model = DummyRegressor() model.fit(X_train, y_train) run["summary"] = npt_utils.create_regressor_summary(model, X_train, X_test, y_train, y_test) From 203a340fde98aee6661f8d26576a0157201b3991 Mon Sep 17 00:00:00 2001 From: AleksanderWWW Date: Wed, 17 Jan 2024 23:33:30 +0100 Subject: [PATCH 2/4] context manager + type hints --- tests/test_e2e.py | 49 +++++++++++++++++++++++++---------------------- 1 file changed, 26 insertions(+), 23 deletions(-) diff --git a/tests/test_e2e.py b/tests/test_e2e.py index 9fe3eee..154e632 100644 --- a/tests/test_e2e.py +++ b/tests/test_e2e.py @@ -1,7 +1,10 @@ try: - from neptune import init_run + from neptune import ( + Run, + init_run, + ) except ImportError: - from neptune.new import init_run + from neptune.new import init_run, Run import pytest from sklearn import datasets @@ -19,47 +22,47 @@ def test_classifier_summary(): - run = init_run() + with init_run() as run: - iris = datasets.load_iris() - X = iris.data[:, :2] - y = iris.target - X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.5) + iris = datasets.load_iris() + X = iris.data[:, :2] + y = iris.target + X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.5) - model = DummyClassifier() - model.fit(X_train, y_train) + model = DummyClassifier() + model.fit(X_train, y_train) - run["summary"] = npt_utils.create_classifier_summary(model, X_train, X_test, y_train, y_test) + run["summary"] = npt_utils.create_classifier_summary(model, X_train, X_test, y_train, y_test) run.wait() validate_run(run, log_charts=True) def test_regressor_summary(): - run = init_run() + with init_run() as run: - X, y = datasets.load_diabetes(return_X_y=True) - X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.5) + X, y = datasets.load_diabetes(return_X_y=True) + X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.5) - model = DummyRegressor() - model.fit(X_train, y_train) + model = DummyRegressor() + model.fit(X_train, y_train) - run["summary"] = npt_utils.create_regressor_summary(model, X_train, X_test, y_train, y_test) + run["summary"] = npt_utils.create_regressor_summary(model, X_train, X_test, y_train, y_test) run.wait() validate_run(run, log_charts=True) def test_kmeans_summary(): - run = init_run() + with init_run() as run: - iris = datasets.load_iris() - X = iris.data[:, :2] + iris = datasets.load_iris() + X = iris.data[:, :2] - model = KMeans() - model.fit(X) + model = KMeans() + model.fit(X) - run["summary"] = npt_utils.create_kmeans_summary(model, X, n_clusters=3) + run["summary"] = npt_utils.create_kmeans_summary(model, X, n_clusters=3) run.wait() validate_run(run, log_charts=True) @@ -92,7 +95,7 @@ def test_unsupported_object(): run.wait() -def validate_run(run, log_charts): +def validate_run(run: Run, log_charts: bool) -> None: assert run.exists("summary/all_params") assert run.exists("summary/pickled_model") assert run.exists("summary/integration/about/neptune-sklearn") From 04eb8b504a6365de60c754ef296ae32bb5f59e9d Mon Sep 17 00:00:00 2001 From: AleksanderWWW Date: Thu, 18 Jan 2024 10:40:01 +0100 Subject: [PATCH 3/4] use fixtures --- tests/conftest.py | 44 ++++++++++++++++++++++++++++++++++++++++++++ tests/test_e2e.py | 47 +++++++++++++++++++---------------------------- 2 files changed, 63 insertions(+), 28 deletions(-) create mode 100644 tests/conftest.py diff --git a/tests/conftest.py b/tests/conftest.py new file mode 100644 index 0000000..e9ac116 --- /dev/null +++ b/tests/conftest.py @@ -0,0 +1,44 @@ +from dataclasses import dataclass +from typing import Optional + +import numpy as np +from pytest import fixture +from sklearn import datasets +from sklearn.model_selection import train_test_split +from sklearn.utils import Bunch + + +@dataclass +class Dataset: + x: np.ndarray + y: np.ndarray + x_train: np.ndarray + x_test: np.ndarray + y_train: np.ndarray + y_test: np.ndarray + + +_IRIS_DATASET: Optional[Bunch] = None +_DIABETES_DATASET: Optional[Bunch] = None + + +@fixture(scope="session") +def iris() -> Dataset: + global _IRIS_DATASET + if _IRIS_DATASET is None: + _IRIS_DATASET = datasets.load_iris() + x = _IRIS_DATASET.data[:, :2] + y = _IRIS_DATASET.target + x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.5) + return Dataset(x, y, x_train, x_test, y_train, y_test) + + +@fixture(scope="session") +def diabetes() -> Dataset: + global _DIABETES_DATASET + if _DIABETES_DATASET is None: + _DIABETES_DATASET = datasets.load_diabetes(return_X_y=True) + x, y = _DIABETES_DATASET + + x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.5) + return Dataset(x, y, x_train, x_test, y_train, y_test) diff --git a/tests/test_e2e.py b/tests/test_e2e.py index 154e632..ed032dd 100644 --- a/tests/test_e2e.py +++ b/tests/test_e2e.py @@ -21,51 +21,42 @@ import neptune_sklearn as npt_utils -def test_classifier_summary(): +def test_classifier_summary(iris): with init_run() as run: - - iris = datasets.load_iris() - X = iris.data[:, :2] - y = iris.target - X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.5) - model = DummyClassifier() - model.fit(X_train, y_train) + model.fit(iris.x_train, iris.y_train) - run["summary"] = npt_utils.create_classifier_summary(model, X_train, X_test, y_train, y_test) + run["summary"] = npt_utils.create_classifier_summary( + model, iris.x_train, iris.x_test, iris.y_train, iris.y_test + ) - run.wait() - validate_run(run, log_charts=True) + run.wait() + validate_run(run, log_charts=True) -def test_regressor_summary(): +def test_regressor_summary(diabetes): with init_run() as run: - - X, y = datasets.load_diabetes(return_X_y=True) - X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.5) - model = DummyRegressor() - model.fit(X_train, y_train) + model.fit(diabetes.x_train, diabetes.y_train) - run["summary"] = npt_utils.create_regressor_summary(model, X_train, X_test, y_train, y_test) + run["summary"] = npt_utils.create_regressor_summary( + model, diabetes.x_train, diabetes.x_test, diabetes.y_train, diabetes.y_test + ) - run.wait() - validate_run(run, log_charts=True) + run.wait() + validate_run(run, log_charts=True) -def test_kmeans_summary(): +def test_kmeans_summary(iris): with init_run() as run: - iris = datasets.load_iris() - X = iris.data[:, :2] - model = KMeans() - model.fit(X) + model.fit(iris.x) - run["summary"] = npt_utils.create_kmeans_summary(model, X, n_clusters=3) + run["summary"] = npt_utils.create_kmeans_summary(model, iris.x, n_clusters=3) - run.wait() - validate_run(run, log_charts=True) + run.wait() + validate_run(run, log_charts=True) @pytest.mark.filterwarnings("error::neptune.common.warnings.NeptuneUnsupportedType") From 549a798a0db18599843f2891d0adb551b9730df8 Mon Sep 17 00:00:00 2001 From: Siddhant Sadangi Date: Thu, 18 Jan 2024 17:58:10 +0100 Subject: [PATCH 4/4] Refactoring `test_unsupported_object` --- tests/test_e2e.py | 26 ++++++++++---------------- 1 file changed, 10 insertions(+), 16 deletions(-) diff --git a/tests/test_e2e.py b/tests/test_e2e.py index ed032dd..d17c00c 100644 --- a/tests/test_e2e.py +++ b/tests/test_e2e.py @@ -4,19 +4,15 @@ init_run, ) except ImportError: - from neptune.new import init_run, Run + from neptune.new import Run, init_run import pytest -from sklearn import datasets from sklearn.cluster import KMeans from sklearn.dummy import ( DummyClassifier, DummyRegressor, ) -from sklearn.model_selection import ( - GridSearchCV, - train_test_split, -) +from sklearn.model_selection import GridSearchCV import neptune_sklearn as npt_utils @@ -60,28 +56,26 @@ def test_kmeans_summary(iris): @pytest.mark.filterwarnings("error::neptune.common.warnings.NeptuneUnsupportedType") -def test_unsupported_object(): +def test_unsupported_object(diabetes): """This method checks if Neptune throws a `NeptuneUnsupportedType` warning if expected metadata is not found or skips trying to log such metadata""" with init_run() as run: - - X, y = datasets.load_diabetes(return_X_y=True) - X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.5) - model = DummyRegressor() + model.fit(diabetes.x_train, diabetes.y_train) param_grid = { "strategy": ["mean", "median", "quantile"], "quantile": [0.1, 0.5, 1.0], } - X, y = datasets.fetch_california_housing(return_X_y=True)[:10] - X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2) - - grid_cv = GridSearchCV(model, param_grid, scoring="neg_mean_absolute_error", cv=2).fit(X_train, y_train) + grid_cv = GridSearchCV(model, param_grid, scoring="neg_mean_absolute_error", cv=2).fit( + diabetes.x_train, diabetes.y_train + ) - run["regressor_summary"] = npt_utils.create_regressor_summary(grid_cv, X_train, X_test, y_train, y_test) + run["regressor_summary"] = npt_utils.create_regressor_summary( + grid_cv, diabetes.x_train, diabetes.x_test, diabetes.y_train, diabetes.y_test + ) run.wait()