diff --git a/tests/conftest.py b/tests/conftest.py new file mode 100644 index 0000000..e9ac116 --- /dev/null +++ b/tests/conftest.py @@ -0,0 +1,44 @@ +from dataclasses import dataclass +from typing import Optional + +import numpy as np +from pytest import fixture +from sklearn import datasets +from sklearn.model_selection import train_test_split +from sklearn.utils import Bunch + + +@dataclass +class Dataset: + x: np.ndarray + y: np.ndarray + x_train: np.ndarray + x_test: np.ndarray + y_train: np.ndarray + y_test: np.ndarray + + +_IRIS_DATASET: Optional[Bunch] = None +_DIABETES_DATASET: Optional[Bunch] = None + + +@fixture(scope="session") +def iris() -> Dataset: + global _IRIS_DATASET + if _IRIS_DATASET is None: + _IRIS_DATASET = datasets.load_iris() + x = _IRIS_DATASET.data[:, :2] + y = _IRIS_DATASET.target + x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.5) + return Dataset(x, y, x_train, x_test, y_train, y_test) + + +@fixture(scope="session") +def diabetes() -> Dataset: + global _DIABETES_DATASET + if _DIABETES_DATASET is None: + _DIABETES_DATASET = datasets.load_diabetes(return_X_y=True) + x, y = _DIABETES_DATASET + + x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.5) + return Dataset(x, y, x_train, x_test, y_train, y_test) diff --git a/tests/test_e2e.py b/tests/test_e2e.py index 64b0e6f..d17c00c 100644 --- a/tests/test_e2e.py +++ b/tests/test_e2e.py @@ -1,99 +1,86 @@ try: - from neptune import init_run + from neptune import ( + Run, + init_run, + ) except ImportError: - from neptune.new import init_run + from neptune.new import Run, init_run import pytest -from sklearn import datasets from sklearn.cluster import KMeans -from sklearn.dummy import DummyRegressor -from sklearn.linear_model import ( - LinearRegression, - LogisticRegression, -) -from sklearn.model_selection import ( - GridSearchCV, - train_test_split, +from sklearn.dummy import ( + DummyClassifier, + DummyRegressor, ) +from sklearn.model_selection import GridSearchCV import neptune_sklearn as npt_utils -def test_classifier_summary(): - run = init_run() - - iris = datasets.load_iris() - X = iris.data[:, :2] - y = iris.target - X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.5) - - model = LogisticRegression(C=1e5) - model.fit(X_train, y_train) - - run["summary"] = npt_utils.create_classifier_summary(model, X_train, X_test, y_train, y_test) - - run.wait() - validate_run(run, log_charts=True) - +def test_classifier_summary(iris): + with init_run() as run: + model = DummyClassifier() + model.fit(iris.x_train, iris.y_train) -def test_regressor_summary(): - run = init_run() + run["summary"] = npt_utils.create_classifier_summary( + model, iris.x_train, iris.x_test, iris.y_train, iris.y_test + ) - X, y = datasets.load_diabetes(return_X_y=True) - X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.5) + run.wait() + validate_run(run, log_charts=True) - model = LinearRegression() - model.fit(X_train, y_train) - run["summary"] = npt_utils.create_regressor_summary(model, X_train, X_test, y_train, y_test) +def test_regressor_summary(diabetes): + with init_run() as run: + model = DummyRegressor() + model.fit(diabetes.x_train, diabetes.y_train) - run.wait() - validate_run(run, log_charts=True) + run["summary"] = npt_utils.create_regressor_summary( + model, diabetes.x_train, diabetes.x_test, diabetes.y_train, diabetes.y_test + ) + run.wait() + validate_run(run, log_charts=True) -def test_kmeans_summary(): - run = init_run() - iris = datasets.load_iris() - X = iris.data[:, :2] +def test_kmeans_summary(iris): + with init_run() as run: - model = KMeans() - model.fit(X) + model = KMeans() + model.fit(iris.x) - run["summary"] = npt_utils.create_kmeans_summary(model, X, n_clusters=3) + run["summary"] = npt_utils.create_kmeans_summary(model, iris.x, n_clusters=3) - run.wait() - validate_run(run, log_charts=True) + run.wait() + validate_run(run, log_charts=True) @pytest.mark.filterwarnings("error::neptune.common.warnings.NeptuneUnsupportedType") -def test_unsupported_object(): +def test_unsupported_object(diabetes): """This method checks if Neptune throws a `NeptuneUnsupportedType` warning if expected metadata is not found or skips trying to log such metadata""" with init_run() as run: - - X, y = datasets.load_diabetes(return_X_y=True) - X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.5) - model = DummyRegressor() + model.fit(diabetes.x_train, diabetes.y_train) param_grid = { "strategy": ["mean", "median", "quantile"], "quantile": [0.1, 0.5, 1.0], } - X, y = datasets.fetch_california_housing(return_X_y=True)[:10] - X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2) - - grid_cv = GridSearchCV(model, param_grid, scoring="neg_mean_absolute_error", cv=2).fit(X_train, y_train) + grid_cv = GridSearchCV(model, param_grid, scoring="neg_mean_absolute_error", cv=2).fit( + diabetes.x_train, diabetes.y_train + ) - run["regressor_summary"] = npt_utils.create_regressor_summary(grid_cv, X_train, X_test, y_train, y_test) + run["regressor_summary"] = npt_utils.create_regressor_summary( + grid_cv, diabetes.x_train, diabetes.x_test, diabetes.y_train, diabetes.y_test + ) run.wait() -def validate_run(run, log_charts): +def validate_run(run: Run, log_charts: bool) -> None: assert run.exists("summary/all_params") assert run.exists("summary/pickled_model") assert run.exists("summary/integration/about/neptune-sklearn")