Skip to content

Commit

Permalink
Stop using boston housing dataset
Browse files Browse the repository at this point in the history
Instead we use the california housing dataset.
  • Loading branch information
betatim committed Dec 12, 2024
1 parent 029b708 commit 3e090c2
Showing 1 changed file with 6 additions and 23 deletions.
29 changes: 6 additions & 23 deletions python/cuml/cuml/tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -245,34 +245,17 @@ def housing_dataset():
return X, y, feature_names


@pytest.fixture(scope="module")
def deprecated_boston_dataset():
# dataset was removed in Scikit-learn 1.2, we should change it for a
# better dataset for tests, see
# https://github.com/rapidsai/cuml/issues/5158

df = pd.read_csv(
"https://raw.githubusercontent.com/scikit-learn/scikit-learn/baf828ca126bcb2c0ad813226963621cafe38adb/sklearn/datasets/data/boston_house_prices.csv",
header=None,
) # noqa: E501
n_samples = int(df[0][0])
data = df[list(np.arange(13))].values[2:n_samples].astype(np.float64)
targets = df[13].values[2:n_samples].astype(np.float64)

return Bunch(
data=data,
target=targets,
)


@pytest.fixture(
scope="module",
params=["digits", "deprecated_boston_dataset", "diabetes", "cancer"],
params=["digits", "housing_dataset", "diabetes", "cancer"],
)
def test_datasets(request, deprecated_boston_dataset):
def test_datasets(request, housing_dataset):
X, y, _ = housing_dataset
housing_dataset = Bunch(data=X.get(), target=y.get())

test_datasets_dict = {
"digits": datasets.load_digits(),
"deprecated_boston_dataset": deprecated_boston_dataset,
"housing_dataset": housing_dataset,
"diabetes": datasets.load_diabetes(),
"cancer": datasets.load_breast_cancer(),
}
Expand Down

0 comments on commit 3e090c2

Please sign in to comment.