From e09b2fc9681402f00e7514f6220aa569d52f4787 Mon Sep 17 00:00:00 2001 From: DerWeh Date: Wed, 3 Apr 2024 23:01:12 +0200 Subject: [PATCH] MAINT: make compatible with NumPy 2 Signed-off-by: DerWeh --- .../interpret/utils/_clean_simple.py | 4 +-- .../interpret/utils/_clean_x.py | 20 ++++++------- .../interpret/utils/_measure_interactions.py | 2 +- .../tests/utils/test_clean_x.py | 30 +++++++++---------- 4 files changed, 28 insertions(+), 28 deletions(-) diff --git a/python/interpret-core/interpret/utils/_clean_simple.py b/python/interpret-core/interpret/utils/_clean_simple.py index bbad531e6..9a9190307 100644 --- a/python/interpret-core/interpret/utils/_clean_simple.py +++ b/python/interpret-core/interpret/utils/_clean_simple.py @@ -260,9 +260,9 @@ def typify_classification(vec): ): dtype = np.bool_ else: - dtype = np.unicode_ + dtype = np.str_ else: - dtype = np.unicode_ + dtype = np.str_ return vec.astype(dtype, copy=False) diff --git a/python/interpret-core/interpret/utils/_clean_x.py b/python/interpret-core/interpret/utils/_clean_x.py index fb7db6357..9d09ebfc3 100644 --- a/python/interpret-core/interpret/utils/_clean_x.py +++ b/python/interpret-core/interpret/utils/_clean_x.py @@ -294,8 +294,8 @@ dict, Ellipsis, np.csingle, - np.complex_, - np.clongfloat, + np.complex128, + np.clongdouble, np.void, ] ) @@ -314,7 +314,7 @@ def _densify_object_ndarray(X_col): types = set(map(type, X_col)) if len(types) == 1: if str in types: - return X_col.astype(np.unicode_) + return X_col.astype(np.str_) elif bool in types: return X_col.astype(np.bool_) @@ -353,7 +353,7 @@ def _densify_object_ndarray(X_col): # it will silently convert negative integers to unsigned! # TODO : should this be np.float64 with a check for big integers - return X_col.astype(np.unicode_) + return X_col.astype(np.str_) if all( one_type is float or issubclass(one_type, np.floating) for one_type in types @@ -430,7 +430,7 @@ def _densify_object_ndarray(X_col): # writing our own cython code that can be more efficient at walking through items in an array. If we write # our own cython there is the added advantage that we can check types in the same loop and therefore eliminate # the costly "set(map(type, X_col))" calls above - return X_col.astype(np.unicode_) + return X_col.astype(np.str_) def _process_column_initial(X_col, nonmissings, processing, min_unique_continuous): @@ -448,9 +448,9 @@ def _process_column_initial(X_col, nonmissings, processing, min_unique_continuou if issubclass(uniques.dtype.type, np.floating): floats = uniques.astype(np.float64, copy=False) - uniques = floats.astype(np.unicode_) + uniques = floats.astype(np.str_) else: - uniques = uniques.astype(np.unicode_, copy=False) + uniques = uniques.astype(np.str_, copy=False) try: # we rely here on there being a round trip format within this language from float64 to text to float64 @@ -544,7 +544,7 @@ def _encode_categorical_existing(X_col, nonmissings, categories): if issubclass(X_col.dtype.type, np.floating): uniques = uniques.astype(np.float64, copy=False) - uniques = uniques.astype(np.unicode_, copy=False) + uniques = uniques.astype(np.str_, copy=False) mapping = np.fromiter( (categories.get(val, -1) for val in uniques), np.int64, count=len(uniques) @@ -725,7 +725,7 @@ def _process_continuous(X_col, nonmissings): floats[idx] = one_item_array.astype(dtype=np.float64)[0] except TypeError: # use .astype instead of str(one_item_array) here to ensure identical string categories - one_str_array = one_item_array.astype(dtype=np.unicode_) + one_str_array = one_item_array.astype(dtype=np.str_) try: # use .astype(..) instead of float(..) to ensure identical conversion results floats[idx] = one_str_array.astype(dtype=np.float64)[0] @@ -948,7 +948,7 @@ def _process_pandas_column(X_col, categories, feature_type, min_unique_continuou # unlike other missing value types, we get back -1's for missing here, so no need to drop them X_col = X_col.values is_ordered = X_col.ordered - pd_categories = X_col.categories.values.astype(dtype=np.unicode_, copy=False) + pd_categories = X_col.categories.values.astype(dtype=np.str_, copy=False) X_col = X_col.codes if feature_type == "ignore": diff --git a/python/interpret-core/interpret/utils/_measure_interactions.py b/python/interpret-core/interpret/utils/_measure_interactions.py index 0817c3484..000fee7d0 100644 --- a/python/interpret-core/interpret/utils/_measure_interactions.py +++ b/python/interpret-core/interpret/utils/_measure_interactions.py @@ -159,7 +159,7 @@ def measure_interactions( try: y_discard = y.astype(dtype=np.float64, copy=False) except (TypeError, ValueError): - y_discard = y.astype(dtype=np.unicode_, copy=False) + y_discard = y.astype(dtype=np.str_, copy=False) target_type = type_of_target(y_discard) if target_type == "continuous": diff --git a/python/interpret-core/tests/utils/test_clean_x.py b/python/interpret-core/tests/utils/test_clean_x.py index 7f1f6398e..e2472d469 100644 --- a/python/interpret-core/tests/utils/test_clean_x.py +++ b/python/interpret-core/tests/utils/test_clean_x.py @@ -628,7 +628,7 @@ def test_process_continuous_obj_hard_bad(): def test_process_continuous_str_simple(): - vals, bad = _process_continuous(np.array(["1", "2.5"], dtype=np.unicode_), None) + vals, bad = _process_continuous(np.array(["1", "2.5"], dtype=np.str_), None) assert bad is None assert vals.dtype == np.float64 assert np.array_equal(vals, np.array([1, 2.5], dtype=np.float64)) @@ -636,7 +636,7 @@ def test_process_continuous_str_simple(): def test_process_continuous_str_simple_missing(): vals, bad = _process_continuous( - np.array(["1", "2.5"], dtype=np.unicode_), + np.array(["1", "2.5"], dtype=np.str_), np.array([True, True, False], dtype=np.bool_), ) assert bad is None @@ -649,7 +649,7 @@ def test_process_continuous_str_simple_missing(): def test_process_continuous_str_hard_bad(): vals, bad = _process_continuous( - np.array(["1", "2.5", "bad"], dtype=np.unicode_), + np.array(["1", "2.5", "bad"], dtype=np.str_), np.array([True, True, True, False], dtype=np.bool_), ) assert len(bad) == 4 @@ -708,7 +708,7 @@ def test_process_column_initial_obj_obj(): def test_process_column_initial_alphabetical_nomissing(): encoded, c = _process_column_initial( - np.array(["xyz", "abc", "xyz"], dtype=np.unicode_), + np.array(["xyz", "abc", "xyz"], dtype=np.str_), None, "nominal_alphabetical", None, @@ -723,7 +723,7 @@ def test_process_column_initial_alphabetical_nomissing(): def test_process_column_initial_alphabetical_missing(): encoded, c = _process_column_initial( - np.array(["xyz", "abc", "xyz"], dtype=np.unicode_), + np.array(["xyz", "abc", "xyz"], dtype=np.str_), np.array([True, True, False, True], dtype=np.bool_), "nominal_alphabetical", None, @@ -738,7 +738,7 @@ def test_process_column_initial_alphabetical_missing(): def test_process_column_initial_prevalence_nomissing(): encoded, c = _process_column_initial( - np.array(["xyz", "abc", "xyz"], dtype=np.unicode_), + np.array(["xyz", "abc", "xyz"], dtype=np.str_), None, "nominal_prevalence", None, @@ -753,7 +753,7 @@ def test_process_column_initial_prevalence_nomissing(): def test_process_column_initial_prevalence_missing(): encoded, c = _process_column_initial( - np.array(["xyz", "abc", "xyz"], dtype=np.unicode_), + np.array(["xyz", "abc", "xyz"], dtype=np.str_), np.array([True, True, False, True], dtype=np.bool_), "nominal_prevalence", None, @@ -768,7 +768,7 @@ def test_process_column_initial_prevalence_missing(): def test_process_column_initial_float64_nomissing(): encoded, c = _process_column_initial( - np.array(["11.1", "2.2", "11.1"], dtype=np.unicode_), + np.array(["11.1", "2.2", "11.1"], dtype=np.str_), None, "ANYTHING_ELSE", None, @@ -783,7 +783,7 @@ def test_process_column_initial_float64_nomissing(): def test_process_column_initial_float64_missing(): encoded, c = _process_column_initial( - np.array(["11.1", "2.2", "11.1"], dtype=np.unicode_), + np.array(["11.1", "2.2", "11.1"], dtype=np.str_), np.array([True, True, False, True], dtype=np.bool_), "ANYTHING_ELSE", None, @@ -1016,7 +1016,7 @@ def test_encode_categorical_existing_obj_floats(): np.float16(2.2), np.float32(3.3), np.float64(4.4), - np.longfloat(5.5), + np.longdouble(5.5), ], dtype=np.object_, ), @@ -1110,7 +1110,7 @@ def test_encode_categorical_existing_obj_obj(): def test_encode_categorical_existing_str(): c = {"abc": 1, "def": 2, "ghi": 3} encoded, bad = _encode_categorical_existing( - np.array(["abc", "ghi", "def", "something"], dtype=np.unicode_), + np.array(["abc", "ghi", "def", "something"], dtype=np.str_), np.array([True, True, False, True, True], dtype=np.bool_), c, ) @@ -1144,7 +1144,7 @@ def test_encode_categorical_existing_int8(): def test_encode_categorical_existing_bool(): c = {"False": 1, "True": 2} encoded, bad = _encode_categorical_existing( - np.array([False, True, False], dtype=np.unicode_), + np.array([False, True, False], dtype=np.str_), np.array([True, True, False, True], dtype=np.bool_), c, ) @@ -1157,7 +1157,7 @@ def test_encode_categorical_existing_bool(): def test_encode_categorical_existing_bool_true(): c = {"True": 1} encoded, bad = _encode_categorical_existing( - np.array([False, True, False], dtype=np.unicode_), + np.array([False, True, False], dtype=np.str_), np.array([True, True, False, True], dtype=np.bool_), c, ) @@ -1170,7 +1170,7 @@ def test_encode_categorical_existing_bool_true(): def test_encode_categorical_existing_bool_false(): c = {"False": 1} encoded, bad = _encode_categorical_existing( - np.array([False, True, False], dtype=np.unicode_), + np.array([False, True, False], dtype=np.str_), np.array([True, True, False, True], dtype=np.bool_), c, ) @@ -1794,7 +1794,7 @@ def test_unify_columns_pandas_missings_float64(): def test_unify_columns_pandas_missings_longfloat(): - check_pandas_float(np.longfloat, -1.1, 2.2) + check_pandas_float(np.longdouble, -1.1, 2.2) def test_unify_columns_pandas_missings_float32():