Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

MAINT: make compatible to NumPy 2 #525

Merged
merged 1 commit into from
Apr 10, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions python/interpret-core/interpret/utils/_clean_simple.py
Original file line number Diff line number Diff line change
Expand Up @@ -260,9 +260,9 @@ def typify_classification(vec):
):
dtype = np.bool_
else:
dtype = np.unicode_
dtype = np.str_
else:
dtype = np.unicode_
dtype = np.str_

return vec.astype(dtype, copy=False)

Expand Down
20 changes: 10 additions & 10 deletions python/interpret-core/interpret/utils/_clean_x.py
Original file line number Diff line number Diff line change
Expand Up @@ -294,8 +294,8 @@
dict,
Ellipsis,
np.csingle,
np.complex_,
np.clongfloat,
np.complex128,
np.clongdouble,
np.void,
]
)
Expand All @@ -314,7 +314,7 @@ def _densify_object_ndarray(X_col):
types = set(map(type, X_col))
if len(types) == 1:
if str in types:
return X_col.astype(np.unicode_)
return X_col.astype(np.str_)
elif bool in types:
return X_col.astype(np.bool_)

Expand Down Expand Up @@ -353,7 +353,7 @@ def _densify_object_ndarray(X_col):
# it will silently convert negative integers to unsigned!

# TODO : should this be np.float64 with a check for big integers
return X_col.astype(np.unicode_)
return X_col.astype(np.str_)

if all(
one_type is float or issubclass(one_type, np.floating) for one_type in types
Expand Down Expand Up @@ -430,7 +430,7 @@ def _densify_object_ndarray(X_col):
# writing our own cython code that can be more efficient at walking through items in an array. If we write
# our own cython there is the added advantage that we can check types in the same loop and therefore eliminate
# the costly "set(map(type, X_col))" calls above
return X_col.astype(np.unicode_)
return X_col.astype(np.str_)


def _process_column_initial(X_col, nonmissings, processing, min_unique_continuous):
Expand All @@ -448,9 +448,9 @@ def _process_column_initial(X_col, nonmissings, processing, min_unique_continuou

if issubclass(uniques.dtype.type, np.floating):
floats = uniques.astype(np.float64, copy=False)
uniques = floats.astype(np.unicode_)
uniques = floats.astype(np.str_)
else:
uniques = uniques.astype(np.unicode_, copy=False)
uniques = uniques.astype(np.str_, copy=False)
try:
# we rely here on there being a round trip format within this language from float64 to text to float64

Expand Down Expand Up @@ -544,7 +544,7 @@ def _encode_categorical_existing(X_col, nonmissings, categories):

if issubclass(X_col.dtype.type, np.floating):
uniques = uniques.astype(np.float64, copy=False)
uniques = uniques.astype(np.unicode_, copy=False)
uniques = uniques.astype(np.str_, copy=False)

mapping = np.fromiter(
(categories.get(val, -1) for val in uniques), np.int64, count=len(uniques)
Expand Down Expand Up @@ -725,7 +725,7 @@ def _process_continuous(X_col, nonmissings):
floats[idx] = one_item_array.astype(dtype=np.float64)[0]
except TypeError:
# use .astype instead of str(one_item_array) here to ensure identical string categories
one_str_array = one_item_array.astype(dtype=np.unicode_)
one_str_array = one_item_array.astype(dtype=np.str_)
try:
# use .astype(..) instead of float(..) to ensure identical conversion results
floats[idx] = one_str_array.astype(dtype=np.float64)[0]
Expand Down Expand Up @@ -948,7 +948,7 @@ def _process_pandas_column(X_col, categories, feature_type, min_unique_continuou
# unlike other missing value types, we get back -1's for missing here, so no need to drop them
X_col = X_col.values
is_ordered = X_col.ordered
pd_categories = X_col.categories.values.astype(dtype=np.unicode_, copy=False)
pd_categories = X_col.categories.values.astype(dtype=np.str_, copy=False)
X_col = X_col.codes

if feature_type == "ignore":
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -159,7 +159,7 @@ def measure_interactions(
try:
y_discard = y.astype(dtype=np.float64, copy=False)
except (TypeError, ValueError):
y_discard = y.astype(dtype=np.unicode_, copy=False)
y_discard = y.astype(dtype=np.str_, copy=False)

target_type = type_of_target(y_discard)
if target_type == "continuous":
Expand Down
30 changes: 15 additions & 15 deletions python/interpret-core/tests/utils/test_clean_x.py
Original file line number Diff line number Diff line change
Expand Up @@ -628,15 +628,15 @@ def test_process_continuous_obj_hard_bad():


def test_process_continuous_str_simple():
vals, bad = _process_continuous(np.array(["1", "2.5"], dtype=np.unicode_), None)
vals, bad = _process_continuous(np.array(["1", "2.5"], dtype=np.str_), None)
assert bad is None
assert vals.dtype == np.float64
assert np.array_equal(vals, np.array([1, 2.5], dtype=np.float64))


def test_process_continuous_str_simple_missing():
vals, bad = _process_continuous(
np.array(["1", "2.5"], dtype=np.unicode_),
np.array(["1", "2.5"], dtype=np.str_),
np.array([True, True, False], dtype=np.bool_),
)
assert bad is None
Expand All @@ -649,7 +649,7 @@ def test_process_continuous_str_simple_missing():

def test_process_continuous_str_hard_bad():
vals, bad = _process_continuous(
np.array(["1", "2.5", "bad"], dtype=np.unicode_),
np.array(["1", "2.5", "bad"], dtype=np.str_),
np.array([True, True, True, False], dtype=np.bool_),
)
assert len(bad) == 4
Expand Down Expand Up @@ -708,7 +708,7 @@ def test_process_column_initial_obj_obj():

def test_process_column_initial_alphabetical_nomissing():
encoded, c = _process_column_initial(
np.array(["xyz", "abc", "xyz"], dtype=np.unicode_),
np.array(["xyz", "abc", "xyz"], dtype=np.str_),
None,
"nominal_alphabetical",
None,
Expand All @@ -723,7 +723,7 @@ def test_process_column_initial_alphabetical_nomissing():

def test_process_column_initial_alphabetical_missing():
encoded, c = _process_column_initial(
np.array(["xyz", "abc", "xyz"], dtype=np.unicode_),
np.array(["xyz", "abc", "xyz"], dtype=np.str_),
np.array([True, True, False, True], dtype=np.bool_),
"nominal_alphabetical",
None,
Expand All @@ -738,7 +738,7 @@ def test_process_column_initial_alphabetical_missing():

def test_process_column_initial_prevalence_nomissing():
encoded, c = _process_column_initial(
np.array(["xyz", "abc", "xyz"], dtype=np.unicode_),
np.array(["xyz", "abc", "xyz"], dtype=np.str_),
None,
"nominal_prevalence",
None,
Expand All @@ -753,7 +753,7 @@ def test_process_column_initial_prevalence_nomissing():

def test_process_column_initial_prevalence_missing():
encoded, c = _process_column_initial(
np.array(["xyz", "abc", "xyz"], dtype=np.unicode_),
np.array(["xyz", "abc", "xyz"], dtype=np.str_),
np.array([True, True, False, True], dtype=np.bool_),
"nominal_prevalence",
None,
Expand All @@ -768,7 +768,7 @@ def test_process_column_initial_prevalence_missing():

def test_process_column_initial_float64_nomissing():
encoded, c = _process_column_initial(
np.array(["11.1", "2.2", "11.1"], dtype=np.unicode_),
np.array(["11.1", "2.2", "11.1"], dtype=np.str_),
None,
"ANYTHING_ELSE",
None,
Expand All @@ -783,7 +783,7 @@ def test_process_column_initial_float64_nomissing():

def test_process_column_initial_float64_missing():
encoded, c = _process_column_initial(
np.array(["11.1", "2.2", "11.1"], dtype=np.unicode_),
np.array(["11.1", "2.2", "11.1"], dtype=np.str_),
np.array([True, True, False, True], dtype=np.bool_),
"ANYTHING_ELSE",
None,
Expand Down Expand Up @@ -1016,7 +1016,7 @@ def test_encode_categorical_existing_obj_floats():
np.float16(2.2),
np.float32(3.3),
np.float64(4.4),
np.longfloat(5.5),
np.longdouble(5.5),
],
dtype=np.object_,
),
Expand Down Expand Up @@ -1110,7 +1110,7 @@ def test_encode_categorical_existing_obj_obj():
def test_encode_categorical_existing_str():
c = {"abc": 1, "def": 2, "ghi": 3}
encoded, bad = _encode_categorical_existing(
np.array(["abc", "ghi", "def", "something"], dtype=np.unicode_),
np.array(["abc", "ghi", "def", "something"], dtype=np.str_),
np.array([True, True, False, True, True], dtype=np.bool_),
c,
)
Expand Down Expand Up @@ -1144,7 +1144,7 @@ def test_encode_categorical_existing_int8():
def test_encode_categorical_existing_bool():
c = {"False": 1, "True": 2}
encoded, bad = _encode_categorical_existing(
np.array([False, True, False], dtype=np.unicode_),
np.array([False, True, False], dtype=np.str_),
np.array([True, True, False, True], dtype=np.bool_),
c,
)
Expand All @@ -1157,7 +1157,7 @@ def test_encode_categorical_existing_bool():
def test_encode_categorical_existing_bool_true():
c = {"True": 1}
encoded, bad = _encode_categorical_existing(
np.array([False, True, False], dtype=np.unicode_),
np.array([False, True, False], dtype=np.str_),
np.array([True, True, False, True], dtype=np.bool_),
c,
)
Expand All @@ -1170,7 +1170,7 @@ def test_encode_categorical_existing_bool_true():
def test_encode_categorical_existing_bool_false():
c = {"False": 1}
encoded, bad = _encode_categorical_existing(
np.array([False, True, False], dtype=np.unicode_),
np.array([False, True, False], dtype=np.str_),
np.array([True, True, False, True], dtype=np.bool_),
c,
)
Expand Down Expand Up @@ -1794,7 +1794,7 @@ def test_unify_columns_pandas_missings_float64():


def test_unify_columns_pandas_missings_longfloat():
check_pandas_float(np.longfloat, -1.1, 2.2)
check_pandas_float(np.longdouble, -1.1, 2.2)


def test_unify_columns_pandas_missings_float32():
Expand Down