Skip to content

Commit

Permalink
MAINT: make compatible with NumPy 2 (#525)
Browse files Browse the repository at this point in the history
Signed-off-by: DerWeh <[email protected]>
  • Loading branch information
DerWeh authored Apr 10, 2024
1 parent d06e9f0 commit 346362f
Show file tree
Hide file tree
Showing 4 changed files with 28 additions and 28 deletions.
4 changes: 2 additions & 2 deletions python/interpret-core/interpret/utils/_clean_simple.py
Original file line number Diff line number Diff line change
Expand Up @@ -260,9 +260,9 @@ def typify_classification(vec):
):
dtype = np.bool_
else:
dtype = np.unicode_
dtype = np.str_
else:
dtype = np.unicode_
dtype = np.str_

return vec.astype(dtype, copy=False)

Expand Down
20 changes: 10 additions & 10 deletions python/interpret-core/interpret/utils/_clean_x.py
Original file line number Diff line number Diff line change
Expand Up @@ -294,8 +294,8 @@
dict,
Ellipsis,
np.csingle,
np.complex_,
np.clongfloat,
np.complex128,
np.clongdouble,
np.void,
]
)
Expand All @@ -314,7 +314,7 @@ def _densify_object_ndarray(X_col):
types = set(map(type, X_col))
if len(types) == 1:
if str in types:
return X_col.astype(np.unicode_)
return X_col.astype(np.str_)
elif bool in types:
return X_col.astype(np.bool_)

Expand Down Expand Up @@ -353,7 +353,7 @@ def _densify_object_ndarray(X_col):
# it will silently convert negative integers to unsigned!

# TODO : should this be np.float64 with a check for big integers
return X_col.astype(np.unicode_)
return X_col.astype(np.str_)

if all(
one_type is float or issubclass(one_type, np.floating) for one_type in types
Expand Down Expand Up @@ -430,7 +430,7 @@ def _densify_object_ndarray(X_col):
# writing our own cython code that can be more efficient at walking through items in an array. If we write
# our own cython there is the added advantage that we can check types in the same loop and therefore eliminate
# the costly "set(map(type, X_col))" calls above
return X_col.astype(np.unicode_)
return X_col.astype(np.str_)


def _process_column_initial(X_col, nonmissings, processing, min_unique_continuous):
Expand All @@ -448,9 +448,9 @@ def _process_column_initial(X_col, nonmissings, processing, min_unique_continuou

if issubclass(uniques.dtype.type, np.floating):
floats = uniques.astype(np.float64, copy=False)
uniques = floats.astype(np.unicode_)
uniques = floats.astype(np.str_)
else:
uniques = uniques.astype(np.unicode_, copy=False)
uniques = uniques.astype(np.str_, copy=False)
try:
# we rely here on there being a round trip format within this language from float64 to text to float64

Expand Down Expand Up @@ -544,7 +544,7 @@ def _encode_categorical_existing(X_col, nonmissings, categories):

if issubclass(X_col.dtype.type, np.floating):
uniques = uniques.astype(np.float64, copy=False)
uniques = uniques.astype(np.unicode_, copy=False)
uniques = uniques.astype(np.str_, copy=False)

mapping = np.fromiter(
(categories.get(val, -1) for val in uniques), np.int64, count=len(uniques)
Expand Down Expand Up @@ -725,7 +725,7 @@ def _process_continuous(X_col, nonmissings):
floats[idx] = one_item_array.astype(dtype=np.float64)[0]
except TypeError:
# use .astype instead of str(one_item_array) here to ensure identical string categories
one_str_array = one_item_array.astype(dtype=np.unicode_)
one_str_array = one_item_array.astype(dtype=np.str_)
try:
# use .astype(..) instead of float(..) to ensure identical conversion results
floats[idx] = one_str_array.astype(dtype=np.float64)[0]
Expand Down Expand Up @@ -948,7 +948,7 @@ def _process_pandas_column(X_col, categories, feature_type, min_unique_continuou
# unlike other missing value types, we get back -1's for missing here, so no need to drop them
X_col = X_col.values
is_ordered = X_col.ordered
pd_categories = X_col.categories.values.astype(dtype=np.unicode_, copy=False)
pd_categories = X_col.categories.values.astype(dtype=np.str_, copy=False)
X_col = X_col.codes

if feature_type == "ignore":
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -159,7 +159,7 @@ def measure_interactions(
try:
y_discard = y.astype(dtype=np.float64, copy=False)
except (TypeError, ValueError):
y_discard = y.astype(dtype=np.unicode_, copy=False)
y_discard = y.astype(dtype=np.str_, copy=False)

target_type = type_of_target(y_discard)
if target_type == "continuous":
Expand Down
30 changes: 15 additions & 15 deletions python/interpret-core/tests/utils/test_clean_x.py
Original file line number Diff line number Diff line change
Expand Up @@ -628,15 +628,15 @@ def test_process_continuous_obj_hard_bad():


def test_process_continuous_str_simple():
vals, bad = _process_continuous(np.array(["1", "2.5"], dtype=np.unicode_), None)
vals, bad = _process_continuous(np.array(["1", "2.5"], dtype=np.str_), None)
assert bad is None
assert vals.dtype == np.float64
assert np.array_equal(vals, np.array([1, 2.5], dtype=np.float64))


def test_process_continuous_str_simple_missing():
vals, bad = _process_continuous(
np.array(["1", "2.5"], dtype=np.unicode_),
np.array(["1", "2.5"], dtype=np.str_),
np.array([True, True, False], dtype=np.bool_),
)
assert bad is None
Expand All @@ -649,7 +649,7 @@ def test_process_continuous_str_simple_missing():

def test_process_continuous_str_hard_bad():
vals, bad = _process_continuous(
np.array(["1", "2.5", "bad"], dtype=np.unicode_),
np.array(["1", "2.5", "bad"], dtype=np.str_),
np.array([True, True, True, False], dtype=np.bool_),
)
assert len(bad) == 4
Expand Down Expand Up @@ -708,7 +708,7 @@ def test_process_column_initial_obj_obj():

def test_process_column_initial_alphabetical_nomissing():
encoded, c = _process_column_initial(
np.array(["xyz", "abc", "xyz"], dtype=np.unicode_),
np.array(["xyz", "abc", "xyz"], dtype=np.str_),
None,
"nominal_alphabetical",
None,
Expand All @@ -723,7 +723,7 @@ def test_process_column_initial_alphabetical_nomissing():

def test_process_column_initial_alphabetical_missing():
encoded, c = _process_column_initial(
np.array(["xyz", "abc", "xyz"], dtype=np.unicode_),
np.array(["xyz", "abc", "xyz"], dtype=np.str_),
np.array([True, True, False, True], dtype=np.bool_),
"nominal_alphabetical",
None,
Expand All @@ -738,7 +738,7 @@ def test_process_column_initial_alphabetical_missing():

def test_process_column_initial_prevalence_nomissing():
encoded, c = _process_column_initial(
np.array(["xyz", "abc", "xyz"], dtype=np.unicode_),
np.array(["xyz", "abc", "xyz"], dtype=np.str_),
None,
"nominal_prevalence",
None,
Expand All @@ -753,7 +753,7 @@ def test_process_column_initial_prevalence_nomissing():

def test_process_column_initial_prevalence_missing():
encoded, c = _process_column_initial(
np.array(["xyz", "abc", "xyz"], dtype=np.unicode_),
np.array(["xyz", "abc", "xyz"], dtype=np.str_),
np.array([True, True, False, True], dtype=np.bool_),
"nominal_prevalence",
None,
Expand All @@ -768,7 +768,7 @@ def test_process_column_initial_prevalence_missing():

def test_process_column_initial_float64_nomissing():
encoded, c = _process_column_initial(
np.array(["11.1", "2.2", "11.1"], dtype=np.unicode_),
np.array(["11.1", "2.2", "11.1"], dtype=np.str_),
None,
"ANYTHING_ELSE",
None,
Expand All @@ -783,7 +783,7 @@ def test_process_column_initial_float64_nomissing():

def test_process_column_initial_float64_missing():
encoded, c = _process_column_initial(
np.array(["11.1", "2.2", "11.1"], dtype=np.unicode_),
np.array(["11.1", "2.2", "11.1"], dtype=np.str_),
np.array([True, True, False, True], dtype=np.bool_),
"ANYTHING_ELSE",
None,
Expand Down Expand Up @@ -1016,7 +1016,7 @@ def test_encode_categorical_existing_obj_floats():
np.float16(2.2),
np.float32(3.3),
np.float64(4.4),
np.longfloat(5.5),
np.longdouble(5.5),
],
dtype=np.object_,
),
Expand Down Expand Up @@ -1110,7 +1110,7 @@ def test_encode_categorical_existing_obj_obj():
def test_encode_categorical_existing_str():
c = {"abc": 1, "def": 2, "ghi": 3}
encoded, bad = _encode_categorical_existing(
np.array(["abc", "ghi", "def", "something"], dtype=np.unicode_),
np.array(["abc", "ghi", "def", "something"], dtype=np.str_),
np.array([True, True, False, True, True], dtype=np.bool_),
c,
)
Expand Down Expand Up @@ -1144,7 +1144,7 @@ def test_encode_categorical_existing_int8():
def test_encode_categorical_existing_bool():
c = {"False": 1, "True": 2}
encoded, bad = _encode_categorical_existing(
np.array([False, True, False], dtype=np.unicode_),
np.array([False, True, False], dtype=np.str_),
np.array([True, True, False, True], dtype=np.bool_),
c,
)
Expand All @@ -1157,7 +1157,7 @@ def test_encode_categorical_existing_bool():
def test_encode_categorical_existing_bool_true():
c = {"True": 1}
encoded, bad = _encode_categorical_existing(
np.array([False, True, False], dtype=np.unicode_),
np.array([False, True, False], dtype=np.str_),
np.array([True, True, False, True], dtype=np.bool_),
c,
)
Expand All @@ -1170,7 +1170,7 @@ def test_encode_categorical_existing_bool_true():
def test_encode_categorical_existing_bool_false():
c = {"False": 1}
encoded, bad = _encode_categorical_existing(
np.array([False, True, False], dtype=np.unicode_),
np.array([False, True, False], dtype=np.str_),
np.array([True, True, False, True], dtype=np.bool_),
c,
)
Expand Down Expand Up @@ -1794,7 +1794,7 @@ def test_unify_columns_pandas_missings_float64():


def test_unify_columns_pandas_missings_longfloat():
check_pandas_float(np.longfloat, -1.1, 2.2)
check_pandas_float(np.longdouble, -1.1, 2.2)


def test_unify_columns_pandas_missings_float32():
Expand Down

0 comments on commit 346362f

Please sign in to comment.