Skip to content

Commit

Permalink
Added xfail for selenium. Black reformat.
Browse files Browse the repository at this point in the history
  • Loading branch information
interpret-ml committed Apr 2, 2020
1 parent 38254e7 commit 90a8c88
Show file tree
Hide file tree
Showing 9 changed files with 111 additions and 56 deletions.
4 changes: 3 additions & 1 deletion python/interpret-core/interpret/ext/extension_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,9 @@ def load_class_extensions(current_module, extension_key, extension_class_validat

except Exception as e: # pragma: no cover
msg = "Failure while loading {}. Failed to load entrypoint {} with exception {}.".format(
extension_key, entrypoint, ''.join(traceback.format_exception(type(e), e, e.__traceback__))
extension_key,
entrypoint,
"".join(traceback.format_exception(type(e), e, e.__traceback__)),
)
module_logger.warning(msg)

Expand Down
6 changes: 4 additions & 2 deletions python/interpret-core/interpret/glassbox/ebm/ebm.py
Original file line number Diff line number Diff line change
Expand Up @@ -708,7 +708,7 @@ def __init__(
# doesn't like parameters in the fit function, other than ones like weights that have
# the same length as the number of instances. See:
# https://github.com/microsoft/LightGBM/issues/2628#issue-536116395
#
#
# NOTE: Consider refactoring later.
def fit(self, X, y): # noqa: C901
# TODO PK we shouldn't expose our internal state until we are 100% sure that we succeeded
Expand Down Expand Up @@ -1282,7 +1282,9 @@ def explain_local(self, X, y=None, name=None):
data_dicts = []
intercept = self.intercept_
if self.n_classes_ <= 2:
if isinstance(self.intercept_, np.ndarray) or isinstance(self.intercept_, list):
if isinstance(self.intercept_, np.ndarray) or isinstance(
self.intercept_, list
):
intercept = intercept[0]

for _ in range(n_rows):
Expand Down
Original file line number Diff line number Diff line change
@@ -1 +1 @@
from .purify import *
from .purify import *
Original file line number Diff line number Diff line change
Expand Up @@ -117,7 +117,7 @@ def purify(mat, densities=None, verbose=False, tol=1e-6, randomize=False):
max_row = np.max(np.abs(row_means))
max_col = np.max(np.abs(col_means))
# Center m1 and m2
intercept = 0.
intercept = 0.0
intercept += np.average(m1, weights=np.sum(densities, axis=1))
m1 -= np.average(m1, weights=np.sum(densities, axis=1))
intercept += np.average(m2, weights=np.sum(densities, axis=0))
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -12,10 +12,7 @@ def test_purify_row():
# Test Base Case of all Zeros.
np.random.seed(0)

raw_mat = np.array([
np.array([0, 0]),
np.array([0, 0])
], dtype=np.float64)
raw_mat = np.array([np.array([0, 0]), np.array([0, 0])], dtype=np.float64)
raw_marg = np.array([0, 0], dtype=np.float64)
densities = np.ones_like(raw_mat)
pure_mat, pure_marg = purify_row(raw_mat.copy(), raw_marg.copy(), densities, 0)
Expand All @@ -29,17 +26,19 @@ def test_purify_row():
for i in range(n_rows):
pure_mat, pure_marg = purify_row(raw_mat.copy(), raw_marg.copy(), densities, i)
assert np.abs(np.average(pure_mat[i, :], weights=densities[i, :])) < 1e-10
assert np.all(np.isclose(pure_marg[i], raw_marg[i] + np.average(raw_mat[i, :],
weights=densities[i, :]), atol=1e-10))
assert np.all(
np.isclose(
pure_marg[i],
raw_marg[i] + np.average(raw_mat[i, :], weights=densities[i, :]),
atol=1e-10,
)
)


def test_purify_col():
# Test Base Case of all Zeros.
np.random.seed(0)
raw_mat = np.array([
np.array([0, 0]),
np.array([0, 0])
], dtype=np.float64)
raw_mat = np.array([np.array([0, 0]), np.array([0, 0])], dtype=np.float64)
raw_marg = np.array([0, 0], dtype=np.float64)
densities = np.ones_like(raw_mat)
pure_mat, pure_marg = purify_col(raw_mat.copy(), raw_marg.copy(), densities, 0)
Expand All @@ -53,18 +52,23 @@ def test_purify_col():
for j in range(n_cols):
pure_mat, pure_marg = purify_col(raw_mat.copy(), raw_marg.copy(), densities, j)
assert np.abs(np.average(pure_mat[:, j], weights=densities[:, j])) < 1e-10
assert np.all(np.isclose(pure_marg[j], raw_marg[j] + np.average(raw_mat[:, j], weights=densities[:, j]), atol=1e-10))
assert np.all(
np.isclose(
pure_marg[j],
raw_marg[j] + np.average(raw_mat[:, j], weights=densities[:, j]),
atol=1e-10,
)
)


def test_purify():
# Test Base Case of all Zeros.
np.random.seed(0)
raw_mat = np.array([
np.array([0, 0]),
np.array([0, 0])
], dtype=np.float64)
raw_mat = np.array([np.array([0, 0]), np.array([0, 0])], dtype=np.float64)
densities = np.ones_like(raw_mat)
intercept, pure_marg1, pure_marg2, pure_mat, n_iter = purify(raw_mat.copy(), densities=densities, tol=1e-10, randomize=False)
intercept, pure_marg1, pure_marg2, pure_mat, n_iter = purify(
raw_mat.copy(), densities=densities, tol=1e-10, randomize=False
)
assert np.abs(intercept) < 1e-10
assert np.all(np.isclose(pure_mat, raw_mat, atol=1e-10))
assert np.all(np.isclose(pure_marg1, 0, atol=1e-10))
Expand All @@ -73,20 +77,36 @@ def test_purify():
# Test with random matrix and uniform density.
raw_mat = np.random.uniform(-1, 1, size=(n_rows, n_cols))
densities = np.ones_like(raw_mat)
intercept, pure_marg1, pure_marg2, pure_mat, n_iter = purify(raw_mat.copy(), densities=densities, tol=1e-10, randomize=False)
intercept, pure_marg1, pure_marg2, pure_mat, n_iter = purify(
raw_mat.copy(), densities=densities, tol=1e-10, randomize=False
)
for i in range(n_rows):
assert np.abs(np.average(pure_mat[i, :], weights=densities[i, :])) < 1e-10
assert np.all(np.isclose(pure_marg1[i]+intercept, np.average(raw_mat[i, :], weights=densities[i, :]), atol=1e-10))
assert np.all(
np.isclose(
pure_marg1[i] + intercept,
np.average(raw_mat[i, :], weights=densities[i, :]),
atol=1e-10,
)
)
for j in range(n_cols):
assert np.abs(np.average(pure_mat[:, j], weights=densities[:, j])) < 1e-10
assert np.all(np.isclose(pure_marg2[j]+intercept, np.average(raw_mat[:, j], weights=densities[:, j]), atol=1e-10))
assert n_iter == 1 # Always takes a single iteration with uniform density.

assert np.all(
np.isclose(
pure_marg2[j] + intercept,
np.average(raw_mat[:, j], weights=densities[:, j]),
atol=1e-10,
)
)
assert n_iter == 1 # Always takes a single iteration with uniform density.

# Test with random matrix and random density.
# Test with random matrix and uniform density.
raw_mat = np.random.uniform(-1, 1, size=(n_rows, n_cols))
densities = np.random.uniform(0, 100, size=raw_mat.shape)
intercept, pure_marg1, pure_marg2, pure_mat, n_iter = purify(raw_mat.copy(), densities=densities, tol=1e-10, randomize=False)
intercept, pure_marg1, pure_marg2, pure_mat, n_iter = purify(
raw_mat.copy(), densities=densities, tol=1e-10, randomize=False
)
for i in range(n_rows):
assert np.abs(np.average(pure_mat[i, :], weights=densities[i, :])) < 1e-10
for j in range(n_cols):
Expand All @@ -98,12 +118,11 @@ def test_purify_randomize():
# Randomize should not change the results
# Test Base Case of all Zeros.
np.random.seed(0)
raw_mat = np.array([
np.array([0, 0]),
np.array([0, 0])
], dtype=np.float64)
raw_mat = np.array([np.array([0, 0]), np.array([0, 0])], dtype=np.float64)
densities = np.ones_like(raw_mat)
intercept, pure_marg1, pure_marg2, pure_mat, n_iter = purify(raw_mat.copy(), densities=densities, tol=1e-10, randomize=True)
intercept, pure_marg1, pure_marg2, pure_mat, n_iter = purify(
raw_mat.copy(), densities=densities, tol=1e-10, randomize=True
)
assert np.abs(intercept) < 1e-10
assert np.all(np.isclose(pure_mat, raw_mat, atol=1e-10))
assert np.all(np.isclose(pure_marg1, 0, atol=1e-10))
Expand All @@ -112,19 +131,35 @@ def test_purify_randomize():
# Test with random matrix and uniform density.
raw_mat = np.random.uniform(-1, 1, size=(n_rows, n_cols))
densities = np.ones_like(raw_mat)
intercept, pure_marg1, pure_marg2, pure_mat, n_iter = purify(raw_mat.copy(), densities=densities, tol=1e-10, randomize=True)
intercept, pure_marg1, pure_marg2, pure_mat, n_iter = purify(
raw_mat.copy(), densities=densities, tol=1e-10, randomize=True
)
for i in range(n_rows):
assert np.abs(np.average(pure_mat[i, :], weights=densities[i, :])) < 1e-10
assert np.all(np.isclose(pure_marg1[i]+intercept, np.average(raw_mat[i, :], weights=densities[i, :]), atol=1e-10))
assert np.all(
np.isclose(
pure_marg1[i] + intercept,
np.average(raw_mat[i, :], weights=densities[i, :]),
atol=1e-10,
)
)
for j in range(n_cols):
assert np.abs(np.average(pure_mat[:, j], weights=densities[:, j])) < 1e-10
assert np.all(np.isclose(pure_marg2[j]+intercept, np.average(raw_mat[:, j], weights=densities[:, j]), atol=1e-10))

assert np.all(
np.isclose(
pure_marg2[j] + intercept,
np.average(raw_mat[:, j], weights=densities[:, j]),
atol=1e-10,
)
)

# Test with random matrix and random density.
# Test with random matrix and uniform density.
raw_mat = np.random.uniform(-1, 1, size=(n_rows, n_cols))
densities = np.random.uniform(0, 100, size=raw_mat.shape)
intercept, pure_marg1, pure_marg2, pure_mat, n_iter = purify(raw_mat.copy(), densities=densities, tol=1e-10, randomize=True)
intercept, pure_marg1, pure_marg2, pure_mat, n_iter = purify(
raw_mat.copy(), densities=densities, tol=1e-10, randomize=True
)
for i in range(n_rows):
assert np.abs(np.average(pure_mat[i, :], weights=densities[i, :])) < 1e-10
for j in range(n_cols):
Expand All @@ -136,10 +171,13 @@ def test_purify_identifiable():
np.random.seed(0)
# Test whether perturbing a model, then purifying it recovers the original model.
def helper(randomize):
raw_mat = np.random.uniform(-1, 1, size=(n_rows, n_cols))*np.random.binomial(1, 0.9, size=(n_rows, n_cols))
raw_mat = np.random.uniform(-1, 1, size=(n_rows, n_cols)) * np.random.binomial(
1, 0.9, size=(n_rows, n_cols)
)
densities = np.random.uniform(1, 100, size=(n_rows, n_cols))
intercept, m1, m2, pure_mat, n_iters = purify(raw_mat.copy(),
densities=densities, tol=1e-10, randomize=randomize)
intercept, m1, m2, pure_mat, n_iters = purify(
raw_mat.copy(), densities=densities, tol=1e-10, randomize=randomize
)
m1_perturbed = m1.copy()
m2_perturbed = m2.copy()
mat_perturbed = pure_mat.copy()
Expand All @@ -151,8 +189,9 @@ def helper(randomize):
val = np.random.normal()
m2_perturbed[j] += val
mat_perturbed[:, j] -= val
intercept2, m12, m22, pure_mat2, n_iters2 = purify(mat_perturbed,
densities=densities, tol=1e-10, randomize=randomize)
intercept2, m12, m22, pure_mat2, n_iters2 = purify(
mat_perturbed, densities=densities, tol=1e-10, randomize=randomize
)
m12 += m1_perturbed
m22 += m2_perturbed

Expand Down
8 changes: 3 additions & 5 deletions python/interpret-core/interpret/glassbox/ebm/test/test_ebm.py
Original file line number Diff line number Diff line change
Expand Up @@ -228,11 +228,9 @@ def test_ebm_sparse():

np.random.seed(0)
n_features = 20
X, y = make_multilabel_classification(n_samples=100,
sparse=True,
n_features=n_features,
n_classes=1,
n_labels=2)
X, y = make_multilabel_classification(
n_samples=100, sparse=True, n_features=n_features, n_classes=1, n_labels=2
)

# train linear model
clf = ExplainableBoostingClassifier()
Expand Down
2 changes: 1 addition & 1 deletion python/interpret-core/interpret/test/test_selenium.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,7 @@ def all_explanations():


@pytest.mark.selenium # noqa: C901
# @pytest.mark.xfail(strict=False)
@pytest.mark.xfail(strict=False)
@pytest.mark.parametrize("job_id", list(range(num_jobs)))
def test_all_explainers_selenium(all_explanations, job_id):
from selenium.webdriver.support.ui import WebDriverWait
Expand Down
22 changes: 15 additions & 7 deletions python/interpret-core/interpret/utils/all.py
Original file line number Diff line number Diff line change
Expand Up @@ -221,18 +221,18 @@ def _get_new_feature_names(data, feature_names):
else:
return feature_names


def _get_new_feature_types(data, feature_types, new_feature_names):
if feature_types is None:
unique_counts = np.apply_along_axis(lambda a: len(set(a)), axis=0, arr=data)
return [
_assign_feature_type(feature_type, unique_counts[index])
for index, feature_type in enumerate(
[data.dtype] * len(new_feature_names)
)
for index, feature_type in enumerate([data.dtype] * len(new_feature_names))
]
else:
return feature_types


# TODO: Docs for unify_data.
def unify_data(data, labels=None, feature_names=None, feature_types=None):
""" Attempts to unify data into a numpy array with feature names and types.
Expand Down Expand Up @@ -273,20 +273,28 @@ def unify_data(data, labels=None, feature_names=None, feature_types=None):
new_data = np.array(data)

new_feature_names = _get_new_feature_names(new_data, feature_names)
new_feature_types = _get_new_feature_types(new_data, feature_types, new_feature_names)
new_feature_types = _get_new_feature_types(
new_data, feature_types, new_feature_names
)
elif isinstance(data, np.ndarray):
new_data = data

new_feature_names = _get_new_feature_names(data, feature_names)
new_feature_types = _get_new_feature_types(data, feature_types, new_feature_names)
new_feature_types = _get_new_feature_types(
data, feature_types, new_feature_names
)
elif sp.sparse.issparse(data):
# Add warning message for now prior to converting the data to dense format
warn_msg = "Sparse data not fully supported, will be densified for now, may cause OOM"
warn_msg = (
"Sparse data not fully supported, will be densified for now, may cause OOM"
)
warnings.warn(warn_msg, RuntimeWarning)
new_data = data.toarray()

new_feature_names = _get_new_feature_names(new_data, feature_names)
new_feature_types = _get_new_feature_types(new_data, feature_types, new_feature_names)
new_feature_types = _get_new_feature_types(
new_data, feature_types, new_feature_names
)
else: # pragma: no cover
msg = "Could not unify data of type: {0}".format(type(data))
log.error(msg)
Expand Down
8 changes: 7 additions & 1 deletion python/interpret-core/setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,13 @@
joblib_dep = "joblib>=0.11"
extras = {
# Core
"required": ["numpy>=1.11.1", "scipy>=0.18.1", "pandas>=0.19.2", sklearn_dep, joblib_dep],
"required": [
"numpy>=1.11.1",
"scipy>=0.18.1",
"pandas>=0.19.2",
sklearn_dep,
joblib_dep,
],
"debug": ["psutil>=5.6.2"],
"notebook": ["ipykernel>=5.1.0", "ipython>=7.4.0"],
# Plotly (required if .visualize is ever called)
Expand Down

0 comments on commit 90a8c88

Please sign in to comment.