Skip to content

Commit

Permalink
Black reformat
Browse files Browse the repository at this point in the history
  • Loading branch information
interpret-ml committed Jul 21, 2020
1 parent a7ec4df commit 936bd04
Show file tree
Hide file tree
Showing 15 changed files with 90 additions and 84 deletions.
2 changes: 1 addition & 1 deletion python/interpret-core/interpret/api/templates.py
Original file line number Diff line number Diff line change
Expand Up @@ -135,7 +135,7 @@ def visualize(self, key=None):
is_multiclass = is_multiclass_local_data_dict(data_dict)
if is_multiclass:
# Sort by predicted class' abs feature values
pred_idx = data_dict['perf']['predicted']
pred_idx = data_dict["perf"]["predicted"]
sort_fn = lambda x: -abs(x[pred_idx])
else:
# Sort by abs feature values
Expand Down
1 change: 1 addition & 0 deletions python/interpret-core/interpret/blackbox/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,4 +5,5 @@
from .shap import ShapKernel # noqa: F401
from .sensitivity import MorrisSensitivity # noqa: F401
from .partialdependence import PartialDependence # noqa: F401

# from .permutationimportance import PermutationImportance # noqa: F401
9 changes: 7 additions & 2 deletions python/interpret-core/interpret/glassbox/decisiontree.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,12 @@

from ..api.base import ExplainerMixin, ExplanationMixin
from ..utils import unify_data
from ..utils import gen_name_from_class, gen_local_selector, gen_global_selector, gen_perf_dicts
from ..utils import (
gen_name_from_class,
gen_local_selector,
gen_global_selector,
gen_perf_dicts,
)

from sklearn.base import ClassifierMixin, RegressorMixin
from sklearn.tree import DecisionTreeClassifier as SKDT
Expand Down Expand Up @@ -365,7 +370,7 @@ def explain_local(self, X, y=None, name=None):
"nodes": nodes,
"edges": edges,
"decision": decision,
"perf": None if perf_dicts is None else perf_dicts[i]
"perf": None if perf_dicts is None else perf_dicts[i],
}
for i, decision in enumerate(decisions)
]
Expand Down
62 changes: 23 additions & 39 deletions python/interpret-core/interpret/glassbox/ebm/ebm.py
Original file line number Diff line number Diff line change
Expand Up @@ -77,13 +77,17 @@ def visualize(self, key=None):
Returns:
A Plotly figure.
"""
from ...visual.plot import plot_continuous_bar, plot_horizontal_bar, sort_take, is_multiclass_global_data_dict
from ...visual.plot import (
plot_continuous_bar,
plot_horizontal_bar,
sort_take,
is_multiclass_global_data_dict,
)

data_dict = self.data(key)
if data_dict is None:
return None


# Overall graph
if self.explanation_type == "global" and key is None:
data_dict = sort_take(
Expand Down Expand Up @@ -121,11 +125,7 @@ class EBMPreprocessor(BaseEstimator, TransformerMixin):
""" Transformer that preprocesses data to be ready before EBM. """

def __init__(
self,
feature_names=None,
feature_types=None,
max_bins=255,
binning="quantile",
self, feature_names=None, feature_types=None, max_bins=255, binning="quantile",
):
""" Initializes EBM preprocessor.
Expand Down Expand Up @@ -165,7 +165,9 @@ def fit(self, X):

self.has_fitted_ = False

schema = autogen_schema(X, feature_names=self.feature_names, feature_types=self.feature_types)
schema = autogen_schema(
X, feature_names=self.feature_names, feature_types=self.feature_types
)

for col_idx in range(X.shape[1]):
col_name = list(schema.keys())[col_idx]
Expand All @@ -192,11 +194,7 @@ def fit(self, X):
)
)
else: # pragma: no cover
raise ValueError(
"Unknown binning: '{}'.".format(
self.binning
)
)
raise ValueError("Unknown binning: '{}'.".format(self.binning))

_, bin_edges = np.histogram(col_data, bins=bins)

Expand Down Expand Up @@ -587,16 +585,15 @@ def __init__(
# models without fitting them ourselves. To conform to a common explaination API we get the feature_names
# here.
feature_names,

# other packages LightGBM, CatBoost, Scikit-Learn (future) are using categorical specific ways to indicate
# other packages LightGBM, CatBoost, Scikit-Learn (future) are using categorical specific ways to indicate
# feature_types. The benefit to them is that they can accept multiple ways of specifying categoricals like:
# categorical = [true, false, true, true] OR categorical = [1, 4, 8] OR categorical = 'all'/'auto'/'none'
# We're choosing a different route because for visualization we want to be able to express multiple
# different types of data. For example, if the user has data with strings of "low", "medium", "high"
# We want to keep both the ordinal nature of this feature and we wish to preserve the text for visualization
# scikit-learn callers can pre-convert these things to [0, 1, 2] in the correct order because they don't
# need to worry about visualizing the data afterwards, but for us we need a way to specify the strings
# back anyways. So we need some way to express both the categorical nature of features and the order
# scikit-learn callers can pre-convert these things to [0, 1, 2] in the correct order because they don't
# need to worry about visualizing the data afterwards, but for us we need a way to specify the strings
# back anyways. So we need some way to express both the categorical nature of features and the order
# mapping. We can do this and more complicated conversions via:
# feature_types = ["categorical", ["low", "medium", "high"], "continuous", "time", "bool"]
feature_types,
Expand Down Expand Up @@ -629,9 +626,9 @@ def __init__(
# Native
learning_rate,
max_leaves,
# Holte, R. C. (1993) "Very simple classification rules perform well on most commonly used datasets"
# Holte, R. C. (1993) "Very simple classification rules perform well on most commonly used datasets"
# says use 6 as the minimum instances https://link.springer.com/content/pdf/10.1023/A:1022631118932.pdf
# TODO PK try setting this (not here, but in our caller) to 6 and run tests to verify the best value.
# TODO PK try setting this (not here, but in our caller) to 6 and run tests to verify the best value.
min_samples_leaf,
# Overall
n_jobs,
Expand Down Expand Up @@ -782,9 +779,7 @@ def fit(self, X, y): # noqa: C901
# a single float64 for regression, so we do the same
if is_classifier(self):
self.intercept_ = np.zeros(
EBMUtils.get_count_scores_c(n_classes),
dtype=np.float64,
order="C",
EBMUtils.get_count_scores_c(n_classes), dtype=np.float64, order="C",
)
else:
self.intercept_ = np.float64(0)
Expand All @@ -808,9 +803,7 @@ def train_model(estimator, X, y, n_classes):
# Discard initial interactions
new_model = []
new_feature_combinations = []
for i, feature_combination in enumerate(
estimator.feature_groups_
):
for i, feature_combination in enumerate(estimator.feature_groups_):
if len(feature_combination) != 1:
continue
new_model.append(estimator.model_[i])
Expand All @@ -827,8 +820,7 @@ def staged_fit_fn(estimator, X, y, inter_indices=[]):
)

staged_fit_args_iter = (
(estimators[i], X, y, pair_indices)
for i in range(self.outer_bags)
(estimators[i], X, y, pair_indices) for i in range(self.outer_bags)
)

estimators = provider.parallel(staged_fit_fn, staged_fit_args_iter)
Expand Down Expand Up @@ -1110,9 +1102,7 @@ def explain_global(self, name=None):
# Obtain min/max for model scores
lower_bound = np.inf
upper_bound = -np.inf
for feature_combination_index, _ in enumerate(
self.feature_groups_
):
for feature_combination_index, _ in enumerate(self.feature_groups_):
errors = self.term_standard_deviations_[feature_combination_index]
scores = self.additive_terms_[feature_combination_index]

Expand Down Expand Up @@ -1302,17 +1292,11 @@ def explain_local(self, X, y=None, name=None):
is_classification = is_classifier(self)
if is_classification:
scores = EBMUtils.classifier_predict_proba(
instances,
self.feature_groups_,
self.additive_terms_,
self.intercept_,
instances, self.feature_groups_, self.additive_terms_, self.intercept_,
)
else:
scores = EBMUtils.regressor_predict(
instances,
self.feature_groups_,
self.additive_terms_,
self.intercept_,
instances, self.feature_groups_, self.additive_terms_, self.intercept_,
)

perf_list = []
Expand Down
13 changes: 3 additions & 10 deletions python/interpret-core/interpret/glassbox/ebm/internal.py
Original file line number Diff line number Diff line change
Expand Up @@ -645,11 +645,7 @@ def close(self):
log.info("Deallocation boosting end")

def boosting_step(
self,
feature_combination_index,
learning_rate,
max_leaves,
min_samples_leaf,
self, feature_combination_index, learning_rate, max_leaves, min_samples_leaf,
):

""" Conducts a boosting step per feature
Expand Down Expand Up @@ -699,9 +695,7 @@ def boosting_step(
ct.byref(metric_output),
)
if return_code != 0: # pragma: no cover
raise Exception(
"Out of memory in ApplyModelFeatureCombinationUpdate"
)
raise Exception("Out of memory in ApplyModelFeatureCombinationUpdate")

# log.debug("Boosting step end")
return metric_output.value
Expand Down Expand Up @@ -1094,8 +1088,7 @@ def get_interactions(
) as native_ebm_interactions:
for feature_combination in iter_feature_combinations:
score = native_ebm_interactions.get_interaction_score(
feature_combination,
min_samples_leaf,
feature_combination, min_samples_leaf,
)
interaction_scores.append((feature_combination, score))

Expand Down
7 changes: 4 additions & 3 deletions python/interpret-core/interpret/glassbox/ebm/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@
class EBMUtils:
@staticmethod
def convert_to_intervals(cuts):
cuts = np.array(cuts, dtype = np.float64)
cuts = np.array(cuts, dtype=np.float64)

if np.isnan(cuts).any():
raise Exception("cuts cannot contain nan")
Expand Down Expand Up @@ -87,7 +87,9 @@ def ebm_train_test_split(
y_uniq = len(set(y))
n_test_samples = test_size if test_size >= 1 else len(y) * test_size
if n_test_samples < y_uniq: # pragma: no cover
warnings.warn("Too few samples per class, adapting test size to guarantee 1 sample per class.")
warnings.warn(
"Too few samples per class, adapting test size to guarantee 1 sample per class."
)
test_size = y_uniq

X_train, X_val, y_train, y_val = train_test_split(
Expand All @@ -103,7 +105,6 @@ def ebm_train_test_split(
if not is_train:
X_train, y_train = None, None


# TODO PK doing a fortran re-ordering here (and an extra copy) isn't the most efficient way
# push the re-ordering right to our first call to fit(..) AND stripe convert
# groups of rows at once and they process them in fortran order after that
Expand Down
12 changes: 10 additions & 2 deletions python/interpret-core/interpret/provider/visualize.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,10 @@
from warnings import warn

from ..version import __version__
JS_URL = "https://unpkg.com/@interpretml/interpret-inline@{}/dist/interpret-inline.js".format(__version__)

JS_URL = "https://unpkg.com/@interpretml/interpret-inline@{}/dist/interpret-inline.js".format(
__version__
)

log = logging.getLogger(__name__)

Expand Down Expand Up @@ -190,4 +193,9 @@ def __init__(self, detected_envs=None, js_url=None):
def render(self, explanation, key=-1, **kwargs):
from ..visual.inline import render

render(explanation, default_key=key, detected_envs=self.detected_envs, js_url=self.js_url)
render(
explanation,
default_key=key,
detected_envs=self.detected_envs,
js_url=self.js_url,
)
1 change: 1 addition & 0 deletions python/interpret-core/interpret/test/test_explainers.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@

from .utils import synthetic_classification, get_all_explainers
from .utils import assert_valid_explanation, assert_valid_model_explainer

# from ..blackbox import PermutationImportance

from ..glassbox import LogisticRegression
Expand Down
1 change: 1 addition & 0 deletions python/interpret-core/interpret/test/test_selenium.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
from .utils import synthetic_classification, get_all_explainers
from ..glassbox import LogisticRegression
from ..glassbox.decisiontree import TreeExplanation

# from ..blackbox import PermutationImportance
from ..visual.interactive import set_show_addr, shutdown_show_server, show_link
from copy import deepcopy
Expand Down
1 change: 1 addition & 0 deletions python/interpret-core/interpret/test/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
from ..blackbox import ShapKernel
from ..blackbox import MorrisSensitivity
from ..blackbox import PartialDependence

# from ..blackbox import PermutationImportance

from ..greybox import TreeInterpreter
Expand Down
9 changes: 2 additions & 7 deletions python/interpret-core/interpret/utils/all.py
Original file line number Diff line number Diff line change
Expand Up @@ -128,14 +128,9 @@ def gen_local_selector(data_dicts, round=3, is_classification=True):
records.append(record)

if is_classification:
columns = [
"Predicted", "PrScore", "Actual", "AcScore",
"Resid", "AbsResid"
]
columns = ["Predicted", "PrScore", "Actual", "AcScore", "Resid", "AbsResid"]
else:
columns = [
"Predicted", "Actual", "Resid", "AbsResid"
]
columns = ["Predicted", "Actual", "Resid", "AbsResid"]

df = pd.DataFrame.from_records(records, columns=columns)
if round is not None:
Expand Down
2 changes: 1 addition & 1 deletion python/interpret-core/interpret/utils/environment.py
Original file line number Diff line number Diff line change
Expand Up @@ -79,7 +79,7 @@ def _detect_azure_notebook():

def _detect_azureml_notebook_vm():
nbvm_file_path = "/mnt/azmnt/.nbvm"
return (os.path.exists(nbvm_file_path) and os.path.isfile(nbvm_file_path))
return os.path.exists(nbvm_file_path) and os.path.isfile(nbvm_file_path)


def _detect_vscode():
Expand Down
7 changes: 1 addition & 6 deletions python/interpret-core/interpret/utils/test/test_all.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,12 +20,7 @@ def test_gen_perf_dicts_regression():

def test_gen_perf_dicts_classification():
y = np.array([0, 2])
scores = np.array(
[
[0.9, 0.06, 0.04],
[0.1, 0.5, 0.4],
]
)
scores = np.array([[0.9, 0.06, 0.04], [0.1, 0.5, 0.4],])
expected_predicted = np.array([0, 1])
expected_actual_score = np.array([0.9, 0.4])
expected_predicted_score = np.array([0.9, 0.5])
Expand Down
8 changes: 6 additions & 2 deletions python/interpret-core/interpret/visual/inline.py
Original file line number Diff line number Diff line change
Expand Up @@ -120,7 +120,9 @@ def _build_javascript(viz_obj, id_str=None, default_key=-1, js_url=None):
else:
init_js = """
<script type="text/javascript" src="{0}"></script>
""".format(js_url)
""".format(
js_url
)

if id_str is None:
div_id = "_interpret-viz-{0}".format(uuid.uuid4())
Expand Down Expand Up @@ -200,7 +202,9 @@ def render(explanation, id_str=None, default_key=-1, detected_envs=None, js_url=
else:
viz_obj = _build_viz_obj(explanation)

init_js, body_js = _build_javascript(viz_obj, id_str, default_key=default_key, js_url=js_url)
init_js, body_js = _build_javascript(
viz_obj, id_str, default_key=default_key, js_url=js_url
)

if "databricks" in detected_envs:
_render_databricks(init_js + body_js)
Expand Down
Loading

0 comments on commit 936bd04

Please sign in to comment.