From 44fed0c4663389a7cfe9b0c8d64bf136a1892432 Mon Sep 17 00:00:00 2001 From: Interpret ML Date: Wed, 11 Dec 2019 18:24:49 -0800 Subject: [PATCH] Reformat by black. --- .../interpret/glassbox/ebm/ebm.py | 243 +++++++++++------- .../interpret/glassbox/ebm/internal.py | 191 ++++++++------ .../interpret/glassbox/ebm/postprocessing.py | 5 +- .../interpret/glassbox/ebm/test/test_ebm.py | 1 - .../interpret/glassbox/ebm/utils.py | 69 +++-- .../interpret/glassbox/skoperules.py | 10 +- .../interpret/provider/test/test_providers.py | 1 + python/interpret-core/interpret/utils/all.py | 1 + .../interpret/visual/interactive.py | 22 +- 9 files changed, 323 insertions(+), 220 deletions(-) diff --git a/python/interpret-core/interpret/glassbox/ebm/ebm.py b/python/interpret-core/interpret/glassbox/ebm/ebm.py index 92059b222..1242ed658 100644 --- a/python/interpret-core/interpret/glassbox/ebm/ebm.py +++ b/python/interpret-core/interpret/glassbox/ebm/ebm.py @@ -161,7 +161,9 @@ def fit(self, X): self.schema_ = ( self.schema if self.schema is not None - else autogen_schema(X, feature_names=self.feature_names, feature_types=self.feature_types) + else autogen_schema( + X, feature_names=self.feature_names, feature_types=self.feature_types + ) ) schema = self.schema_ @@ -382,11 +384,11 @@ def fit_parallel(self, X, y, n_classes): # Split data into train/val X_train, X_val, y_train, y_val = EBMUtils.ebm_train_test_split( - X, - y, - test_size=self.holdout_split, - random_state=self.random_state, - is_classification=self.model_type == "classification" + X, + y, + test_size=self.holdout_split, + random_state=self.random_state, + is_classification=self.model_type == "classification", ) # Define features @@ -399,7 +401,7 @@ def fit_parallel(self, X, y, n_classes): self.intercept_ = np.zeros( EBMUtils.get_count_scores_c(self.n_classes_), dtype=np.float64, - order='C' + order="C", ) else: self.intercept_ = np.float64(0) @@ -413,7 +415,9 @@ def fit_parallel(self, X, y, n_classes): else: # pragma: no cover raise RuntimeError("Argument 'main_attr' has invalid value") - main_feature_combinations = EBMUtils.gen_feature_combinations(main_feature_indices) + main_feature_combinations = EBMUtils.gen_feature_combinations( + main_feature_indices + ) self.feature_combinations_ = [] self.model_ = [] @@ -427,14 +431,20 @@ def fit_parallel(self, X, y, n_classes): self.inter_episode_idx_ = 0 if len(self.inter_indices_) != 0: - self._staged_fit_interactions(X_train, y_train, X_val, y_val, self.inter_indices_) + self._staged_fit_interactions( + X_train, y_train, X_val, y_val, self.inter_indices_ + ) return self def _fit_main(self, main_feature_combinations, X_train, y_train, X_val, y_val): log.info("Train main effects") - self.model_, self.current_metric_, self.main_episode_idx_ = NativeHelper.cyclic_gradient_boost( + ( + self.model_, + self.current_metric_, + self.main_episode_idx_, + ) = NativeHelper.cyclic_gradient_boost( model_type=self.model_type, n_classes=self.n_classes_, features=self.features_, @@ -454,7 +464,7 @@ def _fit_main(self, main_feature_combinations, X_train, y_train, X_val, y_val): data_n_episodes=self.data_n_episodes, early_stopping_tolerance=self.early_stopping_tolerance, early_stopping_run_length=self.early_stopping_run_length, - name="Main" + name="Main", ) self.feature_combinations_ = main_feature_combinations @@ -469,7 +479,7 @@ def _build_interactions(self, X_train, y_train): X_train, self.feature_combinations_, self.model_, self.intercept_ ) - iter_feature_combinations=combinations(range(len(self.col_types)), 2) + iter_feature_combinations = combinations(range(len(self.col_types)), 2) final_indices, final_scores = NativeHelper.get_interactions( n_interactions=self.interactions, @@ -479,7 +489,7 @@ def _build_interactions(self, X_train, y_train): features=self.features_, X=X_train, y=y_train, - scores=scores_train + scores=scores_train, ) elif isinstance(self.interactions, int) and self.interactions == 0: final_indices = [] @@ -492,7 +502,9 @@ def _build_interactions(self, X_train, y_train): return final_indices, final_scores - def _staged_fit_interactions(self, X_train, y_train, X_val, y_val, inter_indices=[]): + def _staged_fit_interactions( + self, X_train, y_train, X_val, y_val, inter_indices=[] + ): log.info("Training interactions") @@ -505,7 +517,11 @@ def _staged_fit_interactions(self, X_train, y_train, X_val, y_val, inter_indices inter_feature_combinations = EBMUtils.gen_feature_combinations(inter_indices) - model_update, self.current_metric_, self.inter_episode_idx_ = NativeHelper.cyclic_gradient_boost( + ( + model_update, + self.current_metric_, + self.inter_episode_idx_, + ) = NativeHelper.cyclic_gradient_boost( model_type=self.model_type, n_classes=self.n_classes_, features=self.features_, @@ -525,7 +541,7 @@ def _staged_fit_interactions(self, X_train, y_train, X_val, y_val, inter_indices data_n_episodes=self.data_n_episodes, early_stopping_tolerance=self.early_stopping_tolerance, early_stopping_run_length=self.early_stopping_run_length, - name="Pair" + name="Pair", ) self.model_.extend(model_update) @@ -538,15 +554,15 @@ def staged_fit_interactions_parallel(self, X, y, inter_indices=[]): log.info("Splitting train/test for interactions") # Split data into train/val - # NOTE: ideally we would store the train/validation split in the + # NOTE: ideally we would store the train/validation split in the # remote processes, but joblib doesn't have a concept # of keeping remote state, so we re-split our sets X_train, X_val, y_train, y_val = EBMUtils.ebm_train_test_split( - X, - y, - test_size=self.holdout_split, - random_state=self.random_state, - is_classification=self.model_type == "classification" + X, + y, + test_size=self.holdout_split, + random_state=self.random_state, + is_classification=self.model_type == "classification", ) self._staged_fit_interactions(X_train, y_train, X_val, y_val, inter_indices) @@ -571,29 +587,29 @@ class BaseEBM(BaseEstimator): # to pass information about the dataset into the __init__ function because then it's possible to do the training # first and then later set things like the features names after training. Also, people have become accustomed # to passing optional parameters into the __init__ function, but not the fit function, so we maintain that by - # using __init__. This is slightly inconcistent if the user passes in a pandas DataFrame which has feature column names, + # using __init__. This is slightly inconcistent if the user passes in a pandas DataFrame which has feature column names, # but this still gives the user ultimate control since they can either keep the DataFrame names or pass in new ones to __init__ - # Lastly, scikit-learn probably doesn't include X, y, and weights in __init__ because those should be pickled given their + # Lastly, scikit-learn probably doesn't include X, y, and weights in __init__ because those should be pickled given their # potential size. We don't have that issue with our smaller extra dataset dependent parameters # TODO PK v.2 per above, we've decided to pass information related to the dataset in via __init__, but # we need to decide then if we inlcude the trailing underscores for these variables, which include: # feature_names, feature_types, schema, main_attr, interactions (for specific columns) # per : https://scikit-learn.org/dev/developers/develop.html - # "Attributes that have been estimated from the data must always have a name ending with trailing underscore, + # "Attributes that have been estimated from the data must always have a name ending with trailing underscore, # for example the coefficients of some regression estimator would be stored in a coef_ attribute after fit has been called." def __init__( self, # Explainer - # TODO PK v.2 feature_names is currently by feature_combination. Perahps we need to make one per + # TODO PK v.2 feature_names is currently by feature_combination. Perahps we need to make one per # feature as well, so would be called feature_names_by_feature and feature_names_by_feature_combination feature_names=None, # TODO PK v.2 look at how sklearn has thought about feature types -> https://github.com/scikit-learn/scikit-learn/pull/3346 - # also look at lightGBM's categorical_feature parameter + # also look at lightGBM's categorical_feature parameter # https://towardsdatascience.com/catboost-vs-light-gbm-vs-xgboost-5f93620723db # - # TODO PK v.2 feature_types is currently by feature_combination. Perahps we need to make one per + # TODO PK v.2 feature_types is currently by feature_combination. Perahps we need to make one per # feature as well, so would be called feature_types_by_feature and feature_types_by_feature_combination feature_types=None, # Data @@ -603,7 +619,7 @@ def __init__( # for AUC or interpretability visualizations. Anyone wanting to do specialized work in comparing our algorithm against others may want # to precisely duplicate our binning procedure, but this is a very very small subset of our users, so they can just # copy our internal bin cutting function -> we can make this easier by having a clean function just for bin cutting - # that other people can either call or copy if they want to do this specialized work of having exactly the same + # that other people can either call or copy if they want to do this specialized work of having exactly the same # bins across two different ML algorithms. # TODO PK v.2 can we eliminate the schema parameter given that we also take feature_names and feature_types definitions in this interface? schema=None, @@ -623,16 +639,12 @@ def __init__( # that people may want to use # both at the same time, and there isn't a good way to separate the two concepts # without issues. Also, the deserve to be in separate functions (init vs fit) - # TODO PK v.2 change interactions to n_interactions which can either be a number for pairs # or can be a list/tuple of integers which denote the number of interactions per dimension # so (3,2,1) would mean 3 pairs, 2 tripples, 1 quadruple - - # TODO PK v.2 add specific_interactions list of interactions to include (n_interactions will not re-pick these). + # TODO PK v.2 add specific_interactions list of interactions to include (n_interactions will not re-pick these). # Allow these to be in any order and don't sort that order, unlike the n_interactions parameter - # TODO PK v.2 exclude -> exclude feature_combinations, either mains, or pairs or whatever. This will take precedence over specific_interactions so anything there will be excluded - interactions=0, # TODO PK v.2 use test_size instead of holdout_split, since sklearn does holdout_split=0.15, @@ -710,7 +722,10 @@ def fit(self, X, y): # noqa: C901 # Build preprocessor self.preprocessor_ = EBMPreprocessor( - schema=self.schema, binning_strategy=self.binning_strategy, feature_names=self.feature_names, feature_types=self.feature_types + schema=self.schema, + binning_strategy=self.binning_strategy, + feature_names=self.feature_names, + feature_types=self.feature_types, ) self.preprocessor_.fit(X) @@ -723,7 +738,7 @@ def fit(self, X, y): # noqa: C901 estimators = [] if is_classifier(self): self.classes_, y = np.unique(y, return_inverse=True) - y = y.astype(np.int64, casting='unsafe', copy=False) + y = y.astype(np.int64, casting="unsafe", copy=False) n_classes = len(self.classes_) if n_classes > 2: # pragma: no cover warn("Multiclass is still experimental. Subject to change per release.") @@ -756,7 +771,7 @@ def fit(self, X, y): # noqa: C901 estimators.append(estimator) else: n_classes = -1 - y = y.astype(np.float64, casting='unsafe', copy=False) + y = y.astype(np.float64, casting="unsafe", copy=False) for i in range(self.n_estimators): estimator = BaseCoreEBM( # Data @@ -794,7 +809,7 @@ def fit(self, X, y): # noqa: C901 self.intercept_ = np.zeros( EBMUtils.get_count_scores_c(self.n_classes_), dtype=np.float64, - order='C' + order="C", ) else: self.intercept_ = np.float64(0) @@ -818,7 +833,9 @@ def train_model(estimator, X, y, n_classes): # Discard initial interactions new_model = [] new_feature_combinations = [] - for i, feature_combination in enumerate(estimator.feature_combinations_): + for i, feature_combination in enumerate( + estimator.feature_combinations_ + ): if len(feature_combination["attributes"]) != 1: continue new_model.append(estimator.model_[i]) @@ -830,10 +847,13 @@ def train_model(estimator, X, y, n_classes): if len(pair_indices) != 0: # Retrain interactions for base models def staged_fit_fn(estimator, X, y, inter_indices=[]): - return estimator.staged_fit_interactions_parallel(X, y, inter_indices) + return estimator.staged_fit_interactions_parallel( + X, y, inter_indices + ) staged_fit_args_iter = ( - (estimators[i], X, y, pair_indices) for i in range(self.n_estimators) + (estimators[i], X, y, pair_indices) + for i in range(self.n_estimators) ) estimators = provider.parallel(staged_fit_fn, staged_fit_args_iter) @@ -882,7 +902,7 @@ def staged_fit_fn(estimator, X, y, inter_indices=[]): model_errors = np.std(np.array(log_odds_tensors), axis=0) # TODO PK v.2 if we end up choosing to expand/contract by removing - # logits from multiclass models, averaged_model + # logits from multiclass models, averaged_model # do it HERE AND apply post processing before returning self.attribute_set_models_.append(averaged_model) @@ -890,7 +910,7 @@ def staged_fit_fn(estimator, X, y, inter_indices=[]): # Get episode indexes for base estimators. self.main_episode_idxs_ = [] - # TODO PK v.2 inter_episode_idxs_ -> interaction_episode_idxs_ + # TODO PK v.2 inter_episode_idxs_ -> interaction_episode_idxs_ # (but does this need to be exposed at all) self.inter_episode_idxs_ = [] for estimator in estimators: @@ -915,7 +935,7 @@ def staged_fit_fn(estimator, X, y, inter_indices=[]): scores_gen = EBMUtils.scores_by_feature_combination( X, self.attribute_sets_, self.attribute_set_models_ ) - # TODO PK v.2 _attrib_set_model_means_ -> _model_means_ + # TODO PK v.2 _attrib_set_model_means_ -> _model_means_ # (or something else matching what this is being used for) # also look for anything with attrib inside of it self._attrib_set_model_means_ = [] @@ -933,7 +953,9 @@ def staged_fit_fn(estimator, X, y, inter_indices=[]): self._attrib_set_model_means_.append(score_mean) else: # Postprocess model graphs for multiclass - binned_predict_proba = lambda x: EBMUtils.classifier_predict_proba(x, self.attribute_sets_, self.attribute_set_models_, self.intercept_) + binned_predict_proba = lambda x: EBMUtils.classifier_predict_proba( + x, self.attribute_sets_, self.attribute_set_models_, self.intercept_ + ) postprocessed = multiclass_postprocess( X, self.attribute_set_models_, binned_predict_proba, self.feature_types @@ -960,16 +982,22 @@ def staged_fit_fn(estimator, X, y, inter_indices=[]): def _select_merged_pairs(self, estimators, X, y): # TODO PK we really need to use purification before here because it's not really legal to elminate - # a feature combination unless it's average contribution value is zero, and for a pair that + # a feature combination unless it's average contribution value is zero, and for a pair that # would mean that the intercepts for both features in the combination were zero, hense purified # Select pairs from base models def score_fn(model_type, X, y, feature_combinations, model, intercept): if model_type == "classification": - prob = EBMUtils.classifier_predict_proba(X, feature_combinations, model, intercept) - return 0 if len(y) == 0 else log_loss(y, prob) # use logloss to conform consistnetly and for multiclass + prob = EBMUtils.classifier_predict_proba( + X, feature_combinations, model, intercept + ) + return ( + 0 if len(y) == 0 else log_loss(y, prob) + ) # use logloss to conform consistnetly and for multiclass elif model_type == "regression": - pred = EBMUtils.regressor_predict(X, feature_combinations, model, intercept) + pred = EBMUtils.regressor_predict( + X, feature_combinations, model, intercept + ) return 0 if len(y) == 0 else mean_squared_error(y, pred) else: # pragma: no cover msg = "Unknown model_type: '{}'.".format(model_type) @@ -988,41 +1016,43 @@ def score_fn(model_type, X, y, feature_combinations, model, intercept): for index, estimator in enumerate(estimators): # TODO PK move the work done inside this loop to the original parallel threads so that this part can be done in parallel - # TODO PK this algorithm in O(N^2) by the number of interactions. Alternatively + # TODO PK this algorithm in O(N^2) by the number of interactions. Alternatively # there is an O(N) algorithm where we generate the logits for the base forward and base backwards # predictions, then we copy that entire array AND add or substract the one feature under consideration backward_impacts = [] forward_impacts = [] - # TODO PK we can remove the is_train input to ebm_train_test_split once we've moved the pair scoring stuff + # TODO PK we can remove the is_train input to ebm_train_test_split once we've moved the pair scoring stuff # to a background thread because we'll already have the validation split without re-splitting it _, X_val, _, y_val = EBMUtils.ebm_train_test_split( - X, - y, - test_size=self.holdout_split, - random_state=estimator.random_state, + X, + y, + test_size=self.holdout_split, + random_state=estimator.random_state, is_classification=is_classifier(self), - is_train=False + is_train=False, ) - n_base_feature_combinations = len(estimator.feature_combinations_) - len(estimator.inter_indices_) + n_base_feature_combinations = len(estimator.feature_combinations_) - len( + estimator.inter_indices_ + ) base_forward_score = score_fn( - estimator.model_type, - X_val, - y_val, - estimator.feature_combinations_[:n_base_feature_combinations], - estimator.model_[:n_base_feature_combinations], - estimator.intercept_ + estimator.model_type, + X_val, + y_val, + estimator.feature_combinations_[:n_base_feature_combinations], + estimator.model_[:n_base_feature_combinations], + estimator.intercept_, ) base_backward_score = score_fn( - estimator.model_type, - X_val, - y_val, - estimator.feature_combinations_, - estimator.model_, - estimator.intercept_ + estimator.model_type, + X_val, + y_val, + estimator.feature_combinations_, + estimator.model_, + estimator.intercept_, ) for pair_idx, pair in enumerate(estimator.inter_indices_): n_full_idx = n_base_feature_combinations + pair_idx @@ -1030,20 +1060,23 @@ def score_fn(model_type, X, y, feature_combinations, model, intercept): pair_freq[pair] += 1 backward_score = score_fn( - estimator.model_type, - X_val, - y_val, - estimator.feature_combinations_[:n_full_idx] + estimator.feature_combinations_[n_full_idx + 1:], - estimator.model_[:n_full_idx] + estimator.model_[n_full_idx + 1:], - estimator.intercept_ + estimator.model_type, + X_val, + y_val, + estimator.feature_combinations_[:n_full_idx] + + estimator.feature_combinations_[n_full_idx + 1 :], + estimator.model_[:n_full_idx] + estimator.model_[n_full_idx + 1 :], + estimator.intercept_, ) forward_score = score_fn( estimator.model_type, X_val, y_val, - estimator.feature_combinations_[:n_base_feature_combinations] + estimator.feature_combinations_[n_full_idx:n_full_idx + 1], - estimator.model_[:n_base_feature_combinations] + estimator.model_[n_full_idx:n_full_idx + 1], - estimator.intercept_ + estimator.feature_combinations_[:n_base_feature_combinations] + + estimator.feature_combinations_[n_full_idx : n_full_idx + 1], + estimator.model_[:n_base_feature_combinations] + + estimator.model_[n_full_idx : n_full_idx + 1], + estimator.intercept_, ) # for both regression (mean square error) and classification (log loss), higher values are bad, so # interactions with high positive values for backward_impact and forward_impact are good @@ -1102,7 +1135,9 @@ def explain_global(self, name=None): # Obtain min/max for model scores lower_bound = np.inf upper_bound = -np.inf - for feature_combination_index, feature_combination in enumerate(self.attribute_sets_): + for feature_combination_index, feature_combination in enumerate( + self.attribute_sets_ + ): errors = self.model_errors_[feature_combination_index] scores = self.attribute_set_models_[feature_combination_index] @@ -1115,12 +1150,16 @@ def explain_global(self, name=None): data_dicts = [] feature_list = [] density_list = [] - for feature_combination_index, feature_combination in enumerate(self.attribute_sets_): + for feature_combination_index, feature_combination in enumerate( + self.attribute_sets_ + ): model_graph = self.attribute_set_models_[feature_combination_index] # NOTE: This uses stddev. for bounds, consider issue warnings. errors = self.model_errors_[feature_combination_index] - feature_indexes = self.attribute_sets_[feature_combination_index]["attributes"] + feature_indexes = self.attribute_sets_[feature_combination_index][ + "attributes" + ] if len(feature_indexes) == 1: bin_labels = self.preprocessor_.get_bin_labels(feature_indexes[0]) @@ -1154,9 +1193,7 @@ def explain_global(self, name=None): "upper_bounds": model_graph + errors, "lower_bounds": model_graph - errors, "density": { - "names": self.preprocessor_.get_hist_edges( - feature_indexes[0] - ), + "names": self.preprocessor_.get_hist_edges(feature_indexes[0]), "scores": self.preprocessor_.get_hist_counts( feature_indexes[0] ), @@ -1164,12 +1201,8 @@ def explain_global(self, name=None): } data_dicts.append(data_dict) elif len(feature_indexes) == 2: - bin_labels_left = self.preprocessor_.get_bin_labels( - feature_indexes[0] - ) - bin_labels_right = self.preprocessor_.get_bin_labels( - feature_indexes[1] - ) + bin_labels_left = self.preprocessor_.get_bin_labels(feature_indexes[0]) + bin_labels_right = self.preprocessor_.get_bin_labels(feature_indexes[1]) feature_dict = { "type": "pairwise", @@ -1266,9 +1299,19 @@ def explain_local(self, X, y=None, name=None): data_dicts[row_idx]["values"].append("") if is_classifier(self): - scores = EBMUtils.classifier_predict_proba(instances, self.attribute_sets_, self.attribute_set_models_, self.intercept_)[:, 1] + scores = EBMUtils.classifier_predict_proba( + instances, + self.attribute_sets_, + self.attribute_set_models_, + self.intercept_, + )[:, 1] else: - scores = EBMUtils.regressor_predict(instances, self.attribute_sets_, self.attribute_set_models_, self.intercept_) + scores = EBMUtils.regressor_predict( + instances, + self.attribute_sets_, + self.attribute_set_models_, + self.intercept_, + ) perf_list = [] for row_idx in range(n_rows): @@ -1387,7 +1430,9 @@ def predict_proba(self, X): X = np.ascontiguousarray(X.T) - prob = EBMUtils.classifier_predict_proba(X, self.attribute_sets_, self.attribute_set_models_, self.intercept_) + prob = EBMUtils.classifier_predict_proba( + X, self.attribute_sets_, self.attribute_set_models_, self.intercept_ + ) return prob def predict(self, X): @@ -1399,7 +1444,13 @@ def predict(self, X): X = np.ascontiguousarray(X.T) - return EBMUtils.classifier_predict(X, self.attribute_sets_, self.attribute_set_models_, self.intercept_, self.classes_) + return EBMUtils.classifier_predict( + X, + self.attribute_sets_, + self.attribute_set_models_, + self.intercept_, + self.classes_, + ) class ExplainableBoostingRegressor(BaseEBM, RegressorMixin, ExplainerMixin): @@ -1479,4 +1530,6 @@ def predict(self, X): X = np.ascontiguousarray(X.T) - return EBMUtils.regressor_predict(X, self.attribute_sets_, self.attribute_set_models_, self.intercept_) + return EBMUtils.regressor_predict( + X, self.attribute_sets_, self.attribute_set_models_, self.intercept_ + ) diff --git a/python/interpret-core/interpret/glassbox/ebm/internal.py b/python/interpret-core/interpret/glassbox/ebm/internal.py index 7bf0adfe6..4fd402ad7 100644 --- a/python/interpret-core/interpret/glassbox/ebm/internal.py +++ b/python/interpret-core/interpret/glassbox/ebm/internal.py @@ -16,9 +16,10 @@ log = logging.getLogger(__name__) + class Native: """Layer/Class responsible for native function calls.""" - + _native = None def _initialize(self, is_debug, log_level): @@ -124,7 +125,7 @@ def _harden_function_signatures(self): # int64_t countInnerBags ct.c_longlong, # int64_t randomSeed - ct.c_longlong + ct.c_longlong, ] self.lib.InitializeBoostingClassification.restype = ct.c_void_p @@ -158,7 +159,7 @@ def _harden_function_signatures(self): # int64_t countInnerBags ct.c_longlong, # int64_t randomSeed - ct.c_longlong + ct.c_longlong, ] self.lib.InitializeBoostingRegression.restype = ct.c_void_p @@ -306,10 +307,10 @@ def native_log(trace_level, message): "NOTSET": self.TraceLevelOff, } - # it's critical that we put typed_log_func into self, + # it's critical that we put typed_log_func into self, # otherwise it will be garbage collected self._typed_log_func = self._LogFuncType(native_log) - + self.lib.SetLogMessageFunction(self._typed_log_func) self.lib.SetTraceLevel(ct.c_char(level_dict[level])) @@ -391,7 +392,7 @@ def convert_features_to_c(features): elif feature["type"] == "continuous": feature_ar[idx].featureType = Native.FeatureTypeOrdinal else: - raise AttributeError("Unrecognized feature[\"type\"]") + raise AttributeError('Unrecognized feature["type"]') feature_ar[idx].hasMissing = 1 * feature["has_missing"] feature_ar[idx].countBins = feature["n_bins"] @@ -407,12 +408,16 @@ def convert_feature_combinations_to_c(feature_combinations): )() for idx, feature_combination in enumerate(feature_combinations): features_in_combination = feature_combination["attributes"] - feature_combinations_ar[idx].countFeaturesInCombination = len(features_in_combination) + feature_combinations_ar[idx].countFeaturesInCombination = len( + features_in_combination + ) for feature_idx in features_in_combination: feature_combination_indexes.append(feature_idx) - feature_combination_indexes = np.array(feature_combination_indexes, dtype=np.int64) + feature_combination_indexes = np.array( + feature_combination_indexes, dtype=np.int64 + ) return feature_combinations_ar, feature_combination_indexes @@ -434,7 +439,7 @@ def __init__( y_val, scores_val, n_inner_bags, - random_state + random_state, ): """ Initializes internal wrapper for EBM C code. @@ -466,7 +471,6 @@ def __init__( # first set the one thing that we will close on self._booster_pointer = None - # check inputs for important inputs or things that would segfault in C if not isinstance(features, list): # pragma: no cover raise ValueError("features should be a list") @@ -481,10 +485,14 @@ def __init__( raise ValueError("y_train should have exactly 1 dimension") if X_train.shape[0] != len(features): # pragma: no cover - raise ValueError("X_train does not have the same number of features as the features array") + raise ValueError( + "X_train does not have the same number of features as the features array" + ) if X_train.shape[1] != len(y_train): # pragma: no cover - raise ValueError("X_train does not have the same number of instances as y_train") + raise ValueError( + "X_train does not have the same number of instances as y_train" + ) if X_val.ndim != 2: # pragma: no cover raise ValueError("X_val should have exactly 2 dimensions") @@ -493,11 +501,14 @@ def __init__( raise ValueError("y_val should have exactly 1 dimension") if X_val.shape[0] != len(features): # pragma: no cover - raise ValueError("X_val does not have the same number of features as the features array") + raise ValueError( + "X_val does not have the same number of features as the features array" + ) if X_val.shape[1] != len(y_val): # pragma: no cover - raise ValueError("X_val does not have the same number of instances as y_val") - + raise ValueError( + "X_val does not have the same number of instances as y_val" + ) self._native = Native.get_native_singleton() @@ -511,38 +522,57 @@ def __init__( feature_array = Native.convert_features_to_c(features) self._feature_combinations = feature_combinations - feature_combinations_array, feature_combination_indexes = Native.convert_feature_combinations_to_c( - feature_combinations - ) + ( + feature_combinations_array, + feature_combination_indexes, + ) = Native.convert_feature_combinations_to_c(feature_combinations) n_scores = EBMUtils.get_count_scores_c(n_classes) if scores_train is None: - scores_train = np.zeros(len(y_train) * n_scores, dtype=np.float64, order='C') + scores_train = np.zeros( + len(y_train) * n_scores, dtype=np.float64, order="C" + ) else: if scores_train.shape[0] != len(y_train): # pragma: no cover - raise ValueError("scores_train does not have the same number of instances as y_train") + raise ValueError( + "scores_train does not have the same number of instances as y_train" + ) if n_scores == 1: if scores_train.ndim != 1: # pragma: no cover - raise ValueError("scores_train should have exactly 1 dimensions for regression or binary classification") + raise ValueError( + "scores_train should have exactly 1 dimensions for regression or binary classification" + ) else: if scores_train.ndim != 2: # pragma: no cover - raise ValueError("scores_train should have exactly 2 dimensions for multiclass") - if(scores_train.shape[1] != n_scores): # pragma: no cover - raise ValueError("scores_train does not have the same number of logit scores as n_scores") + raise ValueError( + "scores_train should have exactly 2 dimensions for multiclass" + ) + if scores_train.shape[1] != n_scores: # pragma: no cover + raise ValueError( + "scores_train does not have the same number of logit scores as n_scores" + ) if scores_val is None: - scores_val = np.zeros(len(y_val) * n_scores, dtype=np.float64, order='C') + scores_val = np.zeros(len(y_val) * n_scores, dtype=np.float64, order="C") else: if scores_val.shape[0] != len(y_val): # pragma: no cover - raise ValueError("scores_val does not have the same number of instances as y_val") + raise ValueError( + "scores_val does not have the same number of instances as y_val" + ) if n_scores == 1: if scores_val.ndim != 1: # pragma: no cover - raise ValueError("scores_val should have exactly 1 dimensions for regression or binary classification") + raise ValueError( + "scores_val should have exactly 1 dimensions for regression or binary classification" + ) else: if scores_val.ndim != 2: # pragma: no cover - raise ValueError("scores_val should have exactly 2 dimensions for multiclass") - if(scores_val.shape[1] != n_scores): # pragma: no cover - raise ValueError("scores_val does not have the same number of logit scores as n_scores") + raise ValueError( + "scores_val should have exactly 2 dimensions for multiclass" + ) + if scores_val.shape[1] != n_scores: # pragma: no cover + raise ValueError( + "scores_val does not have the same number of logit scores as n_scores" + ) # Allocate external resources if model_type == "classification": @@ -562,7 +592,7 @@ def __init__( y_val, scores_val, n_inner_bags, - random_state + random_state, ) if not self._booster_pointer: # pragma: no cover raise MemoryError("Out of memory in InitializeBoostingClassification") @@ -582,7 +612,7 @@ def __init__( y_val, scores_val, n_inner_bags, - random_state + random_state, ) if not self._booster_pointer: # pragma: no cover raise MemoryError("Out of memory in InitializeBoostingRegression") @@ -638,11 +668,15 @@ def boosting_step( ct.byref(gain), ) if not model_update_tensor_pointer: # pragma: no cover - raise MemoryError("Out of memory in GenerateModelFeatureCombinationUpdate") + raise MemoryError( + "Out of memory in GenerateModelFeatureCombinationUpdate" + ) shape = self._get_feature_combination_shape(feature_combination_index) # TODO PK verify that we aren't copying data while making the view and/or passing to ApplyModelFeatureCombinationUpdate - model_update_tensor = Native.make_ndarray(model_update_tensor_pointer, shape, dtype=np.double, copy_data=False) + model_update_tensor = Native.make_ndarray( + model_update_tensor_pointer, shape, dtype=np.double, copy_data=False + ) return_code = self._native.lib.ApplyModelFeatureCombinationUpdate( self._booster_pointer, @@ -651,14 +685,16 @@ def boosting_step( ct.byref(metric_output), ) if return_code != 0: # pragma: no cover - raise Exception("Out of memory in ApplyModelFeatureCombinationUpdate") + raise Exception( + "Out of memory in ApplyModelFeatureCombinationUpdate" + ) # log.debug("Boosting step end") return metric_output.value def _get_feature_combination_shape(self, feature_combination_index): # TODO PK do this once during construction so that we don't have to do it again - # and so that we don't have to store self._features & self._feature_combinations + # and so that we don't have to store self._features & self._feature_combinations # Retrieve dimensions of log odds tensor dimensions = [] @@ -698,14 +734,14 @@ def _get_best_model_feature_combination(self, feature_combination_index): # a None value here for now and handle in the upper levels # # If we were to allow datasets with zero instances, then it would also be legal for there - # to be 0 states. We can probably handle this the same as having 1 state though since + # to be 0 states. We can probably handle this the same as having 1 state though since # any instances in any evaluations need to have a state # TODO PK make sure the None value here is handled by our caller return None # TODO PK v.2 currently we return only a single logit for binary classification - # for the positive case (the one with target 1). scikit also + # for the positive case (the one with target 1). scikit also # stores and returns 1 logit, but they say to do softmax, make the # target0 logit equal to the negative of the target1 logit. # this has the nice property that it would closely match what you'd @@ -753,7 +789,7 @@ def _get_current_model_feature_combination(self, feature_combination_index): # a None value here for now and handle in the upper levels # # If we were to allow datasets with zero instances, then it would also be legal for there - # to be 0 states. We can probably handle this the same as having 1 state though since + # to be 0 states. We can probably handle this the same as having 1 state though since # any instances in any evaluations need to have a state # TODO PK make sure the None value here is handled by our caller @@ -774,7 +810,9 @@ def _get_current_model_feature_combination(self, feature_combination_index): def get_current_model(self): model = [] for index in range(len(self._feature_combinations)): - model_feature_combination = self._get_current_model_feature_combination(index) + model_feature_combination = self._get_current_model_feature_combination( + index + ) model.append(model_feature_combination) return model @@ -785,13 +823,7 @@ class NativeEBMInteraction: """ def __init__( - self, - model_type, - n_classes, - features, - X, - y, - scores, + self, model_type, n_classes, features, X, y, scores, ): """ Initializes internal wrapper for EBM C code. @@ -813,7 +845,6 @@ def __init__( # first set the one thing that we will close on self._interaction_pointer = None - # check inputs for important inputs or things that would segfault in C if not isinstance(features, list): # pragma: no cover raise ValueError("features should be a list") @@ -825,12 +856,13 @@ def __init__( raise ValueError("y should have exactly 1 dimension") if X.shape[0] != len(features): # pragma: no cover - raise ValueError("X does not have the same number of features as the features array") + raise ValueError( + "X does not have the same number of features as the features array" + ) if X.shape[1] != len(y): # pragma: no cover raise ValueError("X does not have the same number of instances as y") - self._native = Native.get_native_singleton() log.info("Allocation interaction start") @@ -840,40 +872,39 @@ def __init__( n_scores = EBMUtils.get_count_scores_c(n_classes) if scores is None: - scores = np.zeros(len(y) * n_scores, dtype=np.float64, order='C') + scores = np.zeros(len(y) * n_scores, dtype=np.float64, order="C") else: if scores.shape[0] != len(y): # pragma: no cover - raise ValueError("scores does not have the same number of instances as y") + raise ValueError( + "scores does not have the same number of instances as y" + ) if n_scores == 1: if scores.ndim != 1: # pragma: no cover - raise ValueError("scores should have exactly 1 dimensions for regression or binary classification") + raise ValueError( + "scores should have exactly 1 dimensions for regression or binary classification" + ) else: if scores.ndim != 2: # pragma: no cover - raise ValueError("scores should have exactly 2 dimensions for multiclass") - if(scores.shape[1] != n_scores): # pragma: no cover - raise ValueError("scores does not have the same number of logit scores as n_scores") + raise ValueError( + "scores should have exactly 2 dimensions for multiclass" + ) + if scores.shape[1] != n_scores: # pragma: no cover + raise ValueError( + "scores does not have the same number of logit scores as n_scores" + ) # Allocate external resources if model_type == "classification": self._interaction_pointer = self._native.lib.InitializeInteractionClassification( - n_classes, - len(feature_array), - feature_array, - len(y), - X, - y, - scores, + n_classes, len(feature_array), feature_array, len(y), X, y, scores, ) if not self._interaction_pointer: # pragma: no cover - raise MemoryError("Out of memory in InitializeInteractionClassification") + raise MemoryError( + "Out of memory in InitializeInteractionClassification" + ) elif model_type == "regression": self._interaction_pointer = self._native.lib.InitializeInteractionRegression( - len(feature_array), - feature_array, - len(y), - X, - y, - scores, + len(feature_array), feature_array, len(y), X, y, scores, ) if not self._interaction_pointer: # pragma: no cover raise MemoryError("Out of memory in InitializeInteractionRegression") @@ -927,7 +958,7 @@ def cyclic_gradient_boost( data_n_episodes, early_stopping_tolerance, early_stopping_run_length, - name + name, ): min_metric = np.inf @@ -945,7 +976,7 @@ def cyclic_gradient_boost( y_val, scores_val, n_inner_bags, - random_state + random_state, ) ) as native_ebm_boosting: no_change_run_length = 0 @@ -969,10 +1000,10 @@ def cyclic_gradient_boost( # TODO PK this early_stopping_tolerance is a little inconsistent # since it triggers intermittently and only re-triggers if the - # threshold is re-passed, but not based on a smooth windowed set + # threshold is re-passed, but not based on a smooth windowed set # of checks. We can do better by keeping a list of the last # number of measurements to have a consistent window of values. - # If we only cared about the metric at the start and end of the epoch + # If we only cared about the metric at the start and end of the epoch # window a circular buffer would be best choice with O(1). if no_change_run_length == 0: bp_metric = min_metric @@ -987,7 +1018,11 @@ def cyclic_gradient_boost( ): break - log.info("End boosting {0}, Best Metric: {1}, Num Rounds: {2}".format(name, min_metric, episode_index)) + log.info( + "End boosting {0}, Best Metric: {1}, Num Rounds: {2}".format( + name, min_metric, episode_index + ) + ) model_update = native_ebm_boosting.get_best_model() return model_update, min_metric, episode_index @@ -1001,7 +1036,7 @@ def get_interactions( features, X, y, - scores + scores, ): # TODO PK we only need to store the top n_interactions items, so use a heap interaction_scores = [] @@ -1016,7 +1051,9 @@ def get_interactions( ) ) as native_ebm_interactions: for feature_combination in iter_feature_combinations: - score = native_ebm_interactions.get_interaction_score(feature_combination) + score = native_ebm_interactions.get_interaction_score( + feature_combination + ) interaction_scores.append((feature_combination, score)) ranked_scores = list( diff --git a/python/interpret-core/interpret/glassbox/ebm/postprocessing.py b/python/interpret-core/interpret/glassbox/ebm/postprocessing.py index 1a6b831b5..204ccf808 100644 --- a/python/interpret-core/interpret/glassbox/ebm/postprocessing.py +++ b/python/interpret-core/interpret/glassbox/ebm/postprocessing.py @@ -63,7 +63,10 @@ def multiclass_postprocess( updated_feature_graphs[i], change.reshape((num_bins, -1)) ) for k in range(K): - mean = np.sum(np.multiply(updated_feature_graphs[i][:, k], bincount)) / X_binned.shape[1] + mean = ( + np.sum(np.multiply(updated_feature_graphs[i][:, k], bincount)) + / X_binned.shape[1] + ) updated_feature_graphs[i][:, k] = np.subtract( updated_feature_graphs[i][:, k], mean ) diff --git a/python/interpret-core/interpret/glassbox/ebm/test/test_ebm.py b/python/interpret-core/interpret/glassbox/ebm/test/test_ebm.py index 1f9afe62c..16d8630ea 100644 --- a/python/interpret-core/interpret/glassbox/ebm/test/test_ebm.py +++ b/python/interpret-core/interpret/glassbox/ebm/test/test_ebm.py @@ -226,4 +226,3 @@ def test_zero_validation(): clf = ExplainableBoostingClassifier(n_jobs=1, interactions=2, holdout_split=0) clf.fit(X, y) - diff --git a/python/interpret-core/interpret/glassbox/ebm/utils.py b/python/interpret-core/interpret/glassbox/ebm/utils.py index d1f8deb75..1d9f68d89 100644 --- a/python/interpret-core/interpret/glassbox/ebm/utils.py +++ b/python/interpret-core/interpret/glassbox/ebm/utils.py @@ -23,30 +23,32 @@ def get_count_scores_c(n_classes): return 1 if n_classes <= 2 else n_classes @staticmethod - def ebm_train_test_split(X, y, test_size, random_state, is_classification, is_train=True): + def ebm_train_test_split( + X, y, test_size, random_state, is_classification, is_train=True + ): # TODO PK Implement the following for memory efficiency and speed of initialization: # - NOTE: FOR RawArray -> import multiprocessing ++ from multiprocessing import RawArray ++ RawArray(ct.c_ubyte, memory_size) ++ ct.POINTER(ct.c_ubyte) - # - OBSERVATION: We want sparse feature support in our booster since we don't need to access + # - OBSERVATION: We want sparse feature support in our booster since we don't need to access # memory if there are long segments with a single value - # - OBSERVATION: Sorting a dataset with sparse features will lead to unpredictably sized final memory sizes, + # - OBSERVATION: Sorting a dataset with sparse features will lead to unpredictably sized final memory sizes, # since more clumped data will be more compressed - # - OBSERVATION: for interactions, from a CPU access point of view, we want all of our features to have the - # same # of bits so that we can have one loop compare any tuple of features. + # - OBSERVATION: for interactions, from a CPU access point of view, we want all of our features to have the + # same # of bits so that we can have one loop compare any tuple of features. # We therefore do NOT want sparse feature support when looking for interactions - # - OBSERVATION: sorting will be easier for non-sparse data, and we'll want non-sparse data for interactions anyways, + # - OBSERVATION: sorting will be easier for non-sparse data, and we'll want non-sparse data for interactions anyways, # so we should only do sparseness for our boosting dataset allocation # - OBSERVATION: without sparse memory in the initial shared memory object, we can calculate the size without seeing the data. - # Even if we had sorted sparse features, we'd only find out the memory size after the sort, + # Even if we had sorted sparse features, we'd only find out the memory size after the sort, # so we'd want dynamically allocated memory during the sort - # - OBSERVATION: for boosting, we can compress memory to the right size per feature_combination, + # - OBSERVATION: for boosting, we can compress memory to the right size per feature_combination, # but for interactions, we want to compress all features by the same amount - # (all features use the same number of bits) so that we can compare any two/three/etc + # (all features use the same number of bits) so that we can compare any two/three/etc # features and loop at the same points for each # STEPS: # - We receive the data from the user in the cache inefficient format X[instances, features] - # - Do preprocessing so that we know how many bins each feature has + # - Do preprocessing so that we know how many bins each feature has # (we might want to process X[instances, features] in chunks, like below to do this) - # - call into C to get back the exact size of the memory object that we need in order to store all the data. + # - call into C to get back the exact size of the memory object that we need in order to store all the data. # We can do this because we won't store any of the data at this point as sparse # - Allocate the buffer in python using RawArray (RawArray will be shared with other processes as read only data) # - Divide the features into M chunks of N features. Let's choose M to be 32, so that we don't increase memory usage by more than 3% @@ -54,21 +56,21 @@ def ebm_train_test_split(X, y, test_size, random_state, is_classification, is_tr # - Take N features and all the instances from the original X and transpose them into X_partial[features_N, instances] # - Loop over N: # - take 1 feature and pass it into C for bit compression (don't use sparse coding here) into the RawArray - # - NOTE: this transposes the matrix twice (once for preprocessing and once for adding to C), + # - NOTE: this transposes the matrix twice (once for preprocessing and once for adding to C), # but this is expected to be a small amount of time compared to training, and we care more about memory size at this point - # - Call a C function which will finalize the dataset (this function will accept the target array). + # - Call a C function which will finalize the dataset (this function will accept the target array). # - The C function will create an index array and add this index to the dataset (it will be shared) # - sort the index array by the target first, then the features with the highest counts of the mode value # - sort the underlying data by the index array # - Now the memory is read only from now on, and shareable. Include a reverse index in the data for reconstructing the - # original order inside the data structure. + # original order inside the data structure. # - No pointers in the data structure, just offsets (for sharing cross process)! - # - Start each child processes, and pass them our shared memory structure + # - Start each child processes, and pass them our shared memory structure # (it will be mapped into each process address space, but not copied) # - each child calls a train/validation splitter provided by our C that fills a numpy array of bools - # We do this in C instead of using the sklearn train_test_split because sklearn would require us to first split sequential indexes, - # possibly sort them (if order in not guaranteed), then convert to bools in a caching inefficient way, - # whereas in C we can do a single pass without any memory array inputs (using just a random number generator) + # We do this in C instead of using the sklearn train_test_split because sklearn would require us to first split sequential indexes, + # possibly sort them (if order in not guaranteed), then convert to bools in a caching inefficient way, + # whereas in C we can do a single pass without any memory array inputs (using just a random number generator) # and we can make the outputs consistent across languages. # - with the RawArray complete data PLUS the train/validation bool list we can generate either interaction datasets OR boosting dataset as needed. # We can reduce our memory footprint, by never having both an interaction AND boosting dataset in memory at the same time. @@ -160,9 +162,7 @@ def gen_feature_combinations(feature_indices): return feature_combinations @staticmethod - def scores_by_feature_combination( - X, feature_combinations, model - ): + def scores_by_feature_combination(X, feature_combinations, model): for set_idx, feature_combination in enumerate(feature_combinations): tensor = model[set_idx] @@ -175,9 +175,7 @@ def scores_by_feature_combination( yield set_idx, feature_combination, scores @staticmethod - def decision_function( - X, feature_combinations, model, intercept - ): + def decision_function(X, feature_combinations, model, intercept): if X.ndim == 1: X = X.reshape(X.shape[0], 1) @@ -205,10 +203,7 @@ def decision_function( @staticmethod def classifier_predict_proba(X, feature_combinations, model, intercept): log_odds_vector = EBMUtils.decision_function( - X, - feature_combinations, - model, - intercept + X, feature_combinations, model, intercept ) # Handle binary classification case -- softmax only works with 0s appended @@ -220,10 +215,7 @@ def classifier_predict_proba(X, feature_combinations, model, intercept): @staticmethod def classifier_predict(X, feature_combinations, model, intercept, classes): log_odds_vector = EBMUtils.decision_function( - X, - feature_combinations, - model, - intercept + X, feature_combinations, model, intercept ) if log_odds_vector.ndim == 1: log_odds_vector = np.c_[np.zeros(log_odds_vector.shape), log_odds_vector] @@ -232,12 +224,7 @@ def classifier_predict(X, feature_combinations, model, intercept, classes): @staticmethod def regressor_predict(X, feature_combinations, model, intercept): - scores = EBMUtils.decision_function( - X, - feature_combinations, - model, - intercept - ) + scores = EBMUtils.decision_function(X, feature_combinations, model, intercept) return scores @staticmethod @@ -245,7 +232,11 @@ def gen_feature_name(feature_idxs, col_names): feature_name = [] for feature_index in feature_idxs: col_name = col_names[feature_index] - feature_name.append("feature_" + str(col_name) if isinstance(col_name, int) else str(col_name)) + feature_name.append( + "feature_" + str(col_name) + if isinstance(col_name, int) + else str(col_name) + ) feature_name = " x ".join(feature_name) return feature_name diff --git a/python/interpret-core/interpret/glassbox/skoperules.py b/python/interpret-core/interpret/glassbox/skoperules.py index 68833f2c9..17e16ef2e 100644 --- a/python/interpret-core/interpret/glassbox/skoperules.py +++ b/python/interpret-core/interpret/glassbox/skoperules.py @@ -121,9 +121,13 @@ def fit(self, X, y): self.pos_ratio_ = np.mean(y) # Extract rules - self.internal_rules_, self.rules_, self.prec_, self.recall_, self.feat_rule_map_ = self._extract_rules( - self.sk_model_.rules_ - ) + ( + self.internal_rules_, + self.rules_, + self.prec_, + self.recall_, + self.feat_rule_map_, + ) = self._extract_rules(self.sk_model_.rules_) self.global_selector = gen_global_selector( X, self.feature_names, self.feature_types, None diff --git a/python/interpret-core/interpret/provider/test/test_providers.py b/python/interpret-core/interpret/provider/test/test_providers.py index 484ceb577..a6f32f489 100644 --- a/python/interpret-core/interpret/provider/test/test_providers.py +++ b/python/interpret-core/interpret/provider/test/test_providers.py @@ -47,6 +47,7 @@ def test_azureml_provider(): def test_auto_visualize_provider(example_explanation): # NOTE: We know this environment is going to use Dash. from ...visual.dashboard import AppRunner + ip = "127.0.0.1" port = "7200" app_runner = AppRunner(addr=(ip, port)) diff --git a/python/interpret-core/interpret/utils/all.py b/python/interpret-core/interpret/utils/all.py index f6070c84b..d1ea7548c 100644 --- a/python/interpret-core/interpret/utils/all.py +++ b/python/interpret-core/interpret/utils/all.py @@ -90,6 +90,7 @@ def gen_global_selector(X, feature_names, feature_types, importance_scores, roun else: # pragma: no cover return df + def gen_local_selector(y, y_hat, round=3): records = [] diff --git a/python/interpret-core/interpret/visual/interactive.py b/python/interpret-core/interpret/visual/interactive.py index b11b97e5f..d95c969b9 100644 --- a/python/interpret-core/interpret/visual/interactive.py +++ b/python/interpret-core/interpret/visual/interactive.py @@ -47,7 +47,10 @@ def get_show_addr(): Address tuple (ip, port). """ if isinstance(this.visualize_provider, DashProvider): - addr = (this.visualize_provider.app_runner.ip, this.visualize_provider.app_runner.port) + addr = ( + this.visualize_provider.app_runner.ip, + this.visualize_provider.app_runner.port, + ) return addr else: return None @@ -99,11 +102,22 @@ def init_show_server(addr=None, base_url=None, use_relative_links=False): log.info("Stopping previous dash provider") shutdown_show_server() - log.info("Replacing visualize provider: {} with {}".format(type(this.visualize_provider), type(DashProvider))) - set_visualize_provider(DashProvider.from_address(addr=addr, base_url=base_url, use_relative_links=use_relative_links)) + log.info( + "Replacing visualize provider: {} with {}".format( + type(this.visualize_provider), type(DashProvider) + ) + ) + set_visualize_provider( + DashProvider.from_address( + addr=addr, base_url=base_url, use_relative_links=use_relative_links + ) + ) this.visualize_provider.idempotent_start() - addr = (this.visualize_provider.app_runner.ip, this.visualize_provider.app_runner.port) + addr = ( + this.visualize_provider.app_runner.ip, + this.visualize_provider.app_runner.port, + ) log.info("Running dash provider at {}".format(addr)) return None