From 2c5871608dd25a63b8970b6c7b3864c2117ce53e Mon Sep 17 00:00:00 2001 From: Alexis Drakopoulos <33128441+alexisdrakopoulos@users.noreply.github.com> Date: Mon, 6 Jun 2022 15:49:13 +0100 Subject: [PATCH 1/7] quick pass --- shap/_explanation.py | 7 ++-- shap/actions/_optimizer.py | 54 +++++++++++++++++----------- shap/benchmark/_explanation_error.py | 2 +- shap/benchmark/_sequential.py | 2 +- shap/benchmark/experiments.py | 2 +- shap/explainers/_additive.py | 4 +-- shap/explainers/_deep/deep_tf.py | 3 +- shap/explainers/_exact.py | 6 ++-- shap/explainers/_explainer.py | 6 ++-- shap/explainers/_linear.py | 14 ++++---- shap/explainers/_partition.py | 2 +- shap/explainers/_permutation.py | 4 +-- shap/explainers/_tree.py | 25 ++++++------- shap/explainers/other/_maple.py | 2 +- shap/explainers/other/_random.py | 2 +- shap/explainers/other/_treegain.py | 2 +- shap/explainers/pytree.py | 5 +-- shap/maskers/_composite.py | 6 ++-- shap/maskers/_image.py | 3 +- shap/maskers/_tabular.py | 7 ++-- shap/plots/_force.py | 2 +- shap/utils/_exceptions.py | 16 +++++++++ 22 files changed, 106 insertions(+), 70 deletions(-) create mode 100644 shap/utils/_exceptions.py diff --git a/shap/_explanation.py b/shap/_explanation.py index a098034e4..ad30c4fd5 100644 --- a/shap/_explanation.py +++ b/shap/_explanation.py @@ -10,6 +10,7 @@ from slicer import Slicer, Alias, Obj # from ._order import Order from .utils._general import OpChain +from .utils._exceptions import DimensionError # slicer confuses pylint... # pylint: disable=no-member @@ -298,7 +299,7 @@ def cohorts(self, cohorts): if isinstance(cohorts, (list, tuple, np.ndarray)): cohorts = np.array(cohorts) return Cohorts(**{name: self[cohorts == name] for name in np.unique(cohorts)}) - raise Exception("The given set of cohort indicators is not recognized! Please give an array or int.") + raise TypeError("The given set of cohort indicators is not recognized! Please give an array or int.") def __repr__(self): """ Display some basic printable info, but not everything. @@ -575,7 +576,7 @@ def sum(self, axis=None, grouping=None): elif axis == 1 or len(self.shape) == 1: return group_features(self, grouping) else: - raise Exception("Only axis = 1 is supported for grouping right now...") + raise DimensionError("Only axis = 1 is supported for grouping right now...") def hstack(self, other): """ Stack two explanations column-wise. @@ -638,7 +639,7 @@ def hclust(self, metric="sqeuclidean", axis=0): values = self.values if len(values.shape) != 2: - raise Exception("The hclust order only supports 2D arrays right now!") + raise DimensionError("The hclust order only supports 2D arrays right now!") if axis == 1: values = values.T diff --git a/shap/actions/_optimizer.py b/shap/actions/_optimizer.py index 17263aa0b..d1e911a3d 100644 --- a/shap/actions/_optimizer.py +++ b/shap/actions/_optimizer.py @@ -5,70 +5,84 @@ from ._action import Action -class ActionOptimizer(): +class InvalidAction(Exception): + pass + + +class ConvergenceError(Exception): + pass + + +class ActionOptimizer: def __init__(self, model, actions): self.model = model - warnings.warn("Note that ActionOptimizer is still in an alpha state and is subjust to API changes.") + warnings.warn( + "Note that ActionOptimizer is still in an alpha state and is subjust to API changes." + ) # actions go into mutually exclusive groups self.action_groups = [] for group in actions: - + if issubclass(type(group), Action): group._group_index = len(self.action_groups) group._grouped_index = 0 self.action_groups.append([copy.copy(group)]) elif issubclass(type(group), list): group = sorted([copy.copy(v) for v in group], key=lambda a: a.cost) - for i,v in enumerate(group): + for i, v in enumerate(group): v._group_index = len(self.action_groups) v._grouped_index = i self.action_groups.append(group) else: - raise Exception("A passed action was not an Action or list of actions!") - + raise InvalidAction( + "A passed action was not an Action or list of actions!" + ) + def __call__(self, *args, max_evals=10000): - + # init our queue with all the least costly actions q = queue.PriorityQueue() for i in range(len(self.action_groups)): group = self.action_groups[i] q.put((group[0].cost, [group[0]])) - + nevals = 0 while not q.empty(): - + # see if we have exceeded our runtime budget nevals += 1 if nevals > max_evals: - raise Exception(f"Failed to find a solution with max_evals={max_evals}! Try reducing the number of actions or increasing max_evals.") - + raise ConvergenceError( + f"Failed to find a solution with max_evals={max_evals}! Try reducing the number of actions or increasing max_evals." + ) + # get the next cheapest set of actions we can do cost, actions = q.get() - + # apply those actions args_tmp = copy.deepcopy(args) for a in actions: a(*args_tmp) - + # if the model is now satisfied we are done!! v = self.model(*args_tmp) if v: return actions - + # if not then we add all possible follow-on actions to our queue else: for i in range(len(self.action_groups)): group = self.action_groups[i] - + # look to to see if we already have a action from this group, if so we need to # move to a more expensive action in the same group next_ind = 0 prev_in_group = -1 - for j,a in enumerate(actions): + for j, a in enumerate(actions): if a._group_index == i: - next_ind = max(next_ind, a._grouped_index+1) + next_ind = max(next_ind, a._grouped_index + 1) prev_in_group = j - + # we are adding a new action type if prev_in_group == -1: new_actions = actions + [group[next_ind]] @@ -79,7 +93,7 @@ def __call__(self, *args, max_evals=10000): # we don't have a more expensive action left in this group else: new_actions = None - + # add the new option to our queue if new_actions is not None: - q.put((sum([a.cost for a in new_actions]), new_actions)) \ No newline at end of file + q.put((sum([a.cost for a in new_actions]), new_actions)) diff --git a/shap/benchmark/_explanation_error.py b/shap/benchmark/_explanation_error.py index e5feb4bc4..fa43133c5 100644 --- a/shap/benchmark/_explanation_error.py +++ b/shap/benchmark/_explanation_error.py @@ -115,7 +115,7 @@ def __call__(self, explanation, name, step_fraction=0.01, indices=[], silent=Fal elif callable(self.masker.clustering): row_clustering = self.masker.clustering(*args) else: - raise Exception("The masker passed has a .clustering attribute that is not yet supported by the ExplanationError benchmark!") + raise NotImplementedError("The masker passed has a .clustering attribute that is not yet supported by the ExplanationError benchmark!") masked_model = MaskedModel(self.model, self.masker, self.link, self.linearize_link, *args) diff --git a/shap/benchmark/_sequential.py b/shap/benchmark/_sequential.py index 3ebb82ade..d7003a1bf 100644 --- a/shap/benchmark/_sequential.py +++ b/shap/benchmark/_sequential.py @@ -14,7 +14,7 @@ def __init__(self, mask_type, sort_order, masker, model, *model_args, batch_size for arg in model_args: if isinstance(arg, pd.DataFrame): - raise Exception("DataFrame arguments dont iterate correctly, pass numpy arrays instead!") + raise TypeError("DataFrame arguments dont iterate correctly, pass numpy arrays instead!") # convert any DataFrames to numpy arrays # self.model_arg_cols = [] diff --git a/shap/benchmark/experiments.py b/shap/benchmark/experiments.py index 49a8ff3e5..8b43bd2be 100644 --- a/shap/benchmark/experiments.py +++ b/shap/benchmark/experiments.py @@ -413,7 +413,7 @@ def __run_remote_experiment(experiment, remote, cache_dir="/tmp", python_binary= #print(cache_id.replace("__", " ") + " ...loaded from remote after %f seconds" % (time.time() - start)) return pickle.load(f) else: - raise Exception("Remote benchmark call finished but no local file was found!") + raise FileNotFoundError("Remote benchmark call finished but no local file was found!") def __gen_cache_id(experiment): dataset_name, model_name, method_name, metric_name = experiment diff --git a/shap/explainers/_additive.py b/shap/explainers/_additive.py index 9a8caa2d7..438f69fb9 100644 --- a/shap/explainers/_additive.py +++ b/shap/explainers/_additive.py @@ -48,7 +48,7 @@ def __init__(self, model, masker, link=None, feature_names=None): # self.model(np.zeros(num_features)) # self._zero_offset = self.model(np.zeros(num_features))#model.intercept_#outputs[0] # self._input_offsets = np.zeros(num_features) #* self._zero_offset - raise Exception("Masker not given and we don't yet support pulling the distribution centering directly from the EBM model!") + raise NotImplementedError("Masker not given and we don't yet support pulling the distribution centering directly from the EBM model!") return # here we need to compute the offsets ourselves because we can't pull them directly from a model we know about @@ -83,7 +83,7 @@ def supports_model_with_masker(model, masker): """ if safe_isinstance(model, "interpret.glassbox.ExplainableBoostingClassifier"): if model.interactions is not 0: - raise Exception("Need to add support for interaction effects!") + raise NotImplementedError("Need to add support for interaction effects!") return True return False diff --git a/shap/explainers/_deep/deep_tf.py b/shap/explainers/_deep/deep_tf.py index e449d0ba8..ee67096e3 100644 --- a/shap/explainers/_deep/deep_tf.py +++ b/shap/explainers/_deep/deep_tf.py @@ -3,6 +3,7 @@ from .._explainer import Explainer from packaging import version from ..tf_utils import _get_session, _get_graph, _get_model_inputs, _get_model_output +from ...utils._exceptions import DimensionError keras = None tf = None tf_ops = None @@ -173,7 +174,7 @@ def __init__(self, model, data, session=None, learning_phase_flags=None): if noutputs is not None: self.phi_symbolics = [None for i in range(noutputs)] else: - raise Exception("The model output tensor to be explained cannot have a static shape in dim 1 of None!") + raise DimensionError("The model output tensor to be explained cannot have a static shape in dim 1 of None!") def _get_model_output(self, model): if len(model.layers[-1]._inbound_nodes) == 0: diff --git a/shap/explainers/_exact.py b/shap/explainers/_exact.py index 12658202a..4514e41ac 100644 --- a/shap/explainers/_exact.py +++ b/shap/explainers/_exact.py @@ -92,7 +92,7 @@ def explain_row(self, *row_args, max_evals, main_effects, error_bounds, batch_si # make sure we have enough evals if max_evals is not None and max_evals != "auto" and max_evals < 2**len(inds): - raise Exception( + raise ValueError( f"It takes {2**len(inds)} masked evaluations to run the Exact explainer on this instance, but max_evals={max_evals}!" ) @@ -131,14 +131,14 @@ def explain_row(self, *row_args, max_evals, main_effects, error_bounds, batch_si _compute_grey_code_row_values_st(row_values, mask, inds, outputs, coeff, extended_delta_indexes, MaskedModel.delta_mask_noop_value) elif interactions > 2: - raise Exception("Currently the Exact explainer does not support interactions higher than order 2!") + raise NotImplementedError("Currently the Exact explainer does not support interactions higher than order 2!") # do a partition tree constrained version of Shapley values else: # make sure we have enough evals if max_evals is not None and max_evals != "auto" and max_evals < len(fm)**2: - raise Exception( + raise ValueError( f"It takes {len(fm)**2} masked evaluations to run the Exact explainer on this instance, but max_evals={max_evals}!" ) diff --git a/shap/explainers/_explainer.py b/shap/explainers/_explainer.py index 1710dfc31..3647273f6 100644 --- a/shap/explainers/_explainer.py +++ b/shap/explainers/_explainer.py @@ -127,7 +127,7 @@ def __init__(self, model, masker=None, link=links.identity, algorithm="auto", ou if callable(link): self.link = link else: - raise Exception("The passed link function needs to be callable!") + raise TypeError("The passed link function needs to be callable!") self.linearize_link = linearize_link # if we are called directly (as opposed to through super()) then we convert ourselves to the subclass @@ -165,7 +165,7 @@ def __init__(self, model, masker=None, link=links.identity, algorithm="auto", ou # if we get here then we don't know how to handle what was given to us else: - raise Exception("The passed model is not callable and cannot be analyzed directly with the given masker! Model: " + str(model)) + raise TypeError("The passed model is not callable and cannot be analyzed directly with the given masker! Model: " + str(model)) # build the right subclass if algorithm == "exact": @@ -190,7 +190,7 @@ def __init__(self, model, masker=None, link=links.identity, algorithm="auto", ou self.__class__ = explainers.Deep explainers.Deep.__init__(self, self.model, self.masker, link=self.link, feature_names=self.feature_names, linearize_link=linearize_link, **kwargs) else: - raise Exception("Unknown algorithm type passed: %s!" % algorithm) + raise ValueError("Unknown algorithm type passed: %s!" % algorithm) def __call__(self, *args, max_evals="auto", main_effects=False, error_bounds=False, batch_size="auto", diff --git a/shap/explainers/_linear.py b/shap/explainers/_linear.py index b07925be8..7cc0b5346 100644 --- a/shap/explainers/_linear.py +++ b/shap/explainers/_linear.py @@ -92,7 +92,7 @@ def __init__(self, model, masker, link=links.identity, nsamples=1000, feature_pe elif issubclass(type(self.masker), maskers.Impute): self.feature_perturbation = "correlation_dependent" else: - raise Exception("The Linear explainer only supports the Independent, Partition, and Impute maskers right now!") + raise NotImplementedError("The Linear explainer only supports the Independent, Partition, and Impute maskers right now!") data = getattr(self.masker, "data", None) # convert DataFrame's to numpy arrays @@ -120,12 +120,12 @@ def __init__(self, model, masker, link=links.identity, nsamples=1000, feature_pe if safe_isinstance(self.cov, "pandas.core.frame.DataFrame"): self.cov = self.cov.values elif data is None: - raise Exception("A background data distribution must be provided!") + raise ValueError("A background data distribution must be provided!") else: if sp.sparse.issparse(data): self.mean = np.array(np.mean(data, 0))[0] if self.feature_perturbation != "interventional": - raise Exception("Only feature_perturbation = 'interventional' is supported for sparse data") + raise NotImplementedError("Only feature_perturbation = 'interventional' is supported for sparse data") else: self.mean = np.array(np.mean(data, 0)).flatten() # assumes it is an array if self.feature_perturbation == "correlation_dependent": @@ -173,7 +173,7 @@ def __init__(self, model, masker, link=links.identity, nsamples=1000, feature_pe if nsamples != 1000: warnings.warn("Setting nsamples has no effect when feature_perturbation = 'interventional'!") else: - raise Exception("Unknown type of feature_perturbation provided: " + self.feature_perturbation) + raise ValueError("Unknown type of feature_perturbation provided: " + self.feature_perturbation) def _estimate_transforms(self, nsamples): """ Uses block matrix inversion identities to quickly estimate transforms. @@ -261,7 +261,7 @@ def _parse_model(model): coef = model.coef_ intercept = model.intercept_ else: - raise Exception("An unknown model type was passed: " + str(type(model))) + raise ValueError("An unknown model type was passed: " + str(type(model))) return coef,intercept @@ -300,7 +300,7 @@ def explain_row(self, *row_args, max_evals, main_effects, error_bounds, batch_si if self.feature_perturbation == "correlation_dependent": if sp.sparse.issparse(X): - raise Exception("Only feature_perturbation = 'interventional' is supported for sparse data") + raise NotImplementedError("Only feature_perturbation = 'interventional' is supported for sparse data") phi = np.matmul(np.matmul(X[:,self.valid_inds], self.avg_proj.T), self.x_transform.T) - self.mean_transformed phi = np.matmul(phi, self.avg_proj) @@ -363,7 +363,7 @@ def shap_values(self, X): if self.feature_perturbation == "correlation_dependent": if sp.sparse.issparse(X): - raise Exception("Only feature_perturbation = 'interventional' is supported for sparse data") + raise ValueError("Only feature_perturbation = 'interventional' is supported for sparse data") phi = np.matmul(np.matmul(X[:,self.valid_inds], self.avg_proj.T), self.x_transform.T) - self.mean_transformed phi = np.matmul(phi, self.avg_proj) diff --git a/shap/explainers/_partition.py b/shap/explainers/_partition.py index f7ff202c9..6a2b9238b 100644 --- a/shap/explainers/_partition.py +++ b/shap/explainers/_partition.py @@ -148,7 +148,7 @@ def explain_row(self, *row_args, max_evals, main_effects, error_bounds, batch_si # else: fixed_context = None elif fixed_context not in [0, 1, None]: - raise Exception("Unknown fixed_context value passed (must be 0, 1 or None): %s" %fixed_context) + raise ValueError("Unknown fixed_context value passed (must be 0, 1 or None): %s" %fixed_context) # build a masked version of the model for the current input sample fm = MaskedModel(self.model, self.masker, self.link, self.linearize_link, *row_args) diff --git a/shap/explainers/_permutation.py b/shap/explainers/_permutation.py index cfbe04d1f..c5cb2af1b 100644 --- a/shap/explainers/_permutation.py +++ b/shap/explainers/_permutation.py @@ -96,7 +96,7 @@ def explain_row(self, *row_args, max_evals, main_effects, error_bounds, batch_si elif callable(self.masker.clustering): row_clustering = self.masker.clustering(*row_args) else: - raise Exception("The masker passed has a .clustering attribute that is not yet supported by the Permutation explainer!") + raise NotImplementedError("The masker passed has a .clustering attribute that is not yet supported by the Permutation explainer!") # loop over many permutations inds = fm.varying_inputs() @@ -154,7 +154,7 @@ def explain_row(self, *row_args, max_evals, main_effects, error_bounds, batch_si history_pos += 1 if npermutations == 0: - raise Exception(f"max_evals={max_evals} is too low for the Permutation explainer, it must be at least 2 * num_features + 1 = {2 * len(inds) + 1}!") + raise ValueError(f"max_evals={max_evals} is too low for the Permutation explainer, it must be at least 2 * num_features + 1 = {2 * len(inds) + 1}!") expected_value = outputs[0] diff --git a/shap/explainers/_tree.py b/shap/explainers/_tree.py index b17a1e081..142c640bd 100644 --- a/shap/explainers/_tree.py +++ b/shap/explainers/_tree.py @@ -11,6 +11,7 @@ from ._explainer import Explainer from ..utils import assert_import, record_import_error, safe_isinstance from ..utils._legacy import DenseData +from ..utils._exceptions import InvalidMaskerError, ExplainerError from .._explanation import Explanation from .. import maskers import warnings @@ -102,10 +103,10 @@ def __init__(self, model, data = None, model_output="raw", feature_perturbation= if type(self.masker) is maskers.Independent: data = self.masker.data elif masker is not None: - raise Exception("Unsupported masker type: %s!" % str(type(self.masker))) + raise InvalidMaskerError("Unsupported masker type: %s!" % str(type(self.masker))) if getattr(self.masker, "clustering", None) is not None: - raise Exception("TreeExplainer does not support clustered data inputs! Please use shap.Explainer or pass an unclustered masker!") + raise ExplainerError("TreeExplainer does not support clustered data inputs! Please use shap.Explainer or pass an unclustered masker!") # check for deprecated options if model_output == "margin": @@ -123,10 +124,10 @@ def __init__(self, model, data = None, model_output="raw", feature_perturbation= warnings.warn("feature_dependence = \"independent\" has been renamed to feature_perturbation" \ " = \"interventional\", you can't supply both options! See GitHub issue #882.") if dep_val == "tree_path_dependent" and feature_perturbation == "interventional": - raise Exception("The feature_dependence option has been renamed to feature_perturbation! " \ + raise ValueError("The feature_dependence option has been renamed to feature_perturbation! " \ "Please update the option name before calling TreeExplainer. See GitHub issue #882.") if feature_perturbation == "independent": - raise Exception("feature_perturbation = \"independent\" is not a valid option value, please use " \ + raise ValueError("feature_perturbation = \"independent\" is not a valid option value, please use " \ "feature_perturbation = \"interventional\" instead. See GitHub issue #882.") @@ -179,7 +180,7 @@ def __init__(self, model, data = None, model_output="raw", feature_perturbation= try: self.expected_value = self.model.predict(self.data).mean(0) except ValueError: - raise Exception("Currently TreeExplainer can only handle models with categorical splits when " \ + raise ExplainerError("Currently TreeExplainer can only handle models with categorical splits when " \ "feature_perturbation=\"tree_path_dependent\" and no background data is passed. Please try again using " \ "shap.TreeExplainer(model, feature_perturbation=\"tree_path_dependent\").") if hasattr(self.expected_value, '__len__') and len(self.expected_value) == 1: @@ -534,7 +535,7 @@ def check_sum(sum_val, model_output): err_msg += " This check failed because for one of the samples the sum of the SHAP values" \ " was %f, while the model output was %f. If this difference is acceptable" \ " you can set check_additivity=False to disable this check." % (sum_val[ind], model_output[ind]) - raise Exception(err_msg) + raise ExplainerError(err_msg) if type(phi) is list: for i in range(len(phi)): @@ -744,7 +745,7 @@ def __init__(self, model, data=None, data_missing=None, model_output=None): self.base_offset = self.base_offset[0, 0] has_len = False if has_len and self.model_output != "raw": - raise Exception("Multi-output HistGradientBoostingClassifier models are not yet supported unless model_output=\"raw\". See GitHub issue #1028") + raise NotImplementedError("Multi-output HistGradientBoostingClassifier models are not yet supported unless model_output=\"raw\". See GitHub issue #1028") self.input_dtype = sklearn.ensemble._hist_gradient_boosting.common.X_DTYPE self.num_stacked_models = len(model._predictors[0]) if self.model_output == "predict_proba": @@ -989,7 +990,7 @@ def __init__(self, model, data=None, data_missing=None, model_output=None): self.tree_output = "raw_value" self.base_offset = model.init_params[param_idx] else: - raise Exception("Model type not yet supported by TreeExplainer: " + str(type(model))) + raise ExplainerError("Model type not yet supported by TreeExplainer: " + str(type(model))) # build a dense numpy version of all the tree objects if self.trees is not None and self.trees: @@ -1051,7 +1052,7 @@ def get_transform(self): elif self.tree_output == "probability": transform = "identity" else: - raise Exception("model_output = \"probability\" is not yet supported when model.tree_output = \"" + self.tree_output + "\"!") + raise NotImplementedError("model_output = \"probability\" is not yet supported when model.tree_output = \"" + self.tree_output + "\"!") elif self.model_output == "log_loss": if self.objective == "squared_error": @@ -1059,9 +1060,9 @@ def get_transform(self): elif self.objective == "binary_crossentropy": transform = "logistic_nlogloss" else: - raise Exception("model_output = \"log_loss\" is not yet supported when model.objective = \"" + self.objective + "\"!") + raise NotImplementedError("model_output = \"log_loss\" is not yet supported when model.objective = \"" + self.objective + "\"!") else: - raise Exception("Unrecognized model_output parameter value: %s! If model.%s is a valid function open a github issue to ask that this method be supported. If you want 'predict_proba' just use 'probability' for now." % (str(self.model_output), str(self.model_output))) + raise TypeError("Unrecognized model_output parameter value: %s! If model.%s is a valid function open a github issue to ask that this method be supported. If you want 'predict_proba' just use 'probability' for now." % (str(self.model_output), str(self.model_output))) return transform @@ -1360,7 +1361,7 @@ def extract_data(node, tree): self.values = values[:,np.newaxis] * scaling self.node_sample_weight = node_sample_weight else: - raise Exception("Unknown input to SingleTree constructor: " + str(tree)) + raise TypeError("Unknown input to SingleTree constructor: " + str(tree)) # Re-compute the number of samples that pass through each node if we are given data if data is not None and data_missing is not None: diff --git a/shap/explainers/other/_maple.py b/shap/explainers/other/_maple.py index 7c544a569..e792b158a 100644 --- a/shap/explainers/other/_maple.py +++ b/shap/explainers/other/_maple.py @@ -87,7 +87,7 @@ def __init__(self, model, data): # elif str(type(model)).endswith("xgboost.sklearn.XGBClassifier'>"): # pass else: - raise Exception("The passed model is not yet supported by TreeMapleExplainer: " + str(type(model))) + raise NotImplementedError("The passed model is not yet supported by TreeMapleExplainer: " + str(type(model))) if str(type(data)).endswith("pandas.core.frame.DataFrame'>"): data = data.values diff --git a/shap/explainers/other/_random.py b/shap/explainers/other/_random.py index d612e9aed..8fa5ba9bd 100644 --- a/shap/explainers/other/_random.py +++ b/shap/explainers/other/_random.py @@ -37,7 +37,7 @@ def explain_row(self, *row_args, max_evals, main_effects, error_bounds, batch_si elif callable(self.masker.clustering): row_clustering = self.masker.clustering(*row_args) else: - raise Exception("The masker passed has a .clustering attribute that is not yet supported by the Permutation explainer!") + raise NotImplementedError("The masker passed has a .clustering attribute that is not yet supported by the Permutation explainer!") # compute the correct expected value masks = np.zeros(1, dtype=np.int) diff --git a/shap/explainers/other/_treegain.py b/shap/explainers/other/_treegain.py index 6c3329495..76ae5ee4b 100644 --- a/shap/explainers/other/_treegain.py +++ b/shap/explainers/other/_treegain.py @@ -20,7 +20,7 @@ def __init__(self, model): elif str(type(model)).endswith("xgboost.sklearn.XGBClassifier'>"): pass else: - raise Exception("The passed model is not yet supported by TreeGainExplainer: " + str(type(model))) + raise NotImplementedError("The passed model is not yet supported by TreeGainExplainer: " + str(type(model))) assert hasattr(model, "feature_importances_"), "The passed model does not have a feature_importances_ attribute!" self.model = model diff --git a/shap/explainers/pytree.py b/shap/explainers/pytree.py index 96bc59c14..f345bffef 100644 --- a/shap/explainers/pytree.py +++ b/shap/explainers/pytree.py @@ -6,6 +6,7 @@ import numpy as np #import numba from .explainer import Explainer +from ..utils._exceptions import ExplainerError # class TreeExplainer(Explainer): # def __init__(self, model, **kwargs): @@ -153,7 +154,7 @@ def __init__(self, model, **kwargs): self.model_type = "lightgbm" self.trees = model else: - raise Exception("Model type not supported by TreeExplainer: " + str(type(model))) + raise ExplainerError("Model type not supported by TreeExplainer: " + str(type(model))) if self.model_type == "internal": # Preallocate space for the unique path data @@ -227,7 +228,7 @@ def shap_interaction_values(self, X, tree_limit=-1, **kwargs): tree_limit=0 return self.trees.predict(X, ntree_limit=tree_limit, pred_interactions=True) else: - raise Exception("Interaction values not yet supported for model type: " + str(type(X))) + raise NotImplementedError("Interaction values not yet supported for model type: " + str(type(X))) def tree_shap(self, tree, x, x_missing, phi, condition=0, condition_feature=0): diff --git a/shap/maskers/_composite.py b/shap/maskers/_composite.py index b1b23361d..ca7867e39 100644 --- a/shap/maskers/_composite.py +++ b/shap/maskers/_composite.py @@ -1,6 +1,6 @@ import types from ._masker import Masker - +from ..utils._exceptions import InvalidMaskerError class Composite(Masker): """ This merges several maskers for different inputs together into a single composite masker. @@ -103,7 +103,7 @@ def __call__(self, mask, *args): num_rows = shapes[-1][0] if shapes[-1][0] != num_rows and shapes[-1][0] != 1 and shapes[-1][0] is not None: - raise Exception("The composite masker can only join together maskers with a compatible number of background rows!") + raise InvalidMaskerError("The composite masker can only join together maskers with a compatible number of background rows!") arg_pos += self.arg_counts[i] # call all the submaskers and combine their outputs @@ -138,5 +138,5 @@ def joint_clustering(self, *args): if len(single_clustering) == 0: single_clustering = clustering elif len(clustering) != 0: - raise Exception("Joining two non-trivial clusterings is not yet implemented in the Composite masker!") + raise NotImplementedError("Joining two non-trivial clusterings is not yet implemented in the Composite masker!") return single_clustering diff --git a/shap/maskers/_image.py b/shap/maskers/_image.py index e39d4bb19..93aa75399 100644 --- a/shap/maskers/_image.py +++ b/shap/maskers/_image.py @@ -1,5 +1,6 @@ import numpy as np from ..utils import assert_import, record_import_error, safe_isinstance +from ..utils._exceptions import DimensionError from ._masker import Masker from .._serializable import Serializer, Deserializer import heapq @@ -76,7 +77,7 @@ def __call__(self, mask, x): x = x.cpu().numpy() if np.prod(x.shape) != np.prod(self.input_shape): - raise Exception("The length of the image to be masked must match the shape given in the " + \ + raise DimensionError("The length of the image to be masked must match the shape given in the " + \ "ImageMasker contructor: "+" * ".join([str(i) for i in x.shape])+ \ " != "+" * ".join([str(i) for i in self.input_shape])) diff --git a/shap/maskers/_tabular.py b/shap/maskers/_tabular.py index 820ef56c6..f72eda75d 100644 --- a/shap/maskers/_tabular.py +++ b/shap/maskers/_tabular.py @@ -4,6 +4,7 @@ from numba import jit from .. import utils from ..utils import safe_isinstance, MaskedModel +from ..utils._exceptions import DimensionError from ._masker import Masker from .._serializable import Serializer, Deserializer @@ -70,7 +71,7 @@ def __init__(self, data, max_samples=100, clustering=None): elif safe_isinstance(clustering, "numpy.ndarray"): self.clustering = clustering else: - raise Exception( + raise ValueError( "Unknown clustering given! Make sure you pass a distance metric as a string, or a clustering as a numpy.ndarray." ) else: @@ -92,7 +93,7 @@ def __call__(self, mask, x): # make sure we are given a single sample if len(x.shape) != 1 or x.shape[0] != self.data.shape[1]: - raise Exception("The input passed for tabular masking does not match the background data shape!") + raise DimensionError("The input passed for tabular masking does not match the background data shape!") # if mask is an array of integers then we are doing delta masking if np.issubdtype(mask.dtype, np.integer): @@ -141,7 +142,7 @@ def invariants(self, x): # make sure we got valid data if x.shape != self.data.shape[1:]: - raise Exception( + raise DimensionError( "The passed data does not match the background shape expected by the masker! The data of shape " + \ str(x.shape) + " was passed while the masker expected data of shape " + str(self.data.shape[1:]) + "." ) diff --git a/shap/plots/_force.py b/shap/plots/_force.py index 5e55b92d8..bfcfe4416 100644 --- a/shap/plots/_force.py +++ b/shap/plots/_force.py @@ -171,7 +171,7 @@ def force(base_value, shap_values=None, features=None, feature_names=None, out_n else: if matplotlib: - raise Exception("matplotlib = True is not yet supported for force plots with multiple samples!") + raise NotImplementedError("matplotlib = True is not yet supported for force plots with multiple samples!") if shap_values.shape[0] > 3000: warnings.warn("shap.plots.force is slow for many thousands of rows, try subsampling your data.") diff --git a/shap/utils/_exceptions.py b/shap/utils/_exceptions.py new file mode 100644 index 000000000..31f0290dc --- /dev/null +++ b/shap/utils/_exceptions.py @@ -0,0 +1,16 @@ +class DimensionError(Exception): + """ + Used for instances where dimensions are either + not supported or cause errors. + """ + + pass + +class InvalidMaskerError(Exception): + pass + +class ExplainerError(Exception): + """ + Generic errors related to Explainers + """ + pass \ No newline at end of file From d55eceae3677fe11454f0cf522b80f5bffcce5d8 Mon Sep 17 00:00:00 2001 From: Alexis Drakopoulos <33128441+alexisdrakopoulos@users.noreply.github.com> Date: Wed, 15 Jun 2022 08:47:08 +0100 Subject: [PATCH 2/7] refactored errors further --- shap/explainers/_explainer.py | 5 +++-- shap/explainers/_linear.py | 12 +++++++----- shap/explainers/_tree.py | 10 +++++----- shap/maskers/_tabular.py | 4 ++-- shap/utils/_exceptions.py | 17 ++++++++++++++++- 5 files changed, 33 insertions(+), 15 deletions(-) diff --git a/shap/explainers/_explainer.py b/shap/explainers/_explainer.py index 3647273f6..00f283c8b 100644 --- a/shap/explainers/_explainer.py +++ b/shap/explainers/_explainer.py @@ -1,4 +1,5 @@ import copy +from sqlite3 import NotSupportedError import time import numpy as np import scipy as sp @@ -13,7 +14,7 @@ from .._serializable import Serializable from .. import explainers from .._serializable import Serializer, Deserializer - +from ..utils._exceptions import InvalidAlgorithmError class Explainer(Serializable): @@ -190,7 +191,7 @@ def __init__(self, model, masker=None, link=links.identity, algorithm="auto", ou self.__class__ = explainers.Deep explainers.Deep.__init__(self, self.model, self.masker, link=self.link, feature_names=self.feature_names, linearize_link=linearize_link, **kwargs) else: - raise ValueError("Unknown algorithm type passed: %s!" % algorithm) + raise InvalidAlgorithmError("Unknown algorithm type passed: %s!" % algorithm) def __call__(self, *args, max_evals="auto", main_effects=False, error_bounds=False, batch_size="auto", diff --git a/shap/explainers/_linear.py b/shap/explainers/_linear.py index 7cc0b5346..18e48f8b8 100644 --- a/shap/explainers/_linear.py +++ b/shap/explainers/_linear.py @@ -4,6 +4,7 @@ from tqdm.autonotebook import tqdm from ._explainer import Explainer from ..utils import safe_isinstance +from ..utils._exceptions import InvalidFeaturePerturbationError, InvalidModelError, DimensionError from .. import maskers from .. import links @@ -173,7 +174,7 @@ def __init__(self, model, masker, link=links.identity, nsamples=1000, feature_pe if nsamples != 1000: warnings.warn("Setting nsamples has no effect when feature_perturbation = 'interventional'!") else: - raise ValueError("Unknown type of feature_perturbation provided: " + self.feature_perturbation) + raise InvalidFeaturePerturbationError("Unknown type of feature_perturbation provided: " + self.feature_perturbation) def _estimate_transforms(self, nsamples): """ Uses block matrix inversion identities to quickly estimate transforms. @@ -261,7 +262,7 @@ def _parse_model(model): coef = model.coef_ intercept = model.intercept_ else: - raise ValueError("An unknown model type was passed: " + str(type(model))) + raise InvalidModelError("An unknown model type was passed: " + str(type(model))) return coef,intercept @@ -296,11 +297,12 @@ def explain_row(self, *row_args, max_evals, main_effects, error_bounds, batch_si X = X.values #assert str(type(X)).endswith("'numpy.ndarray'>"), "Unknown instance type: " + str(type(X)) - assert len(X.shape) == 1 or len(X.shape) == 2, "Instance must have 1 or 2 dimensions!" + if len(X.shape) not in (1, 2): + raise DimensionError("Instance must have 1 or 2 dimensions! Not: %s" %len(X.shape)) if self.feature_perturbation == "correlation_dependent": if sp.sparse.issparse(X): - raise NotImplementedError("Only feature_perturbation = 'interventional' is supported for sparse data") + raise InvalidFeaturePerturbationError("Only feature_perturbation = 'interventional' is supported for sparse data") phi = np.matmul(np.matmul(X[:,self.valid_inds], self.avg_proj.T), self.x_transform.T) - self.mean_transformed phi = np.matmul(phi, self.avg_proj) @@ -363,7 +365,7 @@ def shap_values(self, X): if self.feature_perturbation == "correlation_dependent": if sp.sparse.issparse(X): - raise ValueError("Only feature_perturbation = 'interventional' is supported for sparse data") + raise InvalidFeaturePerturbationError("Only feature_perturbation = 'interventional' is supported for sparse data") phi = np.matmul(np.matmul(X[:,self.valid_inds], self.avg_proj.T), self.x_transform.T) - self.mean_transformed phi = np.matmul(phi, self.avg_proj) diff --git a/shap/explainers/_tree.py b/shap/explainers/_tree.py index 142c640bd..8bf7d1367 100644 --- a/shap/explainers/_tree.py +++ b/shap/explainers/_tree.py @@ -11,7 +11,7 @@ from ._explainer import Explainer from ..utils import assert_import, record_import_error, safe_isinstance from ..utils._legacy import DenseData -from ..utils._exceptions import InvalidMaskerError, ExplainerError +from ..utils._exceptions import InvalidMaskerError, ExplainerError, InvalidFeaturePerturbationError, InvalidModelError from .._explanation import Explanation from .. import maskers import warnings @@ -127,7 +127,7 @@ def __init__(self, model, data = None, model_output="raw", feature_perturbation= raise ValueError("The feature_dependence option has been renamed to feature_perturbation! " \ "Please update the option name before calling TreeExplainer. See GitHub issue #882.") if feature_perturbation == "independent": - raise ValueError("feature_perturbation = \"independent\" is not a valid option value, please use " \ + raise InvalidFeaturePerturbationError("feature_perturbation = \"independent\" is not a valid option value, please use " \ "feature_perturbation = \"interventional\" instead. See GitHub issue #882.") @@ -153,7 +153,7 @@ def __init__(self, model, data = None, model_output="raw", feature_perturbation= self.approximate = approximate if feature_perturbation not in feature_perturbation_codes: - raise ValueError("Invalid feature_perturbation option!") + raise InvalidFeaturePerturbationError("Invalid feature_perturbation option!") # check for unsupported combinations of feature_perturbation and model_outputs if feature_perturbation == "tree_path_dependent": @@ -990,7 +990,7 @@ def __init__(self, model, data=None, data_missing=None, model_output=None): self.tree_output = "raw_value" self.base_offset = model.init_params[param_idx] else: - raise ExplainerError("Model type not yet supported by TreeExplainer: " + str(type(model))) + raise InvalidModelError("Model type not yet supported by TreeExplainer: " + str(type(model))) # build a dense numpy version of all the tree objects if self.trees is not None and self.trees: @@ -1062,7 +1062,7 @@ def get_transform(self): else: raise NotImplementedError("model_output = \"log_loss\" is not yet supported when model.objective = \"" + self.objective + "\"!") else: - raise TypeError("Unrecognized model_output parameter value: %s! If model.%s is a valid function open a github issue to ask that this method be supported. If you want 'predict_proba' just use 'probability' for now." % (str(self.model_output), str(self.model_output))) + raise ValueError("Unrecognized model_output parameter value: %s! If model.%s is a valid function open a github issue to ask that this method be supported. If you want 'predict_proba' just use 'probability' for now." % (str(self.model_output), str(self.model_output))) return transform diff --git a/shap/maskers/_tabular.py b/shap/maskers/_tabular.py index f72eda75d..92ec6e3a1 100644 --- a/shap/maskers/_tabular.py +++ b/shap/maskers/_tabular.py @@ -4,7 +4,7 @@ from numba import jit from .. import utils from ..utils import safe_isinstance, MaskedModel -from ..utils._exceptions import DimensionError +from ..utils._exceptions import DimensionError, InvalidClusteringError from ._masker import Masker from .._serializable import Serializer, Deserializer @@ -71,7 +71,7 @@ def __init__(self, data, max_samples=100, clustering=None): elif safe_isinstance(clustering, "numpy.ndarray"): self.clustering = clustering else: - raise ValueError( + raise InvalidClusteringError( "Unknown clustering given! Make sure you pass a distance metric as a string, or a clustering as a numpy.ndarray." ) else: diff --git a/shap/utils/_exceptions.py b/shap/utils/_exceptions.py index 31f0290dc..7fbb6e660 100644 --- a/shap/utils/_exceptions.py +++ b/shap/utils/_exceptions.py @@ -1,3 +1,6 @@ +from multiprocessing.sharedctypes import Value + + class DimensionError(Exception): """ Used for instances where dimensions are either @@ -6,11 +9,23 @@ class DimensionError(Exception): pass -class InvalidMaskerError(Exception): +class InvalidMaskerError(ValueError): pass class ExplainerError(Exception): """ Generic errors related to Explainers """ + pass + +class InvalidAlgorithmError(ValueError): + pass + +class InvalidFeaturePerturbationError(ValueError): + pass + +class InvalidModelError(ValueError): + pass + +class InvalidClusteringError(ValueError): pass \ No newline at end of file From ad2e347a632f182f526d701d1774cca34a8fa693 Mon Sep 17 00:00:00 2001 From: Alexis Drakopoulos <33128441+alexisdrakopoulos@users.noreply.github.com> Date: Wed, 15 Jun 2022 08:50:17 +0100 Subject: [PATCH 3/7] accidental import removed --- shap/utils/_exceptions.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/shap/utils/_exceptions.py b/shap/utils/_exceptions.py index 7fbb6e660..baf73425a 100644 --- a/shap/utils/_exceptions.py +++ b/shap/utils/_exceptions.py @@ -1,6 +1,3 @@ -from multiprocessing.sharedctypes import Value - - class DimensionError(Exception): """ Used for instances where dimensions are either From 2055ff55e98adbdae0dd92a5f1f41ad848a5e73f Mon Sep 17 00:00:00 2001 From: Alexis Drakopoulos <33128441+alexisdrakopoulos@users.noreply.github.com> Date: Wed, 15 Jun 2022 08:52:54 +0100 Subject: [PATCH 4/7] updating tests --- shap/actions/_optimizer.py | 9 +-------- shap/utils/_exceptions.py | 7 +++++++ tests/actions/_optimizer.py | 6 +++--- tests/explainers/test_linear.py | 4 ++-- 4 files changed, 13 insertions(+), 13 deletions(-) diff --git a/shap/actions/_optimizer.py b/shap/actions/_optimizer.py index d1e911a3d..845e1dda3 100644 --- a/shap/actions/_optimizer.py +++ b/shap/actions/_optimizer.py @@ -3,14 +3,7 @@ import warnings import copy from ._action import Action - - -class InvalidAction(Exception): - pass - - -class ConvergenceError(Exception): - pass +from ..utils._exceptions import InvalidAction, ConvergenceError class ActionOptimizer: diff --git a/shap/utils/_exceptions.py b/shap/utils/_exceptions.py index baf73425a..762ef2ac9 100644 --- a/shap/utils/_exceptions.py +++ b/shap/utils/_exceptions.py @@ -6,6 +6,13 @@ class DimensionError(Exception): pass +class InvalidAction(Exception): + pass + + +class ConvergenceError(Exception): + pass + class InvalidMaskerError(ValueError): pass diff --git a/tests/actions/_optimizer.py b/tests/actions/_optimizer.py index 870d17a29..b0ab5c2f1 100644 --- a/tests/actions/_optimizer.py +++ b/tests/actions/_optimizer.py @@ -6,7 +6,7 @@ import pandas as pd import pytest import shap - +from shap.utils._exceptions import ConvergenceError, InvalidAction def create_basic_scenario(): X = pd.DataFrame({"feature1": np.ones(5), "feature2": np.ones(5), "feature3": np.ones(5)}) @@ -80,7 +80,7 @@ def test_too_few_evals(): [IncreaseFeature3(i) for i in range(1,20)] ] optimizer = shap.ActionOptimizer(passed, possible_actions) - with pytest.raises(Exception): + with pytest.raises(ConvergenceError): optimizer(X.iloc[0], max_evals=3) @@ -98,5 +98,5 @@ def test_run_out_of_group(): def test_bad_action(): - with pytest.raises(Exception): + with pytest.raises(InvalidAction): shap.ActionOptimizer(None, [None]) \ No newline at end of file diff --git a/tests/explainers/test_linear.py b/tests/explainers/test_linear.py index 052d38a35..5a06c0dc4 100644 --- a/tests/explainers/test_linear.py +++ b/tests/explainers/test_linear.py @@ -6,7 +6,7 @@ import scipy import pytest import shap - +from shap.utils._exceptions import InvalidMaskerError def test_tied_pair(): np.random.seed(0) @@ -36,7 +36,7 @@ def test_tied_pair_new(): assert np.abs(explainer.shap_values(X) - np.array([0.5, 0.5, 0])).max() < 0.05 def test_wrong_masker(): - with pytest.raises(Exception): + with pytest.raises(InvalidMaskerError): shap.explainers.Linear((0, 0), shap.maskers.Image("blur(10,10)", (10, 10, 3))) def test_tied_triple(): From 1f599845729fdcc5c4aef30aaba7b6cc1bde7230 Mon Sep 17 00:00:00 2001 From: Alexis Drakopoulos <33128441+alexisdrakopoulos@users.noreply.github.com> Date: Wed, 15 Jun 2022 17:03:08 +0100 Subject: [PATCH 5/7] wrong error --- tests/explainers/test_linear.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/explainers/test_linear.py b/tests/explainers/test_linear.py index d47201027..e14ef7300 100644 --- a/tests/explainers/test_linear.py +++ b/tests/explainers/test_linear.py @@ -6,7 +6,7 @@ import scipy import pytest import shap -from shap.utils._exceptions import InvalidMaskerError +from shap.utils._exceptions import NotImplementedError def test_tied_pair(): np.random.seed(0) @@ -36,7 +36,7 @@ def test_tied_pair_new(): assert np.abs(explainer.shap_values(X) - np.array([0.5, 0.5, 0])).max() < 0.05 def test_wrong_masker(): - with pytest.raises(InvalidMaskerError): + with pytest.raises(NotImplementedError): shap.explainers.Linear((0, 0), shap.maskers.Image("blur(10,10)", (10, 10, 3))) def test_tied_triple(): From ecc5af43650149cfd4031820a13600290b9156c9 Mon Sep 17 00:00:00 2001 From: Alexis Drakopoulos <33128441+alexisdrakopoulos@users.noreply.github.com> Date: Wed, 15 Jun 2022 17:30:04 +0100 Subject: [PATCH 6/7] accidental import --- tests/explainers/test_linear.py | 1 - 1 file changed, 1 deletion(-) diff --git a/tests/explainers/test_linear.py b/tests/explainers/test_linear.py index e14ef7300..509eabcb7 100644 --- a/tests/explainers/test_linear.py +++ b/tests/explainers/test_linear.py @@ -6,7 +6,6 @@ import scipy import pytest import shap -from shap.utils._exceptions import NotImplementedError def test_tied_pair(): np.random.seed(0) From 348dc7d8256b1f1fd874512a5ce3826476e442ab Mon Sep 17 00:00:00 2001 From: Alexis Drakopoulos <33128441+alexisdrakopoulos@users.noreply.github.com> Date: Wed, 15 Jun 2022 21:58:17 +0100 Subject: [PATCH 7/7] accidental import --- shap/explainers/_explainer.py | 1 - 1 file changed, 1 deletion(-) diff --git a/shap/explainers/_explainer.py b/shap/explainers/_explainer.py index 6c660d837..f490190d8 100644 --- a/shap/explainers/_explainer.py +++ b/shap/explainers/_explainer.py @@ -1,5 +1,4 @@ import copy -from sqlite3 import NotSupportedError import time import numpy as np import scipy as sp