Skip to content

ufunc and binop overrides and behavior suggestions

Pauli Virtanen edited this page Jul 16, 2015 · 31 revisions

Synopsis

This page lists several suggestions on how __numpy_ufunc__, __array_priority__, ndarray subclassing, and Python binary operations should work together in the future.

The discussion is ongoing in gh-5844. As sometimes code speaks more than words, and it is clumsy to revise an evolving suggestion in Github discussion thread, the latest suggestions can be kept here.

Let us assign a name and a revision number for each suggestion. Increment revision number on each edit.

ufunc.2

class MyNdarray(object):
    _known_classes = (np.ndarray,)

    def _can_handle(self, other):
        if isinstance(other, _known_classes + (MyNdarray,)):
            return True
        if hasattr(other, "__numpy_ufunc__"):
            return False
        return True

    def __add__(self, other):
        return np.add(self, other)
    def __radd__(self, other):
        return np.add(other, self)
    def __iadd__(self, other):
        return np.add(self, other, out=self)

    # ... repeat for other binops ...

    def __numpy_ufunc__(self, ufunc, method, args, kwargs):
        if method == "__call__":
            return self._numpy_ufunc_call(ufunc, *args, **kwargs)
        elif method == "reduce":
            return self._numpy_ufunc_reduce(ufunc, *args, **kwargs)
        else:
            raise NotImplementedError("it's just an example ok")

    def _numpy_ufunc_call(self, ufunc, *args, out=None, **kwargs):
        new_args = []
        for arg in args:
            if not self._can_handle(arg):
                return NotImplemented
            new_args.append(np.asarray(arg))
        args = new_args
        if out is not None:
            if not isinstance(out, tuple):
                out = (out,)
            new_out = []
            for arg in out:
                if not self._can_handle(arg):
                    return NotImplemented
                new_out.append(np.asarray(arg))
            out = tuple(new_out)
        return ufunc(*args, out=out, **kwargs)

    def _numpy_ufunc_reduce(self, a, out=None, **kwargs):
        if not self._can_handle(a):
            return NotImplemented
        else:
            a = np.asarray(a)
        if out is not None:
            if not self._can_handle(out):
                return NotImplemented
            else:
                out = np.asarray(out)
        return ufunc.reduce(a, out=out, **kwargs)

opt-out.3

class MyNdarray(object):
    __numpy_binop_override__ = True

    _known_classes = (np.ndarray,)

    def _can_handle(self, other):
        if isinstance(other, self._known_classes + (MyNdarray,)):
            return True
        if getattr(other, "__numpy_binop_override__", False):
            return False
        # potential addition/alternative: consider presence of
        # __numpy_ufunc__ as an opt-out
        return True

    def __add__(self, other):
        return self.__numpy_ufunc__(self.add, "__call__", (self, other), {})

    def __radd__(self, other):
        return self.__numpy_ufunc__(self.add, "__call__", (other, self), {})

    def __iadd__(self, other):
        # IF we want to never give `other` the opportunity to do an out-of-place
        # operation:
        return np.add(self, other, out=self)
        # OTHERWISE:
        return self.__numpy_ufunc__(self.add, "__call__", (self, other), out=self)

    # ... repeat for every binop ...

    # Implementation of __numpy_ufunc__ is the same as in ufunc.1 above

    def __numpy_ufunc__(self, ufunc, method, args, kwargs):
        if method == "__call__":
            return self._numpy_ufunc_call(ufunc, *args, **kwargs)
        elif method == "reduce":
            return self._numpy_ufunc_reduce(ufunc, *args, **kwargs)
        else:
            raise NotImplementedError("it's just an example ok")

    def _numpy_ufunc_call(self, ufunc, *args, out=None, **kwargs):
        new_args = []
        for arg in args:
            if not self._can_handle(arg):
                return NotImplemented
            new_args.append(np.asarray(arg))
        args = new_args
        if out is not None:
            if not isinstance(out, tuple):
                out = (out,)
            new_out = []
            for arg in out:
                if not self._can_handle(arg):
                    return NotImplemented
                new_out.append(np.asarray(arg))
            out = tuple(new_out)
        return ufunc(*args, out=out, **kwargs)

    def _numpy_ufunc_reduce(self, a, out=None, **kwargs):
        if not self._can_handle(a):
            return NotImplemented
        else:
            a = np.asarray(a)
        if out is not None:
            if not self._can_handle(out):
                return NotImplemented
            else:
                out = np.asarray(out)
        return ufunc.reduce(a, out=out, **kwargs)

treat-as-ndarray-subclass.4

Here, I first try to define how ndarray itself behaves, and then give an example of a container subclass as well as a different array-like (the latter is nearly identical to opt-out above).

(update 2: correct MaskedArray) (update 3: mention iadd special treatment) (update 4: let output argument handle all inplace stuff)

class ndarray():
    def _can_handle(self, other):
        # to ensure that ndarray subclasses can handle ndarray by default:
        # we're a subclass (or the same), so no need to convert.
        if isinstance(self, other):
            return True
        # Strict subclasses of us and objects defining `__numpy_ufunc__` get treated identically.
        if isinstance(other, type(self)) or hasattr(other, '__numpy_ufunc__'):
            return False
        # backwards compatibility
        if getattr(other, '__array_priority__', 0) > 0:
            return False
        return True

    def __add__(self, other):
        return self.__numpy_ufunc__(np.add, "__call__", 0, (self, other), {})

    def __radd__(self, other):
        return self.__numpy_ufunc__(np.add, "__call__", 1, (other, self), {})

    def __iadd__(self, other):
        # Here, we explicitly stick with python operator logic, which means that if
        # self doesn't know what to do with other, but other does know what to do
        # with self, this will not happen in-place.
        return self.__numpy_ufunc__(np.add, "__call__", 0, (self, other), out=self)

    # ... repeat for every binop ...
    def __numpy_ufunc__(self, ufunc, method, i, args, **kwargs):
        new_args = []
        for arg in args:
            if self._can_handle(other):
                new_args.append(np.asarray(other))
            else:
                try:
                    new_args.append(other.__array__())
                except:
                    return NotImplemented
        args = new_args

        # The next check is not strictly necessary, since one should only be able to
        # get here with `self not in out`.
        out = kwargs.get('out', None)
        if out is not None:
            if not instance(out, tuple):
                out = (out,)
            if self not in out or any(type(out) is not type(self)):
                raise TypeError("Can only handle in-place to arrays of own class")

        # Note that we are guaranteed to work here, since all arguments are ndarray,
        # so we do not have to worry about catching possible TypeError.
        return getattr(ufunc, method)(*args, **kwargs)


class ufunc():
    def __init__(self, ndarray_only_routine):
        # the ndarray_only_routine is the basic code that can only handle ndarray, i.e.,
        # does not do any coercing any more.
        self.ndarray_only_routine = ndarray_only_routine

    def __call__(self, *args, **kwargs):
        return self.execute('__call__', *args, **kwargs)

    def execute(self, method, *args, **kwargs):
        # 
        <some code that ensures that args only contains inputs and kwargs everything else>
        #
        out = kwargs.get('out', ())
        if isinstance(out, tuple):
            if any(type(o) is not type(out[0]) for o in out[1:]):
                raise TypeError("All outputs must be of the same type.")
        else:
            out = (out,)

        if all(type(arg) is ndarray for arg in args + out):
            return self.ndarray_only_routine(method, *args, **kwargs)

        # we may have non-ndarray instances; try __numpy_ufunc__ until something succeeds.
        result = None
        for i, arg in enumerate(args + out):
            if hasattr(arg, '__numpy_ufunc__'):
                # If outputs are given, an argument should only be tried if it is one of
                # them, since inputs cannot be expected to coerce output to the right shape.
                if out != () and arg not in out:
                    continue
                # Could add subclass check here as well to speed things up.
                result = arg.__numpy_ufunc__(self, method, i, args, **kwargs)
                if result is not NotImplemented:
                    return result

        if result is None and out == ():
            # None of the arguments were array-like (e.g., np.add(1., 1.); so nothing has been tried
            # yet.  Use ndarray to see if the arguments can be converted.
            result = ndarray.__numpy_ufunc__(self, method, None, *args, **kwargs)
            if result is not NotImplemented:
                return result

        raise TypeError


add = ufunc(ndarray_only_add)


class MaskedArray(ndarray):
    # A container class that defers everything except masks to ndarray
    def __numpy_ufunc__(self, ufunc, method, i, args, **kwargs):
        new_args = []
        masks = []
        for arg in args:
            if hasattr(arg, 'mask'):  # Duck-typing
                masks.append(arg.mask)
                new_args.append(arg.data)
            else:
                masks.append(None)
                new_args.append(arg)
        args = new_args
        out = kwargs.pop('out', None)
        if out is not None:
            if not isinstance(out, tuple):
                out = (out,)

            if self not in out or any(type(out) is not type(self)):
                raise TypeError("Can only handle in-place to arrays of own class")

            kwargs['out'] = tuple(arg.data for arg in out)

        # All arguments are now guaranteed not to be type(self) any more, so we call
        # the ufunc to deal with possible other types (e.g., if self.data defined
        # __numpy_ufunc__, or one of the other arguments had one). If this failes,
        # we return NotImplemented, since possibly the other argument can handle
        # MaskedArray even though it cannot handle our content (seems unlikely, but
        # let's be proper).
        try:
            result = getattr(ufunc, method)(*args, **kwargs)
        except TypeError:
            return NotImplemented

        # Alternatively (possibly more logical), we just let our content try.
        result = self.data.__numpy_ufunc__(ufunc, method, i, args, **kwargs)
        if result is NotImplemented:
            return NotImplemented

        # ignore multiple outputs here for this example's sake.
        if result is not out:
            out = result.view(type(self))
        out.mask = self.combine_masks(masks)
        return out

class MyNdarray(object):
    _known_classes = (np.ndarray,)

    def _can_handle(self, other):
        if isinstance(other, self._known_classes + (self.__class__,)):
            return True
        if hasattr(other, "__numpy_ufunc__"):
            return False
        return True

    # These methods are defined just like for `ndarray`
    def __add__(self, other):
        return self.__numpy_ufunc__(np.add, "__call__", (self, other), {})

    def __radd__(self, other):
        return self.__numpy_ufunc__(np.add, "__call__", (other, self), {})

    def __iadd__(self, other):
        # See note on ndarray above.
        return self.__numpy_ufunc__(np.add, "__call__", (self, other), out=self)

    # ... repeat for every binop ...

    # Implementation of __numpy_ufunc__ inspired by ufunc.1 above.  This non-container
    # class insists it has to know how to deal with other classes rather than change itself into
    # an ndarray and let other classes try.
    def __numpy_ufunc__(self, ufunc, method, i, args, **kwargs):
        new_args = []
        for arg in args:
            if self._can_handle(arg):
                # this class is rather boring, it just turns itself into an array.
                new_args.append(np.asarray(arg))
            else:
                try:
                    new_args.append(arg.__array__())
                except:
                    return NotImplemented

        args = new_args
        out = kwargs.pop('out', None)
        if out is not None:
            if not isinstance(out, tuple):
                out = (out,)
            # We can only get here is type(out) is type(self).
            out = tuple(np.asarray(arg) for arg in out)

        result = getattr(ufunc, method)(*args, out=out, **kwargs)
        <possible stuff to turn result into type(self)>
        return result

ufunc-wrapper.1

class UfuncWrapper:
    def __init__(self, values):
        # values should be an array-like object
        self.values = values

    def __numpy_ufunc__(self, ufunc, method, i, inputs, **kwargs):
        # replace self with self.values before calling the ufunc again
        inputs = tuple(x.values if x is self else x for x in inputs)
        if out is not None:
            if not isinstance(out, tuple):
                out = (out,)
            out = tuple(x.values if x is self else x for x in out)
        # do the computation on unwrapped arrays
        result = getattr(ufunc, method)(*inputs, **kwargs)
        # now wrap the result
        return type(self)(result)

    # binary ops are defined by calling ufuncs
    def __add__(self, other):
        return np.add(self, other)
    def __radd__(self, other):
        return np.add(other, self)
    def __iadd__(self, other):
        return np.add(self, other, out=self)

    # repeat for all binary ops... (could use a standard mixin)

opt-out-wrapper.1

class OptOutWrapper:
    # __init__ and __numpy_ufunc__ are defined as on UfuncWrapper

    # add any necessary opt-out flags

    def __init__(self, values):
        # values should be an array-like object
        self.values = values

    def __numpy_ufunc__(self, ufunc, method, i, inputs, out=None, **kwargs):
        # replace self with self.values before calling the ufunc again
        inputs = tuple(x.values if x is self else x for x in inputs)
        if out is not None:
            if not isinstance(out, tuple):
                out = (out,)
            out = tuple(x.values if x is self else x for x in out)
        # do the computation on unwrapped arrays
        result = getattr(ufunc, method)(*inputs, out=out, **kwargs)
        # now wrap the result
        return type(self)(result)

    def _binary_op(self, op, other, reflexive=False):
        # we need to write another wrapper for binary operations to properly handle
        # passing off to the wrapped object
        if reflexive:
            result = op(other, self.values)
        else:
            result = op(self.values, other)
        return type(self)(result)

    # binary ops use special _binary_op wrapper
    def __add__(self, other):
        return self._binary_op(operator.add, other)
    def __iadd__(self, other):
        return self._binary_op(operator.iadd, other)
    def __radd__(self, other):
        return self._binary_op(operator.add, other, reflexive=True)

    # repeat for all binary ops...

interfaces.1

Example restricting numpy-like binop handling within an interface:

class MyNdarray(object):
    # This can be a separate flag [Option 4],
    # or replaced with getattr(other, `__numpy_ufunc__`, True) [Option 2]
    __numpy_binop__ = True

    _known_classes = (SomeNonArrayLikeClass,)

    # The default value of the flag for numpy.ndarray is likely fixed to True:
    _default_value = True
    # However, new classes might want to be better citizens and do instead:
    _default_value = False
    _known_classes += (float, int, np.generic)
    # The argument can be made that automatic cast of buffers etc to ndarrays
    # should not be made in binops --- if you want to use a buffer like an array,
    # cast it to an array.
    # Note that removing edges from a valid dispatch graph cannot break
    # validity of the graph, so changing the default is fine.

    def __add__(self, other):
        if getattr(other, "__numpy_binop__", _default_value):
            return np.add(self, other)
        elif isinstance(other, self._known_classes):
            return np.add(self, np.asarray(other))   # for example
        else:
            return NotImplemented

    def __radd__(self, other):
        if getattr(other, "__numpy_binop__", _default_value):
            return np.add(other, self)
        elif isinstance(other, self._known_classes):
            return np.add(self, np.asarray(other))   # for example
        else:
            return NotImplemented

    def __iadd__(self, other):
        # IF we want to enforce += allows only array-likes
        return np.add(self, other, out=self)
        # OTHERWISE:
        if getattr(other, "__numpy_binop__", _default_value) or isinstance(other, self._known_classes):
            return np.add(self, other, out=self)
        else:
            return NotImplemented

    # ... repeat for every binop ...

    def _can_handle(self, other):
        if isinstance(other, self._known_classes + (self.__class__,)):
            return True
        if hasattr(other, "__numpy_ufunc__"):
            return False
        return True

    # Implementation of __numpy_ufunc__ is the same as in ufunc.1 above

    def __numpy_ufunc__(self, ufunc, method, args, kwargs):
        if method == "__call__":
            return self._numpy_ufunc_call(ufunc, *args, **kwargs)
        elif method == "reduce":
            return self._numpy_ufunc_reduce(ufunc, *args, **kwargs)
        else:
            raise NotImplementedError("it's just an example ok")

    def _numpy_ufunc_call(self, ufunc, *args, out=None, **kwargs):
        new_args = []
        for arg in args:
            if not self._can_handle(arg):
                return NotImplemented
            new_args.append(np.asarray(arg))
        args = new_args
        if out is not None:
            if not isinstance(out, tuple):
                out = (out,)
            new_out = []
            for arg in out:
                if not self._can_handle(arg):
                    return NotImplemented
                new_out.append(np.asarray(arg))
            out = tuple(new_out)
        return ufunc(*args, out=out, **kwargs)

    def _numpy_ufunc_reduce(self, a, out=None, **kwargs):
        if not self._can_handle(a):
            return NotImplemented
        else:
            a = np.asarray(a)
        if out is not None:
            if not self._can_handle(out):
                return NotImplemented
            else:
                out = np.asarray(out)
        return ufunc.reduce(a, out=out, **kwargs)