Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

【PaddlePaddle Hackathon 3 No.5】为 Paddle 新增 bucketize #44195

Merged
merged 7 commits into from Aug 3, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
2 changes: 2 additions & 0 deletions python/paddle/__init__.py
Expand Up @@ -292,6 +292,7 @@
from .tensor.search import argmin # noqa: F401
from .tensor.search import argsort # noqa: F401
from .tensor.search import searchsorted # noqa: F401
from .tensor.search import bucketize # noqa: F401
from .tensor.search import masked_select # noqa: F401
from .tensor.search import topk # noqa: F401
from .tensor.search import where # noqa: F401
Expand Down Expand Up @@ -443,6 +444,7 @@
'flops',
'sort',
'searchsorted',
'bucketize',
'split',
'logical_and',
'full_like',
Expand Down
118 changes: 118 additions & 0 deletions python/paddle/fluid/tests/unittests/test_bucketize_api.py
@@ -0,0 +1,118 @@
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

from __future__ import print_function
from re import X

import unittest
import numpy as np
import paddle
import paddle.fluid as fluid
import paddle.fluid.core as core
from paddle.fluid import Program, program_guard

np.random.seed(10)


class TestBucketizeAPI(unittest.TestCase):
# test paddle.tensor.math.nanmean

def setUp(self):
self.sorted_sequence = np.array([2, 4, 8, 16]).astype("float64")
self.x = np.array([[0, 8, 4, 16], [-1, 2, 8, 4]]).astype("float64")
self.place = [paddle.CPUPlace()]
if core.is_compiled_with_cuda():
self.place.append(paddle.CUDAPlace(0))

def test_api_static(self):
paddle.enable_static()

def run(place):
with paddle.static.program_guard(paddle.static.Program()):
sorted_sequence = paddle.static.data(
'SortedSequence',
shape=self.sorted_sequence.shape,
dtype="float64")
x = paddle.static.data('x', shape=self.x.shape, dtype="float64")
out1 = paddle.bucketize(x, sorted_sequence)
out2 = paddle.bucketize(x, sorted_sequence, right=True)
exe = paddle.static.Executor(place)
res = exe.run(feed={
'SortedSequence': self.sorted_sequence,
'x': self.x
},
fetch_list=[out1, out2])
out_ref = np.searchsorted(self.sorted_sequence, self.x)
out_ref1 = np.searchsorted(self.sorted_sequence,
self.x,
side='right')
self.assertTrue(np.allclose(out_ref, res[0]))
self.assertTrue(np.allclose(out_ref1, res[1]))

for place in self.place:
run(place)

def test_api_dygraph(self):

def run(place):
paddle.disable_static(place)
sorted_sequence = paddle.to_tensor(self.sorted_sequence)
x = paddle.to_tensor(self.x)
out1 = paddle.bucketize(x, sorted_sequence)
out2 = paddle.bucketize(x, sorted_sequence, right=True)
out_ref1 = np.searchsorted(self.sorted_sequence, self.x)
out_ref2 = np.searchsorted(self.sorted_sequence,
self.x,
side='right')
self.assertEqual(np.allclose(out_ref1, out1.numpy()), True)
self.assertEqual(np.allclose(out_ref2, out2.numpy()), True)
paddle.enable_static()

for place in self.place:
run(place)

def test_out_int32(self):
paddle.disable_static()
sorted_sequence = paddle.to_tensor(self.sorted_sequence)
x = paddle.to_tensor(self.x)
out = paddle.bucketize(x, sorted_sequence, out_int32=True)
self.assertTrue(out.type, 'int32')

def test_bucketize_dims_error(self):
with paddle.static.program_guard(paddle.static.Program()):
sorted_sequence = paddle.static.data('SortedSequence',
shape=[2, 2],
dtype="float64")
x = paddle.static.data('x', shape=[2, 5], dtype="float64")
self.assertRaises(ValueError, paddle.bucketize, x, sorted_sequence)

def test_input_error(self):
for place in self.place:
paddle.disable_static(place)
sorted_sequence = paddle.to_tensor(self.sorted_sequence)
self.assertRaises(ValueError, paddle.bucketize, self.x,
sorted_sequence)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

there is a test case "错误检查:未输入x和sorted_sequence时,能否正确抛出错误" in rfc, shall we add this test case?


def test_empty_input_error(self):
for place in self.place:
paddle.disable_static(place)
sorted_sequence = paddle.to_tensor(self.sorted_sequence)
x = paddle.to_tensor(self.x)
self.assertRaises(ValueError, paddle.bucketize, None,
sorted_sequence)
self.assertRaises(AttributeError, paddle.bucketize, x, None)


if __name__ == "__main__":
unittest.main()
2 changes: 2 additions & 0 deletions python/paddle/tensor/__init__.py
Expand Up @@ -250,6 +250,7 @@
from .search import argmin # noqa: F401
from .search import argsort # noqa: F401
from .search import searchsorted # noqa: F401
from .search import bucketize # noqa: F401
from .search import topk # noqa: F401
from .search import where # noqa: F401
from .search import index_select # noqa: F401
Expand Down Expand Up @@ -503,6 +504,7 @@
'put_along_axis_',
'exponential_',
'heaviside',
'bucketize',
]

#this list used in math_op_patch.py for magic_method bind
Expand Down
55 changes: 55 additions & 0 deletions python/paddle/tensor/search.py
Expand Up @@ -914,6 +914,61 @@ def topk(x, k, axis=None, largest=True, sorted=True, name=None):
return values, indices


def bucketize(x, sorted_sequence, out_int32=False, right=False, name=None):
"""
This API is used to find the index of the corresponding 1D tensor `sorted_sequence` in the innermost dimension based on the given `x`.

Args:
x(Tensor): An input N-D tensor value with type int32, int64, float32, float64.
sorted_sequence(Tensor): An input 1-D tensor with type int32, int64, float32, float64. The value of the tensor monotonically increases in the innermost dimension.
out_int32(bool, optional): Data type of the output tensor which can be int32, int64. The default value is False, and it indicates that the output data type is int64.
right(bool, optional): Find the upper or lower bounds of the sorted_sequence range in the innermost dimension based on the given `x`. If the value of the sorted_sequence is nan or inf, return the size of the innermost dimension.
The default value is False and it shows the lower bounds.
name(str, optional): The default value is None. Normally there is no need for user to set this property. For more information, please refer to :ref:`api_guide_Name`.

Returns:
Tensor(the same sizes of the `x`), return the tensor of int32 if set :attr:`out_int32` is True, otherwise return the tensor of int64.

Examples:

.. code-block:: python

import paddle

sorted_sequence = paddle.to_tensor([2, 4, 8, 16], dtype='int32')
x = paddle.to_tensor([[0, 8, 4, 16], [-1, 2, 8, 4]], dtype='int32')
out1 = paddle.bucketize(x, sorted_sequence)
print(out1)
# Tensor(shape=[2, 4], dtype=int64, place=CPUPlace, stop_gradient=True,
# [[0, 2, 1, 3],
# [0, 0, 2, 1]])
out2 = paddle.bucketize(x, sorted_sequence, right=True)
print(out2)
# Tensor(shape=[2, 4], dtype=int64, place=CPUPlace, stop_gradient=True,
# [[0, 3, 2, 4],
# [0, 1, 3, 2]])
out3 = x.bucketize(sorted_sequence)
print(out3)
# Tensor(shape=[2, 4], dtype=int64, place=CPUPlace, stop_gradient=True,
# [[0, 2, 1, 3],
# [0, 0, 2, 1]])
out4 = x.bucketize(sorted_sequence, right=True)
print(out4)
# Tensor(shape=[2, 4], dtype=int64, place=CPUPlace, stop_gradient=True,
# [[0, 3, 2, 4],
# [0, 1, 3, 2]])

"""
check_variable_and_dtype(sorted_sequence, 'SortedSequence',
['float32', 'float64', 'int32', 'int64'],
'paddle.searchsorted')
if sorted_sequence.dim() != 1:
raise ValueError(
f"sorted_sequence tensor must be 1 dimension, but got dim {sorted_sequence.dim()}"
)
return searchsorted(sorted_sequence, x, out_int32, right, name)


def searchsorted(sorted_sequence,
values,
out_int32=False,
Expand Down