Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[new API] add paddle.vision.ops.generate_proposals #43611

Merged
merged 7 commits into from Jul 19, 2022
Merged
Show file tree
Hide file tree
Changes from 6 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
71 changes: 14 additions & 57 deletions python/paddle/fluid/layers/detection.py
Expand Up @@ -17,6 +17,8 @@

from __future__ import print_function

import paddle

from .layer_function_generator import generate_layer_fn
from .layer_function_generator import autodoc, templatedoc
from ..layer_helper import LayerHelper
Expand Down Expand Up @@ -3007,63 +3009,18 @@ def generate_proposals(scores,
im_info, anchors, variances)

"""
if _non_static_mode():
assert return_rois_num, "return_rois_num should be True in dygraph mode."
attrs = ('pre_nms_topN', pre_nms_top_n, 'post_nms_topN', post_nms_top_n,
'nms_thresh', nms_thresh, 'min_size', min_size, 'eta', eta)
rpn_rois, rpn_roi_probs, rpn_rois_num = _C_ops.generate_proposals(
scores, bbox_deltas, im_info, anchors, variances, *attrs)
return rpn_rois, rpn_roi_probs, rpn_rois_num

helper = LayerHelper('generate_proposals', **locals())

check_variable_and_dtype(scores, 'scores', ['float32'],
'generate_proposals')
check_variable_and_dtype(bbox_deltas, 'bbox_deltas', ['float32'],
'generate_proposals')
check_variable_and_dtype(im_info, 'im_info', ['float32', 'float64'],
'generate_proposals')
check_variable_and_dtype(anchors, 'anchors', ['float32'],
'generate_proposals')
check_variable_and_dtype(variances, 'variances', ['float32'],
'generate_proposals')

rpn_rois = helper.create_variable_for_type_inference(
dtype=bbox_deltas.dtype)
rpn_roi_probs = helper.create_variable_for_type_inference(
dtype=scores.dtype)
outputs = {
'RpnRois': rpn_rois,
'RpnRoiProbs': rpn_roi_probs,
}
if return_rois_num:
rpn_rois_num = helper.create_variable_for_type_inference(dtype='int32')
rpn_rois_num.stop_gradient = True
outputs['RpnRoisNum'] = rpn_rois_num

helper.append_op(type="generate_proposals",
inputs={
'Scores': scores,
'BboxDeltas': bbox_deltas,
'ImInfo': im_info,
'Anchors': anchors,
'Variances': variances
},
attrs={
'pre_nms_topN': pre_nms_top_n,
'post_nms_topN': post_nms_top_n,
'nms_thresh': nms_thresh,
'min_size': min_size,
'eta': eta
},
outputs=outputs)
rpn_rois.stop_gradient = True
rpn_roi_probs.stop_gradient = True

if return_rois_num:
return rpn_rois, rpn_roi_probs, rpn_rois_num
else:
return rpn_rois, rpn_roi_probs
return paddle.vision.ops.generate_proposals(scores=scores,
bbox_deltas=bbox_deltas,
img_size=im_info[:2],
anchors=anchors,
variances=variances,
pre_nms_top_n=pre_nms_top_n,
post_nms_top_n=post_nms_top_n,
nms_thresh=nms_thresh,
min_size=min_size,
eta=eta,
return_rois_num=return_rois_num,
name=name)


def box_clip(input, im_info, name=None):
Expand Down
Expand Up @@ -254,6 +254,99 @@ def init_test_params(self):
self.pixel_offset = False


class testGenerateProposalsAPI(unittest.TestCase):

def setUp(self):
np.random.seed(678)
self.scores_np = np.random.rand(2, 3, 4, 4).astype('float32')
self.bbox_deltas_np = np.random.rand(2, 12, 4, 4).astype('float32')
self.img_size_np = np.array([[8, 8], [6, 6]]).astype('float32')
self.anchors_np = np.reshape(np.arange(4 * 4 * 3 * 4),
[4, 4, 3, 4]).astype('float32')
self.variances_np = np.ones((4, 4, 3, 4)).astype('float32')

self.roi_expected, self.roi_probs_expected, self.rois_num_expected = generate_proposals_v2_in_python(
self.scores_np,
self.bbox_deltas_np,
self.img_size_np,
self.anchors_np,
self.variances_np,
pre_nms_topN=10,
post_nms_topN=5,
nms_thresh=0.5,
min_size=0.1,
eta=1.0,
pixel_offset=False)
self.roi_expected = np.array(self.roi_expected).squeeze(1)
self.roi_probs_expected = np.array(self.roi_probs_expected).squeeze(1)
self.rois_num_expected = np.array(self.rois_num_expected)

def test_dynamic(self):
paddle.disable_static()
scores = paddle.to_tensor(self.scores_np)
bbox_deltas = paddle.to_tensor(self.bbox_deltas_np)
img_size = paddle.to_tensor(self.img_size_np)
anchors = paddle.to_tensor(self.anchors_np)
variances = paddle.to_tensor(self.variances_np)

rois, roi_probs, rois_num = paddle.vision.ops.generate_proposals(
scores,
bbox_deltas,
img_size,
anchors,
variances,
pre_nms_top_n=10,
post_nms_top_n=5,
return_rois_num=True)
self.assertTrue(np.allclose(self.roi_expected, rois.numpy()))
self.assertTrue(np.allclose(self.roi_probs_expected, roi_probs.numpy()))
self.assertTrue(np.allclose(self.rois_num_expected, rois_num.numpy()))

def test_static(self):
paddle.enable_static()
scores = paddle.static.data(name='scores',
shape=[2, 3, 4, 4],
dtype='float32')
bbox_deltas = paddle.static.data(name='bbox_deltas',
shape=[2, 12, 4, 4],
dtype='float32')
img_size = paddle.static.data(name='img_size',
shape=[2, 2],
dtype='float32')
anchors = paddle.static.data(name='anchors',
shape=[4, 4, 3, 4],
dtype='float32')
variances = paddle.static.data(name='variances',
shape=[4, 4, 3, 4],
dtype='float32')
rois, roi_probs, rois_num = paddle.vision.ops.generate_proposals(
scores,
bbox_deltas,
img_size,
anchors,
variances,
pre_nms_top_n=10,
post_nms_top_n=5,
return_rois_num=True)
exe = paddle.static.Executor()
rois, roi_probs, rois_num = exe.run(
paddle.static.default_main_program(),
feed={
'scores': self.scores_np,
'bbox_deltas': self.bbox_deltas_np,
'img_size': self.img_size_np,
'anchors': self.anchors_np,
'variances': self.variances_np,
},
fetch_list=[rois.name, roi_probs.name, rois_num.name],
return_numpy=False)

self.assertTrue(np.allclose(self.roi_expected, np.array(rois)))
self.assertTrue(
np.allclose(self.roi_probs_expected, np.array(roi_probs)))
self.assertTrue(np.allclose(self.rois_num_expected, np.array(rois_num)))


if __name__ == '__main__':
paddle.enable_static()
unittest.main()
159 changes: 146 additions & 13 deletions python/paddle/vision/ops.py
Expand Up @@ -24,19 +24,9 @@
from paddle import _C_ops

__all__ = [ #noqa
'yolo_loss',
'yolo_box',
'deform_conv2d',
'DeformConv2D',
'read_file',
'decode_jpeg',
'roi_pool',
'RoIPool',
'psroi_pool',
'PSRoIPool',
'roi_align',
'RoIAlign',
'nms',
'yolo_loss', 'yolo_box', 'deform_conv2d', 'DeformConv2D', 'read_file',
'decode_jpeg', 'roi_pool', 'RoIPool', 'psroi_pool', 'PSRoIPool',
'roi_align', 'RoIAlign', 'nms', 'generate_proposals'
]


Expand Down Expand Up @@ -1540,3 +1530,146 @@ def _nms(boxes, iou_threshold):
return keep_boxes_idxs[topk_sub_indices]

return keep_boxes_idxs[sorted_sub_indices][:top_k]


def generate_proposals(scores,
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

shall we delete function of paddle.fluid.layers.generate_proposals and modify paddle.fluid.layers.generate_proposals import here, can discuss with @zhiboniu

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Done.
Through the discusstion with @zhiboniu , current now we change implement of the corresponding fluid api to this paddle.vision.ops.generate_proposals. And we still keep this fluid api to avoid potential incompatibilities.

bbox_deltas,
img_size,
anchors,
variances,
pre_nms_top_n=6000,
post_nms_top_n=1000,
nms_thresh=0.5,
min_size=0.1,
eta=1.0,
pixel_offset=False,
return_rois_num=False,
name=None):
"""
This operation proposes RoIs according to each box with their
probability to be a foreground object. And
the proposals of RPN output are calculated by anchors, bbox_deltas and scores. Final proposals
could be used to train detection net.

For generating proposals, this operation performs following steps:

1. Transpose and resize scores and bbox_deltas in size of
(H * W * A, 1) and (H * W * A, 4)
2. Calculate box locations as proposals candidates.
3. Clip boxes to image
4. Remove predicted boxes with small area.
5. Apply non-maximum suppression (NMS) to get final proposals as output.

Args:
scores (Tensor): A 4-D Tensor with shape [N, A, H, W] represents
the probability for each box to be an object.
N is batch size, A is number of anchors, H and W are height and
width of the feature map. The data type must be float32.
bbox_deltas (Tensor): A 4-D Tensor with shape [N, 4*A, H, W]
represents the difference between predicted box location and
anchor location. The data type must be float32.
img_size (Tensor): A 2-D Tensor with shape [N, 2] represents origin
image shape information for N batch, including height and width of the input sizes.
The data type can be float32 or float64.
anchors (Tensor): A 4-D Tensor represents the anchors with a layout
of [H, W, A, 4]. H and W are height and width of the feature map,
num_anchors is the box count of each position. Each anchor is
in (xmin, ymin, xmax, ymax) format an unnormalized. The data type must be float32.
variances (Tensor): A 4-D Tensor. The expanded variances of anchors with a layout of
[H, W, num_priors, 4]. Each variance is in
(xcenter, ycenter, w, h) format. The data type must be float32.
pre_nms_top_n (float, optional): Number of total bboxes to be kept per
image before NMS. `6000` by default.
post_nms_top_n (float, optional): Number of total bboxes to be kept per
image after NMS. `1000` by default.
nms_thresh (float, optional): Threshold in NMS. The data type must be float32. `0.5` by default.
min_size (float, optional): Remove predicted boxes with either height or
width less than this value. `0.1` by default.
eta(float, optional): Apply in adaptive NMS, only works if adaptive `threshold > 0.5`,
`adaptive_threshold = adaptive_threshold * eta` in each iteration. 1.0 by default.
pixel_offset (bool, optional): Whether there is pixel offset. If True, the offset of `img_size` will be 1. 'False' by default.
return_rois_num (bool, optional): Whether to return `rpn_rois_num` . When setting True, it will return a 1D Tensor with shape [N, ] that includes Rois's
num of each image in one batch. 'False' by default.
name(str, optional): For detailed information, please refer
to :ref:`api_guide_Name`. Usually name is no need to set and
None by default.

Returns:
- rpn_rois (Tensor): The generated RoIs. 2-D Tensor with shape ``[N, 4]`` while ``N`` is the number of RoIs. The data type is the same as ``scores``.
- rpn_roi_probs (Tensor): The scores of generated RoIs. 2-D Tensor with shape ``[N, 1]`` while ``N`` is the number of RoIs. The data type is the same as ``scores``.
- rpn_rois_num (Tensor): Rois's num of each image in one batch. 1-D Tensor with shape ``[B,]`` while ``B`` is the batch size. And its sum equals to RoIs number ``N`` .

Examples:
.. code-block:: python

import paddle

scores = paddle.rand((2,4,5,5), dtype=paddle.float32)
bbox_deltas = paddle.rand((2, 16, 5, 5), dtype=paddle.float32)
img_size = paddle.to_tensor([[224.0, 224.0], [224.0, 224.0]])
anchors = paddle.rand((2,5,4,4), dtype=paddle.float32)
variances = paddle.rand((2,5,10,4), dtype=paddle.float32)
rois, roi_probs, roi_nums = paddle.vision.ops.generate_proposals(scores, bbox_deltas,
img_size, anchors, variances, return_rois_num=True)
print(rois, roi_probs, roi_nums)
"""

if _non_static_mode():
assert return_rois_num, "return_rois_num should be True in dygraph mode."
attrs = ('pre_nms_topN', pre_nms_top_n, 'post_nms_topN', post_nms_top_n,
'nms_thresh', nms_thresh, 'min_size', min_size, 'eta', eta,
'pixel_offset', pixel_offset)
rpn_rois, rpn_roi_probs, rpn_rois_num = _C_ops.generate_proposals_v2(
scores, bbox_deltas, img_size, anchors, variances, *attrs)

return rpn_rois, rpn_roi_probs, rpn_rois_num

helper = LayerHelper('generate_proposals_v2', **locals())

check_variable_and_dtype(scores, 'scores', ['float32'],
'generate_proposals_v2')
check_variable_and_dtype(bbox_deltas, 'bbox_deltas', ['float32'],
'generate_proposals_v2')
check_variable_and_dtype(img_size, 'img_size', ['float32', 'float64'],
'generate_proposals_v2')
check_variable_and_dtype(anchors, 'anchors', ['float32'],
'generate_proposals_v2')
check_variable_and_dtype(variances, 'variances', ['float32'],
'generate_proposals_v2')

rpn_rois = helper.create_variable_for_type_inference(
dtype=bbox_deltas.dtype)
rpn_roi_probs = helper.create_variable_for_type_inference(
dtype=scores.dtype)
outputs = {
'RpnRois': rpn_rois,
'RpnRoiProbs': rpn_roi_probs,
}
if return_rois_num:
rpn_rois_num = helper.create_variable_for_type_inference(dtype='int32')
rpn_rois_num.stop_gradient = True
outputs['RpnRoisNum'] = rpn_rois_num

helper.append_op(type="generate_proposals_v2",
inputs={
'Scores': scores,
'BboxDeltas': bbox_deltas,
'ImShape': img_size,
'Anchors': anchors,
'Variances': variances
},
attrs={
'pre_nms_topN': pre_nms_top_n,
'post_nms_topN': post_nms_top_n,
'nms_thresh': nms_thresh,
'min_size': min_size,
'eta': eta,
'pixel_offset': pixel_offset
},
outputs=outputs)
rpn_rois.stop_gradient = True
rpn_roi_probs.stop_gradient = True
if not return_rois_num:
rpn_rois_num = None

return rpn_rois, rpn_roi_probs, rpn_rois_num