From 01f891496aae3f02b8a24029a8aa0654e13d2b7a Mon Sep 17 00:00:00 2001 From: Nicolas Hug Date: Tue, 14 Jun 2022 14:00:15 +0100 Subject: [PATCH] Add new `.. betastatus::` directive and document Beta APIs (#6115) * Add new .. betastatus:: directive to document Beta APIs * Also add it for the fine-grained video API * Add directive for all builders and pages of Detection module * Also segmentation and video models --- docs/source/beta_status.py | 20 +++++++++++++++++++ docs/source/conf.py | 3 +++ docs/source/io.rst | 2 ++ docs/source/models.rst | 6 ++++++ docs/source/models/deeplabv3.rst | 2 ++ docs/source/models/faster_rcnn.rst | 2 ++ docs/source/models/fcn.rst | 2 ++ docs/source/models/fcos.rst | 2 ++ docs/source/models/keypoint_rcnn.rst | 2 ++ docs/source/models/lraspp.rst | 2 ++ docs/source/models/mask_rcnn.rst | 2 ++ docs/source/models/retinanet.rst | 2 ++ docs/source/models/ssd.rst | 2 ++ docs/source/models/ssdlite.rst | 1 + docs/source/models/video_resnet.rst | 2 ++ torchvision/io/image.py | 2 ++ torchvision/io/video_reader.py | 2 ++ torchvision/models/detection/faster_rcnn.py | 9 +++++++++ torchvision/models/detection/fcos.py | 2 ++ torchvision/models/detection/keypoint_rcnn.py | 2 ++ torchvision/models/detection/mask_rcnn.py | 4 ++++ torchvision/models/detection/retinanet.py | 4 ++++ torchvision/models/detection/ssd.py | 2 ++ torchvision/models/detection/ssdlite.py | 2 ++ torchvision/models/segmentation/deeplabv3.py | 4 ++++ torchvision/models/segmentation/fcn.py | 4 ++++ torchvision/models/segmentation/lraspp.py | 2 ++ torchvision/models/video/resnet.py | 6 ++++++ 28 files changed, 97 insertions(+) create mode 100644 docs/source/beta_status.py diff --git a/docs/source/beta_status.py b/docs/source/beta_status.py new file mode 100644 index 00000000000..925894df5c5 --- /dev/null +++ b/docs/source/beta_status.py @@ -0,0 +1,20 @@ +from docutils import nodes +from docutils.parsers.rst import Directive + + +class BetaStatus(Directive): + has_content = True + + def run(self): + api_name = " ".join(self.content) + text = f"The {api_name} is in Beta stage, and backward compatibility is not guaranteed." + return [nodes.warning("", nodes.paragraph("", "", nodes.Text(text)))] + + +def setup(app): + app.add_directive("betastatus", BetaStatus) + return { + "version": "0.1", + "parallel_read_safe": True, + "parallel_write_safe": True, + } diff --git a/docs/source/conf.py b/docs/source/conf.py index 014eb3c3ae9..8768dbdb80c 100644 --- a/docs/source/conf.py +++ b/docs/source/conf.py @@ -21,6 +21,7 @@ # sys.path.insert(0, os.path.abspath('.')) import os +import sys import textwrap from copy import copy from pathlib import Path @@ -30,6 +31,7 @@ import torchvision.models as M from tabulate import tabulate +sys.path.append(os.path.abspath(".")) # -- General configuration ------------------------------------------------ @@ -50,6 +52,7 @@ "sphinx.ext.duration", "sphinx_gallery.gen_gallery", "sphinx_copybutton", + "beta_status", ] sphinx_gallery_conf = { diff --git a/docs/source/io.rst b/docs/source/io.rst index c62b519efe6..258a1ee16dc 100644 --- a/docs/source/io.rst +++ b/docs/source/io.rst @@ -26,6 +26,8 @@ In addition to the :mod:`read_video` function, we provide a high-performance lower-level API for more fine-grained control compared to the :mod:`read_video` function. It does all this whilst fully supporting torchscript. +.. betastatus:: fine-grained video API + .. autosummary:: :toctree: generated/ :template: class.rst diff --git a/docs/source/models.rst b/docs/source/models.rst index b549c25bf94..9f8babca770 100644 --- a/docs/source/models.rst +++ b/docs/source/models.rst @@ -272,6 +272,8 @@ Semantic Segmentation .. currentmodule:: torchvision.models.segmentation +.. betastatus:: segmentation module + The following semantic segmentation models are available, with or without pre-trained weights: @@ -334,6 +336,8 @@ keypoint detection are initialized with the classification models in torchvision. The models expect a list of ``Tensor[C, H, W]``. Check the constructor of the models for more information. +.. betastatus:: detection module + Object Detection ---------------- @@ -453,6 +457,8 @@ Video Classification .. currentmodule:: torchvision.models.video +.. betastatus:: video module + The following video classification models are available, with or without pre-trained weights: diff --git a/docs/source/models/deeplabv3.rst b/docs/source/models/deeplabv3.rst index 8b978e18358..e6f21686081 100644 --- a/docs/source/models/deeplabv3.rst +++ b/docs/source/models/deeplabv3.rst @@ -6,6 +6,8 @@ DeepLabV3 The DeepLabV3 model is based on the `Rethinking Atrous Convolution for Semantic Image Segmentation `__ paper. +.. betastatus:: segmentation module + Model builders -------------- diff --git a/docs/source/models/faster_rcnn.rst b/docs/source/models/faster_rcnn.rst index cbd461533ec..19ec9227886 100644 --- a/docs/source/models/faster_rcnn.rst +++ b/docs/source/models/faster_rcnn.rst @@ -3,10 +3,12 @@ Faster R-CNN .. currentmodule:: torchvision.models.detection + The Faster R-CNN model is based on the `Faster R-CNN: Towards Real-Time Object Detection with Region Proposal Networks `__ paper. +.. betastatus:: detection module Model builders -------------- diff --git a/docs/source/models/fcn.rst b/docs/source/models/fcn.rst index ae76c50ff1b..efcdb37c0d5 100644 --- a/docs/source/models/fcn.rst +++ b/docs/source/models/fcn.rst @@ -7,6 +7,8 @@ The FCN model is based on the `Fully Convolutional Networks for Semantic Segmentation `__ paper. +.. betastatus:: segmentation module + Model builders -------------- diff --git a/docs/source/models/fcos.rst b/docs/source/models/fcos.rst index 4096cc30f68..1bcc4267678 100644 --- a/docs/source/models/fcos.rst +++ b/docs/source/models/fcos.rst @@ -6,6 +6,8 @@ FCOS The RetinaNet model is based on the `FCOS: Fully Convolutional One-Stage Object Detection `__ paper. +.. betastatus:: detection module + Model builders -------------- diff --git a/docs/source/models/keypoint_rcnn.rst b/docs/source/models/keypoint_rcnn.rst index 8ac26b99660..ba677c7f8f3 100644 --- a/docs/source/models/keypoint_rcnn.rst +++ b/docs/source/models/keypoint_rcnn.rst @@ -6,6 +6,8 @@ Keypoint R-CNN The Keypoint R-CNN model is based on the `Mask R-CNN `__ paper. +.. betastatus:: detection module + Model builders -------------- diff --git a/docs/source/models/lraspp.rst b/docs/source/models/lraspp.rst index 7d419eb21e8..312249c53e1 100644 --- a/docs/source/models/lraspp.rst +++ b/docs/source/models/lraspp.rst @@ -5,6 +5,8 @@ LRASPP The LRASPP model is based on the `Searching for MobileNetV3 `_ paper. +.. betastatus:: segmentation module + Model builders -------------- diff --git a/docs/source/models/mask_rcnn.rst b/docs/source/models/mask_rcnn.rst index 243cef7815c..5887b6c71a6 100644 --- a/docs/source/models/mask_rcnn.rst +++ b/docs/source/models/mask_rcnn.rst @@ -6,6 +6,8 @@ Mask R-CNN The Mask R-CNN model is based on the `Mask R-CNN `__ paper. +.. betastatus:: detection module + Model builders -------------- diff --git a/docs/source/models/retinanet.rst b/docs/source/models/retinanet.rst index 3475cc783c3..8613ae9aaab 100644 --- a/docs/source/models/retinanet.rst +++ b/docs/source/models/retinanet.rst @@ -6,6 +6,8 @@ RetinaNet The RetinaNet model is based on the `Focal Loss for Dense Object Detection `__ paper. +.. betastatus:: detection module + Model builders -------------- diff --git a/docs/source/models/ssd.rst b/docs/source/models/ssd.rst index e240d34b178..7d73b234a28 100644 --- a/docs/source/models/ssd.rst +++ b/docs/source/models/ssd.rst @@ -6,6 +6,8 @@ SSD The SSD model is based on the `SSD: Single Shot MultiBox Detector `__ paper. +.. betastatus:: detection module + Model builders -------------- diff --git a/docs/source/models/ssdlite.rst b/docs/source/models/ssdlite.rst index 1f8437a6ff1..bac1575c966 100644 --- a/docs/source/models/ssdlite.rst +++ b/docs/source/models/ssdlite.rst @@ -8,6 +8,7 @@ The SSDLite model is based on the `SSD: Single Shot MultiBox Detector `__ and `MobileNetV2: Inverted Residuals and Linear Bottlenecks __` papers. +.. betastatus:: detection module Model builders -------------- diff --git a/docs/source/models/video_resnet.rst b/docs/source/models/video_resnet.rst index a3f92b546b9..ecb707b4eeb 100644 --- a/docs/source/models/video_resnet.rst +++ b/docs/source/models/video_resnet.rst @@ -6,6 +6,8 @@ Video ResNet The VideoResNet model is based on the `A Closer Look at Spatiotemporal Convolutions for Action Recognition `__ paper. +.. betastatus:: video module + Model builders -------------- diff --git a/torchvision/io/image.py b/torchvision/io/image.py index 339fe4318aa..174823752db 100644 --- a/torchvision/io/image.py +++ b/torchvision/io/image.py @@ -145,6 +145,8 @@ def decode_jpeg( with `nvjpeg `_. This is only supported for CUDA version >= 10.1 + .. betastatus:: device parameter + .. warning:: There is a memory leak in the nvjpeg library for CUDA versions < 11.6. Make sure to rely on CUDA 11.6 or above before using ``device="cuda"``. diff --git a/torchvision/io/video_reader.py b/torchvision/io/video_reader.py index afd7fdf4be6..881b9d75bd4 100644 --- a/torchvision/io/video_reader.py +++ b/torchvision/io/video_reader.py @@ -30,6 +30,8 @@ class VideoReader: Supports frame-by-frame reading of various streams from a single video container. + .. betastatus:: VideoReader class + Example: The following examples creates a :mod:`VideoReader` object, seeks into 2s point, and returns a single frame:: diff --git a/torchvision/models/detection/faster_rcnn.py b/torchvision/models/detection/faster_rcnn.py index 191acecf69f..fb98ca86b34 100644 --- a/torchvision/models/detection/faster_rcnn.py +++ b/torchvision/models/detection/faster_rcnn.py @@ -469,6 +469,8 @@ def fasterrcnn_resnet50_fpn( Detection with Region Proposal Networks `__ paper. + .. betastatus:: detection module + The input to the model is expected to be a list of tensors, each of shape ``[C, H, W]``, one for each image, and should be in ``0-1`` range. Different images can have different sizes. @@ -580,6 +582,8 @@ def fasterrcnn_resnet50_fpn_v2( Constructs an improved Faster R-CNN model with a ResNet-50-FPN backbone from `Benchmarking Detection Transfer Learning with Vision Transformers `__ paper. + .. betastatus:: detection module + It works similarly to Faster R-CNN with ResNet-50 FPN backbone. See :func:`~torchvision.models.detection.fasterrcnn_resnet50_fpn` for more details. @@ -697,6 +701,8 @@ def fasterrcnn_mobilenet_v3_large_320_fpn( """ Low resolution Faster R-CNN model with a MobileNetV3-Large backbone tunned for mobile use cases. + .. betastatus:: detection module + It works similarly to Faster R-CNN with ResNet-50 FPN backbone. See :func:`~torchvision.models.detection.fasterrcnn_resnet50_fpn` for more details. @@ -767,6 +773,9 @@ def fasterrcnn_mobilenet_v3_large_fpn( ) -> FasterRCNN: """ Constructs a high resolution Faster R-CNN model with a MobileNetV3-Large FPN backbone. + + .. betastatus:: detection module + It works similarly to Faster R-CNN with ResNet-50 FPN backbone. See :func:`~torchvision.models.detection.fasterrcnn_resnet50_fpn` for more details. diff --git a/torchvision/models/detection/fcos.py b/torchvision/models/detection/fcos.py index 63f42259ce6..b19da6637bb 100644 --- a/torchvision/models/detection/fcos.py +++ b/torchvision/models/detection/fcos.py @@ -685,6 +685,8 @@ def fcos_resnet50_fpn( """ Constructs a FCOS model with a ResNet-50-FPN backbone. + .. betastatus:: detection module + Reference: `FCOS: Fully Convolutional One-Stage Object Detection `_. `FCOS: A simple and strong anchor-free object detector `_. diff --git a/torchvision/models/detection/keypoint_rcnn.py b/torchvision/models/detection/keypoint_rcnn.py index c14ec2c7244..0052e49409c 100644 --- a/torchvision/models/detection/keypoint_rcnn.py +++ b/torchvision/models/detection/keypoint_rcnn.py @@ -375,6 +375,8 @@ def keypointrcnn_resnet50_fpn( """ Constructs a Keypoint R-CNN model with a ResNet-50-FPN backbone. + .. betastatus:: detection module + Reference: `Mask R-CNN `__. The input to the model is expected to be a list of tensors, each of shape ``[C, H, W]``, one for each diff --git a/torchvision/models/detection/mask_rcnn.py b/torchvision/models/detection/mask_rcnn.py index 3988b5b1bcc..66dde13adff 100644 --- a/torchvision/models/detection/mask_rcnn.py +++ b/torchvision/models/detection/mask_rcnn.py @@ -412,6 +412,8 @@ def maskrcnn_resnet50_fpn( """Mask R-CNN model with a ResNet-50-FPN backbone from the `Mask R-CNN `_ paper. + .. betastatus:: detection module + The input to the model is expected to be a list of tensors, each of shape ``[C, H, W]``, one for each image, and should be in ``0-1`` range. Different images can have different sizes. @@ -513,6 +515,8 @@ def maskrcnn_resnet50_fpn_v2( """Improved Mask R-CNN model with a ResNet-50-FPN backbone from the `Benchmarking Detection Transfer Learning with Vision Transformers `_ paper. + .. betastatus:: detection module + :func:`~torchvision.models.detection.maskrcnn_resnet50_fpn` for more details. Args: diff --git a/torchvision/models/detection/retinanet.py b/torchvision/models/detection/retinanet.py index ea463f4c51e..18e6b432a4f 100644 --- a/torchvision/models/detection/retinanet.py +++ b/torchvision/models/detection/retinanet.py @@ -733,6 +733,8 @@ def retinanet_resnet50_fpn( """ Constructs a RetinaNet model with a ResNet-50-FPN backbone. + .. betastatus:: detection module + Reference: `Focal Loss for Dense Object Detection `_. The input to the model is expected to be a list of tensors, each of shape ``[C, H, W]``, one for each @@ -829,6 +831,8 @@ def retinanet_resnet50_fpn_v2( """ Constructs an improved RetinaNet model with a ResNet-50-FPN backbone. + .. betastatus:: detection module + Reference: `Bridging the Gap Between Anchor-based and Anchor-free Detection via Adaptive Training Sample Selection `_. diff --git a/torchvision/models/detection/ssd.py b/torchvision/models/detection/ssd.py index 4ec56f76366..bcbea25d6d7 100644 --- a/torchvision/models/detection/ssd.py +++ b/torchvision/models/detection/ssd.py @@ -584,6 +584,8 @@ def ssd300_vgg16( """The SSD300 model is based on the `SSD: Single Shot MultiBox Detector `_ paper. + .. betastatus:: detection module + The input to the model is expected to be a list of tensors, each of shape [C, H, W], one for each image, and should be in 0-1 range. Different images can have different sizes but they will be resized to a fixed size before passing it to the backbone. diff --git a/torchvision/models/detection/ssdlite.py b/torchvision/models/detection/ssdlite.py index 4d721e87208..3be9b6fb9f2 100644 --- a/torchvision/models/detection/ssdlite.py +++ b/torchvision/models/detection/ssdlite.py @@ -222,6 +222,8 @@ def ssdlite320_mobilenet_v3_large( described at `Searching for MobileNetV3 `__ and `MobileNetV2: Inverted Residuals and Linear Bottlenecks `__. + .. betastatus:: detection module + See :func:`~torchvision.models.detection.ssd300_vgg16` for more details. Example: diff --git a/torchvision/models/segmentation/deeplabv3.py b/torchvision/models/segmentation/deeplabv3.py index 78f54cdc6d7..e232235f0ff 100644 --- a/torchvision/models/segmentation/deeplabv3.py +++ b/torchvision/models/segmentation/deeplabv3.py @@ -233,6 +233,8 @@ def deeplabv3_resnet50( ) -> DeepLabV3: """Constructs a DeepLabV3 model with a ResNet-50 backbone. + .. betastatus:: segmentation module + Reference: `Rethinking Atrous Convolution for Semantic Image Segmentation `__. Args: @@ -286,6 +288,8 @@ def deeplabv3_resnet101( ) -> DeepLabV3: """Constructs a DeepLabV3 model with a ResNet-101 backbone. + .. betastatus:: segmentation module + Reference: `Rethinking Atrous Convolution for Semantic Image Segmentation `__. Args: diff --git a/torchvision/models/segmentation/fcn.py b/torchvision/models/segmentation/fcn.py index a1dd48c2f58..b44d0d7547a 100644 --- a/torchvision/models/segmentation/fcn.py +++ b/torchvision/models/segmentation/fcn.py @@ -126,6 +126,8 @@ def fcn_resnet50( """Fully-Convolutional Network model with a ResNet-50 backbone from the `Fully Convolutional Networks for Semantic Segmentation `_ paper. + .. betastatus:: segmentation module + Args: weights (:class:`~torchvision.models.segmentation.FCN_ResNet50_Weights`, optional): The pretrained weights to use. See @@ -182,6 +184,8 @@ def fcn_resnet101( """Fully-Convolutional Network model with a ResNet-101 backbone from the `Fully Convolutional Networks for Semantic Segmentation `_ paper. + .. betastatus:: segmentation module + Args: weights (:class:`~torchvision.models.segmentation.FCN_ResNet101_Weights`, optional): The pretrained weights to use. See diff --git a/torchvision/models/segmentation/lraspp.py b/torchvision/models/segmentation/lraspp.py index ec4eba6eefc..385960cbde4 100644 --- a/torchvision/models/segmentation/lraspp.py +++ b/torchvision/models/segmentation/lraspp.py @@ -132,6 +132,8 @@ def lraspp_mobilenet_v3_large( """Constructs a Lite R-ASPP Network model with a MobileNetV3-Large backbone from `Searching for MobileNetV3 `_ paper. + .. betastatus:: segmentation module + Args: weights (:class:`~torchvision.models.segmentation.LRASPP_MobileNet_V3_Large_Weights`, optional): The pretrained weights to use. See diff --git a/torchvision/models/video/resnet.py b/torchvision/models/video/resnet.py index 8eb47418365..cd40717bbbd 100644 --- a/torchvision/models/video/resnet.py +++ b/torchvision/models/video/resnet.py @@ -374,6 +374,8 @@ class R2Plus1D_18_Weights(WeightsEnum): def r3d_18(*, weights: Optional[R3D_18_Weights] = None, progress: bool = True, **kwargs: Any) -> VideoResNet: """Construct 18 layer Resnet3D model. + .. betastatus:: video module + Reference: `A Closer Look at Spatiotemporal Convolutions for Action Recognition `__. Args: @@ -408,6 +410,8 @@ def r3d_18(*, weights: Optional[R3D_18_Weights] = None, progress: bool = True, * def mc3_18(*, weights: Optional[MC3_18_Weights] = None, progress: bool = True, **kwargs: Any) -> VideoResNet: """Construct 18 layer Mixed Convolution network as in + .. betastatus:: video module + Reference: `A Closer Look at Spatiotemporal Convolutions for Action Recognition `__. Args: @@ -442,6 +446,8 @@ def mc3_18(*, weights: Optional[MC3_18_Weights] = None, progress: bool = True, * def r2plus1d_18(*, weights: Optional[R2Plus1D_18_Weights] = None, progress: bool = True, **kwargs: Any) -> VideoResNet: """Construct 18 layer deep R(2+1)D network as in + .. betastatus:: video module + Reference: `A Closer Look at Spatiotemporal Convolutions for Action Recognition `__. Args: