From 01f891496aae3f02b8a24029a8aa0654e13d2b7a Mon Sep 17 00:00:00 2001
From: Nicolas Hug <contact@nicolas-hug.com>
Date: Tue, 14 Jun 2022 14:00:15 +0100
Subject: [PATCH] Add new `.. betastatus::` directive and document Beta APIs
 (#6115)

* Add new .. betastatus:: directive to document Beta APIs

* Also add it for the fine-grained video API

* Add directive for all builders and pages of Detection module

* Also segmentation and video models
---
 docs/source/beta_status.py                    | 20 +++++++++++++++++++
 docs/source/conf.py                           |  3 +++
 docs/source/io.rst                            |  2 ++
 docs/source/models.rst                        |  6 ++++++
 docs/source/models/deeplabv3.rst              |  2 ++
 docs/source/models/faster_rcnn.rst            |  2 ++
 docs/source/models/fcn.rst                    |  2 ++
 docs/source/models/fcos.rst                   |  2 ++
 docs/source/models/keypoint_rcnn.rst          |  2 ++
 docs/source/models/lraspp.rst                 |  2 ++
 docs/source/models/mask_rcnn.rst              |  2 ++
 docs/source/models/retinanet.rst              |  2 ++
 docs/source/models/ssd.rst                    |  2 ++
 docs/source/models/ssdlite.rst                |  1 +
 docs/source/models/video_resnet.rst           |  2 ++
 torchvision/io/image.py                       |  2 ++
 torchvision/io/video_reader.py                |  2 ++
 torchvision/models/detection/faster_rcnn.py   |  9 +++++++++
 torchvision/models/detection/fcos.py          |  2 ++
 torchvision/models/detection/keypoint_rcnn.py |  2 ++
 torchvision/models/detection/mask_rcnn.py     |  4 ++++
 torchvision/models/detection/retinanet.py     |  4 ++++
 torchvision/models/detection/ssd.py           |  2 ++
 torchvision/models/detection/ssdlite.py       |  2 ++
 torchvision/models/segmentation/deeplabv3.py  |  4 ++++
 torchvision/models/segmentation/fcn.py        |  4 ++++
 torchvision/models/segmentation/lraspp.py     |  2 ++
 torchvision/models/video/resnet.py            |  6 ++++++
 28 files changed, 97 insertions(+)
 create mode 100644 docs/source/beta_status.py

diff --git a/docs/source/beta_status.py b/docs/source/beta_status.py
new file mode 100644
index 00000000000..925894df5c5
--- /dev/null
+++ b/docs/source/beta_status.py
@@ -0,0 +1,20 @@
+from docutils import nodes
+from docutils.parsers.rst import Directive
+
+
+class BetaStatus(Directive):
+    has_content = True
+
+    def run(self):
+        api_name = " ".join(self.content)
+        text = f"The {api_name} is in Beta stage, and backward compatibility is not guaranteed."
+        return [nodes.warning("", nodes.paragraph("", "", nodes.Text(text)))]
+
+
+def setup(app):
+    app.add_directive("betastatus", BetaStatus)
+    return {
+        "version": "0.1",
+        "parallel_read_safe": True,
+        "parallel_write_safe": True,
+    }
diff --git a/docs/source/conf.py b/docs/source/conf.py
index 014eb3c3ae9..8768dbdb80c 100644
--- a/docs/source/conf.py
+++ b/docs/source/conf.py
@@ -21,6 +21,7 @@
 # sys.path.insert(0, os.path.abspath('.'))
 
 import os
+import sys
 import textwrap
 from copy import copy
 from pathlib import Path
@@ -30,6 +31,7 @@
 import torchvision.models as M
 from tabulate import tabulate
 
+sys.path.append(os.path.abspath("."))
 
 # -- General configuration ------------------------------------------------
 
@@ -50,6 +52,7 @@
     "sphinx.ext.duration",
     "sphinx_gallery.gen_gallery",
     "sphinx_copybutton",
+    "beta_status",
 ]
 
 sphinx_gallery_conf = {
diff --git a/docs/source/io.rst b/docs/source/io.rst
index c62b519efe6..258a1ee16dc 100644
--- a/docs/source/io.rst
+++ b/docs/source/io.rst
@@ -26,6 +26,8 @@ In addition to the :mod:`read_video` function, we provide a high-performance
 lower-level API for more fine-grained control compared to the :mod:`read_video` function.
 It does all this whilst fully supporting torchscript.
 
+.. betastatus:: fine-grained video API
+
 .. autosummary::
     :toctree: generated/
     :template: class.rst
diff --git a/docs/source/models.rst b/docs/source/models.rst
index b549c25bf94..9f8babca770 100644
--- a/docs/source/models.rst
+++ b/docs/source/models.rst
@@ -272,6 +272,8 @@ Semantic Segmentation
 
 .. currentmodule:: torchvision.models.segmentation
 
+.. betastatus:: segmentation module
+
 The following semantic segmentation models are available, with or without
 pre-trained weights:
 
@@ -334,6 +336,8 @@ keypoint detection are initialized with the classification models
 in torchvision. The models expect a list of ``Tensor[C, H, W]``.
 Check the constructor of the models for more information.
 
+.. betastatus:: detection module
+
 Object Detection
 ----------------
 
@@ -453,6 +457,8 @@ Video Classification
 
 .. currentmodule:: torchvision.models.video
 
+.. betastatus:: video module
+
 The following video classification models are available, with or without
 pre-trained weights:
 
diff --git a/docs/source/models/deeplabv3.rst b/docs/source/models/deeplabv3.rst
index 8b978e18358..e6f21686081 100644
--- a/docs/source/models/deeplabv3.rst
+++ b/docs/source/models/deeplabv3.rst
@@ -6,6 +6,8 @@ DeepLabV3
 The DeepLabV3 model is based on the `Rethinking Atrous Convolution for Semantic
 Image Segmentation <https://arxiv.org/abs/1706.05587>`__ paper.
 
+.. betastatus:: segmentation module
+
 
 Model builders
 --------------
diff --git a/docs/source/models/faster_rcnn.rst b/docs/source/models/faster_rcnn.rst
index cbd461533ec..19ec9227886 100644
--- a/docs/source/models/faster_rcnn.rst
+++ b/docs/source/models/faster_rcnn.rst
@@ -3,10 +3,12 @@ Faster R-CNN
 
 .. currentmodule:: torchvision.models.detection
 
+
 The Faster R-CNN model is based on the `Faster R-CNN: Towards Real-Time Object Detection 
 with Region Proposal Networks <https://arxiv.org/abs/1506.01497>`__
 paper.
 
+.. betastatus:: detection module
 
 Model builders
 --------------
diff --git a/docs/source/models/fcn.rst b/docs/source/models/fcn.rst
index ae76c50ff1b..efcdb37c0d5 100644
--- a/docs/source/models/fcn.rst
+++ b/docs/source/models/fcn.rst
@@ -7,6 +7,8 @@ The FCN model is based on the `Fully Convolutional Networks for Semantic
 Segmentation <https://arxiv.org/abs/1411.4038>`__
 paper.
 
+.. betastatus:: segmentation module
+
 
 Model builders
 --------------
diff --git a/docs/source/models/fcos.rst b/docs/source/models/fcos.rst
index 4096cc30f68..1bcc4267678 100644
--- a/docs/source/models/fcos.rst
+++ b/docs/source/models/fcos.rst
@@ -6,6 +6,8 @@ FCOS
 The RetinaNet model is based on the `FCOS: Fully Convolutional One-Stage Object Detection
 <https://arxiv.org/abs/1904.01355>`__ paper.
 
+.. betastatus:: detection module
+
 Model builders
 --------------
 
diff --git a/docs/source/models/keypoint_rcnn.rst b/docs/source/models/keypoint_rcnn.rst
index 8ac26b99660..ba677c7f8f3 100644
--- a/docs/source/models/keypoint_rcnn.rst
+++ b/docs/source/models/keypoint_rcnn.rst
@@ -6,6 +6,8 @@ Keypoint R-CNN
 The Keypoint R-CNN model is based on the `Mask R-CNN
 <https://arxiv.org/abs/1703.06870>`__ paper.
 
+.. betastatus:: detection module
+
 
 Model builders
 --------------
diff --git a/docs/source/models/lraspp.rst b/docs/source/models/lraspp.rst
index 7d419eb21e8..312249c53e1 100644
--- a/docs/source/models/lraspp.rst
+++ b/docs/source/models/lraspp.rst
@@ -5,6 +5,8 @@ LRASPP
 
 The LRASPP model is based on the `Searching for MobileNetV3 <https://arxiv.org/abs/1905.02244>`_ paper.
 
+.. betastatus:: segmentation module
+
 Model builders
 --------------
 
diff --git a/docs/source/models/mask_rcnn.rst b/docs/source/models/mask_rcnn.rst
index 243cef7815c..5887b6c71a6 100644
--- a/docs/source/models/mask_rcnn.rst
+++ b/docs/source/models/mask_rcnn.rst
@@ -6,6 +6,8 @@ Mask R-CNN
 The Mask R-CNN model is based on the `Mask R-CNN <https://arxiv.org/abs/1703.06870>`__
 paper.
 
+.. betastatus:: detection module
+
 
 Model builders
 --------------
diff --git a/docs/source/models/retinanet.rst b/docs/source/models/retinanet.rst
index 3475cc783c3..8613ae9aaab 100644
--- a/docs/source/models/retinanet.rst
+++ b/docs/source/models/retinanet.rst
@@ -6,6 +6,8 @@ RetinaNet
 The RetinaNet model is based on the `Focal Loss for Dense Object Detection
 <https://arxiv.org/abs/1708.02002>`__ paper.
 
+.. betastatus:: detection module
+
 Model builders
 --------------
 
diff --git a/docs/source/models/ssd.rst b/docs/source/models/ssd.rst
index e240d34b178..7d73b234a28 100644
--- a/docs/source/models/ssd.rst
+++ b/docs/source/models/ssd.rst
@@ -6,6 +6,8 @@ SSD
 The SSD model is based on the `SSD: Single Shot MultiBox Detector
 <https://arxiv.org/abs/1512.02325>`__ paper.
 
+.. betastatus:: detection module
+
 
 Model builders
 --------------
diff --git a/docs/source/models/ssdlite.rst b/docs/source/models/ssdlite.rst
index 1f8437a6ff1..bac1575c966 100644
--- a/docs/source/models/ssdlite.rst
+++ b/docs/source/models/ssdlite.rst
@@ -8,6 +8,7 @@ The SSDLite model is based on the `SSD: Single Shot MultiBox Detector
 <https://arxiv.org/abs/1905.02244>`__ and `MobileNetV2: Inverted Residuals and Linear
 Bottlenecks <https://arxiv.org/abs/1801.04381>__` papers.
 
+.. betastatus:: detection module
 
 Model builders
 --------------
diff --git a/docs/source/models/video_resnet.rst b/docs/source/models/video_resnet.rst
index a3f92b546b9..ecb707b4eeb 100644
--- a/docs/source/models/video_resnet.rst
+++ b/docs/source/models/video_resnet.rst
@@ -6,6 +6,8 @@ Video ResNet
 The VideoResNet model is based on the `A Closer Look at Spatiotemporal
 Convolutions for Action Recognition <https://arxiv.org/abs/1711.11248>`__ paper.
 
+.. betastatus:: video module
+
 
 Model builders
 --------------
diff --git a/torchvision/io/image.py b/torchvision/io/image.py
index 339fe4318aa..174823752db 100644
--- a/torchvision/io/image.py
+++ b/torchvision/io/image.py
@@ -145,6 +145,8 @@ def decode_jpeg(
             with `nvjpeg <https://developer.nvidia.com/nvjpeg>`_. This is only
             supported for CUDA version >= 10.1
 
+            .. betastatus:: device parameter
+
             .. warning::
                 There is a memory leak in the nvjpeg library for CUDA versions < 11.6.
                 Make sure to rely on CUDA 11.6 or above before using ``device="cuda"``.
diff --git a/torchvision/io/video_reader.py b/torchvision/io/video_reader.py
index afd7fdf4be6..881b9d75bd4 100644
--- a/torchvision/io/video_reader.py
+++ b/torchvision/io/video_reader.py
@@ -30,6 +30,8 @@ class VideoReader:
     Supports frame-by-frame reading of various streams from a single video
     container.
 
+    .. betastatus:: VideoReader class
+
     Example:
         The following examples creates a :mod:`VideoReader` object, seeks into 2s
         point, and returns a single frame::
diff --git a/torchvision/models/detection/faster_rcnn.py b/torchvision/models/detection/faster_rcnn.py
index 191acecf69f..fb98ca86b34 100644
--- a/torchvision/models/detection/faster_rcnn.py
+++ b/torchvision/models/detection/faster_rcnn.py
@@ -469,6 +469,8 @@ def fasterrcnn_resnet50_fpn(
     Detection with Region Proposal Networks <https://arxiv.org/abs/1506.01497>`__
     paper.
 
+    .. betastatus:: detection module
+
     The input to the model is expected to be a list of tensors, each of shape ``[C, H, W]``, one for each
     image, and should be in ``0-1`` range. Different images can have different sizes.
 
@@ -580,6 +582,8 @@ def fasterrcnn_resnet50_fpn_v2(
     Constructs an improved Faster R-CNN model with a ResNet-50-FPN backbone from `Benchmarking Detection
     Transfer Learning with Vision Transformers <https://arxiv.org/abs/2111.11429>`__ paper.
 
+    .. betastatus:: detection module
+
     It works similarly to Faster R-CNN with ResNet-50 FPN backbone. See
     :func:`~torchvision.models.detection.fasterrcnn_resnet50_fpn` for more
     details.
@@ -697,6 +701,8 @@ def fasterrcnn_mobilenet_v3_large_320_fpn(
     """
     Low resolution Faster R-CNN model with a MobileNetV3-Large backbone tunned for mobile use cases.
 
+    .. betastatus:: detection module
+
     It works similarly to Faster R-CNN with ResNet-50 FPN backbone. See
     :func:`~torchvision.models.detection.fasterrcnn_resnet50_fpn` for more
     details.
@@ -767,6 +773,9 @@ def fasterrcnn_mobilenet_v3_large_fpn(
 ) -> FasterRCNN:
     """
     Constructs a high resolution Faster R-CNN model with a MobileNetV3-Large FPN backbone.
+
+    .. betastatus:: detection module
+
     It works similarly to Faster R-CNN with ResNet-50 FPN backbone. See
     :func:`~torchvision.models.detection.fasterrcnn_resnet50_fpn` for more
     details.
diff --git a/torchvision/models/detection/fcos.py b/torchvision/models/detection/fcos.py
index 63f42259ce6..b19da6637bb 100644
--- a/torchvision/models/detection/fcos.py
+++ b/torchvision/models/detection/fcos.py
@@ -685,6 +685,8 @@ def fcos_resnet50_fpn(
     """
     Constructs a FCOS model with a ResNet-50-FPN backbone.
 
+    .. betastatus:: detection module
+
     Reference: `FCOS: Fully Convolutional One-Stage Object Detection <https://arxiv.org/abs/1904.01355>`_.
                `FCOS: A simple and strong anchor-free object detector <https://arxiv.org/abs/2006.09214>`_.
 
diff --git a/torchvision/models/detection/keypoint_rcnn.py b/torchvision/models/detection/keypoint_rcnn.py
index c14ec2c7244..0052e49409c 100644
--- a/torchvision/models/detection/keypoint_rcnn.py
+++ b/torchvision/models/detection/keypoint_rcnn.py
@@ -375,6 +375,8 @@ def keypointrcnn_resnet50_fpn(
     """
     Constructs a Keypoint R-CNN model with a ResNet-50-FPN backbone.
 
+    .. betastatus:: detection module
+
     Reference: `Mask R-CNN <https://arxiv.org/abs/1703.06870>`__.
 
     The input to the model is expected to be a list of tensors, each of shape ``[C, H, W]``, one for each
diff --git a/torchvision/models/detection/mask_rcnn.py b/torchvision/models/detection/mask_rcnn.py
index 3988b5b1bcc..66dde13adff 100644
--- a/torchvision/models/detection/mask_rcnn.py
+++ b/torchvision/models/detection/mask_rcnn.py
@@ -412,6 +412,8 @@ def maskrcnn_resnet50_fpn(
     """Mask R-CNN model with a ResNet-50-FPN backbone from the `Mask R-CNN
     <https://arxiv.org/abs/1703.06870>`_ paper.
 
+    .. betastatus:: detection module
+
     The input to the model is expected to be a list of tensors, each of shape ``[C, H, W]``, one for each
     image, and should be in ``0-1`` range. Different images can have different sizes.
 
@@ -513,6 +515,8 @@ def maskrcnn_resnet50_fpn_v2(
     """Improved Mask R-CNN model with a ResNet-50-FPN backbone from the `Benchmarking Detection Transfer
     Learning with Vision Transformers <https://arxiv.org/abs/2111.11429>`_ paper.
 
+    .. betastatus:: detection module
+
     :func:`~torchvision.models.detection.maskrcnn_resnet50_fpn` for more details.
 
     Args:
diff --git a/torchvision/models/detection/retinanet.py b/torchvision/models/detection/retinanet.py
index ea463f4c51e..18e6b432a4f 100644
--- a/torchvision/models/detection/retinanet.py
+++ b/torchvision/models/detection/retinanet.py
@@ -733,6 +733,8 @@ def retinanet_resnet50_fpn(
     """
     Constructs a RetinaNet model with a ResNet-50-FPN backbone.
 
+    .. betastatus:: detection module
+
     Reference: `Focal Loss for Dense Object Detection <https://arxiv.org/abs/1708.02002>`_.
 
     The input to the model is expected to be a list of tensors, each of shape ``[C, H, W]``, one for each
@@ -829,6 +831,8 @@ def retinanet_resnet50_fpn_v2(
     """
     Constructs an improved RetinaNet model with a ResNet-50-FPN backbone.
 
+    .. betastatus:: detection module
+
     Reference: `Bridging the Gap Between Anchor-based and Anchor-free Detection via Adaptive Training Sample Selection
     <https://arxiv.org/abs/1912.02424>`_.
 
diff --git a/torchvision/models/detection/ssd.py b/torchvision/models/detection/ssd.py
index 4ec56f76366..bcbea25d6d7 100644
--- a/torchvision/models/detection/ssd.py
+++ b/torchvision/models/detection/ssd.py
@@ -584,6 +584,8 @@ def ssd300_vgg16(
     """The SSD300 model is based on the `SSD: Single Shot MultiBox Detector
     <https://arxiv.org/abs/1512.02325>`_ paper.
 
+    .. betastatus:: detection module
+
     The input to the model is expected to be a list of tensors, each of shape [C, H, W], one for each
     image, and should be in 0-1 range. Different images can have different sizes but they will be resized
     to a fixed size before passing it to the backbone.
diff --git a/torchvision/models/detection/ssdlite.py b/torchvision/models/detection/ssdlite.py
index 4d721e87208..3be9b6fb9f2 100644
--- a/torchvision/models/detection/ssdlite.py
+++ b/torchvision/models/detection/ssdlite.py
@@ -222,6 +222,8 @@ def ssdlite320_mobilenet_v3_large(
     described at `Searching for MobileNetV3 <https://arxiv.org/abs/1905.02244>`__ and
     `MobileNetV2: Inverted Residuals and Linear Bottlenecks <https://arxiv.org/abs/1801.04381>`__.
 
+    .. betastatus:: detection module
+
     See :func:`~torchvision.models.detection.ssd300_vgg16` for more details.
 
     Example:
diff --git a/torchvision/models/segmentation/deeplabv3.py b/torchvision/models/segmentation/deeplabv3.py
index 78f54cdc6d7..e232235f0ff 100644
--- a/torchvision/models/segmentation/deeplabv3.py
+++ b/torchvision/models/segmentation/deeplabv3.py
@@ -233,6 +233,8 @@ def deeplabv3_resnet50(
 ) -> DeepLabV3:
     """Constructs a DeepLabV3 model with a ResNet-50 backbone.
 
+    .. betastatus:: segmentation module
+
     Reference: `Rethinking Atrous Convolution for Semantic Image Segmentation <https://arxiv.org/abs/1706.05587>`__.
 
     Args:
@@ -286,6 +288,8 @@ def deeplabv3_resnet101(
 ) -> DeepLabV3:
     """Constructs a DeepLabV3 model with a ResNet-101 backbone.
 
+    .. betastatus:: segmentation module
+
     Reference: `Rethinking Atrous Convolution for Semantic Image Segmentation <https://arxiv.org/abs/1706.05587>`__.
 
     Args:
diff --git a/torchvision/models/segmentation/fcn.py b/torchvision/models/segmentation/fcn.py
index a1dd48c2f58..b44d0d7547a 100644
--- a/torchvision/models/segmentation/fcn.py
+++ b/torchvision/models/segmentation/fcn.py
@@ -126,6 +126,8 @@ def fcn_resnet50(
     """Fully-Convolutional Network model with a ResNet-50 backbone from the `Fully Convolutional
     Networks for Semantic Segmentation <https://arxiv.org/abs/1411.4038>`_ paper.
 
+    .. betastatus:: segmentation module
+
     Args:
         weights (:class:`~torchvision.models.segmentation.FCN_ResNet50_Weights`, optional): The
             pretrained weights to use. See
@@ -182,6 +184,8 @@ def fcn_resnet101(
     """Fully-Convolutional Network model with a ResNet-101 backbone from the `Fully Convolutional
     Networks for Semantic Segmentation <https://arxiv.org/abs/1411.4038>`_ paper.
 
+    .. betastatus:: segmentation module
+
     Args:
         weights (:class:`~torchvision.models.segmentation.FCN_ResNet101_Weights`, optional): The
             pretrained weights to use. See
diff --git a/torchvision/models/segmentation/lraspp.py b/torchvision/models/segmentation/lraspp.py
index ec4eba6eefc..385960cbde4 100644
--- a/torchvision/models/segmentation/lraspp.py
+++ b/torchvision/models/segmentation/lraspp.py
@@ -132,6 +132,8 @@ def lraspp_mobilenet_v3_large(
     """Constructs a Lite R-ASPP Network model with a MobileNetV3-Large backbone from
     `Searching for MobileNetV3 <https://arxiv.org/abs/1905.02244>`_ paper.
 
+    .. betastatus:: segmentation module
+
     Args:
         weights (:class:`~torchvision.models.segmentation.LRASPP_MobileNet_V3_Large_Weights`, optional): The
             pretrained weights to use. See
diff --git a/torchvision/models/video/resnet.py b/torchvision/models/video/resnet.py
index 8eb47418365..cd40717bbbd 100644
--- a/torchvision/models/video/resnet.py
+++ b/torchvision/models/video/resnet.py
@@ -374,6 +374,8 @@ class R2Plus1D_18_Weights(WeightsEnum):
 def r3d_18(*, weights: Optional[R3D_18_Weights] = None, progress: bool = True, **kwargs: Any) -> VideoResNet:
     """Construct 18 layer Resnet3D model.
 
+    .. betastatus:: video module
+
     Reference: `A Closer Look at Spatiotemporal Convolutions for Action Recognition <https://arxiv.org/abs/1711.11248>`__.
 
     Args:
@@ -408,6 +410,8 @@ def r3d_18(*, weights: Optional[R3D_18_Weights] = None, progress: bool = True, *
 def mc3_18(*, weights: Optional[MC3_18_Weights] = None, progress: bool = True, **kwargs: Any) -> VideoResNet:
     """Construct 18 layer Mixed Convolution network as in
 
+    .. betastatus:: video module
+
     Reference: `A Closer Look at Spatiotemporal Convolutions for Action Recognition <https://arxiv.org/abs/1711.11248>`__.
 
     Args:
@@ -442,6 +446,8 @@ def mc3_18(*, weights: Optional[MC3_18_Weights] = None, progress: bool = True, *
 def r2plus1d_18(*, weights: Optional[R2Plus1D_18_Weights] = None, progress: bool = True, **kwargs: Any) -> VideoResNet:
     """Construct 18 layer deep R(2+1)D network as in
 
+    .. betastatus:: video module
+
     Reference: `A Closer Look at Spatiotemporal Convolutions for Action Recognition <https://arxiv.org/abs/1711.11248>`__.
 
     Args: