diff --git a/nltk/internals.py b/nltk/internals.py
index aca26efd03..f5e8cec86f 100644
--- a/nltk/internals.py
+++ b/nltk/internals.py
@@ -74,9 +74,8 @@ def java(cmd, classpath=None, stdin=None, stdout=None, stderr=None, blocking=Tru
         archives, and ZIP archives to search for class files.
     :type classpath: str
 
-    :param stdin, stdout, stderr: Specify the executed programs'
-        standard input, standard output and standard error file
-        handles, respectively.  Valid values are ``subprocess.PIPE``,
+    :param stdin: Specify the executed program's
+        standard input file handles, respectively.  Valid values are ``subprocess.PIPE``,
         an existing file descriptor (a positive integer), an existing
         file object, 'pipe', 'stdout', 'devnull' and None.  ``subprocess.PIPE`` indicates that a
         new pipe to the child should be created.  With None, no
@@ -86,6 +85,13 @@ def java(cmd, classpath=None, stdin=None, stdout=None, stderr=None, blocking=Tru
         from the applications should be captured into the same file
         handle as for stdout.
 
+    :param stdout: Specify the executed program's standard output file
+        handle. See ``stdin`` for valid values.
+
+    :param stderr: Specify the executed program's standard error file
+        handle. See ``stdin`` for valid values.
+
+
     :param blocking: If ``false``, then return immediately after
         spawning the subprocess.  In this case, the return value is
         the ``Popen`` object, and not a ``(stdout, stderr)`` tuple.
diff --git a/nltk/metrics/aline.py b/nltk/metrics/aline.py
index da87ff4452..c29258d1e2 100644
--- a/nltk/metrics/aline.py
+++ b/nltk/metrics/aline.py
@@ -1079,8 +1079,9 @@ def align(str1, str2, epsilon=0):
     """
     Compute the alignment of two phonetic strings.
 
-    :type str1, str2: str
-    :param str1, str2: Two strings to be aligned
+    :param str str1: First string to be aligned
+    :param str str2: Second string to be aligned
+
     :type epsilon: float (0.0 to 1.0)
     :param epsilon: Adjusts threshold similarity score for near-optimal alignments
 
diff --git a/nltk/test/corpus.doctest b/nltk/test/corpus.doctest
index 0fc6cf7cfa..9b640d431b 100644
--- a/nltk/test/corpus.doctest
+++ b/nltk/test/corpus.doctest
@@ -709,7 +709,7 @@ as XML files.  These corpora are returned as ElementTree objects:
      'Prologue', 'Pyramus', 'Thisbe', 'Wall']
 
 subjectivity
------------
+------------
 The Subjectivity Dataset contains 5000 subjective and 5000 objective processed
 sentences.
 
diff --git a/nltk/test/featgram.doctest b/nltk/test/featgram.doctest
index 4eaa55ca8d..4274f2a811 100644
--- a/nltk/test/featgram.doctest
+++ b/nltk/test/featgram.doctest
@@ -5,7 +5,15 @@
  Feature Grammar Parsing
 =========================
 
-.. include:: ../../../nltk_book/definitions.rst
+.. definitions from nltk_book/definitions.rst
+
+.. role:: feat
+    :class: feature
+.. role:: fval
+     :class: fval
+.. |rarr| unicode:: U+2192 .. right arrow
+.. |dot| unicode:: U+2022 .. bullet
+.. |pi| unicode:: U+03C0
 
 Grammars can be parsed from strings.
 
@@ -116,7 +124,7 @@ standard CFGs is used to combine an incomplete edge that's expecting a
 nonterminal *B* with a following, complete edge whose left hand side
 matches *B*. In our current setting, rather than checking for a
 complete match, we test whether the expected category *B* will
-`unify`:dt: with the left hand side *B'* of a following complete
+unify with the left hand side *B'* of a following complete
 edge. We will explain in more detail in Section 9.2 how
 unification works; for the moment, it is enough to know that as a
 result of unification, any variable values of features in *B* will be
@@ -237,10 +245,10 @@ Unification is commutative:
     >>> nltk.unify(nltk.unify(fs1, fs2), fs3) == nltk.unify(fs1, nltk.unify(fs2, fs3))
     True
 
-Unification between `FS`:math:\ :subscript:`0` and `FS`:math:\
-:subscript:`1` will fail if the two feature structures share a path |pi|,
-but the value of |pi| in `FS`:math:\ :subscript:`0` is a distinct
-atom from the value of |pi| in `FS`:math:\ :subscript:`1`. In NLTK,
+Unification between *FS*:math:`_0` and *FS*:math:`_1` will fail if the
+two feature structures share a path |pi|,
+but the value of |pi| in *FS*:math:`_0` is a distinct
+atom from the value of |pi| in *FS*:math:`_1`. In NLTK,
 this is implemented by setting the result of unification to be
 ``None``.
 
diff --git a/nltk/test/gluesemantics.doctest b/nltk/test/gluesemantics.doctest
index b2a969b7a9..8aaa211174 100644
--- a/nltk/test/gluesemantics.doctest
+++ b/nltk/test/gluesemantics.doctest
@@ -5,7 +5,6 @@
  Glue Semantics
 ==============================================================================
 
-.. include:: ../../../nltk_book/definitions.rst
 
 
 ======================
diff --git a/nltk/test/relextract.doctest b/nltk/test/relextract.doctest
index 24a71f844d..1a66d52f8f 100644
--- a/nltk/test/relextract.doctest
+++ b/nltk/test/relextract.doctest
@@ -16,8 +16,8 @@ Information Extraction standardly consists of three subtasks:
 Named Entities
 ~~~~~~~~~~~~~~
 
-The IEER corpus is marked up for a variety of Named Entities. A `Named
-Entity`:dt: (more strictly, a Named Entity mention) is a name of an
+The IEER corpus is marked up for a variety of Named Entities. A Named
+Entity (more strictly, a Named Entity mention) is a name of an
 entity belonging to a specified class. For example, the Named Entity
 classes in IEER include PERSON, LOCATION, ORGANIZATION, DATE and so
 on. Within NLTK, Named Entities are represented as subtrees within a
diff --git a/nltk/test/tag.doctest b/nltk/test/tag.doctest
index 27b96de307..06f81683a2 100644
--- a/nltk/test/tag.doctest
+++ b/nltk/test/tag.doctest
@@ -457,8 +457,7 @@ Regression Testing for issue #1025
 ==================================
 
 We want to ensure that a RegexpTagger can be created with more than 100 patterns
-and does not fail with:
- "AssertionError: sorry, but this version only supports 100 named groups"
+and does not fail with: "AssertionError: sorry, but this version only supports 100 named groups"
 
     >>> from nltk.tag import RegexpTagger
     >>> patterns = [(str(i), 'NNP',) for i in range(200)]
diff --git a/nltk/test/wordnet.doctest b/nltk/test/wordnet.doctest
index 28e07d3f0e..0650eb09eb 100644
--- a/nltk/test/wordnet.doctest
+++ b/nltk/test/wordnet.doctest
@@ -82,6 +82,7 @@ WordNet, using ISO-639 language codes.
 
 The synonyms of a word are returned as a nested list of synonyms of the different senses of
 the input word in the given language, since these different senses are not mutual synonyms:
+
     >>> wn.synonyms('car')
     [['auto', 'automobile', 'machine', 'motorcar'], ['railcar', 'railroad_car', 'railway_car'], ['gondola'], ['elevator_car'], ['cable_car']]
     >>> wn.synonyms('coche', lang='spa')
@@ -274,7 +275,7 @@ Wu-Palmer Similarity:
 Return a score denoting how similar two word senses are, based on the
 depth of the two senses in the taxonomy and that of their Least Common
 Subsumer (most specific ancestor node). Note that at this time the
-scores given do _not_ always agree with those given by Pedersen's Perl
+scores given do **not** always agree with those given by Pedersen's Perl
 implementation of Wordnet Similarity.
 
 The LCS does not necessarily feature in the shortest path connecting the
@@ -640,9 +641,9 @@ Endlessness vs. intractability in relation trees
 1. Endlessness
 --------------
 
-Until NLTK v. 3.5, the tree() function looped forever on symmetric
+Until NLTK v. 3.5, the ``tree()`` function looped forever on symmetric
 relations (verb_groups, attributes, and most also_sees). But in
-the current version, tree() now detects and discards these cycles:
+the current version, ``tree()`` now detects and discards these cycles:
 
     >>> from pprint import pprint
     >>> pprint(wn.synset('bound.a.01').tree(lambda s:s.also_sees()))
@@ -682,15 +683,15 @@ are mentioned in the output, together with the level where they occur:
 
 However, even after discarding the infinite cycles, some trees can remain
 intractable, due to combinatorial explosion in a relation. This happens in
-WordNet, because the also.sees() relation has a big Strongly Connected
+WordNet, because the ``also_sees()`` relation has a big Strongly Connected
 Component (_SCC_) consisting in 758 synsets, where any member node is
 transitively connected by the same relation, to all other members of the
 same SCC. This produces intractable relation trees for each of these 758
 synsets, i. e. trees that are too big to compute or display on any computer.
 
 For example, the synset 'concrete.a.01' is a member of the largest SCC,
-so its also_sees() tree is intractable, and can normally only be handled
-by limiting the "depth" parameter to display a small number of levels:
+so its ``also_sees()`` tree is intractable, and can normally only be handled
+by limiting the ``depth`` parameter to display a small number of levels:
 
     >>> from pprint import pprint
     >>> pprint(wn.synset('concrete.a.01').tree(lambda s:s.also_sees(),cut_mark='...',depth=2))
@@ -708,20 +709,20 @@ by limiting the "depth" parameter to display a small number of levels:
      [Synset('tangible.a.01'), "Cycle(Synset('concrete.a.01'),0,...)"]]
 
 
-2.1 First solution: acyclic_tree()
-..................................
+2.1 First solution: ``acyclic_tree()``
+......................................
 
-On the other hand, the new acyclic_tree() function is able to also handle
-the intractable cases. The also_sees() acyclic tree of 'concrete.a.01' is
+On the other hand, the new ``acyclic_tree()`` function is able to also handle
+the intractable cases. The ``also_sees()`` acyclic tree of 'concrete.a.01' is
 several hundred lines long, so here is a simpler example, concerning a much
 smaller SCC: counting only five members, the SCC that includes 'bound.a.01'
-is tractable with the normal tree() function, as seen above.
+is tractable with the normal ``tree()`` function, as seen above.
 
-But while tree() only prunes redundancy within local branches, acyclic_tree
+But while ``tree()`` only prunes redundancy within local branches, ``acyclic_tree()``
 prunes the tree globally, thus discarding any additional redundancy, and
-produces a tree that includes all reachable nodes (i. e. a _spanning tree_).
-This tree is _minimal_ because it includes the reachable nodes only once,
-but it is not necessarily a _Minimum Spanning Tree_ (MST), because the
+produces a tree that includes all reachable nodes (i.e., a **spanning tree**).
+This tree is **minimal** because it includes the reachable nodes only once,
+but it is not necessarily a **Minimum Spanning Tree** (MST), because the
 Depth-first search strategy does not guarantee that nodes are reached
 through the lowest number of links (as Breadth-first search would).
 
@@ -732,7 +733,7 @@ through the lowest number of links (as Breadth-first search would).
        [Synset('restricted.a.01'), [Synset('classified.a.02')]]],
       [Synset('dependent.a.01')]]]
 
-Again, specifying the "cut_mark" parameter increases verbosity, so that the
+Again, specifying the ``cut_mark`` parameter increases verbosity, so that the
 cycles are mentioned in the output, together with the level where they occur:
 
     >>> pprint(wn.synset('bound.a.01').acyclic_tree(lambda s:s.also_sees(),cut_mark='...'))
@@ -756,9 +757,9 @@ A Minimum Spanning Tree (MST) spans all the nodes of a relation subgraph once,
 while guaranteeing that each node is reached through the shortest path possible.
 In unweighted relation graphs like WordNet, a MST can be computed very efficiently
 in linear time, using Breadth-First Search (BFS). Like acyclic_tree(), the new
-"unweighted_minimum_spanning_tree()" function (imported in the Wordnet
-module as "mst") handles intractable trees, such as the example discussed above:
-"wn.synset('concrete.a.01').mst(lambda s:s.also_sees())".
+``unweighted_minimum_spanning_tree()`` function (imported in the Wordnet
+module as ``mst``) handles intractable trees, such as the example discussed above:
+``wn.synset('concrete.a.01').mst(lambda s:s.also_sees())``.
 
 But, while the also_sees() acyclic_tree of 'bound.a.01' reaches
 'classified.a.02' through four links, using depth-first search as seen above
diff --git a/web/Makefile b/web/Makefile
index 972cb37a98..da274dedd8 100644
--- a/web/Makefile
+++ b/web/Makefile
@@ -18,7 +18,6 @@ I18NSPHINXOPTS  = $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) .
 .PHONY: help clean html dirhtml singlehtml pickle json htmlhelp qthelp devhelp epub latex latexpdf text man changes linkcheck doctest gettext
 
 web: clean_api
-	sphinx-apidoc -o api ../nltk
 	$(SPHINXBUILD) -b html $(ALLSPHINXOPTS) $(WEB)
 	@echo
 	@echo "Build finished. The HTML pages are in $(WEB)."
diff --git a/web/_templates/doctest.rst b/web/_templates/doctest.rst
index c0e6408af9..e2f43c3167 100644
--- a/web/_templates/doctest.rst
+++ b/web/_templates/doctest.rst
@@ -1,9 +1,8 @@
 {# The :autogenerated: tag is picked up by breadcrumbs.html to suppress "Edit on Github" link #}
 :autogenerated:
+
 ..
-    This file is autogenerated by `generate_custom_files` in conf.py,
-    and ought to be generated via building the documentation through Sphinx, e.g.
-    with `sphinx-build ./web ./build` from the root directory.
+    This file is autogenerated by `sphinx-api`.
 
 {% for item in range(17 + module_name|length) -%}#{%- endfor %}
 Sample usage for {{ module_name }}
diff --git a/web/_templates/module.rst b/web/_templates/module.rst
index 87ebe3a1be..5e6434daaf 100644
--- a/web/_templates/module.rst
+++ b/web/_templates/module.rst
@@ -1,9 +1,8 @@
 {# The :autogenerated: tag is picked up by breadcrumbs.html to suppress "Edit on Github" link #}
 :autogenerated:
+
 ..
-    This file is autogenerated by `generate_custom_files` in conf.py,
-    and ought to be generated via building the documentation through Sphinx, e.g.
-    with `sphinx-build ./web ./build` from the root directory.
+    This file is autogenerated by `sphinx-api`.
 
 {{ fullname }} module
 {% for item in range(7 + fullname|length) -%}={%- endfor %}
diff --git a/web/_templates/package.rst b/web/_templates/package.rst
index 9cd2e60271..4905e44c6b 100644
--- a/web/_templates/package.rst
+++ b/web/_templates/package.rst
@@ -1,9 +1,8 @@
 {# The :autogenerated: tag is picked up by breadcrumbs.html to suppress "Edit on Github" link #}
 :autogenerated:
+
 ..
-    This file is autogenerated by `generate_custom_files` in conf.py,
-    and ought to be generated via building the documentation through Sphinx, e.g.
-    with `sphinx-build ./web ./build` from the root directory.
+    This file is autogenerated by `sphinx-api`.
 
 {{ fullname }} package
 {% for item in range(8 + fullname|length) -%}={%- endfor %}
diff --git a/web/conf.py b/web/conf.py
index ec710bc546..c35151ca70 100644
--- a/web/conf.py
+++ b/web/conf.py
@@ -23,7 +23,7 @@
 # -- General configuration -----------------------------------------------------
 
 # If your documentation needs a minimal Sphinx version, state it here.
-# needs_sphinx = '1.0'
+# needs_sphinx = '2.2'
 
 # Add any Sphinx extension module names here, as strings. They can be extensions
 # coming with Sphinx (named 'sphinx.ext.*') or your custom ones.
@@ -32,26 +32,14 @@
     "sphinx.ext.coverage",
     "sphinx.ext.imgmath",
     "sphinx.ext.viewcode",
+    "sphinxcontrib.apidoc",
 ]
 
-
-def run_apidoc(app):
-    """Generage API documentation"""
-    import better_apidoc
-
-    better_apidoc.APP = app
-    better_apidoc.main(
-        [
-            "better-apidoc",
-            "-t",
-            os.path.join(".", "web", "_templates"),
-            "--force",
-            "--separate",
-            "-o",
-            os.path.join(".", "web", "api"),
-            os.path.join(".", "nltk"),
-        ]
-    )
+apidoc_module_dir = "../nltk"
+apidoc_output_dir = "api"
+apidoc_separate_modules = True
+apidoc_extra_args = ["--templatedir=_templates", "--force"]
+apidoc_excluded_paths = ["test"]
 
 
 def generate_custom_files():
@@ -184,10 +172,6 @@ def generate_custom_files():
 html_theme = "nltk_theme"
 
 
-def setup(app):
-    app.connect("builder-inited", run_apidoc)
-
-
 # Theme options are theme-specific and customize the look and feel of a theme
 # further.  For a list of options available for each theme, see the
 # documentation.