Merge pull request #3012 from rmalouf/docs

Fix errors in API docs
nltk · Jun 13, 2022 · 6de8254 · 6de8254
2 parents 018a858 + bb2d16f
commit 6de8254
Show file tree

Hide file tree

Showing 13 changed files with 63 additions and 69 deletions.
diff --git a/nltk/internals.py b/nltk/internals.py
@@ -74,9 +74,8 @@ def java(cmd, classpath=None, stdin=None, stdout=None, stderr=None, blocking=Tru
         archives, and ZIP archives to search for class files.
     :type classpath: str
 
-    :param stdin, stdout, stderr: Specify the executed programs'
-        standard input, standard output and standard error file
-        handles, respectively.  Valid values are ``subprocess.PIPE``,
+    :param stdin: Specify the executed program's
+        standard input file handles, respectively.  Valid values are ``subprocess.PIPE``,
         an existing file descriptor (a positive integer), an existing
         file object, 'pipe', 'stdout', 'devnull' and None.  ``subprocess.PIPE`` indicates that a
         new pipe to the child should be created.  With None, no
@@ -86,6 +85,13 @@ def java(cmd, classpath=None, stdin=None, stdout=None, stderr=None, blocking=Tru
         from the applications should be captured into the same file
         handle as for stdout.
 
+    :param stdout: Specify the executed program's standard output file
+        handle. See ``stdin`` for valid values.
+
+    :param stderr: Specify the executed program's standard error file
+        handle. See ``stdin`` for valid values.
+
+
     :param blocking: If ``false``, then return immediately after
         spawning the subprocess.  In this case, the return value is
         the ``Popen`` object, and not a ``(stdout, stderr)`` tuple.

diff --git a/nltk/metrics/aline.py b/nltk/metrics/aline.py
@@ -1079,8 +1079,9 @@ def align(str1, str2, epsilon=0):
     """
     Compute the alignment of two phonetic strings.
 
-    :type str1, str2: str
-    :param str1, str2: Two strings to be aligned
+    :param str str1: First string to be aligned
+    :param str str2: Second string to be aligned
+
     :type epsilon: float (0.0 to 1.0)
     :param epsilon: Adjusts threshold similarity score for near-optimal alignments
 

diff --git a/nltk/test/corpus.doctest b/nltk/test/corpus.doctest
@@ -709,7 +709,7 @@ as XML files.  These corpora are returned as ElementTree objects:
      'Prologue', 'Pyramus', 'Thisbe', 'Wall']
 
 subjectivity
------------
+------------
 The Subjectivity Dataset contains 5000 subjective and 5000 objective processed
 sentences.
 

diff --git a/nltk/test/featgram.doctest b/nltk/test/featgram.doctest
@@ -5,7 +5,15 @@
  Feature Grammar Parsing
 =========================
 
-.. include:: ../../../nltk_book/definitions.rst
+.. definitions from nltk_book/definitions.rst
+
+.. role:: feat
+    :class: feature
+.. role:: fval
+     :class: fval
+.. |rarr| unicode:: U+2192 .. right arrow
+.. |dot| unicode:: U+2022 .. bullet
+.. |pi| unicode:: U+03C0
 
 Grammars can be parsed from strings.
 
@@ -116,7 +124,7 @@ standard CFGs is used to combine an incomplete edge that's expecting a
 nonterminal *B* with a following, complete edge whose left hand side
 matches *B*. In our current setting, rather than checking for a
 complete match, we test whether the expected category *B* will
-`unify`:dt: with the left hand side *B'* of a following complete
+unify with the left hand side *B'* of a following complete
 edge. We will explain in more detail in Section 9.2 how
 unification works; for the moment, it is enough to know that as a
 result of unification, any variable values of features in *B* will be
@@ -237,10 +245,10 @@ Unification is commutative:
     >>> nltk.unify(nltk.unify(fs1, fs2), fs3) == nltk.unify(fs1, nltk.unify(fs2, fs3))
     True
 
-Unification between `FS`:math:\ :subscript:`0` and `FS`:math:\
-:subscript:`1` will fail if the two feature structures share a path |pi|,
-but the value of |pi| in `FS`:math:\ :subscript:`0` is a distinct
-atom from the value of |pi| in `FS`:math:\ :subscript:`1`. In NLTK,
+Unification between *FS*:math:`_0` and *FS*:math:`_1` will fail if the
+two feature structures share a path |pi|,
+but the value of |pi| in *FS*:math:`_0` is a distinct
+atom from the value of |pi| in *FS*:math:`_1`. In NLTK,
 this is implemented by setting the result of unification to be
 ``None``.
 

diff --git a/nltk/test/gluesemantics.doctest b/nltk/test/gluesemantics.doctest
@@ -5,7 +5,6 @@
  Glue Semantics
 ==============================================================================
 
-.. include:: ../../../nltk_book/definitions.rst
 
 
 ======================

diff --git a/nltk/test/relextract.doctest b/nltk/test/relextract.doctest
@@ -16,8 +16,8 @@ Information Extraction standardly consists of three subtasks:
 Named Entities
 ~~~~~~~~~~~~~~
 
-The IEER corpus is marked up for a variety of Named Entities. A `Named
-Entity`:dt: (more strictly, a Named Entity mention) is a name of an
+The IEER corpus is marked up for a variety of Named Entities. A Named
+Entity (more strictly, a Named Entity mention) is a name of an
 entity belonging to a specified class. For example, the Named Entity
 classes in IEER include PERSON, LOCATION, ORGANIZATION, DATE and so
 on. Within NLTK, Named Entities are represented as subtrees within a

diff --git a/nltk/test/tag.doctest b/nltk/test/tag.doctest
@@ -457,8 +457,7 @@ Regression Testing for issue #1025
 ==================================
 
 We want to ensure that a RegexpTagger can be created with more than 100 patterns
-and does not fail with:
- "AssertionError: sorry, but this version only supports 100 named groups"
+and does not fail with: "AssertionError: sorry, but this version only supports 100 named groups"
 
     >>> from nltk.tag import RegexpTagger
     >>> patterns = [(str(i), 'NNP',) for i in range(200)]

diff --git a/nltk/test/wordnet.doctest b/nltk/test/wordnet.doctest
@@ -82,6 +82,7 @@ WordNet, using ISO-639 language codes.
 
 The synonyms of a word are returned as a nested list of synonyms of the different senses of
 the input word in the given language, since these different senses are not mutual synonyms:
+
     >>> wn.synonyms('car')
     [['auto', 'automobile', 'machine', 'motorcar'], ['railcar', 'railroad_car', 'railway_car'], ['gondola'], ['elevator_car'], ['cable_car']]
     >>> wn.synonyms('coche', lang='spa')
@@ -274,7 +275,7 @@ Wu-Palmer Similarity:
 Return a score denoting how similar two word senses are, based on the
 depth of the two senses in the taxonomy and that of their Least Common
 Subsumer (most specific ancestor node). Note that at this time the
-scores given do _not_ always agree with those given by Pedersen's Perl
+scores given do **not** always agree with those given by Pedersen's Perl
 implementation of Wordnet Similarity.
 
 The LCS does not necessarily feature in the shortest path connecting the
@@ -640,9 +641,9 @@ Endlessness vs. intractability in relation trees
 1. Endlessness
 --------------
 
-Until NLTK v. 3.5, the tree() function looped forever on symmetric
+Until NLTK v. 3.5, the ``tree()`` function looped forever on symmetric
 relations (verb_groups, attributes, and most also_sees). But in
-the current version, tree() now detects and discards these cycles:
+the current version, ``tree()`` now detects and discards these cycles:
 
     >>> from pprint import pprint
     >>> pprint(wn.synset('bound.a.01').tree(lambda s:s.also_sees()))
@@ -682,15 +683,15 @@ are mentioned in the output, together with the level where they occur:
 
 However, even after discarding the infinite cycles, some trees can remain
 intractable, due to combinatorial explosion in a relation. This happens in
-WordNet, because the also.sees() relation has a big Strongly Connected
+WordNet, because the ``also_sees()`` relation has a big Strongly Connected
 Component (_SCC_) consisting in 758 synsets, where any member node is
 transitively connected by the same relation, to all other members of the
 same SCC. This produces intractable relation trees for each of these 758
 synsets, i. e. trees that are too big to compute or display on any computer.
 
 For example, the synset 'concrete.a.01' is a member of the largest SCC,
-so its also_sees() tree is intractable, and can normally only be handled
-by limiting the "depth" parameter to display a small number of levels:
+so its ``also_sees()`` tree is intractable, and can normally only be handled
+by limiting the ``depth`` parameter to display a small number of levels:
 
     >>> from pprint import pprint
     >>> pprint(wn.synset('concrete.a.01').tree(lambda s:s.also_sees(),cut_mark='...',depth=2))
@@ -708,20 +709,20 @@ by limiting the "depth" parameter to display a small number of levels:
      [Synset('tangible.a.01'), "Cycle(Synset('concrete.a.01'),0,...)"]]
 
 
-2.1 First solution: acyclic_tree()
-..................................
+2.1 First solution: ``acyclic_tree()``
+......................................
 
-On the other hand, the new acyclic_tree() function is able to also handle
-the intractable cases. The also_sees() acyclic tree of 'concrete.a.01' is
+On the other hand, the new ``acyclic_tree()`` function is able to also handle
+the intractable cases. The ``also_sees()`` acyclic tree of 'concrete.a.01' is
 several hundred lines long, so here is a simpler example, concerning a much
 smaller SCC: counting only five members, the SCC that includes 'bound.a.01'
-is tractable with the normal tree() function, as seen above.
+is tractable with the normal ``tree()`` function, as seen above.
 
-But while tree() only prunes redundancy within local branches, acyclic_tree
+But while ``tree()`` only prunes redundancy within local branches, ``acyclic_tree()``
 prunes the tree globally, thus discarding any additional redundancy, and
-produces a tree that includes all reachable nodes (i. e. a _spanning tree_).
-This tree is _minimal_ because it includes the reachable nodes only once,
-but it is not necessarily a _Minimum Spanning Tree_ (MST), because the
+produces a tree that includes all reachable nodes (i.e., a **spanning tree**).
+This tree is **minimal** because it includes the reachable nodes only once,
+but it is not necessarily a **Minimum Spanning Tree** (MST), because the
 Depth-first search strategy does not guarantee that nodes are reached
 through the lowest number of links (as Breadth-first search would).
 
@@ -732,7 +733,7 @@ through the lowest number of links (as Breadth-first search would).
        [Synset('restricted.a.01'), [Synset('classified.a.02')]]],
       [Synset('dependent.a.01')]]]
 
-Again, specifying the "cut_mark" parameter increases verbosity, so that the
+Again, specifying the ``cut_mark`` parameter increases verbosity, so that the
 cycles are mentioned in the output, together with the level where they occur:
 
     >>> pprint(wn.synset('bound.a.01').acyclic_tree(lambda s:s.also_sees(),cut_mark='...'))
@@ -756,9 +757,9 @@ A Minimum Spanning Tree (MST) spans all the nodes of a relation subgraph once,
 while guaranteeing that each node is reached through the shortest path possible.
 In unweighted relation graphs like WordNet, a MST can be computed very efficiently
 in linear time, using Breadth-First Search (BFS). Like acyclic_tree(), the new
-"unweighted_minimum_spanning_tree()" function (imported in the Wordnet
-module as "mst") handles intractable trees, such as the example discussed above:
-"wn.synset('concrete.a.01').mst(lambda s:s.also_sees())".
+``unweighted_minimum_spanning_tree()`` function (imported in the Wordnet
+module as ``mst``) handles intractable trees, such as the example discussed above:
+``wn.synset('concrete.a.01').mst(lambda s:s.also_sees())``.
 
 But, while the also_sees() acyclic_tree of 'bound.a.01' reaches
 'classified.a.02' through four links, using depth-first search as seen above

diff --git a/web/Makefile b/web/Makefile
@@ -18,7 +18,6 @@ I18NSPHINXOPTS  = $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) .
 .PHONY: help clean html dirhtml singlehtml pickle json htmlhelp qthelp devhelp epub latex latexpdf text man changes linkcheck doctest gettext
 
 web: clean_api
-	sphinx-apidoc -o api ../nltk
 	$(SPHINXBUILD) -b html $(ALLSPHINXOPTS) $(WEB)
 	@echo
 	@echo "Build finished. The HTML pages are in $(WEB)."

diff --git a/web/_templates/doctest.rst b/web/_templates/doctest.rst
@@ -1,9 +1,8 @@
 {# The :autogenerated: tag is picked up by breadcrumbs.html to suppress "Edit on Github" link #}
 :autogenerated:
+
 ..
-    This file is autogenerated by `generate_custom_files` in conf.py,
-    and ought to be generated via building the documentation through Sphinx, e.g.
-    with `sphinx-build ./web ./build` from the root directory.
+    This file is autogenerated by `sphinx-api`.
 
 {% for item in range(17 + module_name|length) -%}#{%- endfor %}
 Sample usage for {{ module_name }}

diff --git a/web/_templates/module.rst b/web/_templates/module.rst
@@ -1,9 +1,8 @@
 {# The :autogenerated: tag is picked up by breadcrumbs.html to suppress "Edit on Github" link #}
 :autogenerated:
+
 ..
-    This file is autogenerated by `generate_custom_files` in conf.py,
-    and ought to be generated via building the documentation through Sphinx, e.g.
-    with `sphinx-build ./web ./build` from the root directory.
+    This file is autogenerated by `sphinx-api`.
 
 {{ fullname }} module
 {% for item in range(7 + fullname|length) -%}={%- endfor %}

diff --git a/web/_templates/package.rst b/web/_templates/package.rst
@@ -1,9 +1,8 @@
 {# The :autogenerated: tag is picked up by breadcrumbs.html to suppress "Edit on Github" link #}
 :autogenerated:
+
 ..
-    This file is autogenerated by `generate_custom_files` in conf.py,
-    and ought to be generated via building the documentation through Sphinx, e.g.
-    with `sphinx-build ./web ./build` from the root directory.
+    This file is autogenerated by `sphinx-api`.
 
 {{ fullname }} package
 {% for item in range(8 + fullname|length) -%}={%- endfor %}

diff --git a/web/conf.py b/web/conf.py
@@ -23,7 +23,7 @@
 # -- General configuration -----------------------------------------------------
 
 # If your documentation needs a minimal Sphinx version, state it here.
-# needs_sphinx = '1.0'
+# needs_sphinx = '2.2'
 
 # Add any Sphinx extension module names here, as strings. They can be extensions
 # coming with Sphinx (named 'sphinx.ext.*') or your custom ones.
@@ -32,26 +32,14 @@
     "sphinx.ext.coverage",
     "sphinx.ext.imgmath",
     "sphinx.ext.viewcode",
+    "sphinxcontrib.apidoc",
 ]
 
-
-def run_apidoc(app):
-    """Generage API documentation"""
-    import better_apidoc
-
-    better_apidoc.APP = app
-    better_apidoc.main(
-        [
-            "better-apidoc",
-            "-t",
-            os.path.join(".", "web", "_templates"),
-            "--force",
-            "--separate",
-            "-o",
-            os.path.join(".", "web", "api"),
-            os.path.join(".", "nltk"),
-        ]
-    )
+apidoc_module_dir = "../nltk"
+apidoc_output_dir = "api"
+apidoc_separate_modules = True
+apidoc_extra_args = ["--templatedir=_templates", "--force"]
+apidoc_excluded_paths = ["test"]
 
 
 def generate_custom_files():
@@ -184,10 +172,6 @@ def generate_custom_files():
 html_theme = "nltk_theme"
 
 
-def setup(app):
-    app.connect("builder-inited", run_apidoc)
-
-
 # Theme options are theme-specific and customize the look and feel of a theme
 # further.  For a list of options available for each theme, see the
 # documentation.