From 1c6d45f6b0f1a2821c344dda094a0a1d157ac933 Mon Sep 17 00:00:00 2001 From: Rob Malouf Date: Wed, 8 Jun 2022 16:59:29 -0700 Subject: [PATCH 1/6] Fix "Field list ends without a blank line" warnings; get rid of better-apidoc (templates are now part of standard sphinx-apidoc) --- web/Makefile | 2 +- web/_templates/doctest.rst | 5 ++--- web/_templates/module.rst | 5 ++--- web/_templates/package.rst | 5 ++--- web/conf.py | 26 +------------------------- 5 files changed, 8 insertions(+), 35 deletions(-) diff --git a/web/Makefile b/web/Makefile index 972cb37a98..9ae3031389 100644 --- a/web/Makefile +++ b/web/Makefile @@ -18,7 +18,7 @@ I18NSPHINXOPTS = $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) . .PHONY: help clean html dirhtml singlehtml pickle json htmlhelp qthelp devhelp epub latex latexpdf text man changes linkcheck doctest gettext web: clean_api - sphinx-apidoc -o api ../nltk + sphinx-apidoc --templatedir=_templates --force --separate -o api ../nltk $(SPHINXBUILD) -b html $(ALLSPHINXOPTS) $(WEB) @echo @echo "Build finished. The HTML pages are in $(WEB)." diff --git a/web/_templates/doctest.rst b/web/_templates/doctest.rst index c0e6408af9..e2f43c3167 100644 --- a/web/_templates/doctest.rst +++ b/web/_templates/doctest.rst @@ -1,9 +1,8 @@ {# The :autogenerated: tag is picked up by breadcrumbs.html to suppress "Edit on Github" link #} :autogenerated: + .. - This file is autogenerated by `generate_custom_files` in conf.py, - and ought to be generated via building the documentation through Sphinx, e.g. - with `sphinx-build ./web ./build` from the root directory. + This file is autogenerated by `sphinx-api`. {% for item in range(17 + module_name|length) -%}#{%- endfor %} Sample usage for {{ module_name }} diff --git a/web/_templates/module.rst b/web/_templates/module.rst index 87ebe3a1be..5e6434daaf 100644 --- a/web/_templates/module.rst +++ b/web/_templates/module.rst @@ -1,9 +1,8 @@ {# The :autogenerated: tag is picked up by breadcrumbs.html to suppress "Edit on Github" link #} :autogenerated: + .. - This file is autogenerated by `generate_custom_files` in conf.py, - and ought to be generated via building the documentation through Sphinx, e.g. - with `sphinx-build ./web ./build` from the root directory. + This file is autogenerated by `sphinx-api`. {{ fullname }} module {% for item in range(7 + fullname|length) -%}={%- endfor %} diff --git a/web/_templates/package.rst b/web/_templates/package.rst index 9cd2e60271..4905e44c6b 100644 --- a/web/_templates/package.rst +++ b/web/_templates/package.rst @@ -1,9 +1,8 @@ {# The :autogenerated: tag is picked up by breadcrumbs.html to suppress "Edit on Github" link #} :autogenerated: + .. - This file is autogenerated by `generate_custom_files` in conf.py, - and ought to be generated via building the documentation through Sphinx, e.g. - with `sphinx-build ./web ./build` from the root directory. + This file is autogenerated by `sphinx-api`. {{ fullname }} package {% for item in range(8 + fullname|length) -%}={%- endfor %} diff --git a/web/conf.py b/web/conf.py index ec710bc546..abbad9674f 100644 --- a/web/conf.py +++ b/web/conf.py @@ -23,7 +23,7 @@ # -- General configuration ----------------------------------------------------- # If your documentation needs a minimal Sphinx version, state it here. -# needs_sphinx = '1.0' +# needs_sphinx = '2.2' # Add any Sphinx extension module names here, as strings. They can be extensions # coming with Sphinx (named 'sphinx.ext.*') or your custom ones. @@ -34,26 +34,6 @@ "sphinx.ext.viewcode", ] - -def run_apidoc(app): - """Generage API documentation""" - import better_apidoc - - better_apidoc.APP = app - better_apidoc.main( - [ - "better-apidoc", - "-t", - os.path.join(".", "web", "_templates"), - "--force", - "--separate", - "-o", - os.path.join(".", "web", "api"), - os.path.join(".", "nltk"), - ] - ) - - def generate_custom_files(): """Generating contents in the ``howto`` folder, based on the ``ntlk/test/*.doctest`` files, as well @@ -184,10 +164,6 @@ def generate_custom_files(): html_theme = "nltk_theme" -def setup(app): - app.connect("builder-inited", run_apidoc) - - # Theme options are theme-specific and customize the look and feel of a theme # further. For a list of options available for each theme, see the # documentation. From 82a3ea36b4f8486b3b09ae29fa651fdc8e5ff987 Mon Sep 17 00:00:00 2001 From: Rob Malouf Date: Wed, 8 Jun 2022 18:14:40 -0700 Subject: [PATCH 2/6] Correct reST errors --- nltk/test/corpus.doctest | 2 +- nltk/test/featgram.doctest | 20 ++++++++++++----- nltk/test/gluesemantics.doctest | 1 - nltk/test/relextract.doctest | 4 ++-- nltk/test/tag.doctest | 3 +-- nltk/test/wordnet.doctest | 39 +++++++++++++++++---------------- 6 files changed, 38 insertions(+), 31 deletions(-) diff --git a/nltk/test/corpus.doctest b/nltk/test/corpus.doctest index 0fc6cf7cfa..9b640d431b 100644 --- a/nltk/test/corpus.doctest +++ b/nltk/test/corpus.doctest @@ -709,7 +709,7 @@ as XML files. These corpora are returned as ElementTree objects: 'Prologue', 'Pyramus', 'Thisbe', 'Wall'] subjectivity ------------ +------------ The Subjectivity Dataset contains 5000 subjective and 5000 objective processed sentences. diff --git a/nltk/test/featgram.doctest b/nltk/test/featgram.doctest index 4eaa55ca8d..4274f2a811 100644 --- a/nltk/test/featgram.doctest +++ b/nltk/test/featgram.doctest @@ -5,7 +5,15 @@ Feature Grammar Parsing ========================= -.. include:: ../../../nltk_book/definitions.rst +.. definitions from nltk_book/definitions.rst + +.. role:: feat + :class: feature +.. role:: fval + :class: fval +.. |rarr| unicode:: U+2192 .. right arrow +.. |dot| unicode:: U+2022 .. bullet +.. |pi| unicode:: U+03C0 Grammars can be parsed from strings. @@ -116,7 +124,7 @@ standard CFGs is used to combine an incomplete edge that's expecting a nonterminal *B* with a following, complete edge whose left hand side matches *B*. In our current setting, rather than checking for a complete match, we test whether the expected category *B* will -`unify`:dt: with the left hand side *B'* of a following complete +unify with the left hand side *B'* of a following complete edge. We will explain in more detail in Section 9.2 how unification works; for the moment, it is enough to know that as a result of unification, any variable values of features in *B* will be @@ -237,10 +245,10 @@ Unification is commutative: >>> nltk.unify(nltk.unify(fs1, fs2), fs3) == nltk.unify(fs1, nltk.unify(fs2, fs3)) True -Unification between `FS`:math:\ :subscript:`0` and `FS`:math:\ -:subscript:`1` will fail if the two feature structures share a path |pi|, -but the value of |pi| in `FS`:math:\ :subscript:`0` is a distinct -atom from the value of |pi| in `FS`:math:\ :subscript:`1`. In NLTK, +Unification between *FS*:math:`_0` and *FS*:math:`_1` will fail if the +two feature structures share a path |pi|, +but the value of |pi| in *FS*:math:`_0` is a distinct +atom from the value of |pi| in *FS*:math:`_1`. In NLTK, this is implemented by setting the result of unification to be ``None``. diff --git a/nltk/test/gluesemantics.doctest b/nltk/test/gluesemantics.doctest index b2a969b7a9..8aaa211174 100644 --- a/nltk/test/gluesemantics.doctest +++ b/nltk/test/gluesemantics.doctest @@ -5,7 +5,6 @@ Glue Semantics ============================================================================== -.. include:: ../../../nltk_book/definitions.rst ====================== diff --git a/nltk/test/relextract.doctest b/nltk/test/relextract.doctest index 24a71f844d..1a66d52f8f 100644 --- a/nltk/test/relextract.doctest +++ b/nltk/test/relextract.doctest @@ -16,8 +16,8 @@ Information Extraction standardly consists of three subtasks: Named Entities ~~~~~~~~~~~~~~ -The IEER corpus is marked up for a variety of Named Entities. A `Named -Entity`:dt: (more strictly, a Named Entity mention) is a name of an +The IEER corpus is marked up for a variety of Named Entities. A Named +Entity (more strictly, a Named Entity mention) is a name of an entity belonging to a specified class. For example, the Named Entity classes in IEER include PERSON, LOCATION, ORGANIZATION, DATE and so on. Within NLTK, Named Entities are represented as subtrees within a diff --git a/nltk/test/tag.doctest b/nltk/test/tag.doctest index 27b96de307..06f81683a2 100644 --- a/nltk/test/tag.doctest +++ b/nltk/test/tag.doctest @@ -457,8 +457,7 @@ Regression Testing for issue #1025 ================================== We want to ensure that a RegexpTagger can be created with more than 100 patterns -and does not fail with: - "AssertionError: sorry, but this version only supports 100 named groups" +and does not fail with: "AssertionError: sorry, but this version only supports 100 named groups" >>> from nltk.tag import RegexpTagger >>> patterns = [(str(i), 'NNP',) for i in range(200)] diff --git a/nltk/test/wordnet.doctest b/nltk/test/wordnet.doctest index 28e07d3f0e..0650eb09eb 100644 --- a/nltk/test/wordnet.doctest +++ b/nltk/test/wordnet.doctest @@ -82,6 +82,7 @@ WordNet, using ISO-639 language codes. The synonyms of a word are returned as a nested list of synonyms of the different senses of the input word in the given language, since these different senses are not mutual synonyms: + >>> wn.synonyms('car') [['auto', 'automobile', 'machine', 'motorcar'], ['railcar', 'railroad_car', 'railway_car'], ['gondola'], ['elevator_car'], ['cable_car']] >>> wn.synonyms('coche', lang='spa') @@ -274,7 +275,7 @@ Wu-Palmer Similarity: Return a score denoting how similar two word senses are, based on the depth of the two senses in the taxonomy and that of their Least Common Subsumer (most specific ancestor node). Note that at this time the -scores given do _not_ always agree with those given by Pedersen's Perl +scores given do **not** always agree with those given by Pedersen's Perl implementation of Wordnet Similarity. The LCS does not necessarily feature in the shortest path connecting the @@ -640,9 +641,9 @@ Endlessness vs. intractability in relation trees 1. Endlessness -------------- -Until NLTK v. 3.5, the tree() function looped forever on symmetric +Until NLTK v. 3.5, the ``tree()`` function looped forever on symmetric relations (verb_groups, attributes, and most also_sees). But in -the current version, tree() now detects and discards these cycles: +the current version, ``tree()`` now detects and discards these cycles: >>> from pprint import pprint >>> pprint(wn.synset('bound.a.01').tree(lambda s:s.also_sees())) @@ -682,15 +683,15 @@ are mentioned in the output, together with the level where they occur: However, even after discarding the infinite cycles, some trees can remain intractable, due to combinatorial explosion in a relation. This happens in -WordNet, because the also.sees() relation has a big Strongly Connected +WordNet, because the ``also_sees()`` relation has a big Strongly Connected Component (_SCC_) consisting in 758 synsets, where any member node is transitively connected by the same relation, to all other members of the same SCC. This produces intractable relation trees for each of these 758 synsets, i. e. trees that are too big to compute or display on any computer. For example, the synset 'concrete.a.01' is a member of the largest SCC, -so its also_sees() tree is intractable, and can normally only be handled -by limiting the "depth" parameter to display a small number of levels: +so its ``also_sees()`` tree is intractable, and can normally only be handled +by limiting the ``depth`` parameter to display a small number of levels: >>> from pprint import pprint >>> pprint(wn.synset('concrete.a.01').tree(lambda s:s.also_sees(),cut_mark='...',depth=2)) @@ -708,20 +709,20 @@ by limiting the "depth" parameter to display a small number of levels: [Synset('tangible.a.01'), "Cycle(Synset('concrete.a.01'),0,...)"]] -2.1 First solution: acyclic_tree() -.................................. +2.1 First solution: ``acyclic_tree()`` +...................................... -On the other hand, the new acyclic_tree() function is able to also handle -the intractable cases. The also_sees() acyclic tree of 'concrete.a.01' is +On the other hand, the new ``acyclic_tree()`` function is able to also handle +the intractable cases. The ``also_sees()`` acyclic tree of 'concrete.a.01' is several hundred lines long, so here is a simpler example, concerning a much smaller SCC: counting only five members, the SCC that includes 'bound.a.01' -is tractable with the normal tree() function, as seen above. +is tractable with the normal ``tree()`` function, as seen above. -But while tree() only prunes redundancy within local branches, acyclic_tree +But while ``tree()`` only prunes redundancy within local branches, ``acyclic_tree()`` prunes the tree globally, thus discarding any additional redundancy, and -produces a tree that includes all reachable nodes (i. e. a _spanning tree_). -This tree is _minimal_ because it includes the reachable nodes only once, -but it is not necessarily a _Minimum Spanning Tree_ (MST), because the +produces a tree that includes all reachable nodes (i.e., a **spanning tree**). +This tree is **minimal** because it includes the reachable nodes only once, +but it is not necessarily a **Minimum Spanning Tree** (MST), because the Depth-first search strategy does not guarantee that nodes are reached through the lowest number of links (as Breadth-first search would). @@ -732,7 +733,7 @@ through the lowest number of links (as Breadth-first search would). [Synset('restricted.a.01'), [Synset('classified.a.02')]]], [Synset('dependent.a.01')]]] -Again, specifying the "cut_mark" parameter increases verbosity, so that the +Again, specifying the ``cut_mark`` parameter increases verbosity, so that the cycles are mentioned in the output, together with the level where they occur: >>> pprint(wn.synset('bound.a.01').acyclic_tree(lambda s:s.also_sees(),cut_mark='...')) @@ -756,9 +757,9 @@ A Minimum Spanning Tree (MST) spans all the nodes of a relation subgraph once, while guaranteeing that each node is reached through the shortest path possible. In unweighted relation graphs like WordNet, a MST can be computed very efficiently in linear time, using Breadth-First Search (BFS). Like acyclic_tree(), the new -"unweighted_minimum_spanning_tree()" function (imported in the Wordnet -module as "mst") handles intractable trees, such as the example discussed above: -"wn.synset('concrete.a.01').mst(lambda s:s.also_sees())". +``unweighted_minimum_spanning_tree()`` function (imported in the Wordnet +module as ``mst``) handles intractable trees, such as the example discussed above: +``wn.synset('concrete.a.01').mst(lambda s:s.also_sees())``. But, while the also_sees() acyclic_tree of 'bound.a.01' reaches 'classified.a.02' through four links, using depth-first search as seen above From baac07ffd573bf007421512cc804a44637422c8c Mon Sep 17 00:00:00 2001 From: Rob Malouf Date: Wed, 8 Jun 2022 20:10:25 -0700 Subject: [PATCH 3/6] Split up multiple args --- nltk/internals.py | 12 +++++++++--- nltk/metrics/aline.py | 5 +++-- 2 files changed, 12 insertions(+), 5 deletions(-) diff --git a/nltk/internals.py b/nltk/internals.py index aca26efd03..f5e8cec86f 100644 --- a/nltk/internals.py +++ b/nltk/internals.py @@ -74,9 +74,8 @@ def java(cmd, classpath=None, stdin=None, stdout=None, stderr=None, blocking=Tru archives, and ZIP archives to search for class files. :type classpath: str - :param stdin, stdout, stderr: Specify the executed programs' - standard input, standard output and standard error file - handles, respectively. Valid values are ``subprocess.PIPE``, + :param stdin: Specify the executed program's + standard input file handles, respectively. Valid values are ``subprocess.PIPE``, an existing file descriptor (a positive integer), an existing file object, 'pipe', 'stdout', 'devnull' and None. ``subprocess.PIPE`` indicates that a new pipe to the child should be created. With None, no @@ -86,6 +85,13 @@ def java(cmd, classpath=None, stdin=None, stdout=None, stderr=None, blocking=Tru from the applications should be captured into the same file handle as for stdout. + :param stdout: Specify the executed program's standard output file + handle. See ``stdin`` for valid values. + + :param stderr: Specify the executed program's standard error file + handle. See ``stdin`` for valid values. + + :param blocking: If ``false``, then return immediately after spawning the subprocess. In this case, the return value is the ``Popen`` object, and not a ``(stdout, stderr)`` tuple. diff --git a/nltk/metrics/aline.py b/nltk/metrics/aline.py index da87ff4452..c29258d1e2 100644 --- a/nltk/metrics/aline.py +++ b/nltk/metrics/aline.py @@ -1079,8 +1079,9 @@ def align(str1, str2, epsilon=0): """ Compute the alignment of two phonetic strings. - :type str1, str2: str - :param str1, str2: Two strings to be aligned + :param str str1: First string to be aligned + :param str str2: Second string to be aligned + :type epsilon: float (0.0 to 1.0) :param epsilon: Adjusts threshold similarity score for near-optimal alignments From 68f39d02c56da275f98f9772c84dc2e1bfe64376 Mon Sep 17 00:00:00 2001 From: Rob Malouf Date: Fri, 10 Jun 2022 23:32:40 -0700 Subject: [PATCH 4/6] Automatically run sphinx-apidoc on build --- web/Makefile | 1 - web/conf.py | 6 ++++++ 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/web/Makefile b/web/Makefile index 9ae3031389..da274dedd8 100644 --- a/web/Makefile +++ b/web/Makefile @@ -18,7 +18,6 @@ I18NSPHINXOPTS = $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) . .PHONY: help clean html dirhtml singlehtml pickle json htmlhelp qthelp devhelp epub latex latexpdf text man changes linkcheck doctest gettext web: clean_api - sphinx-apidoc --templatedir=_templates --force --separate -o api ../nltk $(SPHINXBUILD) -b html $(ALLSPHINXOPTS) $(WEB) @echo @echo "Build finished. The HTML pages are in $(WEB)." diff --git a/web/conf.py b/web/conf.py index abbad9674f..2d8706033b 100644 --- a/web/conf.py +++ b/web/conf.py @@ -32,8 +32,14 @@ "sphinx.ext.coverage", "sphinx.ext.imgmath", "sphinx.ext.viewcode", + "sphinxcontrib.apidoc" ] +apidoc_module_dir = '../nltk' +apidoc_output_dir = 'api' +apidoc_separate_modules = True +apidoc_extra_args = ['--templatedir=_templates','--force'] + def generate_custom_files(): """Generating contents in the ``howto`` folder, based on the ``ntlk/test/*.doctest`` files, as well From 38125ddf7c46301d6dea630ae34ef375d105bcd8 Mon Sep 17 00:00:00 2001 From: Rob Malouf Date: Sat, 11 Jun 2022 11:33:46 -0700 Subject: [PATCH 5/6] exclude tests from api docs --- web/conf.py | 1 + 1 file changed, 1 insertion(+) diff --git a/web/conf.py b/web/conf.py index 2d8706033b..8346919a13 100644 --- a/web/conf.py +++ b/web/conf.py @@ -39,6 +39,7 @@ apidoc_output_dir = 'api' apidoc_separate_modules = True apidoc_extra_args = ['--templatedir=_templates','--force'] +apidoc_excluded_paths = ['test'] def generate_custom_files(): """Generating contents in the ``howto`` folder, From bb2d16fc1f7a39be36ebcec1dadadfd4a770e3a9 Mon Sep 17 00:00:00 2001 From: Rob Malouf Date: Mon, 13 Jun 2022 11:55:36 -0700 Subject: [PATCH 6/6] Blackified conf.py --- web/conf.py | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/web/conf.py b/web/conf.py index 8346919a13..c35151ca70 100644 --- a/web/conf.py +++ b/web/conf.py @@ -32,14 +32,15 @@ "sphinx.ext.coverage", "sphinx.ext.imgmath", "sphinx.ext.viewcode", - "sphinxcontrib.apidoc" + "sphinxcontrib.apidoc", ] -apidoc_module_dir = '../nltk' -apidoc_output_dir = 'api' +apidoc_module_dir = "../nltk" +apidoc_output_dir = "api" apidoc_separate_modules = True -apidoc_extra_args = ['--templatedir=_templates','--force'] -apidoc_excluded_paths = ['test'] +apidoc_extra_args = ["--templatedir=_templates", "--force"] +apidoc_excluded_paths = ["test"] + def generate_custom_files(): """Generating contents in the ``howto`` folder,