diff --git a/nltk/internals.py b/nltk/internals.py index aca26efd03..f5e8cec86f 100644 --- a/nltk/internals.py +++ b/nltk/internals.py @@ -74,9 +74,8 @@ def java(cmd, classpath=None, stdin=None, stdout=None, stderr=None, blocking=Tru archives, and ZIP archives to search for class files. :type classpath: str - :param stdin, stdout, stderr: Specify the executed programs' - standard input, standard output and standard error file - handles, respectively. Valid values are ``subprocess.PIPE``, + :param stdin: Specify the executed program's + standard input file handles, respectively. Valid values are ``subprocess.PIPE``, an existing file descriptor (a positive integer), an existing file object, 'pipe', 'stdout', 'devnull' and None. ``subprocess.PIPE`` indicates that a new pipe to the child should be created. With None, no @@ -86,6 +85,13 @@ def java(cmd, classpath=None, stdin=None, stdout=None, stderr=None, blocking=Tru from the applications should be captured into the same file handle as for stdout. + :param stdout: Specify the executed program's standard output file + handle. See ``stdin`` for valid values. + + :param stderr: Specify the executed program's standard error file + handle. See ``stdin`` for valid values. + + :param blocking: If ``false``, then return immediately after spawning the subprocess. In this case, the return value is the ``Popen`` object, and not a ``(stdout, stderr)`` tuple. diff --git a/nltk/metrics/aline.py b/nltk/metrics/aline.py index da87ff4452..c29258d1e2 100644 --- a/nltk/metrics/aline.py +++ b/nltk/metrics/aline.py @@ -1079,8 +1079,9 @@ def align(str1, str2, epsilon=0): """ Compute the alignment of two phonetic strings. - :type str1, str2: str - :param str1, str2: Two strings to be aligned + :param str str1: First string to be aligned + :param str str2: Second string to be aligned + :type epsilon: float (0.0 to 1.0) :param epsilon: Adjusts threshold similarity score for near-optimal alignments diff --git a/nltk/test/corpus.doctest b/nltk/test/corpus.doctest index 0fc6cf7cfa..9b640d431b 100644 --- a/nltk/test/corpus.doctest +++ b/nltk/test/corpus.doctest @@ -709,7 +709,7 @@ as XML files. These corpora are returned as ElementTree objects: 'Prologue', 'Pyramus', 'Thisbe', 'Wall'] subjectivity ------------ +------------ The Subjectivity Dataset contains 5000 subjective and 5000 objective processed sentences. diff --git a/nltk/test/featgram.doctest b/nltk/test/featgram.doctest index 4eaa55ca8d..4274f2a811 100644 --- a/nltk/test/featgram.doctest +++ b/nltk/test/featgram.doctest @@ -5,7 +5,15 @@ Feature Grammar Parsing ========================= -.. include:: ../../../nltk_book/definitions.rst +.. definitions from nltk_book/definitions.rst + +.. role:: feat + :class: feature +.. role:: fval + :class: fval +.. |rarr| unicode:: U+2192 .. right arrow +.. |dot| unicode:: U+2022 .. bullet +.. |pi| unicode:: U+03C0 Grammars can be parsed from strings. @@ -116,7 +124,7 @@ standard CFGs is used to combine an incomplete edge that's expecting a nonterminal *B* with a following, complete edge whose left hand side matches *B*. In our current setting, rather than checking for a complete match, we test whether the expected category *B* will -`unify`:dt: with the left hand side *B'* of a following complete +unify with the left hand side *B'* of a following complete edge. We will explain in more detail in Section 9.2 how unification works; for the moment, it is enough to know that as a result of unification, any variable values of features in *B* will be @@ -237,10 +245,10 @@ Unification is commutative: >>> nltk.unify(nltk.unify(fs1, fs2), fs3) == nltk.unify(fs1, nltk.unify(fs2, fs3)) True -Unification between `FS`:math:\ :subscript:`0` and `FS`:math:\ -:subscript:`1` will fail if the two feature structures share a path |pi|, -but the value of |pi| in `FS`:math:\ :subscript:`0` is a distinct -atom from the value of |pi| in `FS`:math:\ :subscript:`1`. In NLTK, +Unification between *FS*:math:`_0` and *FS*:math:`_1` will fail if the +two feature structures share a path |pi|, +but the value of |pi| in *FS*:math:`_0` is a distinct +atom from the value of |pi| in *FS*:math:`_1`. In NLTK, this is implemented by setting the result of unification to be ``None``. diff --git a/nltk/test/gluesemantics.doctest b/nltk/test/gluesemantics.doctest index b2a969b7a9..8aaa211174 100644 --- a/nltk/test/gluesemantics.doctest +++ b/nltk/test/gluesemantics.doctest @@ -5,7 +5,6 @@ Glue Semantics ============================================================================== -.. include:: ../../../nltk_book/definitions.rst ====================== diff --git a/nltk/test/relextract.doctest b/nltk/test/relextract.doctest index 24a71f844d..1a66d52f8f 100644 --- a/nltk/test/relextract.doctest +++ b/nltk/test/relextract.doctest @@ -16,8 +16,8 @@ Information Extraction standardly consists of three subtasks: Named Entities ~~~~~~~~~~~~~~ -The IEER corpus is marked up for a variety of Named Entities. A `Named -Entity`:dt: (more strictly, a Named Entity mention) is a name of an +The IEER corpus is marked up for a variety of Named Entities. A Named +Entity (more strictly, a Named Entity mention) is a name of an entity belonging to a specified class. For example, the Named Entity classes in IEER include PERSON, LOCATION, ORGANIZATION, DATE and so on. Within NLTK, Named Entities are represented as subtrees within a diff --git a/nltk/test/tag.doctest b/nltk/test/tag.doctest index 27b96de307..06f81683a2 100644 --- a/nltk/test/tag.doctest +++ b/nltk/test/tag.doctest @@ -457,8 +457,7 @@ Regression Testing for issue #1025 ================================== We want to ensure that a RegexpTagger can be created with more than 100 patterns -and does not fail with: - "AssertionError: sorry, but this version only supports 100 named groups" +and does not fail with: "AssertionError: sorry, but this version only supports 100 named groups" >>> from nltk.tag import RegexpTagger >>> patterns = [(str(i), 'NNP',) for i in range(200)] diff --git a/nltk/test/wordnet.doctest b/nltk/test/wordnet.doctest index 28e07d3f0e..0650eb09eb 100644 --- a/nltk/test/wordnet.doctest +++ b/nltk/test/wordnet.doctest @@ -82,6 +82,7 @@ WordNet, using ISO-639 language codes. The synonyms of a word are returned as a nested list of synonyms of the different senses of the input word in the given language, since these different senses are not mutual synonyms: + >>> wn.synonyms('car') [['auto', 'automobile', 'machine', 'motorcar'], ['railcar', 'railroad_car', 'railway_car'], ['gondola'], ['elevator_car'], ['cable_car']] >>> wn.synonyms('coche', lang='spa') @@ -274,7 +275,7 @@ Wu-Palmer Similarity: Return a score denoting how similar two word senses are, based on the depth of the two senses in the taxonomy and that of their Least Common Subsumer (most specific ancestor node). Note that at this time the -scores given do _not_ always agree with those given by Pedersen's Perl +scores given do **not** always agree with those given by Pedersen's Perl implementation of Wordnet Similarity. The LCS does not necessarily feature in the shortest path connecting the @@ -640,9 +641,9 @@ Endlessness vs. intractability in relation trees 1. Endlessness -------------- -Until NLTK v. 3.5, the tree() function looped forever on symmetric +Until NLTK v. 3.5, the ``tree()`` function looped forever on symmetric relations (verb_groups, attributes, and most also_sees). But in -the current version, tree() now detects and discards these cycles: +the current version, ``tree()`` now detects and discards these cycles: >>> from pprint import pprint >>> pprint(wn.synset('bound.a.01').tree(lambda s:s.also_sees())) @@ -682,15 +683,15 @@ are mentioned in the output, together with the level where they occur: However, even after discarding the infinite cycles, some trees can remain intractable, due to combinatorial explosion in a relation. This happens in -WordNet, because the also.sees() relation has a big Strongly Connected +WordNet, because the ``also_sees()`` relation has a big Strongly Connected Component (_SCC_) consisting in 758 synsets, where any member node is transitively connected by the same relation, to all other members of the same SCC. This produces intractable relation trees for each of these 758 synsets, i. e. trees that are too big to compute or display on any computer. For example, the synset 'concrete.a.01' is a member of the largest SCC, -so its also_sees() tree is intractable, and can normally only be handled -by limiting the "depth" parameter to display a small number of levels: +so its ``also_sees()`` tree is intractable, and can normally only be handled +by limiting the ``depth`` parameter to display a small number of levels: >>> from pprint import pprint >>> pprint(wn.synset('concrete.a.01').tree(lambda s:s.also_sees(),cut_mark='...',depth=2)) @@ -708,20 +709,20 @@ by limiting the "depth" parameter to display a small number of levels: [Synset('tangible.a.01'), "Cycle(Synset('concrete.a.01'),0,...)"]] -2.1 First solution: acyclic_tree() -.................................. +2.1 First solution: ``acyclic_tree()`` +...................................... -On the other hand, the new acyclic_tree() function is able to also handle -the intractable cases. The also_sees() acyclic tree of 'concrete.a.01' is +On the other hand, the new ``acyclic_tree()`` function is able to also handle +the intractable cases. The ``also_sees()`` acyclic tree of 'concrete.a.01' is several hundred lines long, so here is a simpler example, concerning a much smaller SCC: counting only five members, the SCC that includes 'bound.a.01' -is tractable with the normal tree() function, as seen above. +is tractable with the normal ``tree()`` function, as seen above. -But while tree() only prunes redundancy within local branches, acyclic_tree +But while ``tree()`` only prunes redundancy within local branches, ``acyclic_tree()`` prunes the tree globally, thus discarding any additional redundancy, and -produces a tree that includes all reachable nodes (i. e. a _spanning tree_). -This tree is _minimal_ because it includes the reachable nodes only once, -but it is not necessarily a _Minimum Spanning Tree_ (MST), because the +produces a tree that includes all reachable nodes (i.e., a **spanning tree**). +This tree is **minimal** because it includes the reachable nodes only once, +but it is not necessarily a **Minimum Spanning Tree** (MST), because the Depth-first search strategy does not guarantee that nodes are reached through the lowest number of links (as Breadth-first search would). @@ -732,7 +733,7 @@ through the lowest number of links (as Breadth-first search would). [Synset('restricted.a.01'), [Synset('classified.a.02')]]], [Synset('dependent.a.01')]]] -Again, specifying the "cut_mark" parameter increases verbosity, so that the +Again, specifying the ``cut_mark`` parameter increases verbosity, so that the cycles are mentioned in the output, together with the level where they occur: >>> pprint(wn.synset('bound.a.01').acyclic_tree(lambda s:s.also_sees(),cut_mark='...')) @@ -756,9 +757,9 @@ A Minimum Spanning Tree (MST) spans all the nodes of a relation subgraph once, while guaranteeing that each node is reached through the shortest path possible. In unweighted relation graphs like WordNet, a MST can be computed very efficiently in linear time, using Breadth-First Search (BFS). Like acyclic_tree(), the new -"unweighted_minimum_spanning_tree()" function (imported in the Wordnet -module as "mst") handles intractable trees, such as the example discussed above: -"wn.synset('concrete.a.01').mst(lambda s:s.also_sees())". +``unweighted_minimum_spanning_tree()`` function (imported in the Wordnet +module as ``mst``) handles intractable trees, such as the example discussed above: +``wn.synset('concrete.a.01').mst(lambda s:s.also_sees())``. But, while the also_sees() acyclic_tree of 'bound.a.01' reaches 'classified.a.02' through four links, using depth-first search as seen above diff --git a/web/Makefile b/web/Makefile index 972cb37a98..da274dedd8 100644 --- a/web/Makefile +++ b/web/Makefile @@ -18,7 +18,6 @@ I18NSPHINXOPTS = $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) . .PHONY: help clean html dirhtml singlehtml pickle json htmlhelp qthelp devhelp epub latex latexpdf text man changes linkcheck doctest gettext web: clean_api - sphinx-apidoc -o api ../nltk $(SPHINXBUILD) -b html $(ALLSPHINXOPTS) $(WEB) @echo @echo "Build finished. The HTML pages are in $(WEB)." diff --git a/web/_templates/doctest.rst b/web/_templates/doctest.rst index c0e6408af9..e2f43c3167 100644 --- a/web/_templates/doctest.rst +++ b/web/_templates/doctest.rst @@ -1,9 +1,8 @@ {# The :autogenerated: tag is picked up by breadcrumbs.html to suppress "Edit on Github" link #} :autogenerated: + .. - This file is autogenerated by `generate_custom_files` in conf.py, - and ought to be generated via building the documentation through Sphinx, e.g. - with `sphinx-build ./web ./build` from the root directory. + This file is autogenerated by `sphinx-api`. {% for item in range(17 + module_name|length) -%}#{%- endfor %} Sample usage for {{ module_name }} diff --git a/web/_templates/module.rst b/web/_templates/module.rst index 87ebe3a1be..5e6434daaf 100644 --- a/web/_templates/module.rst +++ b/web/_templates/module.rst @@ -1,9 +1,8 @@ {# The :autogenerated: tag is picked up by breadcrumbs.html to suppress "Edit on Github" link #} :autogenerated: + .. - This file is autogenerated by `generate_custom_files` in conf.py, - and ought to be generated via building the documentation through Sphinx, e.g. - with `sphinx-build ./web ./build` from the root directory. + This file is autogenerated by `sphinx-api`. {{ fullname }} module {% for item in range(7 + fullname|length) -%}={%- endfor %} diff --git a/web/_templates/package.rst b/web/_templates/package.rst index 9cd2e60271..4905e44c6b 100644 --- a/web/_templates/package.rst +++ b/web/_templates/package.rst @@ -1,9 +1,8 @@ {# The :autogenerated: tag is picked up by breadcrumbs.html to suppress "Edit on Github" link #} :autogenerated: + .. - This file is autogenerated by `generate_custom_files` in conf.py, - and ought to be generated via building the documentation through Sphinx, e.g. - with `sphinx-build ./web ./build` from the root directory. + This file is autogenerated by `sphinx-api`. {{ fullname }} package {% for item in range(8 + fullname|length) -%}={%- endfor %} diff --git a/web/conf.py b/web/conf.py index ec710bc546..c35151ca70 100644 --- a/web/conf.py +++ b/web/conf.py @@ -23,7 +23,7 @@ # -- General configuration ----------------------------------------------------- # If your documentation needs a minimal Sphinx version, state it here. -# needs_sphinx = '1.0' +# needs_sphinx = '2.2' # Add any Sphinx extension module names here, as strings. They can be extensions # coming with Sphinx (named 'sphinx.ext.*') or your custom ones. @@ -32,26 +32,14 @@ "sphinx.ext.coverage", "sphinx.ext.imgmath", "sphinx.ext.viewcode", + "sphinxcontrib.apidoc", ] - -def run_apidoc(app): - """Generage API documentation""" - import better_apidoc - - better_apidoc.APP = app - better_apidoc.main( - [ - "better-apidoc", - "-t", - os.path.join(".", "web", "_templates"), - "--force", - "--separate", - "-o", - os.path.join(".", "web", "api"), - os.path.join(".", "nltk"), - ] - ) +apidoc_module_dir = "../nltk" +apidoc_output_dir = "api" +apidoc_separate_modules = True +apidoc_extra_args = ["--templatedir=_templates", "--force"] +apidoc_excluded_paths = ["test"] def generate_custom_files(): @@ -184,10 +172,6 @@ def generate_custom_files(): html_theme = "nltk_theme" -def setup(app): - app.connect("builder-inited", run_apidoc) - - # Theme options are theme-specific and customize the look and feel of a theme # further. For a list of options available for each theme, see the # documentation.