Removed hundreds of formatting warnings for nltk.org (#2859)

* Removed 500+ warnings when building website documentation * Improved formatting for website news titles * Update some of the IBM documentation to include description lists
nltk · Oct 19, 2021 · 4a130f1 · 4a130f1
1 parent bec8910
commit 4a130f1
Show file tree

Hide file tree

Showing 89 changed files with 879 additions and 776 deletions.
diff --git a/nltk/chunk/regexp.py b/nltk/chunk/regexp.py
@@ -78,12 +78,14 @@ def __init__(self, chunk_struct, debug_level=1):
         :param debug_level: The level of debugging which should be
             applied to transformations on the ``ChunkString``.  The
             valid levels are:
+
                 - 0: no checks
                 - 1: full check on to_chunkstruct
                 - 2: full check on to_chunkstruct and cursory check after
-                   each transformation.
+                  each transformation.
                 - 3: full check on to_chunkstruct and full check after
-                   each transformation.
+                  each transformation.
+
             We recommend you use at least level 1.  You should
             probably use level 3 if you use any non-standard
             subclasses of ``RegexpChunkRule``.

diff --git a/nltk/classify/senna.py b/nltk/classify/senna.py
@@ -21,19 +21,20 @@
 misalignment errors.
 
 The input is:
+
 - path to the directory that contains SENNA executables. If the path is incorrect,
-   Senna will automatically search for executable file specified in SENNA environment variable
+  Senna will automatically search for executable file specified in SENNA environment variable
 - List of the operations needed to be performed.
 - (optionally) the encoding of the input data (default:utf-8)
 
 Note: Unit tests for this module can be found in test/unit/test_senna.py
 
-    >>> from nltk.classify import Senna
-    >>> pipeline = Senna('/usr/share/senna-v3.0', ['pos', 'chk', 'ner'])
-    >>> sent = 'Dusseldorf is an international business center'.split()
-    >>> [(token['word'], token['chk'], token['ner'], token['pos']) for token in pipeline.tag(sent)] # doctest: +SKIP
-    [('Dusseldorf', 'B-NP', 'B-LOC', 'NNP'), ('is', 'B-VP', 'O', 'VBZ'), ('an', 'B-NP', 'O', 'DT'),
-    ('international', 'I-NP', 'O', 'JJ'), ('business', 'I-NP', 'O', 'NN'), ('center', 'I-NP', 'O', 'NN')]
+>>> from nltk.classify import Senna
+>>> pipeline = Senna('/usr/share/senna-v3.0', ['pos', 'chk', 'ner'])
+>>> sent = 'Dusseldorf is an international business center'.split()
+>>> [(token['word'], token['chk'], token['ner'], token['pos']) for token in pipeline.tag(sent)] # doctest: +SKIP
+[('Dusseldorf', 'B-NP', 'B-LOC', 'NNP'), ('is', 'B-VP', 'O', 'VBZ'), ('an', 'B-NP', 'O', 'DT'),
+('international', 'I-NP', 'O', 'JJ'), ('business', 'I-NP', 'O', 'NN'), ('center', 'I-NP', 'O', 'NN')]
 """
 
 from os import environ, path, sep

diff --git a/nltk/cluster/__init__.py b/nltk/cluster/__init__.py
@@ -47,10 +47,11 @@
 not significantly increase.
 
 They all extend the ClusterI interface which defines common operations
-available with each clusterer. These operations include.
-   - cluster: clusters a sequence of vectors
-   - classify: assign a vector to a cluster
-   - classification_probdist: give the probability distribution over cluster memberships
+available with each clusterer. These operations include:
+
+- cluster: clusters a sequence of vectors
+- classify: assign a vector to a cluster
+- classification_probdist: give the probability distribution over cluster memberships
 
 The current existing classifiers also extend cluster.VectorSpace, an
 abstract class which allows for singular value decomposition (SVD) and vector
@@ -61,6 +62,7 @@
 hypersphere.
 
 Usage example (see also demo())::
+
     from nltk import cluster
     from nltk.cluster import euclidean_distance
     from numpy import array

diff --git a/nltk/cluster/util.py b/nltk/cluster/util.py
@@ -125,7 +125,7 @@ def euclidean_distance(u, v):
 def cosine_distance(u, v):
     """
     Returns 1 minus the cosine of the angle between vectors v and u. This is
-    equal to 1 - (u.v / |u||v|).
+    equal to ``1 - (u.v / |u||v|)``.
     """
     return 1 - (numpy.dot(u, v) / (sqrt(numpy.dot(u, u)) * sqrt(numpy.dot(v, v))))
 
@@ -221,6 +221,7 @@ def groups(self, n):
     def show(self, leaf_labels=[]):
         """
         Print the dendrogram in ASCII art to standard out.
+
         :param leaf_labels: an optional list of strings to use for labeling the
                             leaves
         :type leaf_labels: list

diff --git a/nltk/corpus/reader/api.py b/nltk/corpus/reader/api.py
@@ -52,6 +52,7 @@ def __init__(self, root, fileids, encoding="utf8", tagset=None):
         :param encoding: The default unicode encoding for the files
             that make up the corpus.  The value of ``encoding`` can be any
             of the following:
+
             - A string: ``encoding`` is the encoding name for all files.
             - A dictionary: ``encoding[file_id]`` is the encoding
               name for the file whose identifier is ``file_id``.  If
@@ -67,7 +68,7 @@ def __init__(self, root, fileids, encoding="utf8", tagset=None):
               processed using non-unicode byte strings.
         :param tagset: The name of the tagset used by this corpus, to be used
               for normalizing or converting the POS tags returned by the
-              tagged_...() methods.
+              ``tagged_...()`` methods.
         """
         # Convert the root to a path pointer, if necessary.
         if isinstance(root, str) and not isinstance(root, PathPointer):

diff --git a/nltk/corpus/reader/bracket_parse.py b/nltk/corpus/reader/bracket_parse.py
@@ -46,12 +46,12 @@ def __init__(
         :param comment_char: The character which can appear at the start of
             a line to indicate that the rest of the line is a comment.
         :param detect_blocks: The method that is used to find blocks
-          in the corpus; can be 'unindented_paren' (every unindented
-          parenthesis starts a new parse) or 'sexpr' (brackets are
-          matched).
+            in the corpus; can be 'unindented_paren' (every unindented
+            parenthesis starts a new parse) or 'sexpr' (brackets are
+            matched).
         :param tagset: The name of the tagset used by this corpus, to be used
-              for normalizing or converting the POS tags returned by the
-              tagged_...() methods.
+            for normalizing or converting the POS tags returned by the
+            ``tagged_...()`` methods.
         """
         # FIXME: Why is it inheritting from SyntaxCorpusReader but initializing
         #       from CorpusReader?
@@ -167,10 +167,10 @@ def parsed_paras(self, fileids=None, categories=None):
 class AlpinoCorpusReader(BracketParseCorpusReader):
     """
     Reader for the Alpino Dutch Treebank.
-    This corpus has a lexical breakdown structure embedded, as read by _parse
+    This corpus has a lexical breakdown structure embedded, as read by `_parse`
     Unfortunately this puts punctuation and some other words out of the sentence
-    order in the xml element tree. This is no good for tag_ and word_
-    _tag and _word will be overridden to use a non-default new parameter 'ordered'
+    order in the xml element tree. This is no good for `tag_` and `word_`
+    `_tag` and `_word` will be overridden to use a non-default new parameter 'ordered'
     to the overridden _normalize function. The _parse function can then remain
     untouched.
     """

diff --git a/nltk/corpus/reader/cmudict.py b/nltk/corpus/reader/cmudict.py
@@ -54,7 +54,7 @@ class CMUDictCorpusReader(CorpusReader):
     def entries(self):
         """
         :return: the cmudict lexicon as a list of entries
-        containing (word, transcriptions) tuples.
+            containing (word, transcriptions) tuples.
         """
         return concat(
             [
@@ -72,7 +72,7 @@ def words(self):
     def dict(self):
         """
         :return: the cmudict lexicon as a dictionary, whose keys are
-        lowercase words and whose values are lists of pronunciations.
+            lowercase words and whose values are lists of pronunciations.
         """
         return dict(Index(self.entries()))
 

diff --git a/nltk/corpus/reader/framenet.py b/nltk/corpus/reader/framenet.py
@@ -772,7 +772,7 @@ class AttrDict(dict):
 
     """A class that wraps a dict and allows accessing the keys of the
     dict as if they were attributes. Taken from here:
-       https://stackoverflow.com/a/14620633/8879
+    https://stackoverflow.com/a/14620633/8879
 
     >>> foo = {'a':1, 'b':2, 'c':3}
     >>> bar = AttrDict(foo)
@@ -1350,8 +1350,7 @@ def doc(self, fn_docid):
                     - 'frameID'  : (only if status is 'MANUAL')
                     - 'frameName': (only if status is 'MANUAL')
                     - 'layer' : a list of labels for the layer
-                       - Each item in the layer is a dict containing the
-                         following keys:
+                       - Each item in the layer is a dict containing the following keys:
                           - '_type': 'layer'
                           - 'rank'
                           - 'name'
@@ -1533,6 +1532,7 @@ def frame(self, fn_fid_or_fname, ignorekeys=[]):
         - 'FE' : a dict containing the Frame Elements that are part of this frame
                  The keys in this dict are the names of the FEs (e.g. 'Body_system')
                  and the values are dicts containing the following keys
+
               - 'definition' : The definition of the FE
               - 'name'       : The name of the FE e.g. 'Body_system'
               - 'ID'         : The id number
@@ -1706,19 +1706,24 @@ def lu(self, fn_luid, ignorekeys=[], luName=None, frameID=None, frameName=None):
 
         - 'lexemes'  : a list of dicts describing the lemma of this LU.
            Each dict in the list contains these keys:
+
            - 'POS'     : part of speech e.g. 'N'
            - 'name'    : either single-lexeme e.g. 'merger' or
                          multi-lexeme e.g. 'a little'
            - 'order': the order of the lexeme in the lemma (starting from 1)
            - 'headword': a boolean ('true' or 'false')
            - 'breakBefore': Can this lexeme be separated from the previous lexeme?
-                Consider: "take over.v" as in:
+                Consider: "take over.v" as in::
+
                          Germany took over the Netherlands in 2 days.
                          Germany took the Netherlands over in 2 days.
+
                 In this case, 'breakBefore' would be "true" for the lexeme
-                "over". Contrast this with "take after.v" as in:
+                "over". Contrast this with "take after.v" as in::
+
                          Mary takes after her grandmother.
                         *Mary takes her grandmother after.
+
                 In this case, 'breakBefore' would be "false" for the lexeme "after"
 
         - 'lemmaID'    : Can be used to connect lemmas in different LUs
@@ -2518,11 +2523,11 @@ def frame_relation_types(self):
     def frame_relations(self, frame=None, frame2=None, type=None):
         """
         :param frame: (optional) frame object, name, or ID; only relations involving
-        this frame will be returned
+            this frame will be returned
         :param frame2: (optional; 'frame' must be a different frame) only show relations
-        between the two specified frames, in either direction
+            between the two specified frames, in either direction
         :param type: (optional) frame relation type (name or object); show only relations
-        of this type
+            of this type
         :type frame: int or str or AttrDict
         :return: A list of all of the frame relations in framenet
         :rtype: list(dict)

diff --git a/nltk/corpus/reader/ieer.py b/nltk/corpus/reader/ieer.py
@@ -14,7 +14,7 @@
 
 This corpus contains the NEWSWIRE development test data for the
 NIST 1999 IE-ER Evaluation.  The files were taken from the
-subdirectory: /ie_er_99/english/devtest/newswire/*.ref.nwt
+subdirectory: ``/ie_er_99/english/devtest/newswire/*.ref.nwt``
 and filenames were shortened.
 
 The corpus contains the following files: APW_19980314, APW_19980424,

diff --git a/nltk/corpus/reader/knbc.py b/nltk/corpus/reader/knbc.py
@@ -38,7 +38,6 @@ class KNBCorpusReader(SyntaxCorpusReader):
       tags = (surface, reading, lemma, pos1, posid1, pos2, posid2, pos3, posid3, others ...)
 
     Usage example
-    -------------
 
     >>> from nltk.corpus.util import LazyCorpusLoader
     >>> knbc = LazyCorpusLoader(

diff --git a/nltk/corpus/reader/nombank.py b/nltk/corpus/reader/nombank.py
@@ -70,7 +70,7 @@ def __init__(
     def instances(self, baseform=None):
         """
         :return: a corpus view that acts as a list of
-        ``NombankInstance`` objects, one for each noun in the corpus.
+            ``NombankInstance`` objects, one for each noun in the corpus.
         """
         kwargs = {}
         if baseform is not None:
@@ -84,7 +84,7 @@ def instances(self, baseform=None):
     def lines(self):
         """
         :return: a corpus view that acts as a list of strings, one for
-        each line in the predicate-argument annotation file.
+            each line in the predicate-argument annotation file.
         """
         return StreamBackedCorpusView(
             self.abspath(self._nomfile),
@@ -138,7 +138,7 @@ def rolesets(self, baseform=None):
     def nouns(self):
         """
         :return: a corpus view that acts as a list of all noun lemmas
-        in this corpus (from the nombank.1.0.words file).
+            in this corpus (from the nombank.1.0.words file).
         """
         return StreamBackedCorpusView(
             self.abspath(self._nounsfile),

diff --git a/nltk/corpus/reader/opinion_lexicon.py b/nltk/corpus/reader/opinion_lexicon.py
@@ -8,9 +8,11 @@
 """
 CorpusReader for the Opinion Lexicon.
 
-- Opinion Lexicon information -
+Opinion Lexicon information
+===========================
+
 Authors: Minqing Hu and Bing Liu, 2004.
-    Department of Computer Sicence
+    Department of Computer Science
     University of Illinois at Chicago
 
 Contact: Bing Liu, liub@cs.uic.edu
@@ -19,6 +21,7 @@
 Distributed with permission.
 
 Related papers:
+
 - Minqing Hu and Bing Liu. "Mining and summarizing customer reviews".
     Proceedings of the ACM SIGKDD International Conference on Knowledge Discovery
     & Data Mining (KDD-04), Aug 22-25, 2004, Seattle, Washington, USA.

diff --git a/nltk/corpus/reader/panlex_lite.py b/nltk/corpus/reader/panlex_lite.py
@@ -108,14 +108,14 @@ def meanings(self, expr_uid, expr_tt):
     def translations(self, from_uid, from_tt, to_uid):
         """
         Return a list of translations for an expression into a single language
-            variety.
+        variety.
 
         :param from_uid: the source expression's language variety, as a
             seven-character uniform identifier.
         :param from_tt: the source expression's text.
         :param to_uid: the target language variety, as a seven-character
             uniform identifier.
-        :return a list of translation tuples. The first element is the expression
+        :return: a list of translation tuples. The first element is the expression
             text and the second element is the translation quality.
         :rtype: list(tuple)
         """

diff --git a/nltk/corpus/reader/plaintext.py b/nltk/corpus/reader/plaintext.py
@@ -177,15 +177,16 @@ class EuroparlCorpusReader(PlaintextCorpusReader):
     for regular plaintext documents. Chapters are separated using blank
     lines. Everything is inherited from ``PlaintextCorpusReader`` except
     that:
-      - Since the corpus is pre-processed and pre-tokenized, the
-        word tokenizer should just split the line at whitespaces.
-      - For the same reason, the sentence tokenizer should just
-        split the paragraph at line breaks.
-      - There is a new 'chapters()' method that returns chapters instead
-        instead of paragraphs.
-      - The 'paras()' method inherited from PlaintextCorpusReader is
-        made non-functional to remove any confusion between chapters
-        and paragraphs for Europarl.
+
+    - Since the corpus is pre-processed and pre-tokenized, the
+      word tokenizer should just split the line at whitespaces.
+    - For the same reason, the sentence tokenizer should just
+      split the paragraph at line breaks.
+    - There is a new 'chapters()' method that returns chapters instead
+      instead of paragraphs.
+    - The 'paras()' method inherited from PlaintextCorpusReader is
+      made non-functional to remove any confusion between chapters
+      and paragraphs for Europarl.
     """
 
     def _read_word_block(self, stream):

diff --git a/nltk/corpus/reader/propbank.py b/nltk/corpus/reader/propbank.py
@@ -70,7 +70,7 @@ def __init__(
     def instances(self, baseform=None):
         """
         :return: a corpus view that acts as a list of
-        ``PropBankInstance`` objects, one for each noun in the corpus.
+            ``PropBankInstance`` objects, one for each noun in the corpus.
         """
         kwargs = {}
         if baseform is not None:
@@ -84,7 +84,7 @@ def instances(self, baseform=None):
     def lines(self):
         """
         :return: a corpus view that acts as a list of strings, one for
-        each line in the predicate-argument annotation file.
+            each line in the predicate-argument annotation file.
         """
         return StreamBackedCorpusView(
             self.abspath(self._propfile),
@@ -134,7 +134,7 @@ def rolesets(self, baseform=None):
     def verbs(self):
         """
         :return: a corpus view that acts as a list of all verb lemmas
-        in this corpus (from the verbs.txt file).
+            in this corpus (from the verbs.txt file).
         """
         return StreamBackedCorpusView(
             self.abspath(self._verbsfile),