diff --git a/AUTHORS.md b/AUTHORS.md
index 4634423d42..30fa80fe93 100644
--- a/AUTHORS.md
+++ b/AUTHORS.md
@@ -94,6 +94,7 @@
 - Eric Kafe
 - Piotr Kasprzyk
 - Angelos Katharopoulos
+- Stefan Kaufmann
 - Sudharshan Kaushik
 - Chris Koenig
 - Mikhail Korobov
diff --git a/nltk/grammar.py b/nltk/grammar.py
index f7cfe0d85f..d53aec9d40 100644
--- a/nltk/grammar.py
+++ b/nltk/grammar.py
@@ -68,6 +68,7 @@
 with the right hand side (*rhs*) in a tree (*tree*) is known as
 "expanding" *lhs* to *rhs* in *tree*.
 """
+import itertools
 import re
 from collections import deque
 from functools import total_ordering
@@ -674,8 +675,8 @@ def _calculate_grammar_forms(self):
         prods = self._productions
         self._is_lexical = all(p.is_lexical() for p in prods)
         self._is_nonlexical = all(p.is_nonlexical() for p in prods if len(p) != 1)
-        self._min_len = min(len(p) for p in prods)
-        self._max_len = max(len(p) for p in prods)
+        self._min_len = min((len(p) for p in prods), default=None)
+        self._max_len = max((len(p) for p in prods), default=None)
         self._all_unary_are_lexical = all(p.is_lexical() for p in prods if len(p) == 1)
 
     def is_lexical(self):
@@ -713,14 +714,14 @@ def is_nonempty(self):
         """
         Return True if there are no empty productions.
         """
-        return self._min_len > 0
+        return all(len(prod) > 0 for prod in self.productions())
 
     def is_binarised(self):
         """
         Return True if all productions are at most binary.
         Note that there can still be empty and unary productions.
         """
-        return self._max_len <= 2
+        return all(len(prod) <= 2 for prod in self.productions())
 
     def is_flexible_chomsky_normal_form(self):
         """
@@ -736,114 +737,377 @@ def is_chomsky_normal_form(self):
         """
         return self.is_flexible_chomsky_normal_form() and self._all_unary_are_lexical
 
-    def chomsky_normal_form(self, new_token_padding="@$@", flexible=False):
+    def chomsky_normal_form(self, flexible=False, simplify=True):
+        """
+        Return an equivalent grammar in Chomsky Normal Form.
+
+        Keyword Parameters:
+        flexible -- bool: if True, result may contain unit productions.
+        simplify -- bool: if True, remove non-producing and
+                                        unreachable symbols.
+
+        The class methods invoked are named following the usage in
+        M. Lange and H. Leiß (2009), "To CNF or not to CNF? An efficient
+        yet presentable version of the CYK algorithm", Informatica
+        Didactica 8:2008-2010.
+        """
+        if (
+            flexible and self.is_flexible_chomsky_normal_form()
+        ) or self.is_chomsky_normal_form():
+            result = self
+            if simplify:
+                # remove non-terminals that don't generate output
+                result = CFG.PROD(result)
+                # remove symbols that are not reachable from the start
+                result = CFG.REACH(result)
+            return result
+
+        # add a new start symbol if the present one
+        # occurs on a right-hand side
+        result = CFG.START(self)
+
+        if not result.is_nonempty():
+            # remove empty productions (except for startsymbol -> epsilon)
+            result = CFG.DEL(result)
+
+        if not flexible:
+            # remove unit productions (except with a terminal on the rhs)
+            result = CFG.UNIT(result)
+
+        if simplify:
+            # remove non-terminals that don't generate output
+            result = CFG.PROD(result)
+            # remove symbols that are not reachable from the start
+            result = CFG.REACH(result)
+
+        if not result.is_binarised():
+            # reduce right-hand sides to at most two
+            result = CFG.BIN(result)
+
+        if not result.is_nonlexical():
+            # replace non-single terminals with non-terminals
+            result = CFG.TERM(result)
+
+        # Sort the productions for nice output
+        result._productions.sort(
+            key=lambda prod: (
+                # non-lexical before lexical
+                prod.is_lexical(),
+                # start symbol lhs before other lhs
+                prod.lhs() != result.start(),
+                # lhs alphabetically
+                str(prod.lhs()),
+                # rhs item-wise
+                *(
+                    (  # nonterminal before terminal
+                        is_terminal(child),
+                        # alphabetically
+                        str(child),
+                    )
+                    for child in prod.rhs()
+                ),
+            )
+        )
+
+        return result
+
+    @classmethod
+    def _new_nonterminal(
+        cls, grammar, nonterminals_in_use=None, ctr=itertools.count(), stem="X"
+    ):
         """
-        Returns a new Grammar that is in chomsky normal
+        Create a new non-terminal symbol that is not yet
+        used in the grammar.
+
+        Params:
+        grammar -- CFG
+
+        Keyword params:
+        nonterminals_in_use -- set of non-terminals currently in the grammar.
+        Note: CFG objects have an attribute "_categories", the set of left-hand
+        sides in the productions. If the grammar contains no useless symbols,
+        categories coincides with the set of non-terminals. Therefore after
+        the elimination of useless symbols (see "PROD" below), _categories
+        can be passed to _new_nonterminals as the value of nonterminals_in_use.
+        But _new_nonterminal is also called prior to the elimination of useless
+        symbols (see "START" below). In that case _categories should not be used
+        and nonterminals_in_use must be calculated.
+
+        ctr     -- itertools.count
+        stem    -- str
+
+        Return: Nonterminal of the form 'stem+n' for some n.
+        """
+        if nonterminals_in_use == None:
+            nonterminals_in_use = grammar._categories.union(
+                {
+                    child
+                    for prod in grammar.productions()
+                    for child in prod.rhs()
+                    if is_nonterminal(child)
+                }
+            )
+        while True:
+            new_nt = Nonterminal(f"{stem}{next(ctr)}")
+            if new_nt not in nonterminals_in_use:
+                grammar._categories.add(new_nt)
+                return new_nt
 
-        :param: new_token_padding
-            Customise new rule formation during binarisation
+    @classmethod
+    def START(cls, grammar):
         """
-        if self.is_chomsky_normal_form():
-            return self
-        if self.productions(empty=True):
-            raise ValueError(
-                "Grammar has Empty rules. " "Cannot deal with them at the moment"
+        Add a new start symbol if the current start symbol
+        appears on the right-hand side of a production.
+
+        Params:
+        grammar -- CFG
+
+        Return: CFG whose start symbol does not occur on the
+        right-hand side of any production.
+        """
+        if any(grammar.start() in prod.rhs() for prod in grammar.productions()):
+            ctr = itertools.count()
+            new_start = cls._new_nonterminal(grammar, ctr=ctr, stem="S")
+            return cls(
+                new_start,
+                grammar.productions() + [Production(new_start, (grammar.start(),))],
             )
+        else:
+            return grammar
 
-        # check for mixed rules
-        for rule in self.productions():
-            if rule.is_lexical() and len(rule.rhs()) > 1:
-                raise ValueError(
-                    f"Cannot handled mixed rule {rule.lhs()} => {rule.rhs()}"
-                )
+    @classmethod
+    def _DEL_multiply(cls, prod, symbol):
+        """
+        Given a production prod and a (nullable) symbol, generate all
+        variants of prod whose right-hand sides contain a (possibly empty)
+        subset of the occurrences of the symbol.
+
+        Thus for instance if prod is X -> A B A C A
+        then _DEL_multiply( prod, A) generates the productions
+        X -> BC, X -> ABC, X -> BAC, X -> BCA,
+        X -> ABAC, X -> ABCA, X -> BACA, X -> ABACA
+
+        Parameters:
+        prod    -- Production
+        symbol  -- terminal or Nonterminal
+
+        Yield: Production with some subset of the occurrences of symbol
+
+        Helper function for DEL_multiply.
+        """
+        for subset in range(2 ** prod.rhs().count(symbol)):
+            new_rhs = []
+            nth_occurrence = 0
+            for child in prod.rhs():
+                if child == symbol:
+                    if subset & (1 << nth_occurrence):
+                        new_rhs.append(child)
+                        nth_occurrence += 1
+                else:
+                    new_rhs.append(child)
+            yield Production(prod.lhs(), new_rhs)
+
+    @classmethod
+    def DEL(cls, grammar):
+        """
+        Return an equivalent grammar without empty productions.
+        (Startsymbol -> epsilon is not removed, and may in fact
+        be created in the process.)
+
+        Parameter:
+        grammar -- CFG
+        """
+        productions = dict.fromkeys(grammar.productions())
+        while True:
+            empty_productions = dict.fromkeys(
+                prod
+                for prod in productions
+                if len(prod) == 0
+                if prod.lhs() != grammar.start()
+            )
+            if not empty_productions:
+                break
+            for prod in empty_productions:
+                del productions[prod]
+            new_productions = dict()
+            for prod in productions:
+                for empty_prod in empty_productions:
+                    if empty_prod.lhs() in prod.rhs():
+                        new_productions.update(
+                            (prod, None)
+                            for prod in cls._DEL_multiply(prod, empty_prod.lhs())
+                        )
+            productions.update(new_productions)
+
+        return cls(grammar.start(), list(productions))
+
+    @classmethod
+    def _UNIT_paths(cls, grammar, path):
+        """
+        Given a list 'path' of production rules, generate all
+        unit paths containing 'path' as an intial segment.
 
-        step1 = CFG.eliminate_start(self)
-        step2 = CFG.binarize(step1, new_token_padding)
-        if flexible:
-            return step2
-        step3 = CFG.remove_unitary_rules(step2)
-        step4 = CFG(step3.start(), list(set(step3.productions())))
-        return step4
+        A unit path is a list [N1 -> N2, N2 -> N3, ..., Nn -> alpha],
+        where all Ni are non-terminals and alpha is not a non-terminal
+        (i.e., either a list of length != 1, or a terminal).
+
+        Parameters:
+        grammar -- CFG
+        path    -- list of productions
+        """
+        if len(path[-1]) != 1 or path[-1].is_lexical():
+            yield path
+
+        else:
+            for next_prod in grammar.productions():
+                if next_prod.lhs() == path[-1].rhs()[0] and next_prod not in path:
+                    yield from cls._UNIT_paths(grammar, path + [next_prod])
 
     @classmethod
-    def remove_unitary_rules(cls, grammar):
+    def UNIT(cls, grammar):
         """
-        Remove nonlexical unitary rules and convert them to
-        lexical
+        Return an equivalent grammar without unit productions.
+
+        Parameters:
+        grammar -- CFG
         """
-        result = []
-        unitary = deque([])
-        for rule in grammar.productions():
-            if len(rule) == 1 and rule.is_nonlexical():
-                unitary.append(rule)
+        productions = dict.fromkeys(grammar.productions())
+        while True:
+            new_productions = dict.fromkeys(
+                Production(path[0].lhs(), path[-1].rhs())
+                for prod in productions
+                for path in cls._UNIT_paths(grammar, [prod])
+            )
+            if all(prod in new_productions for prod in productions):
+                break
             else:
-                result.append(rule)
-
-        while unitary:
-            rule = unitary.popleft()
-            for item in grammar.productions(lhs=rule.rhs()[0]):
-                new_rule = Production(rule.lhs(), item.rhs())
-                if len(new_rule) != 1 or new_rule.is_lexical():
-                    result.append(new_rule)
-                else:
-                    unitary.append(new_rule)
+                productions = new_productions
+
+        return cls(grammar.start(), list(productions))
+
+    @classmethod
+    def PROD(cls, grammar):
+        """
+        Return an equivalent grammar without non-terminals
+        that don't generate any output.
 
-        n_grammar = CFG(grammar.start(), result)
-        return n_grammar
+        Parameters:
+        grammar -- CFG
+        """
+        producing = {grammar.start()}.union(
+            child
+            for prod in grammar.productions()
+            for child in prod.rhs()
+            if is_terminal(child)
+        )
+        productions_used = dict()
+        while True:
+            new_producing = set()
+            for prod in grammar.productions():
+                if prod not in productions_used and all(
+                    child in producing for child in prod.rhs()
+                ):
+                    new_producing.add(prod.lhs())
+                    productions_used[prod] = None
+            if new_producing <= producing:
+                break
+            else:
+                producing.update(new_producing)
+
+        return cls(grammar.start(), list(productions_used))
 
     @classmethod
-    def binarize(cls, grammar, padding="@$@"):
-        """
-        Convert all non-binary rules into binary by introducing
-        new tokens.
-        Example::
-
-            Original:
-                A => B C D
-            After Conversion:
-                A => B A@$@B
-                A@$@B => C D
-        """
-        result = []
-
-        for rule in grammar.productions():
-            if len(rule.rhs()) > 2:
-                # this rule needs to be broken down
-                left_side = rule.lhs()
-                for k in range(0, len(rule.rhs()) - 2):
-                    tsym = rule.rhs()[k]
-                    new_sym = Nonterminal(left_side.symbol() + padding + tsym.symbol())
-                    new_production = Production(left_side, (tsym, new_sym))
-                    left_side = new_sym
-                    result.append(new_production)
-                last_prd = Production(left_side, rule.rhs()[-2:])
-                result.append(last_prd)
+    def REACH(cls, grammar):
+        """
+        Returnan equivalent grammar without symbols
+        that are not reachable from the start symbol.
+
+        Parameters:
+        grammar -- CFG
+        """
+        reachable = {grammar.start()}
+        productions_used = dict()
+        while True:
+            new_reachable = set()
+            for prod in grammar.productions():
+                if prod not in productions_used and prod.lhs() in reachable:
+                    new_reachable.update(set(prod.rhs()))
+                    productions_used[prod] = None
+            if new_reachable <= reachable:
+                break
             else:
-                result.append(rule)
+                reachable.update(new_reachable)
 
-        n_grammar = CFG(grammar.start(), result)
-        return n_grammar
+        return cls(grammar.start(), list(productions_used))
 
     @classmethod
-    def eliminate_start(cls, grammar):
-        """
-        Eliminate start rule in case it appears on RHS
-        Example: S -> S0 S1 and S0 -> S1 S
-        Then another rule S0_Sigma -> S is added
-        """
-        start = grammar.start()
-        result = []
-        need_to_add = None
-        for rule in grammar.productions():
-            if start in rule.rhs():
-                need_to_add = True
-            result.append(rule)
-        if need_to_add:
-            start = Nonterminal("S0_SIGMA")
-            result.append(Production(start, [grammar.start()]))
-            n_grammar = CFG(start, result)
-            return n_grammar
-        return grammar
+    def BIN(cls, grammar):
+        """
+        Return an equivalent grammar none of whose right-hand sides
+        are longer than 2.
+
+        Parameters:
+        grammar -- CFG
+        """
+        new_productions = []
+        ctr = itertools.count()
+        for prod in grammar.productions():
+            if len(prod) <= 2:
+                new_productions.append(prod)
+            else:
+                current_lhs, current_rhs = prod.lhs(), list(prod.rhs())
+                while len(current_rhs) > 2:
+                    left_child = current_rhs.pop(0)
+                    new_parent = cls._new_nonterminal(
+                        grammar,
+                        nonterminals_in_use=grammar._categories,
+                        ctr=ctr,
+                        stem="B",
+                    )
+                    new_productions.append(
+                        Production(current_lhs, (left_child, new_parent))
+                    )
+                    current_lhs = new_parent
+                new_productions.append(Production(current_lhs, current_rhs))
+
+        return cls(grammar.start(), new_productions)
+
+    @classmethod
+    def TERM(cls, grammar):
+        """
+        Return an equivalent grammar in which terminals do not have
+        siblings (terminals or non-terminals) on right-hand sides.
+
+        Parameters:
+        grammar -- CFG
+        """
+        new_parents = dict()
+        new_productions = []
+        ctr = itertools.count()
+        for prod in grammar.productions():
+            if len(prod) > 1 and prod.is_lexical():
+                for child in prod.rhs():
+                    if is_terminal(child) and child not in new_parents:
+                        new_parents[child] = cls._new_nonterminal(
+                            grammar,
+                            nonterminals_in_use=grammar._categories,
+                            ctr=ctr,
+                            stem="T",
+                        )
+                new_productions.append(
+                    Production(
+                        prod.lhs(),
+                        tuple(new_parents.get(child, child) for child in prod.rhs()),
+                    )
+                )
+            else:
+                new_productions.append(prod)
+
+        new_productions.extend(
+            Production(new_parents[child], (child,)) for child in new_parents
+        )
+
+        return cls(grammar.start(), new_productions)
 
     def __repr__(self):
         return "<Grammar with %d productions>" % len(self._productions)
diff --git a/nltk/test/__init__.py b/nltk/test/__init__.py
index fa9d96b782..c924b21f8e 100644
--- a/nltk/test/__init__.py
+++ b/nltk/test/__init__.py
@@ -13,6 +13,4 @@
 ../../web/dev/local_testing.rst
 
 https://github.com/nltk/nltk/blob/develop/web/dev/local_testing.rst
-
-
 """
diff --git a/nltk/test/grammar.doctest b/nltk/test/grammar.doctest
index ffe427bcd3..60525194ca 100644
--- a/nltk/test/grammar.doctest
+++ b/nltk/test/grammar.doctest
@@ -47,6 +47,53 @@ Chomsky Normal Form grammar (Test for bug 474)
     >>> g.productions()[0].lhs()
     VP^<TOP>
 
+Conversion to Chomsky Normal Form
+
+    >>> g = CFG.fromstring("S -> 'a' S 'b' | ")
+    >>> print(g)
+    Grammar with 2 productions (start state = S)
+    S -> 'a' S 'b'
+    S ->
+    >>> print(g.chomsky_normal_form())
+    Grammar with 9 productions (start state = S0)
+        S0 ->
+        S0 -> T0 B0
+        S0 -> T0 T1
+        B0 -> S T1
+        B1 -> S T1
+        S -> T0 B1
+        S -> T0 T1
+        T0 -> 'a'
+        T1 -> 'b'
+
+    >>> g = CFG.fromstring("S -> S")
+    >>> print(g)
+    Grammar with 1 productions (start state = S)
+        S -> S
+    >>> print(g.chomsky_normal_form())
+    Grammar with 0 productions (start state = S0)
+
+
+Removal of useless and unreachable symbols is done by default
+even if the grammar is already in CNF.
+
+    >>> g = CFG.fromstring("""
+    ... S -> X Y
+    ... A -> 'a'
+    ... """)
+    >>> print(g)
+    Grammar with 2 productions (start state = S)
+        S -> X Y
+	A -> 'a'
+    >>> g.is_chomsky_normal_form()
+    True
+    >>> print(g.chomsky_normal_form())
+    Grammar with 0 productions (start state = S)
+    >>> print(g.chomsky_normal_form(simplify=False))
+    Grammar with 2 productions (start state = S)
+        S -> X Y
+    	A -> 'a'
+
 Grammars can contain both empty strings and empty productions:
 
     >>> from nltk.grammar import CFG