From 6b78e285efbf6da4f5ecd95a44fdf7264c244fd4 Mon Sep 17 00:00:00 2001
From: adamjhawley <adamhawley99@gmail.com>
Date: Wed, 10 Nov 2021 21:01:50 +0200
Subject: [PATCH] Deprecated 'return_str' parameter in NLTKWordTokenizer and
 TreebankWordTokenizer

---
 AUTHORS.md                   |  1 +
 nltk/tokenize/destructive.py | 12 +++++++++++-
 nltk/tokenize/treebank.py    | 12 +++++++++++-
 3 files changed, 23 insertions(+), 2 deletions(-)

diff --git a/AUTHORS.md b/AUTHORS.md
index 5b7546d43c..04592e11cc 100644
--- a/AUTHORS.md
+++ b/AUTHORS.md
@@ -285,6 +285,7 @@
 - Saibo Geng <https://github.com/Saibo-creator>
 - Ahmet Yildirim <https://github.com/RnDevelover>
 - Yuta Nakamura <https://github.com/yutanakamura-tky>
+- Adam Hawley <https://github.com/adamjhawley>
 
 ## Others whose work we've taken and included in NLTK, but who didn't directly contribute it:
 
diff --git a/nltk/tokenize/destructive.py b/nltk/tokenize/destructive.py
index 32eb64fd8f..0e780ab4e6 100644
--- a/nltk/tokenize/destructive.py
+++ b/nltk/tokenize/destructive.py
@@ -8,6 +8,7 @@
 
 
 import re
+import warnings
 
 from nltk.tokenize.api import TokenizerI
 from nltk.tokenize.util import align_tokens
@@ -113,6 +114,15 @@ class NLTKWordTokenizer(TokenizerI):
     CONTRACTIONS3 = list(map(re.compile, _contractions.CONTRACTIONS3))
 
     def tokenize(self, text, convert_parentheses=False, return_str=False):
+
+        if return_str is not False:
+            warnings.warn(
+                "Parameter 'return_str' has been deprecated and should no"
+                "longer be used.",
+                category=DeprecationWarning,
+                stacklevel=2,
+            )
+
         for regexp, substitution in self.STARTING_QUOTES:
             text = regexp.sub(substitution, text)
 
@@ -147,7 +157,7 @@ def tokenize(self, text, convert_parentheses=False, return_str=False):
         # for regexp in self._contractions.CONTRACTIONS4:
         #     text = regexp.sub(r' \1 \2 \3 ', text)
 
-        return text if return_str else text.split()
+        return text.split()
 
     def span_tokenize(self, text):
         r"""
diff --git a/nltk/tokenize/treebank.py b/nltk/tokenize/treebank.py
index 51c2020130..9cd11c749c 100644
--- a/nltk/tokenize/treebank.py
+++ b/nltk/tokenize/treebank.py
@@ -18,6 +18,7 @@
 """
 
 import re
+import warnings
 
 from nltk.tokenize.api import TokenizerI
 from nltk.tokenize.destructive import MacIntyreContractions
@@ -99,6 +100,15 @@ class TreebankWordTokenizer(TokenizerI):
     CONTRACTIONS3 = list(map(re.compile, _contractions.CONTRACTIONS3))
 
     def tokenize(self, text, convert_parentheses=False, return_str=False):
+
+        if return_str is not False:
+            warnings.warn(
+                "Parameter 'return_str' has been deprecated and should no"
+                "longer be used.",
+                category=DeprecationWarning,
+                stacklevel=2,
+            )
+
         for regexp, substitution in self.STARTING_QUOTES:
             text = regexp.sub(substitution, text)
 
@@ -133,7 +143,7 @@ def tokenize(self, text, convert_parentheses=False, return_str=False):
         # for regexp in self._contractions.CONTRACTIONS4:
         #     text = regexp.sub(r' \1 \2 \3 ', text)
 
-        return text if return_str else text.split()
+        return text.split()
 
     def span_tokenize(self, text):
         r"""