From 7e401fb9952cad9af0528712384bd20de329991c Mon Sep 17 00:00:00 2001
From: Sylvain Gugger <Sylvain.gugger@gmail.com>
Date: Wed, 27 Jul 2022 08:21:58 -0400
Subject: [PATCH 1/2] Remove all uses of six

---
 .../research_projects/tapex/wikisql_utils.py  |  4 +---
 .../deberta_v2/tokenization_deberta_v2.py     | 19 +++++-----------
 .../models/flaubert/tokenization_flaubert.py  | 22 +++++++++++++------
 3 files changed, 21 insertions(+), 24 deletions(-)

diff --git a/examples/research_projects/tapex/wikisql_utils.py b/examples/research_projects/tapex/wikisql_utils.py
index 9147fdc882e4b..3028e81ad481f 100644
--- a/examples/research_projects/tapex/wikisql_utils.py
+++ b/examples/research_projects/tapex/wikisql_utils.py
@@ -23,8 +23,6 @@
 # Original: https://github.com/google-research/tapas/master/wikisql_utils.py
 from typing import Any, List, Text
 
-import six
-
 
 EMPTY_ANSWER = "none"
 EMPTY_ANSWER_AGG = "none"
@@ -49,7 +47,7 @@ def convert_to_float(value):
         return value
     if isinstance(value, int):
         return float(value)
-    if not isinstance(value, six.string_types):
+    if not isinstance(value, str):
         raise ValueError("Argument value is not a string. Can't parse it as float")
     sanitized = value
 
diff --git a/src/transformers/models/deberta_v2/tokenization_deberta_v2.py b/src/transformers/models/deberta_v2/tokenization_deberta_v2.py
index 123afacf822ca..9ac28c82cd614 100644
--- a/src/transformers/models/deberta_v2/tokenization_deberta_v2.py
+++ b/src/transformers/models/deberta_v2/tokenization_deberta_v2.py
@@ -19,7 +19,6 @@
 from typing import Any, Dict, List, Optional, Tuple
 
 import sentencepiece as sp
-import six
 
 from ...tokenization_utils import PreTrainedTokenizer
 
@@ -523,17 +522,9 @@ def _is_punctuation(char):
 
 def convert_to_unicode(text):
     """Converts `text` to Unicode (if it's not already), assuming utf-8 input."""
-    if six.PY3:
-        if isinstance(text, str):
-            return text
-        elif isinstance(text, bytes):
-            return text.decode("utf-8", "ignore")
-        else:
-            raise ValueError(f"Unsupported string type: {type(text)}")
-    elif six.PY2:
-        if isinstance(text, str):
-            return text.decode("utf-8", "ignore")
-        else:
-            raise ValueError(f"Unsupported string type: {type(text)}")
+    if isinstance(text, str):
+        return text
+    elif isinstance(text, bytes):
+        return text.decode("utf-8", "ignore")
     else:
-        raise ValueError("Not running on Python2 or Python 3?")
+        raise ValueError(f"Unsupported string type: {type(text)}")
diff --git a/src/transformers/models/flaubert/tokenization_flaubert.py b/src/transformers/models/flaubert/tokenization_flaubert.py
index 4fbb3783d8a38..7fad11e20e74a 100644
--- a/src/transformers/models/flaubert/tokenization_flaubert.py
+++ b/src/transformers/models/flaubert/tokenization_flaubert.py
@@ -17,8 +17,6 @@
 
 import unicodedata
 
-import six
-
 from ...utils import logging
 from ..xlm.tokenization_xlm import XLMTokenizer
 
@@ -72,20 +70,30 @@
 }
 
 
+def convert_to_unicode(text):
+    """Converts `text` to Unicode (if it's not already), assuming utf-8 input."""
+    if isinstance(text, str):
+        return text
+    elif isinstance(text, bytes):
+        return text.decode("utf-8", "ignore")
+    else:
+        raise ValueError(f"Unsupported string type: {type(text)}")
+
+
 def convert_to_unicode(text):
     """
     Converts `text` to Unicode (if it's not already), assuming UTF-8 input.
     """
-    # six_ensure_text is copied from https://github.com/benjaminp/six
-    def six_ensure_text(s, encoding="utf-8", errors="strict"):
-        if isinstance(s, six.binary_type):
+
+    def ensure_text(s, encoding="utf-8", errors="strict"):
+        if isinstance(s, bytes):
             return s.decode(encoding, errors)
-        elif isinstance(s, six.text_type):
+        elif isinstance(s, str):
             return s
         else:
             raise TypeError(f"not expecting type '{type(s)}'")
 
-    return six_ensure_text(text, encoding="utf-8", errors="ignore")
+    return ensure_text(text, encoding="utf-8", errors="ignore")
 
 
 class FlaubertTokenizer(XLMTokenizer):

From ded2daf750b65e28b79202e1f0849d0f24bb8121 Mon Sep 17 00:00:00 2001
From: Sylvain Gugger <Sylvain.gugger@gmail.com>
Date: Wed, 27 Jul 2022 08:27:38 -0400
Subject: [PATCH 2/2] fix quality

---
 .../models/flaubert/tokenization_flaubert.py           | 10 ----------
 1 file changed, 10 deletions(-)

diff --git a/src/transformers/models/flaubert/tokenization_flaubert.py b/src/transformers/models/flaubert/tokenization_flaubert.py
index 7fad11e20e74a..5d5ad2a657d1b 100644
--- a/src/transformers/models/flaubert/tokenization_flaubert.py
+++ b/src/transformers/models/flaubert/tokenization_flaubert.py
@@ -70,16 +70,6 @@
 }
 
 
-def convert_to_unicode(text):
-    """Converts `text` to Unicode (if it's not already), assuming utf-8 input."""
-    if isinstance(text, str):
-        return text
-    elif isinstance(text, bytes):
-        return text.decode("utf-8", "ignore")
-    else:
-        raise ValueError(f"Unsupported string type: {type(text)}")
-
-
 def convert_to_unicode(text):
     """
     Converts `text` to Unicode (if it's not already), assuming UTF-8 input.