Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Remove all uses of six #18318

Merged
merged 2 commits into from Jul 27, 2022
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
4 changes: 1 addition & 3 deletions examples/research_projects/tapex/wikisql_utils.py
Expand Up @@ -23,8 +23,6 @@
# Original: https://github.com/google-research/tapas/master/wikisql_utils.py
from typing import Any, List, Text

import six


EMPTY_ANSWER = "none"
EMPTY_ANSWER_AGG = "none"
Expand All @@ -49,7 +47,7 @@ def convert_to_float(value):
return value
if isinstance(value, int):
return float(value)
if not isinstance(value, six.string_types):
if not isinstance(value, str):
raise ValueError("Argument value is not a string. Can't parse it as float")
sanitized = value

Expand Down
19 changes: 5 additions & 14 deletions src/transformers/models/deberta_v2/tokenization_deberta_v2.py
Expand Up @@ -19,7 +19,6 @@
from typing import Any, Dict, List, Optional, Tuple

import sentencepiece as sp
import six

from ...tokenization_utils import PreTrainedTokenizer

Expand Down Expand Up @@ -523,17 +522,9 @@ def _is_punctuation(char):

def convert_to_unicode(text):
"""Converts `text` to Unicode (if it's not already), assuming utf-8 input."""
if six.PY3:
if isinstance(text, str):
return text
elif isinstance(text, bytes):
return text.decode("utf-8", "ignore")
else:
raise ValueError(f"Unsupported string type: {type(text)}")
elif six.PY2:
if isinstance(text, str):
return text.decode("utf-8", "ignore")
else:
raise ValueError(f"Unsupported string type: {type(text)}")
if isinstance(text, str):
return text
elif isinstance(text, bytes):
return text.decode("utf-8", "ignore")
else:
raise ValueError("Not running on Python2 or Python 3?")
raise ValueError(f"Unsupported string type: {type(text)}")
22 changes: 15 additions & 7 deletions src/transformers/models/flaubert/tokenization_flaubert.py
Expand Up @@ -17,8 +17,6 @@

import unicodedata

import six

from ...utils import logging
from ..xlm.tokenization_xlm import XLMTokenizer

Expand Down Expand Up @@ -72,20 +70,30 @@
}


def convert_to_unicode(text):
"""Converts `text` to Unicode (if it's not already), assuming utf-8 input."""
if isinstance(text, str):
return text
elif isinstance(text, bytes):
return text.decode("utf-8", "ignore")
else:
raise ValueError(f"Unsupported string type: {type(text)}")


def convert_to_unicode(text):
"""
Converts `text` to Unicode (if it's not already), assuming UTF-8 input.
"""
# six_ensure_text is copied from https://github.com/benjaminp/six
def six_ensure_text(s, encoding="utf-8", errors="strict"):
if isinstance(s, six.binary_type):

def ensure_text(s, encoding="utf-8", errors="strict"):
if isinstance(s, bytes):
return s.decode(encoding, errors)
elif isinstance(s, six.text_type):
elif isinstance(s, str):
return s
else:
raise TypeError(f"not expecting type '{type(s)}'")

return six_ensure_text(text, encoding="utf-8", errors="ignore")
return ensure_text(text, encoding="utf-8", errors="ignore")


class FlaubertTokenizer(XLMTokenizer):
Expand Down