Skip to content

Commit

Permalink
ENH Improves error message for mixed types for feature names (#25018)
Browse files Browse the repository at this point in the history
Co-authored-by: jeremie du boisberranger <jeremiedbb@yahoo.fr>
  • Loading branch information
thomasjpfan and jeremiedbb committed Nov 25, 2022
1 parent 1912ae5 commit f196344
Show file tree
Hide file tree
Showing 3 changed files with 18 additions and 6 deletions.
8 changes: 6 additions & 2 deletions sklearn/tests/test_base.py
Expand Up @@ -651,8 +651,12 @@ def transform(self, X):
df_mixed = pd.DataFrame(X_np, columns=["a", "b", 1, 2])
trans = NoOpTransformer()
msg = re.escape(
"Feature names only support names that are all strings. "
"Got feature names with dtypes: ['int', 'str']"
"Feature names are only supported if all input features have string names, "
"but your input has ['int', 'str'] as feature name / column name types. "
"If you want feature names to be stored and validated, you must convert "
"them all to strings, by using X.columns = X.columns.astype(str) for "
"example. Otherwise you can remove feature / column names from your input "
"data, or convert them all to a non-string data type."
)
with pytest.raises(TypeError, match=msg):
trans.fit(df_mixed)
Expand Down
8 changes: 6 additions & 2 deletions sklearn/utils/tests/test_validation.py
Expand Up @@ -1675,8 +1675,12 @@ def test_get_feature_names_invalid_dtypes(names, dtypes):
X = pd.DataFrame([[1, 2], [4, 5], [5, 6]], columns=names)

msg = re.escape(
"Feature names only support names that are all strings. "
f"Got feature names with dtypes: {dtypes}."
"Feature names are only supported if all input features have string names, "
f"but your input has {dtypes} as feature name / column name types. "
"If you want feature names to be stored and validated, you must convert "
"them all to strings, by using X.columns = X.columns.astype(str) for "
"example. Otherwise you can remove feature / column names from your input "
"data, or convert them all to a non-string data type."
)
with pytest.raises(TypeError, match=msg):
names = _get_feature_names(X)
Expand Down
8 changes: 6 additions & 2 deletions sklearn/utils/validation.py
Expand Up @@ -1884,8 +1884,12 @@ def _get_feature_names(X):
# mixed type of string and non-string is not supported
if len(types) > 1 and "str" in types:
raise TypeError(
"Feature names only support names that are all strings. "
f"Got feature names with dtypes: {types}."
"Feature names are only supported if all input features have string names, "
f"but your input has {types} as feature name / column name types. "
"If you want feature names to be stored and validated, you must convert "
"them all to strings, by using X.columns = X.columns.astype(str) for "
"example. Otherwise you can remove feature / column names from your input "
"data, or convert them all to a non-string data type."
)

# Only feature names of all strings are supported
Expand Down

0 comments on commit f196344

Please sign in to comment.