diff --git a/sklearn/tests/test_base.py b/sklearn/tests/test_base.py index 934bf7719163c..a0e2f6fd1f273 100644 --- a/sklearn/tests/test_base.py +++ b/sklearn/tests/test_base.py @@ -651,8 +651,12 @@ def transform(self, X): df_mixed = pd.DataFrame(X_np, columns=["a", "b", 1, 2]) trans = NoOpTransformer() msg = re.escape( - "Feature names only support names that are all strings. " - "Got feature names with dtypes: ['int', 'str']" + "Feature names are only supported if all input features have string names, " + "but your input has ['int', 'str'] as feature name / column name types. " + "If you want feature names to be stored and validated, you must convert " + "them all to strings, by using X.columns = X.columns.astype(str) for " + "example. Otherwise you can remove feature / column names from your input " + "data, or convert them all to a non-string data type." ) with pytest.raises(TypeError, match=msg): trans.fit(df_mixed) diff --git a/sklearn/utils/tests/test_validation.py b/sklearn/utils/tests/test_validation.py index cc1ac47a42615..78f26a988dc60 100644 --- a/sklearn/utils/tests/test_validation.py +++ b/sklearn/utils/tests/test_validation.py @@ -1675,8 +1675,12 @@ def test_get_feature_names_invalid_dtypes(names, dtypes): X = pd.DataFrame([[1, 2], [4, 5], [5, 6]], columns=names) msg = re.escape( - "Feature names only support names that are all strings. " - f"Got feature names with dtypes: {dtypes}." + "Feature names are only supported if all input features have string names, " + f"but your input has {dtypes} as feature name / column name types. " + "If you want feature names to be stored and validated, you must convert " + "them all to strings, by using X.columns = X.columns.astype(str) for " + "example. Otherwise you can remove feature / column names from your input " + "data, or convert them all to a non-string data type." ) with pytest.raises(TypeError, match=msg): names = _get_feature_names(X) diff --git a/sklearn/utils/validation.py b/sklearn/utils/validation.py index aeb3a8814be22..7de0fe200607b 100644 --- a/sklearn/utils/validation.py +++ b/sklearn/utils/validation.py @@ -1884,8 +1884,12 @@ def _get_feature_names(X): # mixed type of string and non-string is not supported if len(types) > 1 and "str" in types: raise TypeError( - "Feature names only support names that are all strings. " - f"Got feature names with dtypes: {types}." + "Feature names are only supported if all input features have string names, " + f"but your input has {types} as feature name / column name types. " + "If you want feature names to be stored and validated, you must convert " + "them all to strings, by using X.columns = X.columns.astype(str) for " + "example. Otherwise you can remove feature / column names from your input " + "data, or convert them all to a non-string data type." ) # Only feature names of all strings are supported