From 5f6577ade5eaa12e5cc0e9f8cadfdd9f775c32aa Mon Sep 17 00:00:00 2001
From: fis <jm.yuan@outlook.com>
Date: Thu, 14 Oct 2021 16:28:26 +0800
Subject: [PATCH 1/2] Initial commit.

---
 python-package/xgboost/core.py            | 16 ++++++++--------
 python-package/xgboost/data.py            | 13 ++++++-------
 python-package/xgboost/sklearn.py         |  6 +-----
 tests/python-gpu/test_gpu_with_sklearn.py | 16 +++++++++++++++-
 4 files changed, 30 insertions(+), 21 deletions(-)

diff --git a/python-package/xgboost/core.py b/python-package/xgboost/core.py
index 5218630e7196..05b354e21930 100644
--- a/python-package/xgboost/core.py
+++ b/python-package/xgboost/core.py
@@ -1973,13 +1973,6 @@ def inplace_predict(
         preds = ctypes.POINTER(ctypes.c_float)()
 
         # once caching is supported, we can pass id(data) as cache id.
-        try:
-            import pandas as pd
-
-            if isinstance(data, pd.DataFrame):
-                data = data.values
-        except ImportError:
-            pass
         args = {
             "type": 0,
             "training": False,
@@ -2014,7 +2007,15 @@ def inplace_predict(
                     f"got {data.shape[1]}"
                 )
 
+        from .data import _is_pandas_df, _transform_pandas_df
         from .data import _array_interface
+        if _is_pandas_df(data):
+            ft = self.feature_types
+            if ft is None:
+                enable_categorical = False
+            else:
+                enable_categorical = any(f == "c" for f in ft)
+            data, _, _ = _transform_pandas_df(data, enable_categorical)
         if isinstance(data, np.ndarray):
             from .data import _ensure_np_dtype
             data, _ = _ensure_np_dtype(data, data.dtype)
@@ -2068,7 +2069,6 @@ def inplace_predict(
             return _prediction_output(shape, dims, preds, True)
         if lazy_isinstance(data, "cudf.core.dataframe", "DataFrame"):
             from .data import _cudf_array_interfaces
-
             _, interfaces_str = _cudf_array_interfaces(data)
             _check_call(
                 _LIB.XGBoosterPredictFromCudaColumnar(
diff --git a/python-package/xgboost/data.py b/python-package/xgboost/data.py
index e4b5a690359a..8908a3a58b62 100644
--- a/python-package/xgboost/data.py
+++ b/python-package/xgboost/data.py
@@ -289,16 +289,15 @@ def _transform_pandas_df(
 def _from_pandas_df(
     data,
     enable_categorical: bool,
-    missing,
-    nthread,
+    missing: float,
+    nthread: int,
     feature_names: Optional[List[str]],
     feature_types: Optional[List[str]],
-):
+) -> Tuple[ctypes.c_void_p, Optional[List[str]], Optional[List[str]]]:
     data, feature_names, feature_types = _transform_pandas_df(
-        data, enable_categorical, feature_names, feature_types)
-    return _from_numpy_array(data, missing, nthread, feature_names,
-                             feature_types)
-
+        data, enable_categorical, feature_names, feature_types
+    )
+    return _from_numpy_array(data, missing, nthread, feature_names, feature_types)
 
 def _is_pandas_series(data):
     try:
diff --git a/python-package/xgboost/sklearn.py b/python-package/xgboost/sklearn.py
index 172ea20a6ece..e4c4b9928fb2 100644
--- a/python-package/xgboost/sklearn.py
+++ b/python-package/xgboost/sklearn.py
@@ -809,11 +809,7 @@ def _can_use_inplace_predict(self) -> bool:
         # Inplace predict doesn't handle as many data types as DMatrix, but it's
         # sufficient for dask interface where input is simpiler.
         predictor = self.get_params().get("predictor", None)
-        if (
-            not self.enable_categorical
-            and predictor in ("auto", None)
-            and self.booster != "gblinear"
-        ):
+        if predictor in ("auto", None) and self.booster != "gblinear":
             return True
         return False
 
diff --git a/tests/python-gpu/test_gpu_with_sklearn.py b/tests/python-gpu/test_gpu_with_sklearn.py
index 58e64886d9d5..6664004f517b 100644
--- a/tests/python-gpu/test_gpu_with_sklearn.py
+++ b/tests/python-gpu/test_gpu_with_sklearn.py
@@ -59,7 +59,6 @@ def test_categorical():
     )
     X = pd.DataFrame(X.todense()).astype("category")
     clf.fit(X, y)
-    assert not clf._can_use_inplace_predict()
 
     with tempfile.TemporaryDirectory() as tempdir:
         model = os.path.join(tempdir, "categorial.json")
@@ -74,3 +73,18 @@ def test_categorical():
             )
             assert categories_sizes.shape[0] != 0
             np.testing.assert_allclose(categories_sizes, 1)
+
+    X = pd.DataFrame({"f0": ["a", "b", "c"]})
+    X["f0"] = X["f0"].astype("category")
+
+    y = [1, 2, 3]
+    reg = xgb.XGBRegressor(
+        tree_method="gpu_hist", enable_categorical=True, n_estimators=64
+    )
+    reg.fit(X, y)
+    predts = reg.predict(X)
+    booster = reg.get_booster()
+    assert "c" in booster.feature_types
+    assert len(booster.feature_types) == 1
+    inp_predts = booster.inplace_predict(X)
+    np.testing.assert_allclose(predts, inp_predts)

From 5fd7dbafe1c7559e1d4d80dab7b74b5c749efb93 Mon Sep 17 00:00:00 2001
From: fis <jm.yuan@outlook.com>
Date: Thu, 14 Oct 2021 16:42:09 +0800
Subject: [PATCH 2/2] Test.

---
 tests/python-gpu/test_gpu_with_sklearn.py | 32 +++++++++++++++--------
 1 file changed, 21 insertions(+), 11 deletions(-)

diff --git a/tests/python-gpu/test_gpu_with_sklearn.py b/tests/python-gpu/test_gpu_with_sklearn.py
index 6664004f517b..f8d510753944 100644
--- a/tests/python-gpu/test_gpu_with_sklearn.py
+++ b/tests/python-gpu/test_gpu_with_sklearn.py
@@ -44,9 +44,12 @@ def test_num_parallel_tree():
 
 
 @pytest.mark.skipif(**tm.no_pandas())
+@pytest.mark.skipif(**tm.no_cudf())
 @pytest.mark.skipif(**tm.no_sklearn())
 def test_categorical():
     import pandas as pd
+    import cudf
+    import cupy as cp
     from sklearn.datasets import load_svmlight_file
 
     data_dir = os.path.join(tm.PROJECT_ROOT, "demo", "data")
@@ -74,17 +77,24 @@ def test_categorical():
             assert categories_sizes.shape[0] != 0
             np.testing.assert_allclose(categories_sizes, 1)
 
+    def check_predt(X, y):
+        reg = xgb.XGBRegressor(
+            tree_method="gpu_hist", enable_categorical=True, n_estimators=64
+        )
+        reg.fit(X, y)
+        predts = reg.predict(X)
+        booster = reg.get_booster()
+        assert "c" in booster.feature_types
+        assert len(booster.feature_types) == 1
+        inp_predts = booster.inplace_predict(X)
+        if isinstance(inp_predts, cp.ndarray):
+            inp_predts = cp.asnumpy(inp_predts)
+        np.testing.assert_allclose(predts, inp_predts)
+
+    y = [1, 2, 3]
     X = pd.DataFrame({"f0": ["a", "b", "c"]})
     X["f0"] = X["f0"].astype("category")
+    check_predt(X, y)
 
-    y = [1, 2, 3]
-    reg = xgb.XGBRegressor(
-        tree_method="gpu_hist", enable_categorical=True, n_estimators=64
-    )
-    reg.fit(X, y)
-    predts = reg.predict(X)
-    booster = reg.get_booster()
-    assert "c" in booster.feature_types
-    assert len(booster.feature_types) == 1
-    inp_predts = booster.inplace_predict(X)
-    np.testing.assert_allclose(predts, inp_predts)
+    X = cudf.DataFrame(X)
+    check_predt(X, y)