From 5a23a850fd061df0c51b8b7917f8589133917ffe Mon Sep 17 00:00:00 2001
From: Jordan Silke <51223540+jsilke@users.noreply.github.com>
Date: Fri, 8 Apr 2022 12:27:07 -0400
Subject: [PATCH] DOC modify plot_scalable_poly_kernels.py format (#23009)

Co-authored-by: Guillaume Lemaitre <g.lemaitre58@gmail.com>
---
 .../plot_scalable_poly_kernels.py             | 55 +++++++++++++------
 1 file changed, 38 insertions(+), 17 deletions(-)

diff --git a/examples/kernel_approximation/plot_scalable_poly_kernels.py b/examples/kernel_approximation/plot_scalable_poly_kernels.py
index e1ad883dd6517..ade27e16e349a 100644
--- a/examples/kernel_approximation/plot_scalable_poly_kernels.py
+++ b/examples/kernel_approximation/plot_scalable_poly_kernels.py
@@ -24,16 +24,10 @@
 # Author: Daniel Lopez-Sanchez <lope@usal.es>
 # License: BSD 3 clause
 
-import matplotlib.pyplot as plt
-from sklearn.datasets import fetch_covtype
-from sklearn.model_selection import train_test_split
-from sklearn.preprocessing import MinMaxScaler, Normalizer
-from sklearn.svm import LinearSVC
-from sklearn.kernel_approximation import PolynomialCountSketch
-from sklearn.pipeline import Pipeline, make_pipeline
-import time
-
 # %%
+# Preparing the data
+# ------------------
+#
 # Load the Covtype dataset, which contains 581,012 samples
 # with 54 features each, distributed among 6 classes. The goal of this dataset
 # is to predict forest cover type from cartographic variables only
@@ -41,34 +35,53 @@
 # classification problem to match the version of the dataset in the
 # LIBSVM webpage [2], which was the one used in [1].
 
+from sklearn.datasets import fetch_covtype
+
 X, y = fetch_covtype(return_X_y=True)
 
 y[y != 2] = 0
 y[y == 2] = 1  # We will try to separate class 2 from the other 6 classes.
 
 # %%
+# Partitioning the data
+# ---------------------
+#
 # Here we select 5,000 samples for training and 10,000 for testing.
 # To actually reproduce the results in the original Tensor Sketch paper,
 # select 100,000 for training.
 
+from sklearn.model_selection import train_test_split
+
 X_train, X_test, y_train, y_test = train_test_split(
     X, y, train_size=5_000, test_size=10_000, random_state=42
 )
 
 # %%
+# Feature normalization
+# ---------------------
+#
 # Now scale features to the range [0, 1] to match the format of the dataset in
 # the LIBSVM webpage, and then normalize to unit length as done in the
 # original Tensor Sketch paper [1].
 
+from sklearn.preprocessing import MinMaxScaler, Normalizer
+from sklearn.pipeline import make_pipeline
+
 mm = make_pipeline(MinMaxScaler(), Normalizer())
 X_train = mm.fit_transform(X_train)
 X_test = mm.transform(X_test)
 
 # %%
+# Establishing a baseline model
+# -----------------------------
+#
 # As a baseline, train a linear SVM on the original features and print the
 # accuracy. We also measure and store accuracies and training times to
 # plot them later.
 
+import time
+from sklearn.svm import LinearSVC
+
 results = {}
 
 lsvm = LinearSVC()
@@ -81,6 +94,9 @@
 print(f"Linear SVM score on raw features: {lsvm_score:.2f}%")
 
 # %%
+# Establishing the kernel approximation model
+# -------------------------------------------
+#
 # Then we train linear SVMs on the features generated by
 # :class:`PolynomialCountSketch` with different values for `n_components`,
 # showing that these kernel feature approximations improve the accuracy
@@ -98,6 +114,8 @@
 # (`n_runs` = 1) in this example, in practice one should repeat the experiment several
 # times to compensate for the stochastic nature of :class:`PolynomialCountSketch`.
 
+from sklearn.kernel_approximation import PolynomialCountSketch
+
 n_runs = 1
 N_COMPONENTS = [250, 500, 1000, 2000]
 
@@ -107,14 +125,9 @@
     ps_lsvm_score = 0
     for _ in range(n_runs):
 
-        pipeline = Pipeline(
-            steps=[
-                (
-                    "kernel_approximator",
-                    PolynomialCountSketch(n_components=n_components, degree=4),
-                ),
-                ("linear_classifier", LinearSVC()),
-            ]
+        pipeline = make_pipeline(
+            PolynomialCountSketch(n_components=n_components, degree=4),
+            LinearSVC(),
         )
 
         start = time.time()
@@ -135,6 +148,9 @@
     )
 
 # %%
+# Establishing the kernelized SVM model
+# -------------------------------------
+#
 # Train a kernelized SVM to see how well :class:`PolynomialCountSketch`
 # is approximating the performance of the kernel. This, of course, may take
 # some time, as the SVC class has a relatively poor scalability. This is the
@@ -153,11 +169,16 @@
 print(f"Kernel-SVM score on raw features: {ksvm_score:.2f}%")
 
 # %%
+# Comparing the results
+# ---------------------
+#
 # Finally, plot the results of the different methods against their training
 # times. As we can see, the kernelized SVM achieves a higher accuracy,
 # but its training time is much larger and, most importantly, will grow
 # much faster if the number of training samples increases.
 
+import matplotlib.pyplot as plt
+
 fig, ax = plt.subplots(figsize=(7, 7))
 ax.scatter(
     [