Merge pull request #2 from ogrisel/robertlayton-kmeans_transform2

Robertlayton kmeans transform2
scikit-learn · Aug 24, 2011 · e9ae6f1 · e9ae6f1
2 parents 1bc5e57 + c21d8d0
commit e9ae6f1
Show file tree

Hide file tree

Showing 77 changed files with 1,390 additions and 687 deletions.
diff --git a/benchmarks/bench_plot_omp_lars.py b/benchmarks/bench_plot_omp_lars.py
@@ -98,16 +98,17 @@ def compute_bench(samples_range, features_range):
 
 
 if __name__ == '__main__':
-    samples_range = np.linspace(1000, 3000, 5).astype(np.int)
-    features_range = np.linspace(1000, 3000, 5).astype(np.int)
+    samples_range = np.linspace(1000, 5000, 5).astype(np.int)
+    features_range = np.linspace(1000, 5000, 5).astype(np.int)
     results = compute_bench(samples_range, features_range)
     max_time = max(np.max(t) for t in results.itervalues())
 
     import pylab as pl
     fig = pl.figure()
     for i, (label, timings) in enumerate(sorted(results.iteritems())):
         ax = fig.add_subplot(1, 2, i)
-        pl.matshow(timings, fignum=False, cmap='OrRd')
+        vmax = max(1 - timings.min(), -1 + timings.max())
+        pl.matshow(timings, fignum=False, vmin=1-vmax, vmax=1+vmax)
         ax.set_xticklabels([''] + map(str, samples_range))
         ax.set_yticklabels([''] + map(str, features_range))
         pl.xlabel('n_samples')

diff --git a/doc/datasets/index.rst b/doc/datasets/index.rst
@@ -47,6 +47,31 @@ These datasets are useful to quickly illustrate the behavior of the
 various algorithms implemented in the scikit. They are however often too
 small to be representative of real world machine learning tasks.
 
+Sample images
+=============
+
+The scikit also embed a couple of sample JPEG images published under Creative
+Commons license by their authors. Those image can be useful to test algorithms
+and pipeline on 2D data.
+
+.. autosummary::
+
+   load_sample_images
+   load_sample_image
+
+.. note::
+
+  The default coding of images is based on the ``uint8`` dtype to
+  spare memory.  Often machine learning algorithms work best if the
+  input is converted to a floating point representation first.  Also,
+  if you plan to use ``pylab.imshow`` don't forget to scale to the range
+  0 - 1 as done in the following example.
+
+.. topic:: Examples:
+
+    * :ref:`example_cluster_plot_vq_china.py`
+
+
 Sample generators
 =================
 

diff --git a/doc/developers/index.rst b/doc/developers/index.rst
@@ -328,6 +328,11 @@ classifier or a regressor. All estimators implement the fit method::
 
     estimator.fit(X, y)
 
+All built-in estimators also have a ``set_params`` method, which sets
+data-independent parameters (overriding previous parameter values passed
+to ``__init__``). This method is not required for an object to be an
+estimator.
+
 
 Instantiation
 ^^^^^^^^^^^^^

diff --git a/doc/modules/classes.rst b/doc/modules/classes.rst
@@ -561,6 +561,7 @@ Manifold learning
     :template: class.rst
 
     manifold.LocallyLinearEmbedding
+    manifold.Isomap
 
 
 .. autosummary::

diff --git a/doc/modules/clustering.rst b/doc/modules/clustering.rst
@@ -406,7 +406,7 @@ homogeneous but not complete::
 
 
 Mathematical formulation
-------------------------
+~~~~~~~~~~~~~~~~~~~~~~~~
 
 Homogeneity and completeness scores are formally given by:
 

diff --git a/doc/modules/linear_model.rst b/doc/modules/linear_model.rst
@@ -466,7 +466,7 @@ By default :math:`\alpha_1 = \alpha_2 =  \lambda_1 = \lambda_2 = 1.e^{-6}`, *i.e
     >>> clf = linear_model.BayesianRidge()
     >>> clf.fit (X, Y)
     BayesianRidge(n_iter=300, verbose=False, lambda_1=1e-06, lambda_2=1e-06,
-           fit_intercept=True, eps=0.001, alpha_2=1e-06, alpha_1=1e-06,
+           fit_intercept=True, alpha_2=1e-06, tol=0.001, alpha_1=1e-06,
            compute_score=False)
 
 After being fitted, the model can then be used to predict new values::