scikit-learn · Sep 2, 2010 · Sep 1, 2010 · Sep 1, 2010 · Sep 1, 2010 · Sep 1, 2010
diff --git a/doc/modules/classes.rst b/doc/modules/classes.rst
@@ -18,6 +18,18 @@ Support Vector Machines
    svm.NuSVR
    svm.OneClass
 
+.. _sparse_svm_class_reference:
+
+   sparse.svm.SVC
+   sparse.svm.LinearSVC
+   sparse.svm.NuSVC
+   sparse.svm.SVR
+   sparse.svm.NuSVR
+   sparse.svm.OneClass
+
+For sparse data
+-----------------
+
 Generalized Linear Models
 =========================
 
@@ -29,24 +41,3 @@ Generalized Linear Models
    glm.Ridge
    glm.Lasso
 
-
-
-For sparse data
-===============
-
-Support Vector Machines
------------------------
-
-.. currentmodule:: scikits.learn.sparse
-
-.. autosummary::
-   :toctree: generated/
-   :template: class.rst
-
-   svm.SVC
-   svm.LinearSVC
-   svm.NuSVC
-   svm.SVR
-   svm.NuSVR
-   svm.OneClassSVM
-
diff --git a/doc/modules/svm.rst b/doc/modules/svm.rst
@@ -125,10 +125,6 @@ implement this is called :class:`OneClassSVM`
 In this case, as it is a type of unsupervised learning, the fit method
 will only take as input an array X, as there are no class labels.
 
-.. note::
-
-    For a complete example on one class SVM see 
-    :ref:`example_svm_plot_oneclass.py` example.
 
 .. figure:: ../auto_examples/svm/images/plot_oneclass.png
    :target: ../auto_examples/svm/plot_oneclass.html
@@ -140,8 +136,6 @@ Examples
 --------
 :ref:`example_svm_plot_oneclass.py`
 
-See :ref:`svm_examples` for a complete list of examples.
-
 
 
 
@@ -151,15 +145,23 @@ Support Vector machines for sparse data
 =======================================
 
 There is support for sparse data given in any matrix in a format
-supported by scipy.sparse. See module scikits.learn.sparse.svm.
+supported by scipy.sparse. Classes have the same name, just prefixed
+by the `sparse` namespace, and take the same arguments, with the
+exception of training and test data, which is expected to be in a
+matrix format defined in scipy.sparse.
+
+For maximum efficiency, use the CSR matrix format as defined in
+`scipy.sparse.csr_matrix
+<http://docs.scipy.org/doc/scipy/reference/generated/scipy.sparse.csr_matrix.html>`_.
 
-:class:`SVC`
+See the complete listing of classes in
+:ref:`sparse_svm_class_reference`.
 
 
 Tips on Practical Use
 =====================
 
-  * Support Vector Machine algorithms are not scale-invariant, so it
+  * Support Vector Machine algorithms are not scale invariant, so it
     is highly recommended to scale your data. For example, scale each
     attribute on the input vector X to [0,1] or [-1,+1], or standarize
     it to have mean 0 and variance 1. Note that the *same* scaling
@@ -168,8 +170,8 @@ Tips on Practical Use
     <https://sourceforge.net/apps/trac/scikit-learn/wiki/CookBook>`_
     for some examples on scaling.
 
-  * nu in NuSVC/OneClassSVM/NuSVR approximates the fraction of
-    training errors and support vectors.
+  * Parameter nu in NuSVC/OneClassSVM/NuSVR approximates the fraction
+    of training errors and support vectors.
 
   * If data for classification are unbalanced (e.g. many positive and
     few negative), try different penalty parameters C.
@@ -183,13 +185,26 @@ Kernel functions
 ================
 
 The *kernel function* can be any of the following: 
+
   * linear: :math:`<x_i, x_j'>`.
+
   * polynomial: :math:`(\gamma <x, x'> + r)^d`. d is specified by
     keyword `degree`.
+
   * rbf (:math:`exp(-\gamma |x-x'|^2), \gamma > 0`). :math:`\gamma` is
     specified by keyword gamma.
+
   * sigmoid (:math:`tanh(<x_i,x_j> + r)`).
 
+Different kernels are specified by keword kernel at initialization::
+
+    >>> linear_svc = svm.SVC(kernel='linear')
+    >>> linear_svc.kernel
+    'linear'
+    >>> rbf_svc = svm.SVC (kernel='rbf')
+    >>> rbf_svc.kernel
+    'rbf'
+
 
 Custom Kernels
 --------------
@@ -230,7 +245,7 @@ instance that will use that kernel::
 Passing the gram matrix
 ~~~~~~~~~~~~~~~~~~~~~~~
 
-set kernel='precomputed' and pass the gram matrix instead of X in the
+Set kernel='precomputed' and pass the gram matrix instead of X in the
 fit method.
 
 
@@ -258,21 +273,70 @@ generalization error of the classifier.
    :align: center
    :scale: 50
 
-
-
 SVC
 ---
 
 Given training vectors :math:`x_i \in R^n`, i=1,..., l, in two
-classes, and a vector :math:`y \in R^l`
+classes, and a vector :math:`y \in R^l` such that :math:`y_i \in {1,
+-1}`, SVC solves the following primal problem:
+
+
+.. math::
+
+    \min_ {w, b, \zeta} \frac{1}{2} w^T w + C \sum_{i=1, l} \zeta_i
+
+
+
+    \textrm {subject to } & y_i (w^T \phi (x_i) + b) \geq 1 - \zeta_i,\\
+    & \zeta_i \geq 0, i=1, ..., l
+
+Its dual is
+
+.. math::
+
+   \min_{\alpha} \frac{1}{2} \alpha^T Q \alpha - e^T \alpha
+
+
+   \textrm {subject to } & y^T \alpha = 0\\
+   & 0 \leq \alpha_i \leq C, i=1, ..., l
+
+where :math:`e` is the vector of all ones, C > 0 is the upper bound, Q
+is an l by l positive semidefinite matrix, :math:`Q_ij \equiv K(x_i,
+x_j)` and :math:`\phi (x_i)^T \ phi (x)` is the kernel. Here training
+vectors are mapped into a higher (maybe infinite) dimensional space by
+the function :math:`\phi`
+
+
+The decision function is:
+
+.. math:: sgn(\sum_{i=1}^l y_i \alpha_i K(x_i, x) + \rho)
+
+
+.. TODO multiclass case ?/
+
+This parameters can accessed through the memebers support\_ and intercept\_:
+
+     - Member support\_ holds the product :math:`y^T \alpha`
+
+     - Member intercept\_ of the classifier holds :math:`-\rho`
+
+References
+~~~~~~~~~~
+
+This algorithm is implemented as described in `Automatic Capacity
+Tuning of Very Large VC-dimension Classifiers
+<http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.17.7215>`_
+and `Support-vector networks
+<http://www.springerlink.com/content/k238jx04hm87j80g/>`_
 
-In SVC The decision function in this case will be:
 
-.. math:: sgn(\sum_{i=1}^l \alpha_i K(x_i, x) + \rho)
-where :math:`\alpha, \rho` can be accessed through fields support\_ and
-intercept\_ of the classifier instance, respectevely.
+NuSVC
+-----
 
-    - *penalty*. C > 0 is the penalty parameter of the error term.
+We introduce a new parameter :math:`\nu` wich controls the number of
+support vectors and training errors. The parameter :math:`\nu \in (0,
+1]` is an upper bound on the fraction of training errors and a lower
+bound of the fraction of support vectors.
 
 
 Implementation details

diff --git a/scikits/learn/base.py b/scikits/learn/base.py
@@ -9,7 +9,7 @@
 
 import numpy as np
 
-from .metrics import zero_one, mean_square_error
+from .metrics import explained_variance
 
 ################################################################################
 class BaseEstimator(object):
@@ -141,5 +141,4 @@ def score(self, X, y):
         -------
         z : float
         """
-        return - mean_square_error(self.predict(X), y)
-
+        return explained_variance(y, self.predict(X))
diff --git a/scikits/learn/glm/coordinate_descent.py b/scikits/learn/glm/coordinate_descent.py
@@ -4,6 +4,7 @@
 #
 # License: BSD Style.
 
+import warnings
 import numpy as np
 
 from .base import LinearModel

diff --git a/scikits/learn/glm/lars.py b/scikits/learn/glm/lars.py
@@ -99,11 +99,12 @@ def lars_path(X, y, max_iter=None, alpha_min=0, method="lar", precompute=True):
         res = y - np.dot (X, beta[n_iter]) # there are better ways
         Cov = np.ma.dot (Xna, res)
 
-        imax    = np.ma.argmax (np.ma.abs(Cov), fill_value=0.) #rename
-        Cov_max =  (Cov [imax])
+        imax    = np.ma.argmax (np.ma.abs(Cov)) #rename
+        Cov_max =  Cov.data [imax]
 
         alpha = np.abs(Cov_max) #sum (np.abs(beta[n_iter]))
-        alphas [n_iter] = np.max(np.abs(np.dot(Xt, res))) #sum (np.abs(beta[n_iter]))
+        alphas [n_iter] = alpha
+
         if (n_iter >= max_iter or n_pred >= max_pred ):
             break
 
@@ -185,7 +186,7 @@ def lars_path(X, y, max_iter=None, alpha_min=0, method="lar", precompute=True):
             n_pred -= 1
             drop_idx = active.pop (idx)
             # please please please remove this masked arrays pain from me
-            Xna[drop_idx] = Xna.data[drop_idx].copy()
+            Xna[drop_idx] = Xna.data[drop_idx]
             print 'dropped ', idx, ' at ', n_iter, ' iteration'
             Xa = Xt[active] # duplicate
             L[:n_pred, :n_pred] = linalg.cholesky(np.dot(Xa, Xa.T), lower=True)
@@ -305,7 +306,7 @@ class LassoLARS (LinearModel):
     >>> from scikits.learn import glm
     >>> clf = glm.LassoLARS(alpha=0.1)
     >>> clf.fit([[-1,1], [0, 0], [1, 1]], [-1, 0, -1])
-    LassoLARS(normalize=True, alpha=0.1)
+    LassoLARS(normalize=True, alpha=0.1, max_iter=None)
     >>> print clf.coef_
     [ 0.         -0.51649658]
 
@@ -315,13 +316,14 @@ class LassoLARS (LinearModel):
     an alternative optimization strategy called 'coordinate descent.'
     """
 
-    def __init__(self, alpha=1.0, normalize=True):
+    def __init__(self, alpha=1.0, max_iter=None, normalize=True):
         """ XXX : add doc
                 # will only normalize non-zero columns
         """
         self.alpha = alpha
         self.normalize = normalize
         self.coef_ = None
+        self.max_iter = max_iter
 
     def fit (self, X, y, **params):
         """ XXX : add doc
@@ -345,7 +347,9 @@ def fit (self, X, y, **params):
 
         method = 'lasso'
         alphas_, active, coef_path_ = lars_path(X, y,
-                                            alpha_min=alpha, method=method)
+                                            alpha_min=alpha, method=method,
+                                            max_iter=self.max_iter)
+
         self.coef_ = coef_path_[:,-1]
         return self