fixes in GMM, TSNE, MDS, LSHForest, exclude SpectralEmbedding

scikit-learn · Feb 10, 2015 · 8ef0b9a · 8ef0b9a
1 parent e3e0827
commit 8ef0b9a
Show file tree

Hide file tree

Showing 7 changed files with 13 additions and 11 deletions.
diff --git a/sklearn/manifold/mds.py b/sklearn/manifold/mds.py
@@ -357,7 +357,7 @@ def __init__(self, n_components=2, metric=True, n_init=4,
     def _pairwise(self):
         return self.kernel == "precomputed"
 
-    def fit(self, X, init=None, y=None):
+    def fit(self, X, y=None, init=None):
         """
         Computes the position of the points in the embedding space
 
@@ -374,7 +374,7 @@ def fit(self, X, init=None, y=None):
         self.fit_transform(X, init=init)
         return self
 
-    def fit_transform(self, X, init=None, y=None):
+    def fit_transform(self, X, y=None, init=None):
         """
         Fit the data from X, and returns the embedded coordinates
 

diff --git a/sklearn/manifold/t_sne.py b/sklearn/manifold/t_sne.py
@@ -507,7 +507,7 @@ def _tsne(self, P, alpha, n_samples, random_state, X_embedded=None):
 
         return X_embedded
 
-    def fit_transform(self, X):
+    def fit_transform(self, X, y=None):
         """Transform X to the embedded space.
 
         Parameters

diff --git a/sklearn/mixture/gmm.py b/sklearn/mixture/gmm.py
@@ -321,7 +321,7 @@ def score_samples(self, X):
         responsibilities = np.exp(lpr - logprob[:, np.newaxis])
         return logprob, responsibilities
 
-    def score(self, X):
+    def score(self, X, y=None):
         """Compute the log probability under the model.
 
         Parameters
@@ -411,7 +411,7 @@ def sample(self, n_samples=1, random_state=None):
                     num_comp_in_X, random_state=random_state).T
         return X
 
-    def fit(self, X):
+    def fit(self, X, y=None):
         """Estimate model parameters with the expectation-maximization
         algorithm.
 
@@ -428,9 +428,7 @@ def fit(self, X):
             corresponds to a single data point.
         """
         # initialization step
-        X = np.asarray(X, dtype=np.float64)
-        if X.ndim == 1:
-            X = X[:, np.newaxis]
+        X = check_array(X, dtype=np.float64)
         if X.shape[0] < self.n_components:
             raise ValueError(
                 'GMM estimation with %s components, but got only %s samples' %

diff --git a/sklearn/neighbors/approximate.py b/sklearn/neighbors/approximate.py
@@ -483,7 +483,7 @@ def radius_neighbors(self, X, radius=None, return_distance=True):
         else:
             return _array_of_arrays(neighbors)
 
-    def partial_fit(self, X):
+    def partial_fit(self, X, y=None):
         """
         Inserts new data into the already fitted LSH Forest.
         Cost is proportional to new total size, so additions

diff --git a/sklearn/svm/classes.py b/sklearn/svm/classes.py
@@ -910,7 +910,7 @@ def __init__(self, kernel='rbf', degree=3, gamma=0.0, coef0=0.0, tol=1e-3,
             shrinking, False, cache_size, None, verbose, max_iter,
             random_state)
 
-    def fit(self, X, sample_weight=None, **params):
+    def fit(self, X, y=None, sample_weight=None, **params):
         """
         Detects the soft boundary of the set of samples X.
 

diff --git a/sklearn/tests/test_common.py b/sklearn/tests/test_common.py
@@ -96,6 +96,10 @@ def test_non_meta_estimators():
         if name not in CROSS_DECOMPOSITION:
             yield check_estimators_dtypes, name, Estimator
             yield check_fit_score_takes_y, name, Estimator
+
+        if name not in CROSS_DECOMPOSITION + ['SpectralEmbedding']:
+            # SpectralEmbedding is non-deterministic,
+            # see issue #4236
             yield check_pipeline_consistency, name, Estimator
 
         if name not in CROSS_DECOMPOSITION + ['Imputer']:

diff --git a/sklearn/utils/estimator_checks.py b/sklearn/utils/estimator_checks.py
@@ -256,9 +256,9 @@ def check_pipeline_consistency(name, Estimator):
     X -= X.min()
     y = multioutput_estimator_convert_y_2d(name, y)
     estimator = Estimator()
-    pipeline = make_pipeline(estimator)
     set_fast_parameters(estimator)
     set_random_state(estimator)
+    pipeline = make_pipeline(estimator)
     estimator.fit(X, y)
     pipeline.fit(X, y)
     funcs = ["score", "fit_transform"]