Don't use global np.random.seed in tests (scikit-learn#13356)

* initial commit * used random class * fixed failing testcases, reverted __init__.py * fixed failing testcases scikit-learn#2 - passed rng as parameter to ParameterSampler class - changed seed from 0 to 42 (as original) * fixed failing testcases scikit-learn#2 - passed rng as parameter to SparseRandomProjection class * fixed failing testcases scikit-learn#4 - passed rng as parameter to GaussianRandomProjection class * fixed failing test case because of flake 8
koenvandevelde · Jul 12, 2019 · 7271875 · 7271875
1 parent 4eceac7
commit 7271875
Show file tree

Hide file tree

Showing 13 changed files with 43 additions and 42 deletions.
diff --git a/sklearn/covariance/empirical_covariance_.py b/sklearn/covariance/empirical_covariance_.py
@@ -120,8 +120,8 @@ class EmpiricalCovariance(BaseEstimator):
     >>> from sklearn.datasets import make_gaussian_quantiles
     >>> real_cov = np.array([[.8, .3],
     ...                      [.3, .4]])
-    >>> np.random.seed(0)
-    >>> X = np.random.multivariate_normal(mean=[0, 0],
+    >>> rng = np.random.RandomState(0)
+    >>> X = rng.multivariate_normal(mean=[0, 0],
     ...                                   cov=real_cov,
     ...                                   size=500)
     >>> cov = EmpiricalCovariance().fit(X)

diff --git a/sklearn/covariance/robust_covariance.py b/sklearn/covariance/robust_covariance.py
@@ -586,8 +586,8 @@ class MinCovDet(EmpiricalCovariance):
     >>> from sklearn.datasets import make_gaussian_quantiles
     >>> real_cov = np.array([[.8, .3],
     ...                      [.3, .4]])
-    >>> np.random.seed(0)
-    >>> X = np.random.multivariate_normal(mean=[0, 0],
+    >>> rng = np.random.RandomState(0)
+    >>> X = rng.multivariate_normal(mean=[0, 0],
     ...                                   cov=real_cov,
     ...                                   size=500)
     >>> cov = MinCovDet(random_state=0).fit(X)

diff --git a/sklearn/covariance/shrunk_covariance_.py b/sklearn/covariance/shrunk_covariance_.py
@@ -103,8 +103,8 @@ class ShrunkCovariance(EmpiricalCovariance):
     >>> from sklearn.datasets import make_gaussian_quantiles
     >>> real_cov = np.array([[.8, .3],
     ...                      [.3, .4]])
-    >>> np.random.seed(0)
-    >>> X = np.random.multivariate_normal(mean=[0, 0],
+    >>> rng = np.random.RandomState(0)
+    >>> X = rng.multivariate_normal(mean=[0, 0],
     ...                                   cov=real_cov,
     ...                                   size=500)
     >>> cov = ShrunkCovariance().fit(X)

diff --git a/sklearn/decomposition/tests/test_fastica.py b/sklearn/decomposition/tests/test_fastica.py
@@ -54,7 +54,6 @@ def test_fastica_simple(add_noise=False):
     # Test the FastICA algorithm on very simple data.
     rng = np.random.RandomState(0)
     # scipy.stats uses the global RNG:
-    np.random.seed(0)
     n_samples = 1000
     # Generate two sources:
     s1 = (2 * np.sin(np.linspace(0, 100, n_samples)) > 0) - 1

diff --git a/sklearn/linear_model/huber.py b/sklearn/linear_model/huber.py
@@ -196,11 +196,11 @@ class HuberRegressor(LinearModel, RegressorMixin, BaseEstimator):
     >>> import numpy as np
     >>> from sklearn.linear_model import HuberRegressor, LinearRegression
     >>> from sklearn.datasets import make_regression
-    >>> np.random.seed(0)
+    >>> rng = np.random.RandomState(0)
     >>> X, y, coef = make_regression(
     ...     n_samples=200, n_features=2, noise=4.0, coef=True, random_state=0)
-    >>> X[:4] = np.random.uniform(10, 20, (4, 2))
-    >>> y[:4] = np.random.uniform(10, 20, 4)
+    >>> X[:4] = rng.uniform(10, 20, (4, 2))
+    >>> y[:4] = rng.uniform(10, 20, 4)
     >>> huber = HuberRegressor().fit(X, y)
     >>> huber.score(X, y) # doctest: +ELLIPSIS
     -7.284608623514573

diff --git a/sklearn/linear_model/ridge.py b/sklearn/linear_model/ridge.py
@@ -697,9 +697,9 @@ class Ridge(_BaseRidge, RegressorMixin):
     >>> from sklearn.linear_model import Ridge
     >>> import numpy as np
     >>> n_samples, n_features = 10, 5
-    >>> np.random.seed(0)
-    >>> y = np.random.randn(n_samples)
-    >>> X = np.random.randn(n_samples, n_features)
+    >>> rng = np.random.RandomState(0)
+    >>> y = rng.randn(n_samples)
+    >>> X = rng.randn(n_samples, n_features)
     >>> clf = Ridge(alpha=1.0)
     >>> clf.fit(X, y) # doctest: +NORMALIZE_WHITESPACE
     Ridge(alpha=1.0, copy_X=True, fit_intercept=True, max_iter=None,

diff --git a/sklearn/linear_model/sag.py b/sklearn/linear_model/sag.py
@@ -201,9 +201,9 @@ def sag_solver(X, y, sample_weight=None, loss='log', alpha=1., beta=0.,
     >>> import numpy as np
     >>> from sklearn import linear_model
     >>> n_samples, n_features = 10, 5
-    >>> np.random.seed(0)
-    >>> X = np.random.randn(n_samples, n_features)
-    >>> y = np.random.randn(n_samples)
+    >>> rng = np.random.RandomState(0)
+    >>> X = rng.randn(n_samples, n_features)
+    >>> y = rng.randn(n_samples)
     >>> clf = linear_model.Ridge(solver='sag')
     >>> clf.fit(X, y)
     ... #doctest: +NORMALIZE_WHITESPACE

diff --git a/sklearn/linear_model/stochastic_gradient.py b/sklearn/linear_model/stochastic_gradient.py
@@ -1559,9 +1559,9 @@ class SGDRegressor(BaseSGDRegressor):
     >>> import numpy as np
     >>> from sklearn import linear_model
     >>> n_samples, n_features = 10, 5
-    >>> np.random.seed(0)
-    >>> y = np.random.randn(n_samples)
-    >>> X = np.random.randn(n_samples, n_features)
+    >>> rng = np.random.RandomState(0)
+    >>> y = rng.randn(n_samples)
+    >>> X = rng.randn(n_samples, n_features)
     >>> clf = linear_model.SGDRegressor(max_iter=1000, tol=1e-3)
     >>> clf.fit(X, y)
     ... #doctest: +NORMALIZE_WHITESPACE

diff --git a/sklearn/model_selection/_search.py b/sklearn/model_selection/_search.py
@@ -228,9 +228,10 @@ class ParameterSampler:
     >>> from sklearn.model_selection import ParameterSampler
     >>> from scipy.stats.distributions import expon
     >>> import numpy as np
-    >>> np.random.seed(0)
+    >>> rng = np.random.RandomState(0)
     >>> param_grid = {'a':[1, 2], 'b': expon()}
-    >>> param_list = list(ParameterSampler(param_grid, n_iter=4))
+    >>> param_list = list(ParameterSampler(param_grid, n_iter=4,
+    ...                                    random_state=rng))
     >>> rounded_list = [dict((k, round(v, 6)) for (k, v) in d.items())
     ...                 for d in param_list]
     >>> rounded_list == [{'b': 0.89856, 'a': 1},

diff --git a/sklearn/neighbors/binary_tree.pxi b/sklearn/neighbors/binary_tree.pxi
@@ -302,8 +302,8 @@ Examples
 Query for k-nearest neighbors
 
     >>> import numpy as np
-    >>> np.random.seed(0)
-    >>> X = np.random.random((10, 3))  # 10 points in 3 dimensions
+    >>> rng = np.random.RandomState(0)
+    >>> X = rng.random_sample((10, 3))  # 10 points in 3 dimensions
     >>> tree = {BinaryTree}(X, leaf_size=2)              # doctest: +SKIP
     >>> dist, ind = tree.query(X[:1], k=3)                # doctest: +SKIP
     >>> print(ind)  # indices of 3 closest neighbors
@@ -316,8 +316,8 @@ pickle operation: the tree needs not be rebuilt upon unpickling.
 
     >>> import numpy as np
     >>> import pickle
-    >>> np.random.seed(0)
-    >>> X = np.random.random((10, 3))  # 10 points in 3 dimensions
+    >>> rng = np.random.RandomState(0)
+    >>> X = rng.random_sample((10, 3))  # 10 points in 3 dimensions
     >>> tree = {BinaryTree}(X, leaf_size=2)        # doctest: +SKIP
     >>> s = pickle.dumps(tree)                     # doctest: +SKIP
     >>> tree_copy = pickle.loads(s)                # doctest: +SKIP
@@ -330,8 +330,8 @@ pickle operation: the tree needs not be rebuilt upon unpickling.
 Query for neighbors within a given radius
 
     >>> import numpy as np
-    >>> np.random.seed(0)
-    >>> X = np.random.random((10, 3))  # 10 points in 3 dimensions
+    >>> rng = np.random.RandomState(0)
+    >>> X = rng.random_sample((10, 3))  # 10 points in 3 dimensions
     >>> tree = {BinaryTree}(X, leaf_size=2)     # doctest: +SKIP
     >>> print(tree.query_radius(X[:1], r=0.3, count_only=True))
     3
@@ -343,17 +343,17 @@ Query for neighbors within a given radius
 Compute a gaussian kernel density estimate:
 
     >>> import numpy as np
-    >>> np.random.seed(1)
-    >>> X = np.random.random((100, 3))
+    >>> rng = np.random.RandomState(42)
+    >>> X = rng.random_sample((100, 3))
     >>> tree = {BinaryTree}(X)                # doctest: +SKIP
     >>> tree.kernel_density(X[:3], h=0.1, kernel='gaussian')
     array([ 6.94114649,  7.83281226,  7.2071716 ])
 
 Compute a two-point auto-correlation function
 
     >>> import numpy as np
-    >>> np.random.seed(0)
-    >>> X = np.random.random((30, 3))
+    >>> rng = np.random.RandomState(0)
+    >>> X = rng.random_sample((30, 3))
     >>> r = np.linspace(0, 1, 5)
     >>> tree = {BinaryTree}(X)                # doctest: +SKIP
     >>> tree.two_point_correlation(X, r)

diff --git a/sklearn/neighbors/tests/test_ball_tree.py b/sklearn/neighbors/tests/test_ball_tree.py
@@ -152,9 +152,9 @@ def compute_kernel_slow(Y, X, kernel, h):
 @pytest.mark.parametrize("breadth_first", [True, False])
 def test_ball_tree_kde(kernel, h, rtol, atol, breadth_first, n_samples=100,
                        n_features=3):
-    np.random.seed(0)
-    X = np.random.random((n_samples, n_features))
-    Y = np.random.random((n_samples, n_features))
+    rng = np.random.RandomState(0)
+    X = rng.random_sample((n_samples, n_features))
+    Y = rng.random_sample((n_samples, n_features))
     bt = BallTree(X, leaf_size=10)
 
     dens_true = compute_kernel_slow(Y, X, kernel, h)

diff --git a/sklearn/random_projection.py b/sklearn/random_projection.py
@@ -466,8 +466,9 @@ class GaussianRandomProjection(BaseRandomProjection):
     --------
     >>> import numpy as np
     >>> from sklearn.random_projection import GaussianRandomProjection
-    >>> X = np.random.rand(100, 10000)
-    >>> transformer = GaussianRandomProjection()
+    >>> rng = np.random.RandomState(42)
+    >>> X = rng.rand(100, 10000)
+    >>> transformer = GaussianRandomProjection(random_state=rng)
     >>> X_new = transformer.fit_transform(X)
     >>> X_new.shape
     (100, 3947)
@@ -588,9 +589,9 @@ class SparseRandomProjection(BaseRandomProjection):
     --------
     >>> import numpy as np
     >>> from sklearn.random_projection import SparseRandomProjection
-    >>> np.random.seed(42)
-    >>> X = np.random.rand(100, 10000)
-    >>> transformer = SparseRandomProjection()
+    >>> rng = np.random.RandomState(42)
+    >>> X = rng.rand(100, 10000)
+    >>> transformer = SparseRandomProjection(random_state=rng)
     >>> X_new = transformer.fit_transform(X)
     >>> X_new.shape
     (100, 3947)

diff --git a/sklearn/svm/classes.py b/sklearn/svm/classes.py
@@ -874,9 +874,9 @@ class SVR(BaseLibSVM, RegressorMixin):
     >>> from sklearn.svm import SVR
     >>> import numpy as np
     >>> n_samples, n_features = 10, 5
-    >>> np.random.seed(0)
-    >>> y = np.random.randn(n_samples)
-    >>> X = np.random.randn(n_samples, n_features)
+    >>> rng = np.random.RandomState(0)
+    >>> y = rng.randn(n_samples)
+    >>> X = rng.randn(n_samples, n_features)
     >>> clf = SVR(gamma='scale', C=1.0, epsilon=0.2)
     >>> clf.fit(X, y) #doctest: +NORMALIZE_WHITESPACE
     SVR(C=1.0, cache_size=200, coef0=0.0, degree=3, epsilon=0.2, gamma='scale',