From 7271875fa8aba2c3af25068b964b1fbfa03f3909 Mon Sep 17 00:00:00 2001 From: Prathmesh Savale Date: Sat, 2 Mar 2019 13:16:23 +0530 Subject: [PATCH] Don't use global np.random.seed in tests (#13356) * initial commit * used random class * fixed failing testcases, reverted __init__.py * fixed failing testcases #2 - passed rng as parameter to ParameterSampler class - changed seed from 0 to 42 (as original) * fixed failing testcases #2 - passed rng as parameter to SparseRandomProjection class * fixed failing testcases #4 - passed rng as parameter to GaussianRandomProjection class * fixed failing test case because of flake 8 --- sklearn/covariance/empirical_covariance_.py | 4 ++-- sklearn/covariance/robust_covariance.py | 4 ++-- sklearn/covariance/shrunk_covariance_.py | 4 ++-- sklearn/decomposition/tests/test_fastica.py | 1 - sklearn/linear_model/huber.py | 6 +++--- sklearn/linear_model/ridge.py | 6 +++--- sklearn/linear_model/sag.py | 6 +++--- sklearn/linear_model/stochastic_gradient.py | 6 +++--- sklearn/model_selection/_search.py | 5 +++-- sklearn/neighbors/binary_tree.pxi | 20 ++++++++++---------- sklearn/neighbors/tests/test_ball_tree.py | 6 +++--- sklearn/random_projection.py | 11 ++++++----- sklearn/svm/classes.py | 6 +++--- 13 files changed, 43 insertions(+), 42 deletions(-) diff --git a/sklearn/covariance/empirical_covariance_.py b/sklearn/covariance/empirical_covariance_.py index 943d2ae2fde9c..74365b7d9f468 100644 --- a/sklearn/covariance/empirical_covariance_.py +++ b/sklearn/covariance/empirical_covariance_.py @@ -120,8 +120,8 @@ class EmpiricalCovariance(BaseEstimator): >>> from sklearn.datasets import make_gaussian_quantiles >>> real_cov = np.array([[.8, .3], ... [.3, .4]]) - >>> np.random.seed(0) - >>> X = np.random.multivariate_normal(mean=[0, 0], + >>> rng = np.random.RandomState(0) + >>> X = rng.multivariate_normal(mean=[0, 0], ... cov=real_cov, ... size=500) >>> cov = EmpiricalCovariance().fit(X) diff --git a/sklearn/covariance/robust_covariance.py b/sklearn/covariance/robust_covariance.py index 5c5331166941d..3e235ff3596db 100644 --- a/sklearn/covariance/robust_covariance.py +++ b/sklearn/covariance/robust_covariance.py @@ -586,8 +586,8 @@ class MinCovDet(EmpiricalCovariance): >>> from sklearn.datasets import make_gaussian_quantiles >>> real_cov = np.array([[.8, .3], ... [.3, .4]]) - >>> np.random.seed(0) - >>> X = np.random.multivariate_normal(mean=[0, 0], + >>> rng = np.random.RandomState(0) + >>> X = rng.multivariate_normal(mean=[0, 0], ... cov=real_cov, ... size=500) >>> cov = MinCovDet(random_state=0).fit(X) diff --git a/sklearn/covariance/shrunk_covariance_.py b/sklearn/covariance/shrunk_covariance_.py index 0c28907ed5fc6..1dbe057441375 100644 --- a/sklearn/covariance/shrunk_covariance_.py +++ b/sklearn/covariance/shrunk_covariance_.py @@ -103,8 +103,8 @@ class ShrunkCovariance(EmpiricalCovariance): >>> from sklearn.datasets import make_gaussian_quantiles >>> real_cov = np.array([[.8, .3], ... [.3, .4]]) - >>> np.random.seed(0) - >>> X = np.random.multivariate_normal(mean=[0, 0], + >>> rng = np.random.RandomState(0) + >>> X = rng.multivariate_normal(mean=[0, 0], ... cov=real_cov, ... size=500) >>> cov = ShrunkCovariance().fit(X) diff --git a/sklearn/decomposition/tests/test_fastica.py b/sklearn/decomposition/tests/test_fastica.py index 5efda7d67a178..26aa4eba80de8 100644 --- a/sklearn/decomposition/tests/test_fastica.py +++ b/sklearn/decomposition/tests/test_fastica.py @@ -54,7 +54,6 @@ def test_fastica_simple(add_noise=False): # Test the FastICA algorithm on very simple data. rng = np.random.RandomState(0) # scipy.stats uses the global RNG: - np.random.seed(0) n_samples = 1000 # Generate two sources: s1 = (2 * np.sin(np.linspace(0, 100, n_samples)) > 0) - 1 diff --git a/sklearn/linear_model/huber.py b/sklearn/linear_model/huber.py index cd17b0fe33c00..65c6864007eb2 100644 --- a/sklearn/linear_model/huber.py +++ b/sklearn/linear_model/huber.py @@ -196,11 +196,11 @@ class HuberRegressor(LinearModel, RegressorMixin, BaseEstimator): >>> import numpy as np >>> from sklearn.linear_model import HuberRegressor, LinearRegression >>> from sklearn.datasets import make_regression - >>> np.random.seed(0) + >>> rng = np.random.RandomState(0) >>> X, y, coef = make_regression( ... n_samples=200, n_features=2, noise=4.0, coef=True, random_state=0) - >>> X[:4] = np.random.uniform(10, 20, (4, 2)) - >>> y[:4] = np.random.uniform(10, 20, 4) + >>> X[:4] = rng.uniform(10, 20, (4, 2)) + >>> y[:4] = rng.uniform(10, 20, 4) >>> huber = HuberRegressor().fit(X, y) >>> huber.score(X, y) # doctest: +ELLIPSIS -7.284608623514573 diff --git a/sklearn/linear_model/ridge.py b/sklearn/linear_model/ridge.py index e240db3f1cb06..d08be5a916c9a 100644 --- a/sklearn/linear_model/ridge.py +++ b/sklearn/linear_model/ridge.py @@ -697,9 +697,9 @@ class Ridge(_BaseRidge, RegressorMixin): >>> from sklearn.linear_model import Ridge >>> import numpy as np >>> n_samples, n_features = 10, 5 - >>> np.random.seed(0) - >>> y = np.random.randn(n_samples) - >>> X = np.random.randn(n_samples, n_features) + >>> rng = np.random.RandomState(0) + >>> y = rng.randn(n_samples) + >>> X = rng.randn(n_samples, n_features) >>> clf = Ridge(alpha=1.0) >>> clf.fit(X, y) # doctest: +NORMALIZE_WHITESPACE Ridge(alpha=1.0, copy_X=True, fit_intercept=True, max_iter=None, diff --git a/sklearn/linear_model/sag.py b/sklearn/linear_model/sag.py index 4ce1a98b8489a..5b9e9cdac0b24 100644 --- a/sklearn/linear_model/sag.py +++ b/sklearn/linear_model/sag.py @@ -201,9 +201,9 @@ def sag_solver(X, y, sample_weight=None, loss='log', alpha=1., beta=0., >>> import numpy as np >>> from sklearn import linear_model >>> n_samples, n_features = 10, 5 - >>> np.random.seed(0) - >>> X = np.random.randn(n_samples, n_features) - >>> y = np.random.randn(n_samples) + >>> rng = np.random.RandomState(0) + >>> X = rng.randn(n_samples, n_features) + >>> y = rng.randn(n_samples) >>> clf = linear_model.Ridge(solver='sag') >>> clf.fit(X, y) ... #doctest: +NORMALIZE_WHITESPACE diff --git a/sklearn/linear_model/stochastic_gradient.py b/sklearn/linear_model/stochastic_gradient.py index d094cd0988853..358914b9b4110 100644 --- a/sklearn/linear_model/stochastic_gradient.py +++ b/sklearn/linear_model/stochastic_gradient.py @@ -1559,9 +1559,9 @@ class SGDRegressor(BaseSGDRegressor): >>> import numpy as np >>> from sklearn import linear_model >>> n_samples, n_features = 10, 5 - >>> np.random.seed(0) - >>> y = np.random.randn(n_samples) - >>> X = np.random.randn(n_samples, n_features) + >>> rng = np.random.RandomState(0) + >>> y = rng.randn(n_samples) + >>> X = rng.randn(n_samples, n_features) >>> clf = linear_model.SGDRegressor(max_iter=1000, tol=1e-3) >>> clf.fit(X, y) ... #doctest: +NORMALIZE_WHITESPACE diff --git a/sklearn/model_selection/_search.py b/sklearn/model_selection/_search.py index 8b43a65ccc5aa..7d03c472efecf 100644 --- a/sklearn/model_selection/_search.py +++ b/sklearn/model_selection/_search.py @@ -228,9 +228,10 @@ class ParameterSampler: >>> from sklearn.model_selection import ParameterSampler >>> from scipy.stats.distributions import expon >>> import numpy as np - >>> np.random.seed(0) + >>> rng = np.random.RandomState(0) >>> param_grid = {'a':[1, 2], 'b': expon()} - >>> param_list = list(ParameterSampler(param_grid, n_iter=4)) + >>> param_list = list(ParameterSampler(param_grid, n_iter=4, + ... random_state=rng)) >>> rounded_list = [dict((k, round(v, 6)) for (k, v) in d.items()) ... for d in param_list] >>> rounded_list == [{'b': 0.89856, 'a': 1}, diff --git a/sklearn/neighbors/binary_tree.pxi b/sklearn/neighbors/binary_tree.pxi index aed060bb48cd3..9f3a94e3ef20f 100755 --- a/sklearn/neighbors/binary_tree.pxi +++ b/sklearn/neighbors/binary_tree.pxi @@ -302,8 +302,8 @@ Examples Query for k-nearest neighbors >>> import numpy as np - >>> np.random.seed(0) - >>> X = np.random.random((10, 3)) # 10 points in 3 dimensions + >>> rng = np.random.RandomState(0) + >>> X = rng.random_sample((10, 3)) # 10 points in 3 dimensions >>> tree = {BinaryTree}(X, leaf_size=2) # doctest: +SKIP >>> dist, ind = tree.query(X[:1], k=3) # doctest: +SKIP >>> print(ind) # indices of 3 closest neighbors @@ -316,8 +316,8 @@ pickle operation: the tree needs not be rebuilt upon unpickling. >>> import numpy as np >>> import pickle - >>> np.random.seed(0) - >>> X = np.random.random((10, 3)) # 10 points in 3 dimensions + >>> rng = np.random.RandomState(0) + >>> X = rng.random_sample((10, 3)) # 10 points in 3 dimensions >>> tree = {BinaryTree}(X, leaf_size=2) # doctest: +SKIP >>> s = pickle.dumps(tree) # doctest: +SKIP >>> tree_copy = pickle.loads(s) # doctest: +SKIP @@ -330,8 +330,8 @@ pickle operation: the tree needs not be rebuilt upon unpickling. Query for neighbors within a given radius >>> import numpy as np - >>> np.random.seed(0) - >>> X = np.random.random((10, 3)) # 10 points in 3 dimensions + >>> rng = np.random.RandomState(0) + >>> X = rng.random_sample((10, 3)) # 10 points in 3 dimensions >>> tree = {BinaryTree}(X, leaf_size=2) # doctest: +SKIP >>> print(tree.query_radius(X[:1], r=0.3, count_only=True)) 3 @@ -343,8 +343,8 @@ Query for neighbors within a given radius Compute a gaussian kernel density estimate: >>> import numpy as np - >>> np.random.seed(1) - >>> X = np.random.random((100, 3)) + >>> rng = np.random.RandomState(42) + >>> X = rng.random_sample((100, 3)) >>> tree = {BinaryTree}(X) # doctest: +SKIP >>> tree.kernel_density(X[:3], h=0.1, kernel='gaussian') array([ 6.94114649, 7.83281226, 7.2071716 ]) @@ -352,8 +352,8 @@ Compute a gaussian kernel density estimate: Compute a two-point auto-correlation function >>> import numpy as np - >>> np.random.seed(0) - >>> X = np.random.random((30, 3)) + >>> rng = np.random.RandomState(0) + >>> X = rng.random_sample((30, 3)) >>> r = np.linspace(0, 1, 5) >>> tree = {BinaryTree}(X) # doctest: +SKIP >>> tree.two_point_correlation(X, r) diff --git a/sklearn/neighbors/tests/test_ball_tree.py b/sklearn/neighbors/tests/test_ball_tree.py index 7e23a7fe5ed7c..9b4726e119779 100644 --- a/sklearn/neighbors/tests/test_ball_tree.py +++ b/sklearn/neighbors/tests/test_ball_tree.py @@ -152,9 +152,9 @@ def compute_kernel_slow(Y, X, kernel, h): @pytest.mark.parametrize("breadth_first", [True, False]) def test_ball_tree_kde(kernel, h, rtol, atol, breadth_first, n_samples=100, n_features=3): - np.random.seed(0) - X = np.random.random((n_samples, n_features)) - Y = np.random.random((n_samples, n_features)) + rng = np.random.RandomState(0) + X = rng.random_sample((n_samples, n_features)) + Y = rng.random_sample((n_samples, n_features)) bt = BallTree(X, leaf_size=10) dens_true = compute_kernel_slow(Y, X, kernel, h) diff --git a/sklearn/random_projection.py b/sklearn/random_projection.py index c12c47efd0f27..62ad5766c1f15 100644 --- a/sklearn/random_projection.py +++ b/sklearn/random_projection.py @@ -466,8 +466,9 @@ class GaussianRandomProjection(BaseRandomProjection): -------- >>> import numpy as np >>> from sklearn.random_projection import GaussianRandomProjection - >>> X = np.random.rand(100, 10000) - >>> transformer = GaussianRandomProjection() + >>> rng = np.random.RandomState(42) + >>> X = rng.rand(100, 10000) + >>> transformer = GaussianRandomProjection(random_state=rng) >>> X_new = transformer.fit_transform(X) >>> X_new.shape (100, 3947) @@ -588,9 +589,9 @@ class SparseRandomProjection(BaseRandomProjection): -------- >>> import numpy as np >>> from sklearn.random_projection import SparseRandomProjection - >>> np.random.seed(42) - >>> X = np.random.rand(100, 10000) - >>> transformer = SparseRandomProjection() + >>> rng = np.random.RandomState(42) + >>> X = rng.rand(100, 10000) + >>> transformer = SparseRandomProjection(random_state=rng) >>> X_new = transformer.fit_transform(X) >>> X_new.shape (100, 3947) diff --git a/sklearn/svm/classes.py b/sklearn/svm/classes.py index 6ef9215e909f2..511f2f1bfd9aa 100644 --- a/sklearn/svm/classes.py +++ b/sklearn/svm/classes.py @@ -874,9 +874,9 @@ class SVR(BaseLibSVM, RegressorMixin): >>> from sklearn.svm import SVR >>> import numpy as np >>> n_samples, n_features = 10, 5 - >>> np.random.seed(0) - >>> y = np.random.randn(n_samples) - >>> X = np.random.randn(n_samples, n_features) + >>> rng = np.random.RandomState(0) + >>> y = rng.randn(n_samples) + >>> X = rng.randn(n_samples, n_features) >>> clf = SVR(gamma='scale', C=1.0, epsilon=0.2) >>> clf.fit(X, y) #doctest: +NORMALIZE_WHITESPACE SVR(C=1.0, cache_size=200, coef0=0.0, degree=3, epsilon=0.2, gamma='scale',