From 62b11e8f177b4efeb58f9b3ca6fbcda39be3b127 Mon Sep 17 00:00:00 2001 From: Brian Wignall Date: Tue, 26 Nov 2019 07:31:40 -0500 Subject: [PATCH] MNT Fix some easy-to-make typos (#15720) --- build_tools/azure/install.sh | 2 +- doc/developers/advanced_installation.rst | 2 +- doc/modules/computing.rst | 4 ++-- doc/modules/model_evaluation.rst | 2 +- doc/modules/neighbors.rst | 2 +- doc/whats_new/v0.20.rst | 2 +- doc/whats_new/v0.21.rst | 2 +- doc/whats_new/v0.22.rst | 2 +- examples/inspection/plot_partial_dependence.py | 2 +- sklearn/decomposition/_dict_learning.py | 4 ++-- sklearn/ensemble/_hist_gradient_boosting/binning.py | 6 +++--- .../_hist_gradient_boosting/tests/test_gradient_boosting.py | 2 +- sklearn/ensemble/tests/test_gradient_boosting.py | 2 +- sklearn/externals/_arff.py | 2 +- sklearn/metrics/_regression.py | 2 +- sklearn/metrics/tests/test_common.py | 2 +- sklearn/metrics/tests/test_score_objects.py | 6 +++--- sklearn/model_selection/_search.py | 4 ++-- 18 files changed, 25 insertions(+), 25 deletions(-) diff --git a/build_tools/azure/install.sh b/build_tools/azure/install.sh index 084c497eb65aa..f4f60df8a9626 100755 --- a/build_tools/azure/install.sh +++ b/build_tools/azure/install.sh @@ -11,7 +11,7 @@ make_conda() { } version_ge() { - # The two version numbers are seperated with a new line is piped to sort + # The two version numbers are separated with a new line is piped to sort # -rV. The -V activates for version number sorting and -r sorts in # decending order. If the first argument is the top element of the sort, it # is greater than or equal to the second argument. diff --git a/doc/developers/advanced_installation.rst b/doc/developers/advanced_installation.rst index bcb305116676d..c58eb14e828d2 100644 --- a/doc/developers/advanced_installation.rst +++ b/doc/developers/advanced_installation.rst @@ -374,7 +374,7 @@ Finally, build the package using the standard command:: pip install --verbose --editable . -For the upcomming FreeBSD 12.1 and 11.3 versions, OpenMP will be included in +For the upcoming FreeBSD 12.1 and 11.3 versions, OpenMP will be included in the base system and these steps will not be necessary. .. _OpenMP: https://en.wikipedia.org/wiki/OpenMP diff --git a/doc/modules/computing.rst b/doc/modules/computing.rst index 176b8e22fca1c..246085d436cde 100644 --- a/doc/modules/computing.rst +++ b/doc/modules/computing.rst @@ -529,7 +529,7 @@ Joblib-based parallelism ........................ When the underlying implementation uses joblib, the number of workers -(threads or processes) that are spawned in parallel can be controled via the +(threads or processes) that are spawned in parallel can be controlled via the ``n_jobs`` parameter. .. note:: @@ -666,7 +666,7 @@ Python runtime :working_memory: - the optimal size of temporary arrays used by some algoritms. + the optimal size of temporary arrays used by some algorithms. .. _environment_variable: diff --git a/doc/modules/model_evaluation.rst b/doc/modules/model_evaluation.rst index 1410887c4c51f..7af1e46578de6 100644 --- a/doc/modules/model_evaluation.rst +++ b/doc/modules/model_evaluation.rst @@ -1720,7 +1720,7 @@ relevant), NDCG can be used. For one sample, given the vector of continuous ground-truth values for each target :math:`y \in \mathbb{R}^{M}`, where :math:`M` is the number of outputs, and -the prediction :math:`\hat{y}`, which induces the ranking funtion :math:`f`, the +the prediction :math:`\hat{y}`, which induces the ranking function :math:`f`, the DCG score is .. math:: diff --git a/doc/modules/neighbors.rst b/doc/modules/neighbors.rst index 7f72aa68c38db..9aa27a53501b8 100644 --- a/doc/modules/neighbors.rst +++ b/doc/modules/neighbors.rst @@ -581,7 +581,7 @@ implementation with special data types. The precomputed neighbors training point as its own neighbor in the count of `n_neighbors`. However, for compatibility reasons with other estimators which use the other definition, one extra neighbor will be computed when `mode == 'distance'`. - To maximise compatiblity with all estimators, a safe choice is to always + To maximise compatibility with all estimators, a safe choice is to always include one extra neighbor in a custom nearest neighbors estimator, since unnecessary neighbors will be filtered by following estimators. diff --git a/doc/whats_new/v0.20.rst b/doc/whats_new/v0.20.rst index 4e3a4891b70e2..2eaf3199fbc3c 100644 --- a/doc/whats_new/v0.20.rst +++ b/doc/whats_new/v0.20.rst @@ -709,7 +709,7 @@ Support for Python 3.3 has been officially dropped. - |Feature| |Fix| :class:`decomposition.SparsePCA` now exposes ``normalize_components``. When set to True, the train and test data are - centered with the train mean repsectively during the fit phase and the + centered with the train mean respectively during the fit phase and the transform phase. This fixes the behavior of SparsePCA. When set to False, which is the default, the previous abnormal behaviour still holds. The False value is for backward compatibility and should not be used. :issue:`11585` diff --git a/doc/whats_new/v0.21.rst b/doc/whats_new/v0.21.rst index 59e3774e76c69..94099723dd0ec 100644 --- a/doc/whats_new/v0.21.rst +++ b/doc/whats_new/v0.21.rst @@ -295,7 +295,7 @@ Support for Python 3.4 and below has been officially dropped. ...................... - |MajorFeature| A new clustering algorithm: :class:`cluster.OPTICS`: an - algoritm related to :class:`cluster.DBSCAN`, that has hyperparameters easier + algorithm related to :class:`cluster.DBSCAN`, that has hyperparameters easier to set and that scales better, by :user:`Shane `, `Adrin Jalali`_, :user:`Erich Schubert `, `Hanmin Qin`_, and :user:`Assia Benbihi `. diff --git a/doc/whats_new/v0.22.rst b/doc/whats_new/v0.22.rst index f62e380085c82..3a7e644d79ee7 100644 --- a/doc/whats_new/v0.22.rst +++ b/doc/whats_new/v0.22.rst @@ -799,7 +799,7 @@ Changelog - |Fix| :class:`svm.SVC`, :class:`svm.SVR`, :class:`svm.NuSVR` and :class:`svm.OneClassSVM` when received values negative or zero for parameter ``sample_weight`` in method fit(), generated an - invalid model. This behavior occured only in some border scenarios. + invalid model. This behavior occurred only in some border scenarios. Now in these cases, fit() will fail with an Exception. :pr:`14286` by :user:`Alex Shacked `. diff --git a/examples/inspection/plot_partial_dependence.py b/examples/inspection/plot_partial_dependence.py index 526ace208e30f..d74c6363dec06 100644 --- a/examples/inspection/plot_partial_dependence.py +++ b/examples/inspection/plot_partial_dependence.py @@ -14,7 +14,7 @@ :class:`~sklearn.ensemble.HistGradientBoostingRegressor` trained on the California housing dataset. The example is taken from [1]_. -The plots show four 1-way and two 1-way partial dependence plots (ommitted for +The plots show four 1-way and two 1-way partial dependence plots (omitted for :class:`~sklearn.neural_network.MLPRegressor` due to computation time). The target variables for the one-way PDP are: median income (`MedInc`), average occupants per household (`AvgOccup`), median house age (`HouseAge`), and diff --git a/sklearn/decomposition/_dict_learning.py b/sklearn/decomposition/_dict_learning.py index 29839157ca33f..a2f3f601f4127 100644 --- a/sklearn/decomposition/_dict_learning.py +++ b/sklearn/decomposition/_dict_learning.py @@ -704,7 +704,7 @@ def dict_learning_online(X, n_components=2, alpha=1, n_iter=100, inner_stats : tuple of (A, B) ndarrays Inner sufficient statistics that are kept by the algorithm. Passing them at initialization is useful in online settings, to - avoid loosing the history of the evolution. + avoid losing the history of the evolution. A (n_components, n_components) is the dictionary covariance matrix. B (n_features, n_components) is the data approximation matrix @@ -1351,7 +1351,7 @@ class MiniBatchDictionaryLearning(SparseCodingMixin, BaseEstimator): inner_stats_ : tuple of (A, B) ndarrays Internal sufficient statistics that are kept by the algorithm. - Keeping them is useful in online settings, to avoid loosing the + Keeping them is useful in online settings, to avoid losing the history of the evolution, but they shouldn't have any use for the end user. A (n_components, n_components) is the dictionary covariance matrix. diff --git a/sklearn/ensemble/_hist_gradient_boosting/binning.py b/sklearn/ensemble/_hist_gradient_boosting/binning.py index 18cddca2d867f..a4dec15763940 100644 --- a/sklearn/ensemble/_hist_gradient_boosting/binning.py +++ b/sklearn/ensemble/_hist_gradient_boosting/binning.py @@ -32,7 +32,7 @@ def _find_binning_thresholds(data, max_bins, subsample, random_state): instead of the quantiles. subsample : int or None If ``n_samples > subsample``, then ``sub_samples`` samples will be - randomly choosen to compute the quantiles. If ``None``, the whole data + randomly chosen to compute the quantiles. If ``None``, the whole data is used. random_state: int or numpy.random.RandomState or None Pseudo-random number generator to control the random sub-sampling. @@ -107,7 +107,7 @@ class _BinMapper(TransformerMixin, BaseEstimator): instead of the quantiles. subsample : int or None, optional (default=2e5) If ``n_samples > subsample``, then ``sub_samples`` samples will be - randomly choosen to compute the quantiles. If ``None``, the whole data + randomly chosen to compute the quantiles. If ``None``, the whole data is used. random_state: int or numpy.random.RandomState or None, \ optional (default=None) @@ -126,7 +126,7 @@ class _BinMapper(TransformerMixin, BaseEstimator): equal to ``n_bins - 1``. missing_values_bin_idx_ : uint8 The index of the bin where missing values are mapped. This is a - constant accross all features. This corresponds to the last bin, and + constant across all features. This corresponds to the last bin, and it is always equal to ``n_bins - 1``. Note that if ``n_bins_missing_`` is less than ``n_bins - 1`` for a given feature, then there are empty (and unused) bins. diff --git a/sklearn/ensemble/_hist_gradient_boosting/tests/test_gradient_boosting.py b/sklearn/ensemble/_hist_gradient_boosting/tests/test_gradient_boosting.py index 117539a424119..87950eab38a97 100644 --- a/sklearn/ensemble/_hist_gradient_boosting/tests/test_gradient_boosting.py +++ b/sklearn/ensemble/_hist_gradient_boosting/tests/test_gradient_boosting.py @@ -413,7 +413,7 @@ def test_infinite_values_missing_values(): # High level test making sure that inf and nan values are properly handled # when both are present. This is similar to # test_split_on_nan_with_infinite_values() in test_grower.py, though we - # cannot check the predicitons for binned values here. + # cannot check the predictions for binned values here. X = np.asarray([-np.inf, 0, 1, np.inf, np.nan]).reshape(-1, 1) y_isnan = np.isnan(X.ravel()) diff --git a/sklearn/ensemble/tests/test_gradient_boosting.py b/sklearn/ensemble/tests/test_gradient_boosting.py index d0100a1724a52..5fe9dee573d1d 100644 --- a/sklearn/ensemble/tests/test_gradient_boosting.py +++ b/sklearn/ensemble/tests/test_gradient_boosting.py @@ -1311,7 +1311,7 @@ def test_gradient_boosting_with_init(gb, dataset_maker, init_estimator): # Check that GradientBoostingRegressor works when init is a sklearn # estimator. # Check that an error is raised if trying to fit with sample weight but - # inital estimator does not support sample weight + # initial estimator does not support sample weight X, y = dataset_maker() sample_weight = np.random.RandomState(42).rand(100) diff --git a/sklearn/externals/_arff.py b/sklearn/externals/_arff.py index 4db55eb6d6c02..bf3cbfc9a9b98 100644 --- a/sklearn/externals/_arff.py +++ b/sklearn/externals/_arff.py @@ -98,7 +98,7 @@ The above keys must follow the case which were described, i.e., the keys are case sensitive. The attribute type ``attribute_type`` must be one of these strings (they are not case sensitive): ``NUMERIC``, ``INTEGER``, ``REAL`` or -``STRING``. For nominal attributes, the ``atribute_type`` must be a list of +``STRING``. For nominal attributes, the ``attribute_type`` must be a list of strings. In this format, the XOR dataset presented above can be represented as a python diff --git a/sklearn/metrics/_regression.py b/sklearn/metrics/_regression.py index d0226e62bb7ec..6c3c83a0c0c7c 100644 --- a/sklearn/metrics/_regression.py +++ b/sklearn/metrics/_regression.py @@ -717,7 +717,7 @@ def mean_tweedie_deviance(y_true, y_pred, sample_weight=None, power=0): message = ("Mean Tweedie deviance error with power={} can only be used on " .format(power)) if power < 0: - # 'Extreme stable', y_true any realy number, y_pred > 0 + # 'Extreme stable', y_true any real number, y_pred > 0 if (y_pred <= 0).any(): raise ValueError(message + "strictly positive y_pred.") dev = 2 * (np.power(np.maximum(y_true, 0), 2 - power) diff --git a/sklearn/metrics/tests/test_common.py b/sklearn/metrics/tests/test_common.py index 5f93810f0b407..991af61537012 100644 --- a/sklearn/metrics/tests/test_common.py +++ b/sklearn/metrics/tests/test_common.py @@ -115,7 +115,7 @@ "unnormalized_accuracy_score": partial(accuracy_score, normalize=False), # `confusion_matrix` returns absolute values and hence behaves unnormalized - # . Naming it with an unnormalized_ prefix is neccessary for this module to + # . Naming it with an unnormalized_ prefix is necessary for this module to # skip sample_weight scaling checks which will fail for unnormalized # metrics. "unnormalized_confusion_matrix": confusion_matrix, diff --git a/sklearn/metrics/tests/test_score_objects.py b/sklearn/metrics/tests/test_score_objects.py index 00ff5a3a0563e..64e88f37ed2bc 100644 --- a/sklearn/metrics/tests/test_score_objects.py +++ b/sklearn/metrics/tests/test_score_objects.py @@ -649,7 +649,7 @@ def predict(self, X): def test_multimetric_scorer_sanity_check(): - # scoring dictionary returned is the same as calling each scorer seperately + # scoring dictionary returned is the same as calling each scorer separately scorers = {'a1': 'accuracy', 'a2': 'accuracy', 'll1': 'neg_log_loss', 'll2': 'neg_log_loss', 'ra1': 'roc_auc', 'ra2': 'roc_auc'} @@ -664,13 +664,13 @@ def test_multimetric_scorer_sanity_check(): result = multi_scorer(clf, X, y) - seperate_scores = { + separate_scores = { name: get_scorer(name)(clf, X, y) for name in ['accuracy', 'neg_log_loss', 'roc_auc']} for key, value in result.items(): score_name = scorers[key] - assert_allclose(value, seperate_scores[score_name]) + assert_allclose(value, separate_scores[score_name]) @pytest.mark.parametrize('scorer_name, metric', [ diff --git a/sklearn/model_selection/_search.py b/sklearn/model_selection/_search.py index 4c9b082d355fd..e6a8493ef6250 100644 --- a/sklearn/model_selection/_search.py +++ b/sklearn/model_selection/_search.py @@ -948,7 +948,7 @@ class GridSearchCV(BaseSearchCV): returns the selected ``best_index_`` given ``cv_results_``. In that case, the ``best_estimator_`` and ``best_parameters_`` will be set according to the returned ``best_index_`` while the ``best_score_`` - attribute will not be availble. + attribute will not be available. The refitted estimator is made available at the ``best_estimator_`` attribute and permits using ``predict`` directly on this @@ -1278,7 +1278,7 @@ class RandomizedSearchCV(BaseSearchCV): returns the selected ``best_index_`` given the ``cv_results``. In that case, the ``best_estimator_`` and ``best_parameters_`` will be set according to the returned ``best_index_`` while the ``best_score_`` - attribute will not be availble. + attribute will not be available. The refitted estimator is made available at the ``best_estimator_`` attribute and permits using ``predict`` directly on this