Skip to content

Commit

Permalink
add class_weight to PA cls, remove from PA reg
Browse files Browse the repository at this point in the history
rebase on top of scikit-learn#4347

improve error message
  • Loading branch information
trevorstephens committed Jun 4, 2015
1 parent ec2fd72 commit 718825d
Show file tree
Hide file tree
Showing 3 changed files with 102 additions and 6 deletions.
6 changes: 5 additions & 1 deletion doc/whats_new.rst
Expand Up @@ -61,6 +61,10 @@ Enhancements
option, which has a simpler forumlar and interpretation.
By Hanna Wallach and `Andreas Müller`_.

- Add ``class_weight`` parameter to automatically weight samples by class
frequency for :class:`linear_model.PassiveAgressiveClassifier`. By
`Trevor Stephens`_.

- Added backlinks from the API reference pages to the user guide. By
`Andreas Müller`_.

Expand Down Expand Up @@ -572,7 +576,7 @@ API changes summary

- The ``shuffle`` option of :class:`.linear_model.SGDClassifier`,
:class:`linear_model.SGDRegressor`, :class:`linear_model.Perceptron`,
:class:`linear_model.PassiveAgressiveClassivier` and
:class:`linear_model.PassiveAgressiveClassifier` and
:class:`linear_model.PassiveAgressiveRegressor` now defaults to ``True``.

- :class:`cluster.DBSCAN` now uses a deterministic initialization. The
Expand Down
30 changes: 25 additions & 5 deletions sklearn/linear_model/passive_aggressive.py
Expand Up @@ -49,6 +49,16 @@ class PassiveAggressiveClassifier(BaseSGDClassifier):
When set to True, reuse the solution of the previous call to fit as
initialization, otherwise, just erase the previous solution.
class_weight : dict, {class_label: weight} or "balanced" or None, optional
Preset for the class_weight fit parameter.
Weights associated with classes. If not given, all classes
are supposed to have weight one.
The "balanced" mode uses the values of y to automatically adjust
weights inversely proportional to class frequencies in the input data
as ``n_samples / (n_classes * np.bincount(y))``
Attributes
----------
coef_ : array, shape = [1, n_features] if n_classes == 2 else [n_classes,\
Expand All @@ -71,9 +81,9 @@ class PassiveAggressiveClassifier(BaseSGDClassifier):
K. Crammer, O. Dekel, J. Keshat, S. Shalev-Shwartz, Y. Singer - JMLR (2006)
"""
def __init__(self, C=1.0, fit_intercept=True,
n_iter=5, shuffle=True, verbose=0, loss="hinge",
n_jobs=1, random_state=None, warm_start=False):
def __init__(self, C=1.0, fit_intercept=True, n_iter=5, shuffle=True,
verbose=0, loss="hinge", n_jobs=1, random_state=None,
warm_start=False, class_weight=None):
BaseSGDClassifier.__init__(self,
penalty=None,
fit_intercept=fit_intercept,
Expand All @@ -83,6 +93,7 @@ def __init__(self, C=1.0, fit_intercept=True,
random_state=random_state,
eta0=1.0,
warm_start=warm_start,
class_weight=class_weight,
n_jobs=n_jobs)
self.C = C
self.loss = loss
Expand Down Expand Up @@ -110,6 +121,16 @@ def partial_fit(self, X, y, classes=None):
-------
self : returns an instance of self.
"""
if self.class_weight == 'balanced':
raise ValueError("class_weight 'balanced' is not supported for "
"partial_fit. In order to use 'balanced' "
"weights, from the sklearn.utils module use "
"compute_class_weight('balanced', classes, y). "
"In place of y you can us a large enough sample "
"of the full training set target to properly "
"estimate the class frequency distributions. "
"Pass the resulting weights as the class_weight "
"parameter.")
lr = "pa1" if self.loss == "hinge" else "pa2"
return self._partial_fit(X, y, alpha=1.0, C=self.C,
loss="hinge", learning_rate=lr, n_iter=1,
Expand Down Expand Up @@ -209,8 +230,7 @@ class PassiveAggressiveRegressor(BaseSGDRegressor):
"""
def __init__(self, C=1.0, fit_intercept=True, n_iter=5, shuffle=True,
verbose=0, loss="epsilon_insensitive",
epsilon=DEFAULT_EPSILON, random_state=None, class_weight=None,
warm_start=False):
epsilon=DEFAULT_EPSILON, random_state=None, warm_start=False):
BaseSGDRegressor.__init__(self,
penalty=None,
l1_ratio=0,
Expand Down
72 changes: 72 additions & 0 deletions sklearn/linear_model/tests/test_passive_aggressive.py
Expand Up @@ -4,6 +4,7 @@
from sklearn.utils.testing import assert_less
from sklearn.utils.testing import assert_greater
from sklearn.utils.testing import assert_array_almost_equal, assert_array_equal
from sklearn.utils.testing import assert_almost_equal
from sklearn.utils.testing import assert_raises

from sklearn.base import ClassifierMixin
Expand Down Expand Up @@ -125,6 +126,77 @@ def test_classifier_undefined_methods():
assert_raises(AttributeError, lambda x: getattr(clf, x), meth)


def test_class_weights():
# Test class weights.
X2 = np.array([[-1.0, -1.0], [-1.0, 0], [-.8, -1.0],
[1.0, 1.0], [1.0, 0.0]])
y2 = [1, 1, 1, -1, -1]

clf = PassiveAggressiveClassifier(C=0.1, n_iter=100, class_weight=None,
random_state=100)
clf.fit(X2, y2)
assert_array_equal(clf.predict([[0.2, -1.0]]), np.array([1]))

# we give a small weights to class 1
clf = PassiveAggressiveClassifier(C=0.1, n_iter=100,
class_weight={1: 0.001},
random_state=100)
clf.fit(X2, y2)

# now the hyperplane should rotate clock-wise and
# the prediction on this point should shift
assert_array_equal(clf.predict([[0.2, -1.0]]), np.array([-1]))


def test_partial_fit_weight_class_balanced():
# partial_fit with class_weight='balanced' not supported
clf = PassiveAggressiveClassifier(class_weight="balanced")
assert_raises(ValueError, clf.partial_fit, X, y, classes=np.unique(y))


def test_equal_class_weight():
X2 = [[1, 0], [1, 0], [0, 1], [0, 1]]
y2 = [0, 0, 1, 1]
clf = PassiveAggressiveClassifier(C=0.1, n_iter=1000, class_weight=None)
clf.fit(X2, y2)

# Already balanced, so "balanced" weights should have no effect
clf_balanced = PassiveAggressiveClassifier(C=0.1, n_iter=1000,
class_weight="balanced")
clf_balanced.fit(X2, y2)

clf_weighted = PassiveAggressiveClassifier(C=0.1, n_iter=1000,
class_weight={0: 0.5, 1: 0.5})
clf_weighted.fit(X2, y2)

# should be similar up to some epsilon due to learning rate schedule
assert_almost_equal(clf.coef_, clf_weighted.coef_, decimal=2)
assert_almost_equal(clf.coef_, clf_balanced.coef_, decimal=2)


def test_wrong_class_weight_label():
# ValueError due to wrong class_weight label.
X2 = np.array([[-1.0, -1.0], [-1.0, 0], [-.8, -1.0],
[1.0, 1.0], [1.0, 0.0]])
y2 = [1, 1, 1, -1, -1]

clf = PassiveAggressiveClassifier(class_weight={0: 0.5})
assert_raises(ValueError, clf.fit, X2, y2)


def test_wrong_class_weight_format():
# ValueError due to wrong class_weight argument type.
X2 = np.array([[-1.0, -1.0], [-1.0, 0], [-.8, -1.0],
[1.0, 1.0], [1.0, 0.0]])
y2 = [1, 1, 1, -1, -1]

clf = PassiveAggressiveClassifier(class_weight=[0.5])
assert_raises(ValueError, clf.fit, X2, y2)

clf = PassiveAggressiveClassifier(class_weight="the larch")
assert_raises(ValueError, clf.fit, X2, y2)


def test_regressor_mse():
y_bin = y.copy()
y_bin[y != 1] = -1
Expand Down

0 comments on commit 718825d

Please sign in to comment.