Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[backport] Fix gamma neg log likelihood. (#7275) #7285

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
7 changes: 3 additions & 4 deletions src/metric/elementwise_metric.cu
Expand Up @@ -309,10 +309,9 @@ struct EvalGammaNLogLik {
float constexpr kPsi = 1.0;
bst_float theta = -1. / py;
bst_float a = kPsi;
// b = -std::log(-theta);
float b = 1.0f;
// c = 1. / kPsi * std::log(y/kPsi) - std::log(y) - common::LogGamma(1. / kPsi);
// = 1.0f * std::log(y) - std::log(y) - 0 = 0
float b = -std::log(-theta);
// c = 1. / kPsi^2 * std::log(y/kPsi) - std::log(y) - common::LogGamma(1. / kPsi);
// = 1.0f * std::log(y) - std::log(y) - 0 = 0
float c = 0;
// general form for exponential family.
return -((y * theta - b) / a + c);
Expand Down
29 changes: 29 additions & 0 deletions tests/python/test_eval_metrics.py
Expand Up @@ -124,6 +124,35 @@ def test_gamma_deviance(self):
skl_gamma_dev = mean_gamma_deviance(y, score)
np.testing.assert_allclose(gamma_dev, skl_gamma_dev, rtol=1e-6)

@pytest.mark.skipif(**tm.no_sklearn())
def test_gamma_lik(self) -> None:
import scipy.stats as stats
rng = np.random.default_rng(1994)
n_samples = 32
n_features = 10

X = rng.normal(0, 1, size=n_samples * n_features).reshape((n_samples, n_features))

alpha, loc, beta = 5.0, 11.1, 22
y = stats.gamma.rvs(alpha, loc=loc, scale=beta, size=n_samples, random_state=rng)
reg = xgb.XGBRegressor(tree_method="hist", objective="reg:gamma", n_estimators=64)
reg.fit(X, y, eval_metric="gamma-nloglik", eval_set=[(X, y)])

score = reg.predict(X)

booster = reg.get_booster()
nloglik = float(booster.eval(xgb.DMatrix(X, y)).split(":")[1].split(":")[0])

# \beta_i = - (1 / \theta_i a)
# where \theta_i is the canonical parameter
# XGBoost uses the canonical link function of gamma in evaluation function.
# so \theta = - (1.0 / y)
# dispersion is hardcoded as 1.0, so shape (a in scipy parameter) is also 1.0
beta = - (1.0 / (- (1.0 / y))) # == y
nloglik_stats = -stats.gamma.logpdf(score, a=1.0, scale=beta)

np.testing.assert_allclose(nloglik, np.mean(nloglik_stats), rtol=1e-3)

def run_roc_auc_binary(self, tree_method, n_samples):
import numpy as np
from sklearn.datasets import make_classification
Expand Down