From cac0c77d58c2d410bf6601c8e5647b3fc3f3ac4e Mon Sep 17 00:00:00 2001
From: fis <jm.yuan@outlook.com>
Date: Wed, 29 Sep 2021 15:25:50 +0800
Subject: [PATCH] Fix gamma neg log likelihood.

Add a test.
---
 src/metric/elementwise_metric.cu  |  7 +++----
 tests/python/test_eval_metrics.py | 29 +++++++++++++++++++++++++++++
 2 files changed, 32 insertions(+), 4 deletions(-)

diff --git a/src/metric/elementwise_metric.cu b/src/metric/elementwise_metric.cu
index 25492bf2c5f8..d8fd9e7fda5b 100644
--- a/src/metric/elementwise_metric.cu
+++ b/src/metric/elementwise_metric.cu
@@ -309,10 +309,9 @@ struct EvalGammaNLogLik {
     float constexpr kPsi = 1.0;
     bst_float theta = -1. / py;
     bst_float a = kPsi;
-    // b = -std::log(-theta);
-    float b = 1.0f;
-    // c = 1. / kPsi * std::log(y/kPsi) - std::log(y) - common::LogGamma(1. / kPsi);
-    //   = 1.0f      * std::log(y)      - std::log(y) - 0 = 0
+    float b = -std::log(-theta);
+    // c = 1. / kPsi^2 * std::log(y/kPsi) - std::log(y) - common::LogGamma(1. / kPsi);
+    //   = 1.0f        * std::log(y)      - std::log(y) - 0 = 0
     float c = 0;
     // general form for exponential family.
     return -((y * theta - b) / a + c);
diff --git a/tests/python/test_eval_metrics.py b/tests/python/test_eval_metrics.py
index e54cd71b670b..71a8691fe7f2 100644
--- a/tests/python/test_eval_metrics.py
+++ b/tests/python/test_eval_metrics.py
@@ -124,6 +124,35 @@ def test_gamma_deviance(self):
         skl_gamma_dev = mean_gamma_deviance(y, score)
         np.testing.assert_allclose(gamma_dev, skl_gamma_dev, rtol=1e-6)
 
+    @pytest.mark.skipif(**tm.no_sklearn())
+    def test_gamma_lik(self) -> None:
+        import scipy.stats as stats
+        rng = np.random.default_rng(1994)
+        n_samples = 32
+        n_features = 10
+
+        X = rng.normal(0, 1, size=n_samples * n_features).reshape((n_samples, n_features))
+
+        alpha, loc, beta = 5.0, 11.1, 22
+        y = stats.gamma.rvs(alpha, loc=loc, scale=beta, size=n_samples, random_state=rng)
+        reg = xgb.XGBRegressor(tree_method="hist", objective="reg:gamma", n_estimators=64)
+        reg.fit(X, y, eval_metric="gamma-nloglik", eval_set=[(X, y)])
+
+        score = reg.predict(X)
+
+        booster = reg.get_booster()
+        nloglik = float(booster.eval(xgb.DMatrix(X, y)).split(":")[1].split(":")[0])
+
+        # \beta_i = - (1 / \theta_i a)
+        # where \theta_i is the canonical parameter
+        # XGBoost uses the canonical link function of gamma in evaluation function.
+        # so \theta = - (1.0 / y)
+        # dispersion is hardcoded as 1.0, so shape (a in scipy parameter) is also 1.0
+        beta = - (1.0 / (- (1.0 / y)))  # == y
+        nloglik_stats = -stats.gamma.logpdf(score, a=1.0, scale=beta)
+
+        np.testing.assert_allclose(nloglik, np.mean(nloglik_stats), rtol=1e-3)
+
     def run_roc_auc_binary(self, tree_method, n_samples):
         import numpy as np
         from sklearn.datasets import make_classification