-
Notifications
You must be signed in to change notification settings - Fork 11
/
cumulative.py
104 lines (76 loc) · 3.11 KB
/
cumulative.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
"""Plot the cumulative distribution of residuals and absolute errors."""
from __future__ import annotations
from typing import TYPE_CHECKING, Any
import matplotlib.pyplot as plt
import numpy as np
if TYPE_CHECKING:
from numpy.typing import ArrayLike
def cumulative_residual(
res: ArrayLike, ax: plt.Axes | None = None, **kwargs: Any
) -> plt.Axes:
"""Plot the empirical cumulative distribution for the residuals (y - mu).
Args:
res (array): Residuals between y_true and y_pred, i.e.
targets - model predictions.
ax (Axes, optional): matplotlib Axes on which to plot. Defaults to None.
**kwargs: Additional keyword arguments passed to ax.fill_between().
Returns:
plt.Axes: matplotlib Axes object
"""
ax = ax or plt.gca()
res = np.sort(res)
n_data = len(res)
# Plot the empirical distribution
ax.plot(res, np.arange(n_data) / n_data * 100)
# Fill the 90% coverage region
# TODO may look better to add drop downs instead
low = int(0.05 * (n_data - 1) + 0.5)
up = int(0.95 * (n_data - 1) + 0.5)
ax.fill_between(
res[low:up],
(np.arange(n_data) / n_data * 100)[low:up],
alpha=kwargs.pop("alpha", 0.3),
**kwargs,
)
# Get robust (and symmetrical) x axis limits
delta_low = res[low] - res[int(0.97 * low)]
delta_up = res[int(1.03 * up)] - res[up]
delta_max = max(delta_low, delta_up)
lim = max(abs(res[up] + delta_max), abs(res[low] - delta_max))
ax.set(xlim=(-lim, lim), ylim=(0, 100))
# Add some visual guidelines
ax.plot((0, 0), (0, 100), "--", color="grey", alpha=0.4)
ax.plot((ax.get_xlim()[0], 0), (50, 50), "--", color="grey", alpha=0.4)
# Label the plot
ax.set(xlabel="Residual", ylabel="Percentile", title="Cumulative Residual")
return ax
def cumulative_error(
abs_err: ArrayLike, ax: plt.Axes | None = None, **kwargs: Any
) -> plt.Axes:
"""Plot the empirical cumulative distribution of the absolute errors.
abs(y_true - y_pred).
Args:
abs_err (array): Absolute error between y_true and y_pred, i.e.
abs(targets - model predictions).
ax (Axes, optional): matplotlib Axes on which to plot. Defaults to None.
**kwargs: Additional keyword arguments passed to ax.plot().
Returns:
plt.Axes: matplotlib Axes object
"""
ax = ax or plt.gca()
errors = np.sort(abs_err)
n_data = len(errors)
# Plot the empirical distribution
ax.plot(errors, np.arange(n_data) / n_data * 100, **kwargs)
# Get robust (and symmetrical) x-axis limits
lim = np.percentile(errors, 98)
ax.set(xlim=(0, lim), ylim=(0, 100))
line_kwargs = dict(linestyle="--", color="grey", alpha=0.4)
# Add some visual guidelines
for percentile in [50, 75]:
percent = int(percentile * (n_data - 1) / 100 + 0.5)
ax.plot((0, errors[percent]), (percentile, percentile), **line_kwargs)
ax.plot((errors[percent], errors[percent]), (0, percentile), **line_kwargs)
# Label the plot
ax.set(xlabel="Absolute Error", ylabel="Percentile", title="Cumulative Error")
return ax