Skip to content

Commit

Permalink
add metric for kernel restarts
Browse files Browse the repository at this point in the history
labels: type = kernel name, source = "restarter" or "user"
  • Loading branch information
minrk committed Mar 22, 2023
1 parent 60d7cca commit 429b233
Show file tree
Hide file tree
Showing 3 changed files with 20 additions and 1 deletion.
8 changes: 8 additions & 0 deletions jupyter_server/prometheus/metrics.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,8 @@
conventions for metrics & labels.
"""

from prometheus_client import Counter

try:
# Jupyter Notebook also defines these metrics. Re-defining them results in a ValueError.
# Try to de-duplicate by using the ones in Notebook if available.
Expand Down Expand Up @@ -34,3 +36,9 @@
"counter for how many kernels are running labeled by type",
["type"],
)

KERNEL_RESTARTS = Counter(
"kernel_restarts",
"counter for how many kernel restarts, labeled by type and source (user or restarter)",
["type", "source"],
)
2 changes: 2 additions & 0 deletions jupyter_server/services/kernels/handlers.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
from tornado import web

from jupyter_server.auth import authorized
from jupyter_server.prometheus.metrics import KERNEL_RESTARTS
from jupyter_server.utils import url_escape, url_path_join

from ...base.handlers import APIHandler
Expand Down Expand Up @@ -104,6 +105,7 @@ async def post(self, kernel_id, action):
self.set_status(500)
else:
model = await ensure_async(km.kernel_model(kernel_id))
KERNEL_RESTARTS.labels(type=model["name"], source="user").inc()
self.write(json.dumps(model, default=json_default))
self.finish()

Expand Down
11 changes: 10 additions & 1 deletion jupyter_server/services/kernels/kernelmanager.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@
)

from jupyter_server._tz import isoformat, utcnow
from jupyter_server.prometheus.metrics import KERNEL_CURRENTLY_RUNNING_TOTAL
from jupyter_server.prometheus.metrics import KERNEL_CURRENTLY_RUNNING_TOTAL, KERNEL_RESTARTS
from jupyter_server.utils import ApiPath, import_item, to_os_path


Expand Down Expand Up @@ -179,6 +179,10 @@ def __init__(self, **kwargs):
# Methods for managing kernels and sessions
# -------------------------------------------------------------------------

def _handle_kernel_restart(self, kernel_id, kernel_name):
"""notice that a kernel restarted"""
KERNEL_RESTARTS.labels(type=kernel_name, source="restarter").inc()

def _handle_kernel_died(self, kernel_id):
"""notice that a kernel died"""
self.log.warning("Kernel %s died, removing from map.", kernel_id)
Expand Down Expand Up @@ -279,6 +283,11 @@ async def _finish_kernel_start(self, kernel_id):
lambda: self._handle_kernel_died(kernel_id),
"dead",
)
# register callback to count restarts
self.add_restart_callback(
kernel_id,
lambda: self._handle_kernel_restart(kernel_id, km.kernel_name),
)

def ports_changed(self, kernel_id):
"""Used by ZMQChannelsHandler to determine how to coordinate nudge and replays.
Expand Down

0 comments on commit 429b233

Please sign in to comment.