From f4844879c5a834c2a1712330694ad64127cd0b5b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Adrian=20W=C3=A4lchli?= Date: Tue, 1 Nov 2022 22:38:49 +0100 Subject: [PATCH 1/3] Fix unsupported signal SIGKILL on Windows --- torch/distributed/elastic/timer/file_based_local_timer.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/torch/distributed/elastic/timer/file_based_local_timer.py b/torch/distributed/elastic/timer/file_based_local_timer.py index 8ad78f2f2683..2bb3c185f315 100644 --- a/torch/distributed/elastic/timer/file_based_local_timer.py +++ b/torch/distributed/elastic/timer/file_based_local_timer.py @@ -9,7 +9,7 @@ import logging import os import select -import signal +import signal as _signal import threading import time from typing import Callable, Dict, List, Optional, Set, Tuple @@ -78,10 +78,10 @@ class FileTimerClient(TimerClient): signal: singal, the signal to use to kill the process. Using a negative or zero signal will not kill the process. """ - def __init__(self, file_path: str, signal=signal.SIGKILL) -> None: + def __init__(self, file_path: str, signal: Optional[_signal.Signals] = None) -> None: super().__init__() self._file_path = file_path - self.signal = signal + self.signal = _signal.SIGKILL if signal is None else signal def _open_non_blocking(self) -> Optional[io.TextIOWrapper]: try: From e13933d2ee7c88cbe3a61e107f43d1ab48e4b669 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Adrian=20W=C3=A4lchli?= Date: Wed, 2 Nov 2022 19:43:25 +0100 Subject: [PATCH 2/3] use _get_kill_signal --- torch/distributed/elastic/timer/file_based_local_timer.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/torch/distributed/elastic/timer/file_based_local_timer.py b/torch/distributed/elastic/timer/file_based_local_timer.py index 2bb3c185f315..380d2c3524e4 100644 --- a/torch/distributed/elastic/timer/file_based_local_timer.py +++ b/torch/distributed/elastic/timer/file_based_local_timer.py @@ -15,6 +15,8 @@ from typing import Callable, Dict, List, Optional, Set, Tuple from torch.distributed.elastic.timer.api import TimerClient, TimerRequest +from torch.distributed.elastic.multiprocessing.api import _get_kill_signal + __all__ = ["FileTimerClient", "FileTimerRequest", "FileTimerServer"] @@ -81,7 +83,7 @@ class FileTimerClient(TimerClient): def __init__(self, file_path: str, signal: Optional[_signal.Signals] = None) -> None: super().__init__() self._file_path = file_path - self.signal = _signal.SIGKILL if signal is None else signal + self.signal = _get_kill_signal() if signal is None else signal def _open_non_blocking(self) -> Optional[io.TextIOWrapper]: try: From b47d28ab46c9b7e5fa650ecaab259cd0a1b88714 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Adrian=20W=C3=A4lchli?= Date: Wed, 2 Nov 2022 19:44:47 +0100 Subject: [PATCH 3/3] sort import --- torch/distributed/elastic/timer/file_based_local_timer.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/torch/distributed/elastic/timer/file_based_local_timer.py b/torch/distributed/elastic/timer/file_based_local_timer.py index 380d2c3524e4..05f442240d56 100644 --- a/torch/distributed/elastic/timer/file_based_local_timer.py +++ b/torch/distributed/elastic/timer/file_based_local_timer.py @@ -14,8 +14,8 @@ import time from typing import Callable, Dict, List, Optional, Set, Tuple -from torch.distributed.elastic.timer.api import TimerClient, TimerRequest from torch.distributed.elastic.multiprocessing.api import _get_kill_signal +from torch.distributed.elastic.timer.api import TimerClient, TimerRequest __all__ = ["FileTimerClient", "FileTimerRequest", "FileTimerServer"]