Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix torchelastic import error due to unsupported signal SIGKILL on Windows #88250

Closed
Closed
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
8 changes: 5 additions & 3 deletions torch/distributed/elastic/timer/file_based_local_timer.py
Expand Up @@ -9,13 +9,15 @@
import logging
import os
import select
import signal
import signal as _signal
import threading
import time
from typing import Callable, Dict, List, Optional, Set, Tuple

from torch.distributed.elastic.multiprocessing.api import _get_kill_signal
from torch.distributed.elastic.timer.api import TimerClient, TimerRequest


__all__ = ["FileTimerClient", "FileTimerRequest", "FileTimerServer"]

class FileTimerRequest(TimerRequest):
Expand Down Expand Up @@ -78,10 +80,10 @@ class FileTimerClient(TimerClient):
signal: singal, the signal to use to kill the process. Using a
negative or zero signal will not kill the process.
"""
def __init__(self, file_path: str, signal=signal.SIGKILL) -> None:
def __init__(self, file_path: str, signal: Optional[_signal.Signals] = None) -> None:
super().__init__()
self._file_path = file_path
self.signal = signal
self.signal = _get_kill_signal() if signal is None else signal

def _open_non_blocking(self) -> Optional[io.TextIOWrapper]:
try:
Expand Down