From 9f9d65734fb1911d9d3db7baa404e166a717a0c9 Mon Sep 17 00:00:00 2001 From: Nikita Shulga Date: Fri, 4 Nov 2022 15:16:19 -0700 Subject: [PATCH 1/4] Make TorchElastic timer importable on Windows Also, add `torch.distribtued` to test imports, so that we would not regress in the future --- test/test_testing.py | 5 +++-- torch/distributed/elastic/timer/file_based_local_timer.py | 4 +++- 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/test/test_testing.py b/test/test_testing.py index 5ce07ce454dc299..076fb024908c9b0 100644 --- a/test/test_testing.py +++ b/test/test_testing.py @@ -1794,8 +1794,9 @@ def test_circular_dependencies(self) -> None: if not sys.version_info >= (3, 9): ignored_modules.append("torch.utils.benchmark") if IS_WINDOWS or IS_MACOS: - # Distributed does not work on Windows or by default on Mac - ignored_modules.append("torch.distributed.") + # Distributed should be importable on Windows but not on Mac + if IS_MACOS: + ignored_modules.append("torch.distributed.") ignored_modules.append("torch.testing._internal.dist_utils") # And these both end up with transitive dependencies on distributed ignored_modules.append("torch.nn.parallel._replicated_tensor_ddp_interop") diff --git a/torch/distributed/elastic/timer/file_based_local_timer.py b/torch/distributed/elastic/timer/file_based_local_timer.py index 36ae944ec8e4ffd..88fefe1dab81166 100644 --- a/torch/distributed/elastic/timer/file_based_local_timer.py +++ b/torch/distributed/elastic/timer/file_based_local_timer.py @@ -10,6 +10,7 @@ import os import select import signal +import sys import threading import time from typing import Callable, Dict, List, Optional, Set, Tuple @@ -78,7 +79,8 @@ class FileTimerClient(TimerClient): signal: signal, the signal to use to kill the process. Using a negative or zero signal will not kill the process. """ - def __init__(self, file_path: str, signal=signal.SIGKILL) -> None: + def __init__(self, file_path: str, signal=(signal.SIGKILL if sys.platform != "win32" else + signal.CTRL_C_EVENT)) -> None: # type: ignore[attr-defined] super().__init__() self._file_path = file_path self.signal = signal From 677a2d347cdf0ec31e709ca03fe422f342b94f85 Mon Sep 17 00:00:00 2001 From: Nikita Shulga Date: Fri, 4 Nov 2022 17:07:33 -0700 Subject: [PATCH 2/4] Update test_testing.py --- test/test_testing.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/test/test_testing.py b/test/test_testing.py index 076fb024908c9b0..83b8cac6cfbb788 100644 --- a/test/test_testing.py +++ b/test/test_testing.py @@ -1794,9 +1794,11 @@ def test_circular_dependencies(self) -> None: if not sys.version_info >= (3, 9): ignored_modules.append("torch.utils.benchmark") if IS_WINDOWS or IS_MACOS: - # Distributed should be importable on Windows but not on Mac + # Distributed should be importable on Windows(except nn.api.), but not on Mac if IS_MACOS: ignored_modules.append("torch.distributed.") + else: + ignored_modules.append("torch.distributed.nn.api.") ignored_modules.append("torch.testing._internal.dist_utils") # And these both end up with transitive dependencies on distributed ignored_modules.append("torch.nn.parallel._replicated_tensor_ddp_interop") From a7146d543454d4b9cd3803d9e3c68e1e1a3728c8 Mon Sep 17 00:00:00 2001 From: Nikita Shulga Date: Thu, 10 Nov 2022 06:08:06 -0800 Subject: [PATCH 3/4] One more exclusion --- test/test_testing.py | 1 + 1 file changed, 1 insertion(+) diff --git a/test/test_testing.py b/test/test_testing.py index 83b8cac6cfbb788..f7663f161c27be2 100644 --- a/test/test_testing.py +++ b/test/test_testing.py @@ -1799,6 +1799,7 @@ def test_circular_dependencies(self) -> None: ignored_modules.append("torch.distributed.") else: ignored_modules.append("torch.distributed.nn.api.") + ignored_modules.append("torch.distributed.optim.") ignored_modules.append("torch.testing._internal.dist_utils") # And these both end up with transitive dependencies on distributed ignored_modules.append("torch.nn.parallel._replicated_tensor_ddp_interop") From 87a3b7019e39fe97a8e4d09337c3d6d2088c6f10 Mon Sep 17 00:00:00 2001 From: Nikita Shulga Date: Thu, 10 Nov 2022 06:57:30 -0800 Subject: [PATCH 4/4] Two more exclusions --- test/test_testing.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/test/test_testing.py b/test/test_testing.py index f7663f161c27be2..8fe66043e5a16d6 100644 --- a/test/test_testing.py +++ b/test/test_testing.py @@ -1800,6 +1800,8 @@ def test_circular_dependencies(self) -> None: else: ignored_modules.append("torch.distributed.nn.api.") ignored_modules.append("torch.distributed.optim.") + ignored_modules.append("torch.distributed.pipeline.") + ignored_modules.append("torch.distributed.rpc.") ignored_modules.append("torch.testing._internal.dist_utils") # And these both end up with transitive dependencies on distributed ignored_modules.append("torch.nn.parallel._replicated_tensor_ddp_interop")