From c65a40d5ded783b3f762c8ca8b0d4e5a1aa3251e Mon Sep 17 00:00:00 2001 From: Bin Bao Date: Wed, 26 Oct 2022 16:13:20 +0000 Subject: [PATCH] Enable some PyTorch core tests with inductor (#87490) Summary: 1) Graph break on torch.random.set_rng_state since it blocks running inductor core tests; 2) Add several inductor-specific skips; 3) Enable several core tests for inductor CI; cc @jansel @mlazos @soumith @voznesenskym @yanboliang @penguinwu @anijain2305 Pull Request resolved: https://github.com/pytorch/pytorch/pull/87490 Approved by: https://github.com/eellison --- .jenkins/pytorch/test.sh | 11 ++++------- test/dynamo/test_repros.py | 2 ++ test/test_modules.py | 6 +++++- test/test_ops.py | 6 ++++++ test/test_ops_gradients.py | 6 ++++-- torch/_dynamo/variables/torch.py | 3 +++ 6 files changed, 24 insertions(+), 10 deletions(-) diff --git a/.jenkins/pytorch/test.sh b/.jenkins/pytorch/test.sh index 94896701771c..89fbd764201a 100755 --- a/.jenkins/pytorch/test.sh +++ b/.jenkins/pytorch/test.sh @@ -251,13 +251,10 @@ test_dynamo_shard() { test_inductor() { - echo "TODO: enable inductor unit tests" - # time python test/run_test.py --core --exclude test_autograd --continue-through-error --verbose - - # PYTORCH_TEST_WITH_DYNAMO and PYTORCH_TEST_WITH_INDUCTOR are only needed for PyTorch tests not written with - # using dynamo/inductor. For dynamo/inductor unit tests, specifiying them will trigger an error like - # "Detected two calls to `torchdynamo.optimize(...)` with a different backend compiler arguments." - # PYTORCH_TEST_WITH_DYNAMO=0 PYTORCH_TEST_WITH_INDUCTOR=0 pytest test/inductor + python test/test_modules.py --verbose + # TODO: investigate "RuntimeError: CUDA driver API confirmed a leak" + # seen intest_ops_gradients.py + # pytest test/test_ops_gradients.py --verbose -k "not _complex and not test_inplace_grad_acos_cuda_float64" } test_inductor_huggingface_shard() { diff --git a/test/dynamo/test_repros.py b/test/dynamo/test_repros.py index 66fc19895dd6..41564952a744 100644 --- a/test/dynamo/test_repros.py +++ b/test/dynamo/test_repros.py @@ -1016,6 +1016,8 @@ def test_create_rand_mask_from_inputs(self): self.assertEqual(cnt.frame_count, 1) self.assertEqual(cnt.op_count, 8) + # TODO: make set_rng_state work with FakeTensor/aot_autograd + @patch.object(torch._dynamo.config, "fake_tensor_propagation", False) def test_rng_state(self): def fn(): state = torch.get_rng_state() diff --git a/test/test_modules.py b/test/test_modules.py index e06f0cc617d9..2f5008244d54 100644 --- a/test/test_modules.py +++ b/test/test_modules.py @@ -11,7 +11,8 @@ instantiate_device_type_tests, onlyCUDA, toleranceOverride, tol, skipMeta) from torch.testing._internal.common_modules import module_db, modules, TrainEvalMode from torch.testing._internal.common_utils import ( - TestCase, run_tests, freeze_rng_state, mock_wrapper, get_tensors_from, gradcheck, gradgradcheck, skipIfMps) + TestCase, run_tests, freeze_rng_state, mock_wrapper, get_tensors_from, gradcheck, + gradgradcheck, skipIfMps, skipIfTorchInductor) from unittest.mock import patch, call @@ -326,6 +327,7 @@ def inner_zero_grad(obj): @skipIfMps @modules(module_db) + @skipIfTorchInductor("to be fixed") def test_non_contiguous_tensors(self, device, dtype, module_info, training): # Check modules work with non-contiguous tensors @@ -489,6 +491,7 @@ def test_gradgrad(self, device, dtype, module_info, training): @toleranceOverride({torch.float32: tol(5e-2, 0), torch.float64: tol(4e-4, 0)}) @modules(module_db) + @skipIfTorchInductor("to be fixed") def test_cpu_gpu_parity(self, device, dtype, module_info, training): # TODO: RNN / GRU / LSTM don't support backwards on eval mode for cuDNN; skip this in a # nicer way for eval mode only. @@ -579,6 +582,7 @@ def check_backward(cpu_output, gpu_output): @skipIfMps @modules(module_db) + @skipIfTorchInductor("to be fixed") def test_memory_format(self, device, dtype, module_info, training): is_sm86 = device.startswith("cuda") and torch.cuda.get_device_capability(0) == (8, 6) # TODO tighten it to a specific module diff --git a/test/test_ops.py b/test/test_ops.py index 5e9371e98234..0e5b6f1d607d 100644 --- a/test/test_ops.py +++ b/test/test_ops.py @@ -36,6 +36,7 @@ first_sample, parametrize, skipIfSlowGradcheckEnv, + skipIfTorchInductor, slowTest, ) from torch.testing._internal.common_methods_invocations import ( @@ -209,6 +210,7 @@ def to_cpu(arg): @unittest.skipIf(TEST_WITH_ASAN, "Skipped under ASAN") @onlyNativeDeviceTypes @ops(python_ref_db) + @skipIfTorchInductor("Takes too long for inductor") def test_python_ref_meta(self, device, dtype, op): with FakeTensorMode() as mode: pass @@ -374,6 +376,7 @@ def _distance(a, b): @unittest.skipIf(TEST_WITH_ASAN, "Skipped under ASAN") @onlyNativeDeviceTypes @ops(python_ref_db) + @skipIfTorchInductor("Takes too long for inductor") def test_python_ref(self, device, dtype, op): # In this test, primTorch refs call into the refs namespace # For example, a ref with torch.foo in it will calls refs.foo instead @@ -386,6 +389,7 @@ def test_python_ref(self, device, dtype, op): @unittest.skipIf(TEST_WITH_ASAN, "Skipped under ASAN") @onlyNativeDeviceTypes @ops(python_ref_db) + @skipIfTorchInductor("Takes too long for inductor") def test_python_ref_torch_fallback(self, device, dtype, op): # In this test, refs call into the torch namespace (after the initial invocation) # For example, a ref with torch.foo in it will call torch.foo instead of refs.foo @@ -397,6 +401,7 @@ def test_python_ref_torch_fallback(self, device, dtype, op): @skipCUDAIfRocm @ops(python_ref_db) @parametrize('executor', ['aten', 'nvfuser']) + @skipIfTorchInductor("Takes too long for inductor") def test_python_ref_executor(self, device, dtype, op, executor): # TODO: Not all dtypes are supported with nvfuser from torch._prims_common import _torch_dtype_to_nvfuser_dtype_map @@ -457,6 +462,7 @@ def test_errors(self, device, op): @skipMeta @onlyNativeDeviceTypes @ops([op for op in python_ref_db if op.error_inputs_func is not None], dtypes=OpDTypes.none) + @skipIfTorchInductor("Takes too long for inductor") def test_python_ref_errors(self, device, op): mode = FakeTensorMode() with mode: diff --git a/test/test_ops_gradients.py b/test/test_ops_gradients.py index 0411f043df9c..6d517c7a7f8b 100644 --- a/test/test_ops_gradients.py +++ b/test/test_ops_gradients.py @@ -4,8 +4,9 @@ from itertools import chain import torch -from torch.testing._internal.common_utils import \ - (TestCase, is_iterable_of_tensors, run_tests, gradcheck, gradgradcheck, is_slow_gradcheck_env) +from torch.testing._internal.common_utils import ( + TestCase, is_iterable_of_tensors, run_tests, gradcheck, gradgradcheck, is_slow_gradcheck_env, + skipIfTorchInductor) from torch.testing._internal.common_methods_invocations import op_db from torch.testing._internal.common_device_type import \ (instantiate_device_type_tests, ops, OpDTypes) @@ -253,6 +254,7 @@ def test_forward_mode_AD(self, device, dtype, op): self._forward_grad_helper(device, dtype, op, op.get_op(), is_inplace=False) @_gradcheck_ops(op_db) + @skipIfTorchInductor("to be fixed") def test_inplace_forward_mode_AD(self, device, dtype, op): self._skip_helper(op, device, dtype) diff --git a/torch/_dynamo/variables/torch.py b/torch/_dynamo/variables/torch.py index e0c88b2cf059..36ca6591189d 100644 --- a/torch/_dynamo/variables/torch.py +++ b/torch/_dynamo/variables/torch.py @@ -320,6 +320,9 @@ def get_state_from_generator(): assert isinstance(args[0], TensorVariable) if config.fake_tensor_propagation: + unimplemented( + "TODO: make torch.random.set_rng_state work with FakeTensor/aot_autograd" + ) # In fake tensor case, this state doesn't matter, but # it needs to be valid to not segfault. Pull a real tensor out. # The value won't matter since we are running with fake tensors anyway, so rng doesn't matter.