From c65a40d5ded783b3f762c8ca8b0d4e5a1aa3251e Mon Sep 17 00:00:00 2001
From: Bin Bao <binbao@fb.com>
Date: Wed, 26 Oct 2022 16:13:20 +0000
Subject: [PATCH] Enable some PyTorch core tests with inductor (#87490)

Summary:
1) Graph break on torch.random.set_rng_state since it blocks running
inductor core tests;
2) Add several inductor-specific skips;
3) Enable several core tests for inductor CI;

cc @jansel @mlazos @soumith @voznesenskym @yanboliang @penguinwu @anijain2305
Pull Request resolved: https://github.com/pytorch/pytorch/pull/87490
Approved by: https://github.com/eellison
---
 .jenkins/pytorch/test.sh         | 11 ++++-------
 test/dynamo/test_repros.py       |  2 ++
 test/test_modules.py             |  6 +++++-
 test/test_ops.py                 |  6 ++++++
 test/test_ops_gradients.py       |  6 ++++--
 torch/_dynamo/variables/torch.py |  3 +++
 6 files changed, 24 insertions(+), 10 deletions(-)

diff --git a/.jenkins/pytorch/test.sh b/.jenkins/pytorch/test.sh
index 94896701771c..89fbd764201a 100755
--- a/.jenkins/pytorch/test.sh
+++ b/.jenkins/pytorch/test.sh
@@ -251,13 +251,10 @@ test_dynamo_shard() {
 
 
 test_inductor() {
-  echo "TODO: enable inductor unit tests"
-  # time python test/run_test.py --core --exclude test_autograd --continue-through-error --verbose
-
-  # PYTORCH_TEST_WITH_DYNAMO and PYTORCH_TEST_WITH_INDUCTOR are only needed for PyTorch tests not written with
-  # using dynamo/inductor. For dynamo/inductor unit tests, specifiying them will trigger an error like
-  # "Detected two calls to `torchdynamo.optimize(...)` with a different backend compiler arguments."
-  # PYTORCH_TEST_WITH_DYNAMO=0 PYTORCH_TEST_WITH_INDUCTOR=0 pytest test/inductor
+  python test/test_modules.py --verbose
+  # TODO: investigate "RuntimeError: CUDA driver API confirmed a leak"
+  # seen intest_ops_gradients.py
+  # pytest test/test_ops_gradients.py --verbose -k "not _complex and not test_inplace_grad_acos_cuda_float64"
 }
 
 test_inductor_huggingface_shard() {
diff --git a/test/dynamo/test_repros.py b/test/dynamo/test_repros.py
index 66fc19895dd6..41564952a744 100644
--- a/test/dynamo/test_repros.py
+++ b/test/dynamo/test_repros.py
@@ -1016,6 +1016,8 @@ def test_create_rand_mask_from_inputs(self):
         self.assertEqual(cnt.frame_count, 1)
         self.assertEqual(cnt.op_count, 8)
 
+    # TODO: make set_rng_state work with FakeTensor/aot_autograd
+    @patch.object(torch._dynamo.config, "fake_tensor_propagation", False)
     def test_rng_state(self):
         def fn():
             state = torch.get_rng_state()
diff --git a/test/test_modules.py b/test/test_modules.py
index e06f0cc617d9..2f5008244d54 100644
--- a/test/test_modules.py
+++ b/test/test_modules.py
@@ -11,7 +11,8 @@
     instantiate_device_type_tests, onlyCUDA, toleranceOverride, tol, skipMeta)
 from torch.testing._internal.common_modules import module_db, modules, TrainEvalMode
 from torch.testing._internal.common_utils import (
-    TestCase, run_tests, freeze_rng_state, mock_wrapper, get_tensors_from, gradcheck, gradgradcheck, skipIfMps)
+    TestCase, run_tests, freeze_rng_state, mock_wrapper, get_tensors_from, gradcheck,
+    gradgradcheck, skipIfMps, skipIfTorchInductor)
 from unittest.mock import patch, call
 
 
@@ -326,6 +327,7 @@ def inner_zero_grad(obj):
 
     @skipIfMps
     @modules(module_db)
+    @skipIfTorchInductor("to be fixed")
     def test_non_contiguous_tensors(self, device, dtype, module_info, training):
         # Check modules work with non-contiguous tensors
 
@@ -489,6 +491,7 @@ def test_gradgrad(self, device, dtype, module_info, training):
     @toleranceOverride({torch.float32: tol(5e-2, 0),
                         torch.float64: tol(4e-4, 0)})
     @modules(module_db)
+    @skipIfTorchInductor("to be fixed")
     def test_cpu_gpu_parity(self, device, dtype, module_info, training):
         # TODO: RNN / GRU / LSTM don't support backwards on eval mode for cuDNN; skip this in a
         # nicer way for eval mode only.
@@ -579,6 +582,7 @@ def check_backward(cpu_output, gpu_output):
 
     @skipIfMps
     @modules(module_db)
+    @skipIfTorchInductor("to be fixed")
     def test_memory_format(self, device, dtype, module_info, training):
         is_sm86 = device.startswith("cuda") and torch.cuda.get_device_capability(0) == (8, 6)
         # TODO tighten it to a specific module
diff --git a/test/test_ops.py b/test/test_ops.py
index 5e9371e98234..0e5b6f1d607d 100644
--- a/test/test_ops.py
+++ b/test/test_ops.py
@@ -36,6 +36,7 @@
     first_sample,
     parametrize,
     skipIfSlowGradcheckEnv,
+    skipIfTorchInductor,
     slowTest,
 )
 from torch.testing._internal.common_methods_invocations import (
@@ -209,6 +210,7 @@ def to_cpu(arg):
     @unittest.skipIf(TEST_WITH_ASAN, "Skipped under ASAN")
     @onlyNativeDeviceTypes
     @ops(python_ref_db)
+    @skipIfTorchInductor("Takes too long for inductor")
     def test_python_ref_meta(self, device, dtype, op):
         with FakeTensorMode() as mode:
             pass
@@ -374,6 +376,7 @@ def _distance(a, b):
     @unittest.skipIf(TEST_WITH_ASAN, "Skipped under ASAN")
     @onlyNativeDeviceTypes
     @ops(python_ref_db)
+    @skipIfTorchInductor("Takes too long for inductor")
     def test_python_ref(self, device, dtype, op):
         # In this test, primTorch refs call into the refs namespace
         # For example, a ref with torch.foo in it will calls refs.foo instead
@@ -386,6 +389,7 @@ def test_python_ref(self, device, dtype, op):
     @unittest.skipIf(TEST_WITH_ASAN, "Skipped under ASAN")
     @onlyNativeDeviceTypes
     @ops(python_ref_db)
+    @skipIfTorchInductor("Takes too long for inductor")
     def test_python_ref_torch_fallback(self, device, dtype, op):
         # In this test, refs call into the torch namespace (after the initial invocation)
         # For example, a ref with torch.foo in it will call torch.foo instead of refs.foo
@@ -397,6 +401,7 @@ def test_python_ref_torch_fallback(self, device, dtype, op):
     @skipCUDAIfRocm
     @ops(python_ref_db)
     @parametrize('executor', ['aten', 'nvfuser'])
+    @skipIfTorchInductor("Takes too long for inductor")
     def test_python_ref_executor(self, device, dtype, op, executor):
         # TODO: Not all dtypes are supported with nvfuser
         from torch._prims_common import _torch_dtype_to_nvfuser_dtype_map
@@ -457,6 +462,7 @@ def test_errors(self, device, op):
     @skipMeta
     @onlyNativeDeviceTypes
     @ops([op for op in python_ref_db if op.error_inputs_func is not None], dtypes=OpDTypes.none)
+    @skipIfTorchInductor("Takes too long for inductor")
     def test_python_ref_errors(self, device, op):
         mode = FakeTensorMode()
         with mode:
diff --git a/test/test_ops_gradients.py b/test/test_ops_gradients.py
index 0411f043df9c..6d517c7a7f8b 100644
--- a/test/test_ops_gradients.py
+++ b/test/test_ops_gradients.py
@@ -4,8 +4,9 @@
 from itertools import chain
 import torch
 
-from torch.testing._internal.common_utils import \
-    (TestCase, is_iterable_of_tensors, run_tests, gradcheck, gradgradcheck, is_slow_gradcheck_env)
+from torch.testing._internal.common_utils import (
+    TestCase, is_iterable_of_tensors, run_tests, gradcheck, gradgradcheck, is_slow_gradcheck_env,
+    skipIfTorchInductor)
 from torch.testing._internal.common_methods_invocations import op_db
 from torch.testing._internal.common_device_type import \
     (instantiate_device_type_tests, ops, OpDTypes)
@@ -253,6 +254,7 @@ def test_forward_mode_AD(self, device, dtype, op):
         self._forward_grad_helper(device, dtype, op, op.get_op(), is_inplace=False)
 
     @_gradcheck_ops(op_db)
+    @skipIfTorchInductor("to be fixed")
     def test_inplace_forward_mode_AD(self, device, dtype, op):
         self._skip_helper(op, device, dtype)
 
diff --git a/torch/_dynamo/variables/torch.py b/torch/_dynamo/variables/torch.py
index e0c88b2cf059..36ca6591189d 100644
--- a/torch/_dynamo/variables/torch.py
+++ b/torch/_dynamo/variables/torch.py
@@ -320,6 +320,9 @@ def get_state_from_generator():
             assert isinstance(args[0], TensorVariable)
 
             if config.fake_tensor_propagation:
+                unimplemented(
+                    "TODO: make torch.random.set_rng_state work with FakeTensor/aot_autograd"
+                )
                 # In fake tensor case, this state doesn't matter, but
                 # it needs to be valid to not segfault. Pull a real tensor out.
                 # The value won't matter since we are running with fake tensors anyway, so rng doesn't matter.