pytorch · BowenBao · Mar 11, 2023 · Mar 14, 2023 · Mar 14, 2023 · Mar 15, 2023
diff --git a/.lintrunner.toml b/.lintrunner.toml
@@ -878,7 +878,7 @@ init_command = [
     'tools/linter/adapters/pip_init.py',
     '--dry-run={{DRYRUN}}',
     '--no-black-binary',
-    'black==22.3.0',
+    'black==23.1.0',
     'ufmt==1.3.3',
     'usort==1.0.2',
 ]

diff --git a/benchmarks/dynamo/benchmarks.py b/benchmarks/dynamo/benchmarks.py
@@ -4,6 +4,7 @@
 
 from typing import Set
 
+
 # Note - hf and timm have their own version of this, torchbench does not
 # TOOD(voz): Someday, consolidate all the files into one runner instead of a shim like this...
 def model_names(filename: str) -> Set[str]:

diff --git a/benchmarks/dynamo/check_graph_breaks.py b/benchmarks/dynamo/check_graph_breaks.py
@@ -11,12 +11,10 @@ def get_field(csv, model_name: str, field: str, typ=float):
 
 
 def check_graph_breaks(actual_csv, expected_csv, expected_filename):
-
     failed = []
     improved = []
 
     for model in actual_csv["name"]:
-
         graph_breaks = get_field(actual_csv, model, "graph_breaks", typ=int)
         expected_graph_breaks = get_field(expected_csv, model, "graph_breaks", typ=int)
 

diff --git a/benchmarks/dynamo/ci_expected_accuracy/update_expected.py b/benchmarks/dynamo/ci_expected_accuracy/update_expected.py
@@ -31,7 +31,6 @@
 
 
 def query_job_sha(repo, sha):
-
     params = {
         "parameters": [
             {"name": "sha", "type": "string", "value": sha},
@@ -108,7 +107,6 @@ def write_filtered_csvs(root_path, dataframes):
 
 
 if __name__ == "__main__":
-
     parser = argparse.ArgumentParser(
         description=__doc__, formatter_class=argparse.RawDescriptionHelpFormatter
     )

diff --git a/benchmarks/dynamo/huggingface.py b/benchmarks/dynamo/huggingface.py
@@ -373,7 +373,6 @@ def load_model(
         model_name,
         batch_size=None,
     ):
-
         is_training = self.args.training
         use_eval_mode = self.args.use_eval_mode
         dtype = torch.float32
@@ -513,7 +512,6 @@ def refresh_model_names_and_batch_sizes():
     lm_seen = set()
     family_seen = set()
     for cls_name in hf_fx._SUPPORTED_MODELS:
-
         if "For" not in cls_name:
             continue
 

diff --git a/benchmarks/dynamo/microbenchmarks/bench_autotune_conv.py b/benchmarks/dynamo/microbenchmarks/bench_autotune_conv.py
@@ -73,7 +73,6 @@ def bench_op(
     warmup=25,
     rep=75,
 ):
-
     skip = False
     # allocate inputs, nchw
     x = torch.randn((BATCH, IN_C, IN_H, IN_W), dtype=dtype, device="cuda")

diff --git a/benchmarks/dynamo/microbenchmarks/bench_conv.py b/benchmarks/dynamo/microbenchmarks/bench_conv.py
@@ -70,7 +70,6 @@ def bench_op(
     warmup=25,
     rep=75,
 ):
-
     # allocate inputs, nchw
     x = torch.randn((BATCH, IN_C, IN_H, IN_W), dtype=dtype, device="cuda")
     w = torch.randn(

diff --git a/benchmarks/dynamo/microbenchmarks/bench_conv1x1.py b/benchmarks/dynamo/microbenchmarks/bench_conv1x1.py
@@ -66,7 +66,6 @@ def bench_op(
     warmup=25,
     rep=75,
 ):
-
     # allocate inputs, nchw
     x = torch.randn((BATCH, IN_C, IN_H, IN_W), dtype=dtype, device="cuda")
     w = torch.randn(

diff --git a/benchmarks/dynamo/microbenchmarks/bench_conv_fusion.py b/benchmarks/dynamo/microbenchmarks/bench_conv_fusion.py
@@ -236,7 +236,6 @@ def bench(layer_params, layer_id, p, fusion_types=[""]):
 
     row = [layer_id]
     for fusion_type in fusion_types:
-
         if fusion_type == "":
             conv_torchinductor = getattr(Func, "conv_torchinductor")
             conv = getattr(Func, "conv")

diff --git a/benchmarks/dynamo/microbenchmarks/bench_mm_fusion.py b/benchmarks/dynamo/microbenchmarks/bench_mm_fusion.py
@@ -56,7 +56,6 @@ def tflops(ms):
 
     row = [layer_id]
     for fusion_type in fusion_types:
-
         if fusion_type == "":
             fn_mm = getattr(Func, "mm")
         else:

diff --git a/benchmarks/dynamo/microbenchmarks/profile_conv.py b/benchmarks/dynamo/microbenchmarks/profile_conv.py
@@ -46,7 +46,6 @@ def profile_op(
     warmup=25,
     rep=50,
 ):
-
     # allocate inputs, nchw
     x = torch.randn((BATCH, IN_C, IN_H, IN_W), dtype=dtype, device="cuda")
     w = torch.randn(

diff --git a/benchmarks/dynamo/parse_logs.py b/benchmarks/dynamo/parse_logs.py
@@ -60,6 +60,7 @@ def chunker(seq, size):
 out.writeheader()
 out.writerow({"explain": gist_url})
 
+
 # Sometimes backtraces will be in third party code, which results
 # in very long file names.  Delete the absolute path in this case.
 def normalize_file(f):

diff --git a/benchmarks/dynamo/timm_models.py b/benchmarks/dynamo/timm_models.py
@@ -182,7 +182,6 @@ def load_model(
         model_name,
         batch_size=None,
     ):
-
         is_training = self.args.training
         use_eval_mode = self.args.use_eval_mode
 

diff --git a/benchmarks/dynamo/torchbench.py b/benchmarks/dynamo/torchbench.py
@@ -242,7 +242,6 @@ def load_model(
         batch_size=None,
         part=None,
     ):
-
         is_training = self.args.training
         use_eval_mode = self.args.use_eval_mode
         dynamic_shapes = self.args.dynamic_shapes

diff --git a/test/distributed/_composable/fully_shard/test_fully_shard_init.py b/test/distributed/_composable/fully_shard/test_fully_shard_init.py
@@ -120,7 +120,7 @@ def _test_fully_shard_construction(
         composable_handles = traversal_utils._get_fsdp_handles(composable_module)
         fsdp_wrapped_handles = traversal_utils._get_fsdp_handles(fsdp_wrapped_model)
         self.assertEqual(len(composable_handles), len(fsdp_wrapped_handles))
-        for (composable_handle, fsdp_wrapped_handle) in zip(
+        for composable_handle, fsdp_wrapped_handle in zip(
             composable_handles, fsdp_wrapped_handles
         ):
             self.assertEqual(
@@ -179,7 +179,7 @@ def test_sync_module_states(self):
             policy=policy,
             sync_module_states=True,
         )
-        for (composable_param, fsdp_wrapped_param) in zip(
+        for composable_param, fsdp_wrapped_param in zip(
             composable_module.parameters(),
             fsdp_wrapped_model.parameters(),
         ):

diff --git a/test/distributed/fsdp/test_fsdp_checkpoint.py b/test/distributed/fsdp/test_fsdp_checkpoint.py
@@ -116,7 +116,7 @@ def _verify_parity(self, losses, outputs, models):
         assert outputs
         assert models
 
-        for (l, o) in zip(losses[1:], outputs[1:]):
+        for l, o in zip(losses[1:], outputs[1:]):
             self.assertEqual(losses[0], l)
             self.assertEqual(outputs[0], o)
 
@@ -324,7 +324,6 @@ def forward(self, x):
 
 
 class TestFSDPCheckpointSubmodule(FSDPTest):
-
     # TODO: grad value checks occasionally fails when use_reentrant = True
     @skip_if_lt_x_gpu(2)
     @parametrize("use_reentrant", [False])

diff --git a/test/distributed/fsdp/test_fsdp_comm_hooks.py b/test/distributed/fsdp/test_fsdp_comm_hooks.py
@@ -70,7 +70,6 @@ def forward(self, x):
 
 
 class DummyState:
-
     __slots__ = ["process_group", "noise"]
 
     def __init__(self, process_group: dist.ProcessGroup, noise: int):
@@ -157,7 +156,6 @@ def test_default_communication_hook_behavior(
             self.assertEqual(entry._communication_hook, default_hook)
 
         for _ in range(4):
-
             # Clear gradients
             net_default_hook.zero_grad()
             loss = net_default_hook(inpt).sum()
@@ -183,7 +181,6 @@ def _get_submodules(self, fsdp_net):
         ]
 
     def _init_model(self, core, sharding_strategy, mixed_precision=None):
-
         device = torch.device("cuda")
         return FSDP(
             core,
@@ -424,7 +421,6 @@ def _check_low_precision_hook(
     def test_fp16_hook(
         self, has_wrapping: bool, sharding_strategy: Optional[ShardingStrategy]
     ):
-
         state = default_hooks.LowPrecisionState(process_group=_get_default_group())
         hook = default_hooks.fp16_compress_hook
 
@@ -452,7 +448,6 @@ def test_fp16_hook(
     def test_bf16_hook(
         self, has_wrapping: bool, sharding_strategy: Optional[ShardingStrategy]
     ):
-
         state = default_hooks.LowPrecisionState(process_group=_get_default_group())
         hook = default_hooks.bf16_compress_hook
 

diff --git a/test/distributed/fsdp/test_fsdp_grad_acc.py b/test/distributed/fsdp/test_fsdp_grad_acc.py
@@ -160,7 +160,7 @@ def permute_tensor(x: torch.Tensor):
             num_iters_to_acc = sum(config.num_iters for config in configs)
             for _ in range(num_iters_to_acc - 1):
                 batches.append(tuple(permute_tensor(t) for t in batch))
-            for (batch1, batch2) in itertools.combinations(batches, r=2):
+            for batch1, batch2 in itertools.combinations(batches, r=2):
                 for t1, t2 in zip(batch1, batch2):
                     assert not torch.all(
                         t1 == t2

diff --git a/test/distributed/fsdp/test_fsdp_optim_state.py b/test/distributed/fsdp/test_fsdp_optim_state.py
@@ -1338,7 +1338,6 @@ def _test_rekey_optim_state_dict_to_names(
         use_multiple_param_groups: bool,
         use_optim_input: bool,
     ):
-
         NUM_ITERS = 3
         # Run a wrapped model for a few iterations
         model1, optim1, optim_input1 = self._init_nested_model(

diff --git a/test/distributed/fsdp/test_fsdp_state_dict.py b/test/distributed/fsdp/test_fsdp_state_dict.py
@@ -937,14 +937,14 @@ def _create_module(wrap_fsdp=True):
         # Check that it can be loaded into FSDP.
         new_fsdp, _ = _create_module()
         _zero_model(new_fsdp)
-        for (p1, p2) in zip(fsdp.parameters(), new_fsdp.parameters()):
+        for p1, p2 in zip(fsdp.parameters(), new_fsdp.parameters()):
             self.assertNotEqual(p1, p2)
         with FSDP.state_dict_type(new_fsdp, STATE_DICT_MAPPING[state_dict_type]):
             if state_dict_type != "local_state_dict":
                 # FlatParameter has not supported deepcopy yet.
                 state_dict = deepcopy(state_dict)
             new_fsdp.load_state_dict(state_dict, strict=True)
-        for (p1, p2) in zip(fsdp.parameters(), new_fsdp.parameters()):
+        for p1, p2 in zip(fsdp.parameters(), new_fsdp.parameters()):
             self.assertEqual(p1, p2)
 
         # Test that the checkpoint can be loaded into a local model.
@@ -954,7 +954,7 @@ def _create_module(wrap_fsdp=True):
                 param.zero_()
 
         with fsdp.summon_full_params(fsdp):
-            for (p1, p2) in zip(fsdp.parameters(), local.parameters()):
+            for p1, p2 in zip(fsdp.parameters(), local.parameters()):
                 self.assertNotEqual(p1, p2)
 
         if state_dict_type == "local_state_dict":
@@ -963,7 +963,7 @@ def _create_module(wrap_fsdp=True):
         with fsdp.summon_full_params(fsdp):
             if self.rank == 0:
                 local.load_state_dict(state_dict, strict=True)
-                for (p1, p2) in zip(fsdp.parameters(), local.parameters()):
+                for p1, p2 in zip(fsdp.parameters(), local.parameters()):
                     self.assertEqual(p1, p2)
 
     @skip_if_lt_x_gpu(2)

diff --git a/test/distributed/fsdp/test_shard_utils.py b/test/distributed/fsdp/test_shard_utils.py
@@ -31,7 +31,6 @@ def _get_and_check_split_sizes(
             out_offsets,
             in_split_sizes,
         ):
-
             for my_rank in range(world_size):
                 _in_split_sizes = in_split_sizes[my_rank]
                 _out_split_sizes = [

diff --git a/test/dynamo/test_modules.py b/test/dynamo/test_modules.py
@@ -847,7 +847,6 @@ def __torch_function__(cls, func, types, args=(), kwargs=None):
         torch._dynamo.config.traceable_tensor_subclasses.add(TensorProxy)
 
         try:
-
             x = torch.randn(1).as_subclass(TensorProxy)
             cnt = torch._dynamo.testing.CompileCounter()
             out1 = foo(x)
@@ -862,7 +861,6 @@ def __torch_function__(cls, func, types, args=(), kwargs=None):
 
     def test_torch_function_with_closure(self):
         def run():
-
             counter = 0
 
             def foo(x):
@@ -1097,7 +1095,7 @@ def forward(self, x):
         opt_mod = torch._dynamo.optimize("eager")(mod)
 
         # Check parameteres and buffers
-        for (p1, p2) in zip(mod.parameters(), opt_mod.parameters()):
+        for p1, p2 in zip(mod.parameters(), opt_mod.parameters()):
             self.assertTrue(id(p1) == id(p2))
 
     def test_recursion(self):

diff --git a/test/dynamo/test_repros.py b/test/dynamo/test_repros.py
@@ -1572,7 +1572,6 @@ def forward(self, x):
         self.assertEqual(y, 10)
 
     def test_sort_out(self):
-
         dtype = torch.float32
         device = "cpu"
 
@@ -1607,7 +1606,6 @@ def forward(self, x):
         self.assertTrue(same(ref, res))
 
     def test_sigmoid_out(self):
-
         dtype = torch.float32
         device = "cpu"
 

diff --git a/test/inductor/test_config.py b/test/inductor/test_config.py
@@ -178,7 +178,6 @@ def d(x):
         a(torch.randn(10))
 
     def test_api_options(self):
-
         reduce_overhead_opts = torch._inductor.list_mode_options("reduce-overhead")
         self.assertEqual(reduce_overhead_opts["triton.cudagraphs"], True)
 

diff --git a/test/inductor/test_torchinductor_dynamic_shapes.py b/test/inductor/test_torchinductor_dynamic_shapes.py
@@ -79,7 +79,6 @@ class DynamicShapesCudaTests(TestCase):
 
 
 class TestInductorDynamic(TestCase):
-
     compile_fn = partial(torch.compile, dynamic=True)
 
     def setUp(self):

diff --git a/test/inductor/test_torchinductor_opinfo.py b/test/inductor/test_torchinductor_opinfo.py
@@ -597,7 +597,6 @@ def fn(*args, **kwargs):
                     )
 
         except Exception as e:
-
             if test_expect is ExpectedTestResult.XFAILURE:
                 raise e
 

diff --git a/test/onnx/pytorch_test_common.py b/test/onnx/pytorch_test_common.py
@@ -48,6 +48,7 @@ def wrapper(*args, **kwargs):
     lambda: not torch.cuda.is_bf16_supported(), "BFloat16 CUDA is not available"
 )
 
+
 # skips tests for all versions below min_opset_version.
 # if exporting the op is only supported after a specific version,
 # add this wrapper to prevent running the test for opset_versions

diff --git a/test/onnx/test_onnx_opset.py b/test/onnx/test_onnx_opset.py
@@ -494,7 +494,6 @@ def forward(self, x, grid, mode, padding_mode, align_corers):
             ("zeros", "border", "reflection"),
             (True, False),
         ):
-
             args = (
                 torch.randn(n, c, h_in, w_in),  # x
                 torch.randn(n, h_out, w_out, 2),  # grid,

diff --git a/test/onnx/test_onnxscript_no_runtime.py b/test/onnx/test_onnxscript_no_runtime.py
@@ -13,14 +13,12 @@
 
 
 class TestONNXScriptExport(common_utils.TestCase):
-
     # opset version is
     # 1. local function is supported after opset 15
     # 2. onnx-script requires users to determine opset in local function
     opset_version = 15
 
     def test_onnxscript_registration_with_multiple_models(self):
-
         from onnxscript.onnx_opset import opset15 as op
 
         # 1. Register Selu onnxscript function as custom Op

diff --git a/test/onnx/test_onnxscript_runtime.py b/test/onnx/test_onnxscript_runtime.py
@@ -12,14 +12,12 @@
 
 
 class TestONNXScriptRuntime(onnx_test_common._TestONNXRuntime):
-
     # opset version is
     # 1. local function is supported after opset 15
     # 2. onnx-script requires users to determine opset in local function
     opset_version = 15
 
     def test_selu_from_onnxscript_example(self):
-
         x = torch.randn(1, 2, 3, 4, requires_grad=True)
         model = torch.nn.SELU()
 
@@ -52,7 +50,6 @@ def custom_selu(g: jit_utils.GraphContext, X):
         self.run_test(model, x)
 
     def test_layer_norm(self):
-
         x = torch.randn(2, 3)
         y = torch.randn(2, 3)
         z = torch.randn(2, 3)