Revert "add int8 packed gemm support on CPU device (pytorch#118056)"

This reverts commit f84375c. Reverted pytorch#118056 on behalf of https://github.com/izaitsevfb due to breaks internal builds ([comment](pytorch#118056 (comment)))
Lourencom · Mar 6, 2024 · 2ed6735 · 2ed6735
1 parent 1b0b159
commit 2ed6735
Show file tree

Hide file tree

Showing 8 changed files with 0 additions and 445 deletions.
diff --git a/aten/src/ATen/native/LinearAlgebra.cpp b/aten/src/ATen/native/LinearAlgebra.cpp
@@ -38,7 +38,6 @@
 #include <ATen/ops/_linalg_slogdet_native.h>
 #include <ATen/ops/_unsafe_view.h>
 #include <ATen/ops/_weight_int4pack_mm_native.h>
-#include <ATen/ops/_weight_int8pack_mm_native.h>
 #include <ATen/ops/addbmm_native.h>
 #include <ATen/ops/addmm_native.h>
 #include <ATen/ops/addr.h>
@@ -3395,7 +3394,6 @@ Tensor kron(const Tensor& self, const Tensor& other) {
 // Weight Only Quantization Gemm
 DEFINE_DISPATCH(weight_to_int4pack_stub);
 DEFINE_DISPATCH(int4pack_mm_stub);
-DEFINE_DISPATCH(int8pack_mm_stub);
 
 Tensor _convert_weight_to_int4pack_cpu(
     const Tensor& in,
@@ -3458,37 +3456,5 @@ Tensor _weight_int4pack_mm_cpu(
   return C;
 }
 
-Tensor _weight_int8pack_mm_cpu(
-    const Tensor& A,
-    const Tensor& B,
-    const Tensor& scales) {
-
-  auto M = A.size(0);
-  auto N = B.size(0);
-  auto K = A.size(1);
-
-  TORCH_CHECK(A.dtype() == kBFloat16,
-      __func__, " : expect A to be bfloat16 tensor.");
-  TORCH_CHECK(A.is_contiguous(),
-      __func__, " : expect A to be contiguous.");
-  TORCH_CHECK(A.dim() == 2,
-      __func__, " : expect A to be 2D tensor.");
-
-  TORCH_CHECK(B.dtype() == kChar,
-      __func__, " : expect B to be int8 tensor.");
-  TORCH_CHECK(B.is_contiguous(),
-      __func__, " : expect B to be contiguous.");
-  TORCH_CHECK(B.size(1) == K,
-      __func__, " : expect B.size(1) == ", K);
-
-  TORCH_CHECK(scales.dim() == 1 && scales.size(0) == N,
-      __func__, " : expect scales to be 1d tensor with size ", N);
-
-  auto C = at::empty({M, N}, A.options());
-  int8pack_mm_stub(kCPU, C, A, B, scales);
-
-  return C;
-}
-
 } // namespace native
 } // namespace at
diff --git a/aten/src/ATen/native/cpu/int8mm_kernel.cpp b/aten/src/ATen/native/cpu/int8mm_kernel.cpp
diff --git a/aten/src/ATen/native/cpu/int_mm_kernel.h b/aten/src/ATen/native/cpu/int_mm_kernel.h
@@ -7,10 +7,8 @@ namespace at::native {
 
 using weight_to_int4pack_fn = void(*)(const Tensor&, const Tensor&);
 using int4pack_mm_fn = void(*)(const Tensor&, const Tensor&, const Tensor&, int64_t, const Tensor&);
-using int8pack_mm_fn = void(*)(const Tensor&, const Tensor&, const Tensor&, const Tensor&);
 
 DECLARE_DISPATCH(weight_to_int4pack_fn, weight_to_int4pack_stub);
 DECLARE_DISPATCH(int4pack_mm_fn, int4pack_mm_stub);
-DECLARE_DISPATCH(int8pack_mm_fn, int8pack_mm_stub);
 
 } // namespace at::native
diff --git a/aten/src/ATen/native/native_functions.yaml b/aten/src/ATen/native/native_functions.yaml
@@ -4100,10 +4100,6 @@
     CPU: _weight_int4pack_mm_cpu
     CUDA: _weight_int4pack_mm_cuda
 
-- func: _weight_int8pack_mm(Tensor self, Tensor mat2, Tensor scales) -> Tensor
-  dispatch:
-    CPU: _weight_int8pack_mm_cpu
-
 - func: _sparse_mm(Tensor sparse, Tensor dense) -> Tensor
   python_module: sparse
 

diff --git a/test/expect/HasDecompTest.test_has_decomposition.expect b/test/expect/HasDecompTest.test_has_decomposition.expect
@@ -605,7 +605,6 @@ aten::_values
 aten::_values_copy
 aten::_values_copy.out
 aten::_weight_int4pack_mm
-aten::_weight_int8pack_mm
 aten::_weight_norm_interface_backward
 aten::_weight_norm_interface_backward.out
 aten::adaptive_avg_pool2d.out