Skip to content

Commit

Permalink
Revert "add int8 packed gemm support on CPU device (pytorch#118056)"
Browse files Browse the repository at this point in the history
This reverts commit f84375c.

Reverted pytorch#118056 on behalf of https://github.com/izaitsevfb due to breaks internal builds ([comment](pytorch#118056 (comment)))
  • Loading branch information
pytorchmergebot authored and Lourencom committed Mar 6, 2024
1 parent 1b0b159 commit 2ed6735
Show file tree
Hide file tree
Showing 8 changed files with 0 additions and 445 deletions.
34 changes: 0 additions & 34 deletions aten/src/ATen/native/LinearAlgebra.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,6 @@
#include <ATen/ops/_linalg_slogdet_native.h>
#include <ATen/ops/_unsafe_view.h>
#include <ATen/ops/_weight_int4pack_mm_native.h>
#include <ATen/ops/_weight_int8pack_mm_native.h>
#include <ATen/ops/addbmm_native.h>
#include <ATen/ops/addmm_native.h>
#include <ATen/ops/addr.h>
Expand Down Expand Up @@ -3395,7 +3394,6 @@ Tensor kron(const Tensor& self, const Tensor& other) {
// Weight Only Quantization Gemm
DEFINE_DISPATCH(weight_to_int4pack_stub);
DEFINE_DISPATCH(int4pack_mm_stub);
DEFINE_DISPATCH(int8pack_mm_stub);

Tensor _convert_weight_to_int4pack_cpu(
const Tensor& in,
Expand Down Expand Up @@ -3458,37 +3456,5 @@ Tensor _weight_int4pack_mm_cpu(
return C;
}

Tensor _weight_int8pack_mm_cpu(
const Tensor& A,
const Tensor& B,
const Tensor& scales) {

auto M = A.size(0);
auto N = B.size(0);
auto K = A.size(1);

TORCH_CHECK(A.dtype() == kBFloat16,
__func__, " : expect A to be bfloat16 tensor.");
TORCH_CHECK(A.is_contiguous(),
__func__, " : expect A to be contiguous.");
TORCH_CHECK(A.dim() == 2,
__func__, " : expect A to be 2D tensor.");

TORCH_CHECK(B.dtype() == kChar,
__func__, " : expect B to be int8 tensor.");
TORCH_CHECK(B.is_contiguous(),
__func__, " : expect B to be contiguous.");
TORCH_CHECK(B.size(1) == K,
__func__, " : expect B.size(1) == ", K);

TORCH_CHECK(scales.dim() == 1 && scales.size(0) == N,
__func__, " : expect scales to be 1d tensor with size ", N);

auto C = at::empty({M, N}, A.options());
int8pack_mm_stub(kCPU, C, A, B, scales);

return C;
}

} // namespace native
} // namespace at
302 changes: 0 additions & 302 deletions aten/src/ATen/native/cpu/int8mm_kernel.cpp

This file was deleted.

2 changes: 0 additions & 2 deletions aten/src/ATen/native/cpu/int_mm_kernel.h
Original file line number Diff line number Diff line change
Expand Up @@ -7,10 +7,8 @@ namespace at::native {

using weight_to_int4pack_fn = void(*)(const Tensor&, const Tensor&);
using int4pack_mm_fn = void(*)(const Tensor&, const Tensor&, const Tensor&, int64_t, const Tensor&);
using int8pack_mm_fn = void(*)(const Tensor&, const Tensor&, const Tensor&, const Tensor&);

DECLARE_DISPATCH(weight_to_int4pack_fn, weight_to_int4pack_stub);
DECLARE_DISPATCH(int4pack_mm_fn, int4pack_mm_stub);
DECLARE_DISPATCH(int8pack_mm_fn, int8pack_mm_stub);

} // namespace at::native
4 changes: 0 additions & 4 deletions aten/src/ATen/native/native_functions.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -4100,10 +4100,6 @@
CPU: _weight_int4pack_mm_cpu
CUDA: _weight_int4pack_mm_cuda

- func: _weight_int8pack_mm(Tensor self, Tensor mat2, Tensor scales) -> Tensor
dispatch:
CPU: _weight_int8pack_mm_cpu

- func: _sparse_mm(Tensor sparse, Tensor dense) -> Tensor
python_module: sparse

Expand Down
1 change: 0 additions & 1 deletion test/expect/HasDecompTest.test_has_decomposition.expect
Original file line number Diff line number Diff line change
Expand Up @@ -605,7 +605,6 @@ aten::_values
aten::_values_copy
aten::_values_copy.out
aten::_weight_int4pack_mm
aten::_weight_int8pack_mm
aten::_weight_norm_interface_backward
aten::_weight_norm_interface_backward.out
aten::adaptive_avg_pool2d.out
Expand Down

0 comments on commit 2ed6735

Please sign in to comment.