From 150b6d4207ff5046afb2e2a07f632247360229c9 Mon Sep 17 00:00:00 2001
From: Sayed Adel <seiko@imavr.com>
Date: Thu, 18 Aug 2022 20:48:56 +0200
Subject: [PATCH 1/3] CI: Test NumPy build against old versions of GCC(6, 7, 8)

---
 .github/workflows/build_test.yml | 59 ++++++++++++++++++++++++++++++++
 1 file changed, 59 insertions(+)

diff --git a/.github/workflows/build_test.yml b/.github/workflows/build_test.yml
index e1bd519df293..9e57662bea0f 100644
--- a/.github/workflows/build_test.yml
+++ b/.github/workflows/build_test.yml
@@ -71,6 +71,65 @@ jobs:
         python-version: ${{ matrix.python-version }}
     - uses: ./.github/actions
 
+  old_gcc:
+    needs: [smoke_test]
+    # provides GCC 6, 7, 8
+    runs-on: ubuntu-18.04
+    steps:
+    - uses: actions/checkout@v3
+      with:
+        submodules: recursive
+        fetch-depth: 0
+    # comes with python3.6
+    - name: Install Python3.8
+      run: |
+        sudo apt update
+        # for add-apt-repository
+        sudo apt install software-properties-common -y
+        sudo add-apt-repository ppa:deadsnakes/ppa -y
+        sudo apt install python3.8-dev -y
+        sudo ln -s /usr/bin/python3.8 /usr/bin/pythonx
+        pythonx -m pip install --upgrade pip setuptools wheel
+        pythonx -m pip install -r test_requirements.txt
+    - name: Install Compilers
+      run: sudo apt install g++-6 g++-7 g++-8 -y
+    - name: Build gcc-6
+      run: |
+        export CC=/usr/bin/gcc-6
+        export CXX=/usr/bin/g++-6
+        pythonx setup.py install --user
+    - name: Runtests gcc-6
+      run: pythonx runtests.py -n
+    - name: Build gcc-7
+      run: |
+        export CC=/usr/bin/gcc-7
+        export CXX=/usr/bin/g++-7
+        rm -rf build && pythonx setup.py install --user
+    - name: Runtests gcc-7
+      run: pythonx runtests.py -n
+    - name: Build gcc-8
+      run: |
+        export CC=/usr/bin/gcc-8
+        export CXX=/usr/bin/g++-8
+        rm -rf build && pythonx setup.py install --user
+    - name: Runtests gcc-8
+      run: pythonx runtests.py -n
+
+  without_optimizations:
+    needs: [smoke_test]
+    runs-on: ubuntu-latest
+    env:
+      WITHOUT_OPTIMIZATIONS: 1
+    steps:
+    - uses: actions/checkout@v3
+      with:
+        submodules: recursive
+        fetch-depth: 0
+    - uses: actions/setup-python@v3
+      with:
+        python-version: ${{ env.PYTHON_VERSION }}
+    - uses: ./.github/actions
+
   debug:
     needs: [smoke_test]
     runs-on: ubuntu-20.04

From 44b8a55d54d69e9dc1f6e91c807f1d24dd44af23 Mon Sep 17 00:00:00 2001
From: Sayed Adel <seiko@imavr.com>
Date: Thu, 18 Aug 2022 22:32:24 +0200
Subject: [PATCH 2/3] BUG, SIMD: Fix C++ AVX512/qsort on old gcc compilers

---
 .../core/src/common/simd/avx512/arithmetic.h  | 24 +++++++++----------
 numpy/core/src/npysort/x86-qsort.dispatch.cpp |  6 +----
 2 files changed, 13 insertions(+), 17 deletions(-)

diff --git a/numpy/core/src/common/simd/avx512/arithmetic.h b/numpy/core/src/common/simd/avx512/arithmetic.h
index 93e9d9d45197..850a0c05adf5 100644
--- a/numpy/core/src/common/simd/avx512/arithmetic.h
+++ b/numpy/core/src/common/simd/avx512/arithmetic.h
@@ -384,8 +384,8 @@ NPY_FINLINE npyv_s64 npyv_divc_s64(npyv_s64 a, const npyv_s64x3 divisor)
         const npyv_u32 idx2 = _mm512_set_epi32(3, 2, 1, 0, 7, 6, 5, 4, 11, 10, 9, 8, 15, 14, 13, 12);
         npyv_u32 a1 = _mm512_max_epu32(a, _mm512_permutex2var_epi32(a, idx1, a));
         npyv_u32 a2 = _mm512_max_epu32(a1, _mm512_permutex2var_epi32(a1, idx2, a1));
-        npyv_u32 a3 = _mm512_max_epu32(a2, _mm512_shuffle_epi32(a2, (1<<6 | 0<<4 | 3<<2 | 2)));
-        npyv_u32 a4 = _mm512_max_epu32(a3, _mm512_shuffle_epi32(a3, (2<<6 | 3<<4 | 0<<2 | 1)));
+        npyv_u32 a3 = _mm512_max_epu32(a2, _mm512_shuffle_epi32(a2, (_MM_PERM_ENUM)(1<<6 | 0<<4 | 3<<2 | 2)));
+        npyv_u32 a4 = _mm512_max_epu32(a3, _mm512_shuffle_epi32(a3, (_MM_PERM_ENUM)(2<<6 | 3<<4 | 0<<2 | 1)));
         return _mm_cvtsi128_si32(_mm512_extracti32x4_epi32(a4, 0x00));
     }
 
@@ -395,8 +395,8 @@ NPY_FINLINE npyv_s64 npyv_divc_s64(npyv_s64 a, const npyv_s64x3 divisor)
         const npyv_u32 idx2 = _mm512_set_epi32(3, 2, 1, 0, 7, 6, 5, 4, 11, 10, 9, 8, 15, 14, 13, 12);
         npyv_s32 a1 = _mm512_max_epi32(a, _mm512_permutex2var_epi32(a, idx1, a));
         npyv_s32 a2 = _mm512_max_epi32(a1, _mm512_permutex2var_epi32(a1, idx2, a1));
-        npyv_s32 a3 = _mm512_max_epi32(a2, _mm512_shuffle_epi32(a2, (1<<6 | 0<<4 | 3<<2 | 2)));
-        npyv_s32 a4 = _mm512_max_epi32(a3, _mm512_shuffle_epi32(a3, (2<<6 | 3<<4 | 0<<2 | 1)));
+        npyv_s32 a3 = _mm512_max_epi32(a2, _mm512_shuffle_epi32(a2, (_MM_PERM_ENUM)(1<<6 | 0<<4 | 3<<2 | 2)));
+        npyv_s32 a4 = _mm512_max_epi32(a3, _mm512_shuffle_epi32(a3, (_MM_PERM_ENUM)(2<<6 | 3<<4 | 0<<2 | 1)));
         return _mm_cvtsi128_si32(_mm512_extracti32x4_epi32(a4, 0x00));
     }
 
@@ -406,8 +406,8 @@ NPY_FINLINE npyv_s64 npyv_divc_s64(npyv_s64 a, const npyv_s64x3 divisor)
         const npyv_u32 idx2 = _mm512_set_epi32(3, 2, 1, 0, 7, 6, 5, 4, 11, 10, 9, 8, 15, 14, 13, 12);
         npyv_f32 a1 = _mm512_max_ps(a, _mm512_permutex2var_ps(a, idx1, a));
         npyv_f32 a2 = _mm512_max_ps(a1, _mm512_permutex2var_ps(a1, idx2, a1));
-        npyv_f32 a3 = _mm512_max_ps(a2, _mm512_shuffle_ps(a2, a2, (1<<6 | 0<<4 | 3<<2 | 2)));
-        npyv_f32 a4 = _mm512_max_ps(a3, _mm512_shuffle_sp(a3, a3, (2<<6 | 3<<4 | 0<<2 | 1)));
+        npyv_f32 a3 = _mm512_max_ps(a2, _mm512_shuffle_ps(a2, a2, (_MM_PERM_ENUM)(1<<6 | 0<<4 | 3<<2 | 2)));
+        npyv_f32 a4 = _mm512_max_ps(a3, _mm512_shuffle_ps(a3, a3, (_MM_PERM_ENUM)(2<<6 | 3<<4 | 0<<2 | 1)));
         return _mm_cvtss_f32(_mm512_extractf32x4_ps(a4, 0x00));
     }
 
@@ -417,8 +417,8 @@ NPY_FINLINE npyv_s64 npyv_divc_s64(npyv_s64 a, const npyv_s64x3 divisor)
         const npyv_u32 idx2 = _mm512_set_epi32(3, 2, 1, 0, 7, 6, 5, 4, 11, 10, 9, 8, 15, 14, 13, 12);
         npyv_u32 a1 = _mm512_min_epu32(a, _mm512_permutex2var_epi32(a, idx1, a));
         npyv_u32 a2 = _mm512_min_epu32(a1, _mm512_permutex2var_epi32(a1, idx2, a1));
-        npyv_u32 a3 = _mm512_min_epu32(a2, _mm512_shuffle_epi32(a2, (1<<6 | 0<<4 | 3<<2 | 2)));
-        npyv_u32 a4 = _mm512_min_epu32(a3, _mm512_shuffle_epi32(a3, (2<<6 | 3<<4 | 0<<2 | 1)));
+        npyv_u32 a3 = _mm512_min_epu32(a2, _mm512_shuffle_epi32(a2, (_MM_PERM_ENUM)(1<<6 | 0<<4 | 3<<2 | 2)));
+        npyv_u32 a4 = _mm512_min_epu32(a3, _mm512_shuffle_epi32(a3, (_MM_PERM_ENUM)(2<<6 | 3<<4 | 0<<2 | 1)));
         return _mm_cvtsi128_si32(_mm512_extracti32x4_epi32(a4, 0x00));
     }
 
@@ -428,8 +428,8 @@ NPY_FINLINE npyv_s64 npyv_divc_s64(npyv_s64 a, const npyv_s64x3 divisor)
         const npyv_u32 idx2 = _mm512_set_epi32(3, 2, 1, 0, 7, 6, 5, 4, 11, 10, 9, 8, 15, 14, 13, 12);
         npyv_s32 a1 = _mm512_min_epi32(a, _mm512_permutex2var_epi32(a, idx1, a));
         npyv_s32 a2 = _mm512_min_epi32(a1, _mm512_permutex2var_epi32(a1, idx2, a1));
-        npyv_s32 a3 = _mm512_min_epi32(a2, _mm512_shuffle_epi32(a2, (1<<6 | 0<<4 | 3<<2 | 2)));
-        npyv_s32 a4 = _mm512_min_epi32(a3, _mm512_shuffle_epi32(a3, (2<<6 | 3<<4 | 0<<2 | 1)));
+        npyv_s32 a3 = _mm512_min_epi32(a2, _mm512_shuffle_epi32(a2, (_MM_PERM_ENUM)(1<<6 | 0<<4 | 3<<2 | 2)));
+        npyv_s32 a4 = _mm512_min_epi32(a3, _mm512_shuffle_epi32(a3, (_MM_PERM_ENUM)(2<<6 | 3<<4 | 0<<2 | 1)));
         return _mm_cvtsi128_si32(_mm512_extracti32x4_epi32(a4, 0x00));
     }
 
@@ -439,8 +439,8 @@ NPY_FINLINE npyv_s64 npyv_divc_s64(npyv_s64 a, const npyv_s64x3 divisor)
         const npyv_u32 idx2 = _mm512_set_epi32(3, 2, 1, 0, 7, 6, 5, 4, 11, 10, 9, 8, 15, 14, 13, 12);
         npyv_f32 a1 = _mm512_min_ps(a, _mm512_permutex2var_ps(a, idx1, a));
         npyv_f32 a2 = _mm512_min_ps(a1, _mm512_permutex2var_ps(a1, idx2, a1));
-        npyv_f32 a3 = _mm512_min_ps(a2, _mm512_shuffle_ps(a2, a2, (1<<6 | 0<<4 | 3<<2 | 2)));
-        npyv_f32 a4 = _mm512_min_ps(a3, _mm512_shuffle_sp(a3, a3, (2<<6 | 3<<4 | 0<<2 | 1)));
+        npyv_f32 a3 = _mm512_min_ps(a2, _mm512_shuffle_ps(a2, a2, (_MM_PERM_ENUM)(1<<6 | 0<<4 | 3<<2 | 2)));
+        npyv_f32 a4 = _mm512_min_ps(a3, _mm512_shuffle_ps(a3, a3, (_MM_PERM_ENUM)(2<<6 | 3<<4 | 0<<2 | 1)));
         return _mm_cvtss_f32(_mm512_extractf32x4_ps(a4, 0x00));
     }
 
diff --git a/numpy/core/src/npysort/x86-qsort.dispatch.cpp b/numpy/core/src/npysort/x86-qsort.dispatch.cpp
index 39067229beaf..01fa16e3e350 100644
--- a/numpy/core/src/npysort/x86-qsort.dispatch.cpp
+++ b/numpy/core/src/npysort/x86-qsort.dispatch.cpp
@@ -648,11 +648,7 @@ partition_vec(type_t *arr, npy_intp left, npy_intp right, const zmm_t curr_vec,
     /* which elements are larger than the pivot */
     __mmask16 gt_mask = vtype::ge(curr_vec, pivot_vec);
     npy_int amount_gt_pivot = _mm_popcnt_u32((npy_int)gt_mask);
-#if defined(_MSC_VER) && _MSC_VER < 1922
-    vtype::mask_compressstoreu(arr + left, ~gt_mask, curr_vec);
-#else
-    vtype::mask_compressstoreu(arr + left, _knot_mask16(gt_mask), curr_vec);
-#endif
+    vtype::mask_compressstoreu(arr + left, _mm512_knot(gt_mask), curr_vec);
     vtype::mask_compressstoreu(arr + right - amount_gt_pivot, gt_mask,
                                curr_vec);
     *smallest_vec = vtype::min(curr_vec, *smallest_vec);

From ffc3d7b140740bb7db6ddfa7c281ae40517d3951 Mon Sep 17 00:00:00 2001
From: Ralf Gommers <ralf.gommers@gmail.com>
Date: Fri, 19 Aug 2022 10:50:35 +0300
Subject: [PATCH 3/3] DOC: add notes on GCC 6.5 being the minimum supported GCC
 version.

Given that GCC 6 is mainly kept for Ubuntu LTS and that has 6.5
when you install gcc-6 with `apt`, let's document that as the
minimum version (also that is what is tested in CI).

[ci skip]
---
 INSTALL.rst.txt              | 3 ++-
 doc/source/user/building.rst | 2 +-
 2 files changed, 3 insertions(+), 2 deletions(-)

diff --git a/INSTALL.rst.txt b/INSTALL.rst.txt
index 130306d06c07..f136b7cfcd32 100644
--- a/INSTALL.rst.txt
+++ b/INSTALL.rst.txt
@@ -75,7 +75,8 @@ skipped when running the test suite if no Fortran compiler is available.  For
 building Scipy a Fortran compiler is needed though, so we include some details
 on Fortran compilers in the rest of this section.
 
-On OS X and Linux, all common compilers will work.
+On OS X and Linux, all common compilers will work. The minimum supported GCC
+version is 6.5.
 
 For Fortran, ``gfortran`` works, ``g77`` does not.  In case ``g77`` is
 installed then ``g77`` will be detected and used first.  To explicitly select
diff --git a/doc/source/user/building.rst b/doc/source/user/building.rst
index 4bd0b7183ea0..81f6d33a797b 100644
--- a/doc/source/user/building.rst
+++ b/doc/source/user/building.rst
@@ -59,7 +59,7 @@ Building NumPy requires the following software installed:
    MSVC and Clang compilers. Compilers from other vendors such as Intel,
    Absoft, Sun, NAG, Compaq, Vast, Portland, Lahey, HP, IBM are only
    supported in the form of community feedback, and may not work out of the
-   box.  GCC 4.x (and later) compilers are recommended. On ARM64 (aarch64)
+   box.  GCC 6.5 (and later) compilers are recommended. On ARM64 (aarch64)
    GCC 8.x (and later) are recommended.
 
 3) Linear Algebra libraries