Skip to content

Commit

Permalink
Merge pull request #22155 from charris/backport-22150
Browse files Browse the repository at this point in the history
CI: Test NumPy build against old versions of GCC(6, 7, 8)
  • Loading branch information
charris committed Aug 19, 2022
2 parents d8d04e7 + ffc3d7b commit 0a467c8
Show file tree
Hide file tree
Showing 5 changed files with 75 additions and 19 deletions.
59 changes: 59 additions & 0 deletions .github/workflows/build_test.yml
Expand Up @@ -71,6 +71,65 @@ jobs:
python-version: ${{ matrix.python-version }}
- uses: ./.github/actions

old_gcc:
needs: [smoke_test]
# provides GCC 6, 7, 8
runs-on: ubuntu-18.04
steps:
- uses: actions/checkout@v3
with:
submodules: recursive
fetch-depth: 0
# comes with python3.6
- name: Install Python3.8
run: |
sudo apt update
# for add-apt-repository
sudo apt install software-properties-common -y
sudo add-apt-repository ppa:deadsnakes/ppa -y
sudo apt install python3.8-dev -y
sudo ln -s /usr/bin/python3.8 /usr/bin/pythonx
pythonx -m pip install --upgrade pip setuptools wheel
pythonx -m pip install -r test_requirements.txt
- name: Install Compilers
run: sudo apt install g++-6 g++-7 g++-8 -y
- name: Build gcc-6
run: |
export CC=/usr/bin/gcc-6
export CXX=/usr/bin/g++-6
pythonx setup.py install --user
- name: Runtests gcc-6
run: pythonx runtests.py -n
- name: Build gcc-7
run: |
export CC=/usr/bin/gcc-7
export CXX=/usr/bin/g++-7
rm -rf build && pythonx setup.py install --user
- name: Runtests gcc-7
run: pythonx runtests.py -n
- name: Build gcc-8
run: |
export CC=/usr/bin/gcc-8
export CXX=/usr/bin/g++-8
rm -rf build && pythonx setup.py install --user
- name: Runtests gcc-8
run: pythonx runtests.py -n

without_optimizations:
needs: [smoke_test]
runs-on: ubuntu-latest
env:
WITHOUT_OPTIMIZATIONS: 1
steps:
- uses: actions/checkout@v3
with:
submodules: recursive
fetch-depth: 0
- uses: actions/setup-python@v3
with:
python-version: ${{ env.PYTHON_VERSION }}
- uses: ./.github/actions

debug:
needs: [smoke_test]
runs-on: ubuntu-20.04
Expand Down
3 changes: 2 additions & 1 deletion INSTALL.rst.txt
Expand Up @@ -75,7 +75,8 @@ skipped when running the test suite if no Fortran compiler is available. For
building Scipy a Fortran compiler is needed though, so we include some details
on Fortran compilers in the rest of this section.

On OS X and Linux, all common compilers will work.
On OS X and Linux, all common compilers will work. The minimum supported GCC
version is 6.5.

For Fortran, ``gfortran`` works, ``g77`` does not. In case ``g77`` is
installed then ``g77`` will be detected and used first. To explicitly select
Expand Down
2 changes: 1 addition & 1 deletion doc/source/user/building.rst
Expand Up @@ -59,7 +59,7 @@ Building NumPy requires the following software installed:
MSVC and Clang compilers. Compilers from other vendors such as Intel,
Absoft, Sun, NAG, Compaq, Vast, Portland, Lahey, HP, IBM are only
supported in the form of community feedback, and may not work out of the
box. GCC 4.x (and later) compilers are recommended. On ARM64 (aarch64)
box. GCC 6.5 (and later) compilers are recommended. On ARM64 (aarch64)
GCC 8.x (and later) are recommended.

3) Linear Algebra libraries
Expand Down
24 changes: 12 additions & 12 deletions numpy/core/src/common/simd/avx512/arithmetic.h
Expand Up @@ -384,8 +384,8 @@ NPY_FINLINE npyv_s64 npyv_divc_s64(npyv_s64 a, const npyv_s64x3 divisor)
const npyv_u32 idx2 = _mm512_set_epi32(3, 2, 1, 0, 7, 6, 5, 4, 11, 10, 9, 8, 15, 14, 13, 12);
npyv_u32 a1 = _mm512_max_epu32(a, _mm512_permutex2var_epi32(a, idx1, a));
npyv_u32 a2 = _mm512_max_epu32(a1, _mm512_permutex2var_epi32(a1, idx2, a1));
npyv_u32 a3 = _mm512_max_epu32(a2, _mm512_shuffle_epi32(a2, (1<<6 | 0<<4 | 3<<2 | 2)));
npyv_u32 a4 = _mm512_max_epu32(a3, _mm512_shuffle_epi32(a3, (2<<6 | 3<<4 | 0<<2 | 1)));
npyv_u32 a3 = _mm512_max_epu32(a2, _mm512_shuffle_epi32(a2, (_MM_PERM_ENUM)(1<<6 | 0<<4 | 3<<2 | 2)));
npyv_u32 a4 = _mm512_max_epu32(a3, _mm512_shuffle_epi32(a3, (_MM_PERM_ENUM)(2<<6 | 3<<4 | 0<<2 | 1)));
return _mm_cvtsi128_si32(_mm512_extracti32x4_epi32(a4, 0x00));
}

Expand All @@ -395,8 +395,8 @@ NPY_FINLINE npyv_s64 npyv_divc_s64(npyv_s64 a, const npyv_s64x3 divisor)
const npyv_u32 idx2 = _mm512_set_epi32(3, 2, 1, 0, 7, 6, 5, 4, 11, 10, 9, 8, 15, 14, 13, 12);
npyv_s32 a1 = _mm512_max_epi32(a, _mm512_permutex2var_epi32(a, idx1, a));
npyv_s32 a2 = _mm512_max_epi32(a1, _mm512_permutex2var_epi32(a1, idx2, a1));
npyv_s32 a3 = _mm512_max_epi32(a2, _mm512_shuffle_epi32(a2, (1<<6 | 0<<4 | 3<<2 | 2)));
npyv_s32 a4 = _mm512_max_epi32(a3, _mm512_shuffle_epi32(a3, (2<<6 | 3<<4 | 0<<2 | 1)));
npyv_s32 a3 = _mm512_max_epi32(a2, _mm512_shuffle_epi32(a2, (_MM_PERM_ENUM)(1<<6 | 0<<4 | 3<<2 | 2)));
npyv_s32 a4 = _mm512_max_epi32(a3, _mm512_shuffle_epi32(a3, (_MM_PERM_ENUM)(2<<6 | 3<<4 | 0<<2 | 1)));
return _mm_cvtsi128_si32(_mm512_extracti32x4_epi32(a4, 0x00));
}

Expand All @@ -406,8 +406,8 @@ NPY_FINLINE npyv_s64 npyv_divc_s64(npyv_s64 a, const npyv_s64x3 divisor)
const npyv_u32 idx2 = _mm512_set_epi32(3, 2, 1, 0, 7, 6, 5, 4, 11, 10, 9, 8, 15, 14, 13, 12);
npyv_f32 a1 = _mm512_max_ps(a, _mm512_permutex2var_ps(a, idx1, a));
npyv_f32 a2 = _mm512_max_ps(a1, _mm512_permutex2var_ps(a1, idx2, a1));
npyv_f32 a3 = _mm512_max_ps(a2, _mm512_shuffle_ps(a2, a2, (1<<6 | 0<<4 | 3<<2 | 2)));
npyv_f32 a4 = _mm512_max_ps(a3, _mm512_shuffle_sp(a3, a3, (2<<6 | 3<<4 | 0<<2 | 1)));
npyv_f32 a3 = _mm512_max_ps(a2, _mm512_shuffle_ps(a2, a2, (_MM_PERM_ENUM)(1<<6 | 0<<4 | 3<<2 | 2)));
npyv_f32 a4 = _mm512_max_ps(a3, _mm512_shuffle_ps(a3, a3, (_MM_PERM_ENUM)(2<<6 | 3<<4 | 0<<2 | 1)));
return _mm_cvtss_f32(_mm512_extractf32x4_ps(a4, 0x00));
}

Expand All @@ -417,8 +417,8 @@ NPY_FINLINE npyv_s64 npyv_divc_s64(npyv_s64 a, const npyv_s64x3 divisor)
const npyv_u32 idx2 = _mm512_set_epi32(3, 2, 1, 0, 7, 6, 5, 4, 11, 10, 9, 8, 15, 14, 13, 12);
npyv_u32 a1 = _mm512_min_epu32(a, _mm512_permutex2var_epi32(a, idx1, a));
npyv_u32 a2 = _mm512_min_epu32(a1, _mm512_permutex2var_epi32(a1, idx2, a1));
npyv_u32 a3 = _mm512_min_epu32(a2, _mm512_shuffle_epi32(a2, (1<<6 | 0<<4 | 3<<2 | 2)));
npyv_u32 a4 = _mm512_min_epu32(a3, _mm512_shuffle_epi32(a3, (2<<6 | 3<<4 | 0<<2 | 1)));
npyv_u32 a3 = _mm512_min_epu32(a2, _mm512_shuffle_epi32(a2, (_MM_PERM_ENUM)(1<<6 | 0<<4 | 3<<2 | 2)));
npyv_u32 a4 = _mm512_min_epu32(a3, _mm512_shuffle_epi32(a3, (_MM_PERM_ENUM)(2<<6 | 3<<4 | 0<<2 | 1)));
return _mm_cvtsi128_si32(_mm512_extracti32x4_epi32(a4, 0x00));
}

Expand All @@ -428,8 +428,8 @@ NPY_FINLINE npyv_s64 npyv_divc_s64(npyv_s64 a, const npyv_s64x3 divisor)
const npyv_u32 idx2 = _mm512_set_epi32(3, 2, 1, 0, 7, 6, 5, 4, 11, 10, 9, 8, 15, 14, 13, 12);
npyv_s32 a1 = _mm512_min_epi32(a, _mm512_permutex2var_epi32(a, idx1, a));
npyv_s32 a2 = _mm512_min_epi32(a1, _mm512_permutex2var_epi32(a1, idx2, a1));
npyv_s32 a3 = _mm512_min_epi32(a2, _mm512_shuffle_epi32(a2, (1<<6 | 0<<4 | 3<<2 | 2)));
npyv_s32 a4 = _mm512_min_epi32(a3, _mm512_shuffle_epi32(a3, (2<<6 | 3<<4 | 0<<2 | 1)));
npyv_s32 a3 = _mm512_min_epi32(a2, _mm512_shuffle_epi32(a2, (_MM_PERM_ENUM)(1<<6 | 0<<4 | 3<<2 | 2)));
npyv_s32 a4 = _mm512_min_epi32(a3, _mm512_shuffle_epi32(a3, (_MM_PERM_ENUM)(2<<6 | 3<<4 | 0<<2 | 1)));
return _mm_cvtsi128_si32(_mm512_extracti32x4_epi32(a4, 0x00));
}

Expand All @@ -439,8 +439,8 @@ NPY_FINLINE npyv_s64 npyv_divc_s64(npyv_s64 a, const npyv_s64x3 divisor)
const npyv_u32 idx2 = _mm512_set_epi32(3, 2, 1, 0, 7, 6, 5, 4, 11, 10, 9, 8, 15, 14, 13, 12);
npyv_f32 a1 = _mm512_min_ps(a, _mm512_permutex2var_ps(a, idx1, a));
npyv_f32 a2 = _mm512_min_ps(a1, _mm512_permutex2var_ps(a1, idx2, a1));
npyv_f32 a3 = _mm512_min_ps(a2, _mm512_shuffle_ps(a2, a2, (1<<6 | 0<<4 | 3<<2 | 2)));
npyv_f32 a4 = _mm512_min_ps(a3, _mm512_shuffle_sp(a3, a3, (2<<6 | 3<<4 | 0<<2 | 1)));
npyv_f32 a3 = _mm512_min_ps(a2, _mm512_shuffle_ps(a2, a2, (_MM_PERM_ENUM)(1<<6 | 0<<4 | 3<<2 | 2)));
npyv_f32 a4 = _mm512_min_ps(a3, _mm512_shuffle_ps(a3, a3, (_MM_PERM_ENUM)(2<<6 | 3<<4 | 0<<2 | 1)));
return _mm_cvtss_f32(_mm512_extractf32x4_ps(a4, 0x00));
}

Expand Down
6 changes: 1 addition & 5 deletions numpy/core/src/npysort/x86-qsort.dispatch.cpp
Expand Up @@ -648,11 +648,7 @@ partition_vec(type_t *arr, npy_intp left, npy_intp right, const zmm_t curr_vec,
/* which elements are larger than the pivot */
__mmask16 gt_mask = vtype::ge(curr_vec, pivot_vec);
npy_int amount_gt_pivot = _mm_popcnt_u32((npy_int)gt_mask);
#if defined(_MSC_VER) && _MSC_VER < 1922
vtype::mask_compressstoreu(arr + left, ~gt_mask, curr_vec);
#else
vtype::mask_compressstoreu(arr + left, _knot_mask16(gt_mask), curr_vec);
#endif
vtype::mask_compressstoreu(arr + left, _mm512_knot(gt_mask), curr_vec);
vtype::mask_compressstoreu(arr + right - amount_gt_pivot, gt_mask,
curr_vec);
*smallest_vec = vtype::min(curr_vec, *smallest_vec);
Expand Down

0 comments on commit 0a467c8

Please sign in to comment.