Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

CI: Test NumPy build against old versions of GCC(6, 7, 8) #22155

Merged
merged 3 commits into from Aug 19, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
59 changes: 59 additions & 0 deletions .github/workflows/build_test.yml
Expand Up @@ -71,6 +71,65 @@ jobs:
python-version: ${{ matrix.python-version }}
- uses: ./.github/actions

old_gcc:
needs: [smoke_test]
# provides GCC 6, 7, 8
runs-on: ubuntu-18.04
steps:
- uses: actions/checkout@v3
with:
submodules: recursive
fetch-depth: 0
# comes with python3.6
- name: Install Python3.8
run: |
sudo apt update
# for add-apt-repository
sudo apt install software-properties-common -y
sudo add-apt-repository ppa:deadsnakes/ppa -y
sudo apt install python3.8-dev -y
sudo ln -s /usr/bin/python3.8 /usr/bin/pythonx
pythonx -m pip install --upgrade pip setuptools wheel
pythonx -m pip install -r test_requirements.txt
- name: Install Compilers
run: sudo apt install g++-6 g++-7 g++-8 -y
- name: Build gcc-6
run: |
export CC=/usr/bin/gcc-6
export CXX=/usr/bin/g++-6
pythonx setup.py install --user
- name: Runtests gcc-6
run: pythonx runtests.py -n
- name: Build gcc-7
run: |
export CC=/usr/bin/gcc-7
export CXX=/usr/bin/g++-7
rm -rf build && pythonx setup.py install --user
- name: Runtests gcc-7
run: pythonx runtests.py -n
- name: Build gcc-8
run: |
export CC=/usr/bin/gcc-8
export CXX=/usr/bin/g++-8
rm -rf build && pythonx setup.py install --user
- name: Runtests gcc-8
run: pythonx runtests.py -n

without_optimizations:
needs: [smoke_test]
runs-on: ubuntu-latest
env:
WITHOUT_OPTIMIZATIONS: 1
steps:
- uses: actions/checkout@v3
with:
submodules: recursive
fetch-depth: 0
- uses: actions/setup-python@v3
with:
python-version: ${{ env.PYTHON_VERSION }}
- uses: ./.github/actions

debug:
needs: [smoke_test]
runs-on: ubuntu-20.04
Expand Down
3 changes: 2 additions & 1 deletion INSTALL.rst.txt
Expand Up @@ -75,7 +75,8 @@ skipped when running the test suite if no Fortran compiler is available. For
building Scipy a Fortran compiler is needed though, so we include some details
on Fortran compilers in the rest of this section.

On OS X and Linux, all common compilers will work.
On OS X and Linux, all common compilers will work. The minimum supported GCC
version is 6.5.

For Fortran, ``gfortran`` works, ``g77`` does not. In case ``g77`` is
installed then ``g77`` will be detected and used first. To explicitly select
Expand Down
2 changes: 1 addition & 1 deletion doc/source/user/building.rst
Expand Up @@ -59,7 +59,7 @@ Building NumPy requires the following software installed:
MSVC and Clang compilers. Compilers from other vendors such as Intel,
Absoft, Sun, NAG, Compaq, Vast, Portland, Lahey, HP, IBM are only
supported in the form of community feedback, and may not work out of the
box. GCC 4.x (and later) compilers are recommended. On ARM64 (aarch64)
box. GCC 6.5 (and later) compilers are recommended. On ARM64 (aarch64)
GCC 8.x (and later) are recommended.

3) Linear Algebra libraries
Expand Down
24 changes: 12 additions & 12 deletions numpy/core/src/common/simd/avx512/arithmetic.h
Expand Up @@ -384,8 +384,8 @@ NPY_FINLINE npyv_s64 npyv_divc_s64(npyv_s64 a, const npyv_s64x3 divisor)
const npyv_u32 idx2 = _mm512_set_epi32(3, 2, 1, 0, 7, 6, 5, 4, 11, 10, 9, 8, 15, 14, 13, 12);
npyv_u32 a1 = _mm512_max_epu32(a, _mm512_permutex2var_epi32(a, idx1, a));
npyv_u32 a2 = _mm512_max_epu32(a1, _mm512_permutex2var_epi32(a1, idx2, a1));
npyv_u32 a3 = _mm512_max_epu32(a2, _mm512_shuffle_epi32(a2, (1<<6 | 0<<4 | 3<<2 | 2)));
npyv_u32 a4 = _mm512_max_epu32(a3, _mm512_shuffle_epi32(a3, (2<<6 | 3<<4 | 0<<2 | 1)));
npyv_u32 a3 = _mm512_max_epu32(a2, _mm512_shuffle_epi32(a2, (_MM_PERM_ENUM)(1<<6 | 0<<4 | 3<<2 | 2)));
npyv_u32 a4 = _mm512_max_epu32(a3, _mm512_shuffle_epi32(a3, (_MM_PERM_ENUM)(2<<6 | 3<<4 | 0<<2 | 1)));
return _mm_cvtsi128_si32(_mm512_extracti32x4_epi32(a4, 0x00));
}

Expand All @@ -395,8 +395,8 @@ NPY_FINLINE npyv_s64 npyv_divc_s64(npyv_s64 a, const npyv_s64x3 divisor)
const npyv_u32 idx2 = _mm512_set_epi32(3, 2, 1, 0, 7, 6, 5, 4, 11, 10, 9, 8, 15, 14, 13, 12);
npyv_s32 a1 = _mm512_max_epi32(a, _mm512_permutex2var_epi32(a, idx1, a));
npyv_s32 a2 = _mm512_max_epi32(a1, _mm512_permutex2var_epi32(a1, idx2, a1));
npyv_s32 a3 = _mm512_max_epi32(a2, _mm512_shuffle_epi32(a2, (1<<6 | 0<<4 | 3<<2 | 2)));
npyv_s32 a4 = _mm512_max_epi32(a3, _mm512_shuffle_epi32(a3, (2<<6 | 3<<4 | 0<<2 | 1)));
npyv_s32 a3 = _mm512_max_epi32(a2, _mm512_shuffle_epi32(a2, (_MM_PERM_ENUM)(1<<6 | 0<<4 | 3<<2 | 2)));
npyv_s32 a4 = _mm512_max_epi32(a3, _mm512_shuffle_epi32(a3, (_MM_PERM_ENUM)(2<<6 | 3<<4 | 0<<2 | 1)));
return _mm_cvtsi128_si32(_mm512_extracti32x4_epi32(a4, 0x00));
}

Expand All @@ -406,8 +406,8 @@ NPY_FINLINE npyv_s64 npyv_divc_s64(npyv_s64 a, const npyv_s64x3 divisor)
const npyv_u32 idx2 = _mm512_set_epi32(3, 2, 1, 0, 7, 6, 5, 4, 11, 10, 9, 8, 15, 14, 13, 12);
npyv_f32 a1 = _mm512_max_ps(a, _mm512_permutex2var_ps(a, idx1, a));
npyv_f32 a2 = _mm512_max_ps(a1, _mm512_permutex2var_ps(a1, idx2, a1));
npyv_f32 a3 = _mm512_max_ps(a2, _mm512_shuffle_ps(a2, a2, (1<<6 | 0<<4 | 3<<2 | 2)));
npyv_f32 a4 = _mm512_max_ps(a3, _mm512_shuffle_sp(a3, a3, (2<<6 | 3<<4 | 0<<2 | 1)));
npyv_f32 a3 = _mm512_max_ps(a2, _mm512_shuffle_ps(a2, a2, (_MM_PERM_ENUM)(1<<6 | 0<<4 | 3<<2 | 2)));
npyv_f32 a4 = _mm512_max_ps(a3, _mm512_shuffle_ps(a3, a3, (_MM_PERM_ENUM)(2<<6 | 3<<4 | 0<<2 | 1)));
return _mm_cvtss_f32(_mm512_extractf32x4_ps(a4, 0x00));
}

Expand All @@ -417,8 +417,8 @@ NPY_FINLINE npyv_s64 npyv_divc_s64(npyv_s64 a, const npyv_s64x3 divisor)
const npyv_u32 idx2 = _mm512_set_epi32(3, 2, 1, 0, 7, 6, 5, 4, 11, 10, 9, 8, 15, 14, 13, 12);
npyv_u32 a1 = _mm512_min_epu32(a, _mm512_permutex2var_epi32(a, idx1, a));
npyv_u32 a2 = _mm512_min_epu32(a1, _mm512_permutex2var_epi32(a1, idx2, a1));
npyv_u32 a3 = _mm512_min_epu32(a2, _mm512_shuffle_epi32(a2, (1<<6 | 0<<4 | 3<<2 | 2)));
npyv_u32 a4 = _mm512_min_epu32(a3, _mm512_shuffle_epi32(a3, (2<<6 | 3<<4 | 0<<2 | 1)));
npyv_u32 a3 = _mm512_min_epu32(a2, _mm512_shuffle_epi32(a2, (_MM_PERM_ENUM)(1<<6 | 0<<4 | 3<<2 | 2)));
npyv_u32 a4 = _mm512_min_epu32(a3, _mm512_shuffle_epi32(a3, (_MM_PERM_ENUM)(2<<6 | 3<<4 | 0<<2 | 1)));
return _mm_cvtsi128_si32(_mm512_extracti32x4_epi32(a4, 0x00));
}

Expand All @@ -428,8 +428,8 @@ NPY_FINLINE npyv_s64 npyv_divc_s64(npyv_s64 a, const npyv_s64x3 divisor)
const npyv_u32 idx2 = _mm512_set_epi32(3, 2, 1, 0, 7, 6, 5, 4, 11, 10, 9, 8, 15, 14, 13, 12);
npyv_s32 a1 = _mm512_min_epi32(a, _mm512_permutex2var_epi32(a, idx1, a));
npyv_s32 a2 = _mm512_min_epi32(a1, _mm512_permutex2var_epi32(a1, idx2, a1));
npyv_s32 a3 = _mm512_min_epi32(a2, _mm512_shuffle_epi32(a2, (1<<6 | 0<<4 | 3<<2 | 2)));
npyv_s32 a4 = _mm512_min_epi32(a3, _mm512_shuffle_epi32(a3, (2<<6 | 3<<4 | 0<<2 | 1)));
npyv_s32 a3 = _mm512_min_epi32(a2, _mm512_shuffle_epi32(a2, (_MM_PERM_ENUM)(1<<6 | 0<<4 | 3<<2 | 2)));
npyv_s32 a4 = _mm512_min_epi32(a3, _mm512_shuffle_epi32(a3, (_MM_PERM_ENUM)(2<<6 | 3<<4 | 0<<2 | 1)));
return _mm_cvtsi128_si32(_mm512_extracti32x4_epi32(a4, 0x00));
}

Expand All @@ -439,8 +439,8 @@ NPY_FINLINE npyv_s64 npyv_divc_s64(npyv_s64 a, const npyv_s64x3 divisor)
const npyv_u32 idx2 = _mm512_set_epi32(3, 2, 1, 0, 7, 6, 5, 4, 11, 10, 9, 8, 15, 14, 13, 12);
npyv_f32 a1 = _mm512_min_ps(a, _mm512_permutex2var_ps(a, idx1, a));
npyv_f32 a2 = _mm512_min_ps(a1, _mm512_permutex2var_ps(a1, idx2, a1));
npyv_f32 a3 = _mm512_min_ps(a2, _mm512_shuffle_ps(a2, a2, (1<<6 | 0<<4 | 3<<2 | 2)));
npyv_f32 a4 = _mm512_min_ps(a3, _mm512_shuffle_sp(a3, a3, (2<<6 | 3<<4 | 0<<2 | 1)));
npyv_f32 a3 = _mm512_min_ps(a2, _mm512_shuffle_ps(a2, a2, (_MM_PERM_ENUM)(1<<6 | 0<<4 | 3<<2 | 2)));
npyv_f32 a4 = _mm512_min_ps(a3, _mm512_shuffle_ps(a3, a3, (_MM_PERM_ENUM)(2<<6 | 3<<4 | 0<<2 | 1)));
return _mm_cvtss_f32(_mm512_extractf32x4_ps(a4, 0x00));
}

Expand Down
6 changes: 1 addition & 5 deletions numpy/core/src/npysort/x86-qsort.dispatch.cpp
Expand Up @@ -648,11 +648,7 @@ partition_vec(type_t *arr, npy_intp left, npy_intp right, const zmm_t curr_vec,
/* which elements are larger than the pivot */
__mmask16 gt_mask = vtype::ge(curr_vec, pivot_vec);
npy_int amount_gt_pivot = _mm_popcnt_u32((npy_int)gt_mask);
#if defined(_MSC_VER) && _MSC_VER < 1922
vtype::mask_compressstoreu(arr + left, ~gt_mask, curr_vec);
#else
vtype::mask_compressstoreu(arr + left, _knot_mask16(gt_mask), curr_vec);
#endif
vtype::mask_compressstoreu(arr + left, _mm512_knot(gt_mask), curr_vec);
vtype::mask_compressstoreu(arr + right - amount_gt_pivot, gt_mask,
curr_vec);
*smallest_vec = vtype::min(curr_vec, *smallest_vec);
Expand Down