Skip to content

Commit

Permalink
Merge pull request #15114 from charris/backport-15069
Browse files Browse the repository at this point in the history
ENH: add support for ILP64 OpenBLAS (without symbol suffix)
  • Loading branch information
charris committed Dec 15, 2019
2 parents ed41774 + 912a9cc commit 791bbba
Show file tree
Hide file tree
Showing 18 changed files with 287 additions and 212 deletions.
7 changes: 3 additions & 4 deletions doc/source/release/1.18.0-notes.rst
Original file line number Diff line number Diff line change
Expand Up @@ -224,10 +224,9 @@ The ``numpy.expand_dims`` ``axis`` keyword can now accept a tuple of
axes. Previously, ``axis`` was required to be an integer.
(`gh-14051 <https://github.com/numpy/numpy/pull/14051>`__)

Support for 64-bit OpenBLAS with symbol suffix
----------------------------------------------
Added support for 64-bit (ILP64) OpenBLAS compiled with
``make INTERFACE64=1 SYMBOLSUFFIX=64_``. See ``site.cfg.example``
Support for 64-bit OpenBLAS
---------------------------
Added support for 64-bit (ILP64) OpenBLAS. See ``site.cfg.example``
for details.
(`gh-15012 <https://github.com/numpy/numpy/pull/15012>`__)

Expand Down
40 changes: 27 additions & 13 deletions doc/source/user/building.rst
Original file line number Diff line number Diff line change
Expand Up @@ -195,23 +195,37 @@ or::
BLAS=None LAPACK=None ATLAS=None python setup.py build


64-bit BLAS and LAPACK with symbol suffix
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
64-bit BLAS and LAPACK
~~~~~~~~~~~~~~~~~~~~~~

You can tell Numpy to use 64-bit BLAS/LAPACK libraries by setting the
environment variable::

NPY_USE_BLAS_ILP64=1

when building Numpy. The following 64-bit BLAS/LAPACK libraries are
supported:

1. OpenBLAS ILP64 with ``64_`` symbol suffix (``openblas64_``)
2. OpenBLAS ILP64 without symbol suffix (``openblas_ilp64``)

The order in which they are preferred is determined by
``NPY_BLAS_ILP64_ORDER`` and ``NPY_LAPACK_ILP64_ORDER`` environment
variables. The default value is ``openblas64_,openblas_ilp64``.

.. note::

Numpy also supports 64-bit OpenBLAS with ``64_`` symbol suffix. Such
library is obtained by compiling OpenBLAS with settings::
Using non-symbol-suffixed 64-bit BLAS/LAPACK in a program that also
uses 32-bit BLAS/LAPACK can cause crashes under certain conditions
(e.g. with embedded Python interpreters on Linux).

make INTERFACE64=1 SYMBOLSUFFIX=64_
The 64-bit OpenBLAS with ``64_`` symbol suffix is obtained by
compiling OpenBLAS with settings::

To make Numpy use it, set ``NPY_USE_BLAS64_=1`` environment variable
when building Numpy. You may also need to configure the
``[openblas64_]`` section in ``site.cfg``.
make INTERFACE64=1 SYMBOLSUFFIX=64_

The symbol suffix avoids symbol name clashes between 32-bit and 64-bit
BLAS/LAPACK libraries, meaning that you can link to both in the same
program. This avoids potential issues when using 64-bit BLAS/LAPACK in
Numpy while simultaneously using other Python software that uses the
32-bit versions.
The symbol suffix avoids the symbol name clashes between 32-bit and
64-bit BLAS/LAPACK libraries.


Supplying additional compiler flags
Expand Down
11 changes: 4 additions & 7 deletions numpy/core/setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -91,9 +91,6 @@ def is_npy_no_smp():
# block.
return 'NPY_NOSMP' in os.environ

def is_npy_use_blas64_():
return (os.environ.get('NPY_USE_BLAS64_', "0") != "0")

def win32_checks(deflist):
from numpy.distutils.misc_util import get_build_architecture
a = get_build_architecture()
Expand Down Expand Up @@ -756,12 +753,12 @@ def get_mathlib_info(*args):
join('src', 'common', 'numpyos.c'),
]

if is_npy_use_blas64_():
blas_info = get_info('blas64__opt', 2)
have_blas = blas_info and ('HAVE_CBLAS64_', None) in blas_info.get('define_macros', [])
if os.environ.get('NPY_USE_BLAS_ILP64', "0") != "0":
blas_info = get_info('blas_ilp64_opt', 2)
else:
blas_info = get_info('blas_opt', 0)
have_blas = blas_info and ('HAVE_CBLAS', None) in blas_info.get('define_macros', [])

have_blas = blas_info and ('HAVE_CBLAS', None) in blas_info.get('define_macros', [])

if have_blas:
extra_info = blas_info
Expand Down
10 changes: 0 additions & 10 deletions numpy/core/src/common/cblasfuncs.c
Original file line number Diff line number Diff line change
Expand Up @@ -10,20 +10,10 @@
#include <assert.h>
#include <numpy/arrayobject.h>
#include "npy_cblas.h"
#include "npy_cblas64_.h"
#include "arraytypes.h"
#include "common.h"


/*
* If 64-bit CBLAS with symbol suffix '64_' is available, use it.
*/
#ifdef HAVE_CBLAS64_
#define CBLAS_FUNC(name) name ## 64_
#else
#define CBLAS_FUNC(name) name
#endif

static const double oneD[2] = {1.0, 0.0}, zeroD[2] = {0.0, 0.0};
static const float oneF[2] = {1.0, 0.0}, zeroF[2] = {0.0, 0.0};

Expand Down
30 changes: 28 additions & 2 deletions numpy/core/src/common/npy_cblas.h
Original file line number Diff line number Diff line change
Expand Up @@ -25,8 +25,34 @@ enum CBLAS_SIDE {CblasLeft=141, CblasRight=142};

#define CBLAS_INDEX size_t /* this may vary between platforms */

#define BLASINT int
#define BLASNAME(name) name
#ifdef NO_APPEND_FORTRAN
#define BLAS_FORTRAN_SUFFIX
#else
#define BLAS_FORTRAN_SUFFIX _
#endif

#ifndef BLAS_SYMBOL_PREFIX
#define BLAS_SYMBOL_PREFIX
#endif

#ifndef BLAS_SYMBOL_SUFFIX
#define BLAS_SYMBOL_SUFFIX
#endif

#define BLAS_FUNC_CONCAT(name,prefix,suffix,suffix2) prefix ## name ## suffix ## suffix2
#define BLAS_FUNC_EXPAND(name,prefix,suffix,suffix2) BLAS_FUNC_CONCAT(name,prefix,suffix,suffix2)

#define CBLAS_FUNC(name) BLAS_FUNC_EXPAND(name,BLAS_SYMBOL_PREFIX,,BLAS_SYMBOL_SUFFIX)
#define BLAS_FUNC(name) BLAS_FUNC_EXPAND(name,BLAS_SYMBOL_PREFIX,BLAS_FORTRAN_SUFFIX,BLAS_SYMBOL_SUFFIX)

#ifdef HAVE_BLAS_ILP64
#define CBLAS_INT npy_int64
#else
#define CBLAS_INT int
#endif

#define BLASNAME(name) CBLAS_FUNC(name)
#define BLASINT CBLAS_INT

#include "npy_cblas_base.h"

Expand Down
31 changes: 0 additions & 31 deletions numpy/core/src/common/npy_cblas64_.h

This file was deleted.

19 changes: 3 additions & 16 deletions numpy/core/src/common/python_xerbla.c
Original file line number Diff line number Diff line change
@@ -1,11 +1,6 @@
#include "Python.h"
#include "numpy/npy_common.h"

/*
* From f2c.h, this should be safe unless fortran is set to use 64
* bit integers. We don't seem to have any good way to detect that.
*/
typedef int integer;
#include "npy_cblas.h"

/*
From the original manpage:
Expand All @@ -24,7 +19,7 @@ typedef int integer;
info: Number of the invalid parameter.
*/

int xerbla_(char *srname, integer *info)
CBLAS_INT BLAS_FUNC(xerbla)(char *srname, CBLAS_INT *info)
{
static const char format[] = "On entry to %.*s" \
" parameter number %d had an illegal value";
Expand All @@ -42,19 +37,11 @@ int xerbla_(char *srname, integer *info)
#ifdef WITH_THREAD
save = PyGILState_Ensure();
#endif
PyOS_snprintf(buf, sizeof(buf), format, len, srname, *info);
PyOS_snprintf(buf, sizeof(buf), format, len, srname, (int)*info);
PyErr_SetString(PyExc_ValueError, buf);
#ifdef WITH_THREAD
PyGILState_Release(save);
#endif

return 0;
}


/* Same for LAPACK64_ */
npy_int64 xerbla_64_(char *srname, npy_int64 *info)
{
integer info_int = (integer)*info;
return xerbla_(srname, &info_int);
}
16 changes: 8 additions & 8 deletions numpy/core/src/multiarray/arraytypes.c.src
Original file line number Diff line number Diff line change
Expand Up @@ -3535,17 +3535,17 @@ NPY_NO_EXPORT void
npy_intp n, void *NPY_UNUSED(ignore))
{
#if defined(HAVE_CBLAS)
int is1b = blas_stride(is1, sizeof(@type@));
int is2b = blas_stride(is2, sizeof(@type@));
CBLAS_INT is1b = blas_stride(is1, sizeof(@type@));
CBLAS_INT is2b = blas_stride(is2, sizeof(@type@));

if (is1b && is2b)
{
double sum = 0.; /* double for stability */

while (n > 0) {
int chunk = n < NPY_CBLAS_CHUNK ? n : NPY_CBLAS_CHUNK;
CBLAS_INT chunk = n < NPY_CBLAS_CHUNK ? n : NPY_CBLAS_CHUNK;

sum += cblas_@prefix@dot(chunk,
sum += CBLAS_FUNC(cblas_@prefix@dot)(chunk,
(@type@ *) ip1, is1b,
(@type@ *) ip2, is2b);
/* use char strides here */
Expand Down Expand Up @@ -3584,17 +3584,17 @@ NPY_NO_EXPORT void
char *op, npy_intp n, void *NPY_UNUSED(ignore))
{
#if defined(HAVE_CBLAS)
int is1b = blas_stride(is1, sizeof(@ctype@));
int is2b = blas_stride(is2, sizeof(@ctype@));
CBLAS_INT is1b = blas_stride(is1, sizeof(@ctype@));
CBLAS_INT is2b = blas_stride(is2, sizeof(@ctype@));

if (is1b && is2b) {
double sum[2] = {0., 0.}; /* double for stability */

while (n > 0) {
int chunk = n < NPY_CBLAS_CHUNK ? n : NPY_CBLAS_CHUNK;
CBLAS_INT chunk = n < NPY_CBLAS_CHUNK ? n : NPY_CBLAS_CHUNK;
@type@ tmp[2];

cblas_@prefix@dotu_sub((int)n, ip1, is1b, ip2, is2b, tmp);
CBLAS_FUNC(cblas_@prefix@dotu_sub)((CBLAS_INT)n, ip1, is1b, ip2, is2b, tmp);
sum[0] += (double)tmp[0];
sum[1] += (double)tmp[1];
/* use char strides here */
Expand Down
10 changes: 9 additions & 1 deletion numpy/core/src/multiarray/common.h
Original file line number Diff line number Diff line change
Expand Up @@ -303,7 +303,11 @@ blas_stride(npy_intp stride, unsigned itemsize)
*/
if (stride > 0 && npy_is_aligned((void *)stride, itemsize)) {
stride /= itemsize;
#ifndef HAVE_BLAS_ILP64
if (stride <= INT_MAX) {
#else
if (stride <= NPY_MAX_INT64) {
#endif
return stride;
}
}
Expand All @@ -314,7 +318,11 @@ blas_stride(npy_intp stride, unsigned itemsize)
* Define a chunksize for CBLAS. CBLAS counts in integers.
*/
#if NPY_MAX_INTP > INT_MAX
# define NPY_CBLAS_CHUNK (INT_MAX / 2 + 1)
# ifndef HAVE_BLAS_ILP64
# define NPY_CBLAS_CHUNK (INT_MAX / 2 + 1)
# else
# define NPY_CBLAS_CHUNK (NPY_MAX_INT64 / 2 + 1)
# endif
#else
# define NPY_CBLAS_CHUNK NPY_MAX_INTP
#endif
Expand Down
16 changes: 8 additions & 8 deletions numpy/core/src/multiarray/vdot.c
Original file line number Diff line number Diff line change
Expand Up @@ -15,17 +15,17 @@ CFLOAT_vdot(char *ip1, npy_intp is1, char *ip2, npy_intp is2,
char *op, npy_intp n, void *NPY_UNUSED(ignore))
{
#if defined(HAVE_CBLAS)
int is1b = blas_stride(is1, sizeof(npy_cfloat));
int is2b = blas_stride(is2, sizeof(npy_cfloat));
CBLAS_INT is1b = blas_stride(is1, sizeof(npy_cfloat));
CBLAS_INT is2b = blas_stride(is2, sizeof(npy_cfloat));

if (is1b && is2b) {
double sum[2] = {0., 0.}; /* double for stability */

while (n > 0) {
int chunk = n < NPY_CBLAS_CHUNK ? n : NPY_CBLAS_CHUNK;
CBLAS_INT chunk = n < NPY_CBLAS_CHUNK ? n : NPY_CBLAS_CHUNK;
float tmp[2];

cblas_cdotc_sub((int)n, ip1, is1b, ip2, is2b, tmp);
CBLAS_FUNC(cblas_cdotc_sub)((CBLAS_INT)n, ip1, is1b, ip2, is2b, tmp);
sum[0] += (double)tmp[0];
sum[1] += (double)tmp[1];
/* use char strides here */
Expand Down Expand Up @@ -66,17 +66,17 @@ CDOUBLE_vdot(char *ip1, npy_intp is1, char *ip2, npy_intp is2,
char *op, npy_intp n, void *NPY_UNUSED(ignore))
{
#if defined(HAVE_CBLAS)
int is1b = blas_stride(is1, sizeof(npy_cdouble));
int is2b = blas_stride(is2, sizeof(npy_cdouble));
CBLAS_INT is1b = blas_stride(is1, sizeof(npy_cdouble));
CBLAS_INT is2b = blas_stride(is2, sizeof(npy_cdouble));

if (is1b && is2b) {
double sum[2] = {0., 0.}; /* double for stability */

while (n > 0) {
int chunk = n < NPY_CBLAS_CHUNK ? n : NPY_CBLAS_CHUNK;
CBLAS_INT chunk = n < NPY_CBLAS_CHUNK ? n : NPY_CBLAS_CHUNK;
double tmp[2];

cblas_zdotc_sub((int)n, ip1, is1b, ip2, is2b, tmp);
CBLAS_FUNC(cblas_zdotc_sub)((CBLAS_INT)n, ip1, is1b, ip2, is2b, tmp);
sum[0] += (double)tmp[0];
sum[1] += (double)tmp[1];
/* use char strides here */
Expand Down

0 comments on commit 791bbba

Please sign in to comment.