Merge pull request #15114 from charris/backport-15069

ENH: add support for ILP64 OpenBLAS (without symbol suffix)
numpy · Dec 15, 2019 · 791bbba · 791bbba
2 parents ed41774 + 912a9cc
commit 791bbba
Show file tree

Hide file tree

Showing 18 changed files with 287 additions and 212 deletions.
diff --git a/doc/source/release/1.18.0-notes.rst b/doc/source/release/1.18.0-notes.rst
@@ -224,10 +224,9 @@ The ``numpy.expand_dims`` ``axis`` keyword can now accept a tuple of
 axes.  Previously, ``axis`` was required to be an integer.
 (`gh-14051 <https://github.com/numpy/numpy/pull/14051>`__)
 
-Support for 64-bit OpenBLAS with symbol suffix
-----------------------------------------------
-Added support for 64-bit (ILP64) OpenBLAS compiled with
-``make INTERFACE64=1 SYMBOLSUFFIX=64_``. See ``site.cfg.example``
+Support for 64-bit OpenBLAS
+---------------------------
+Added support for 64-bit (ILP64) OpenBLAS. See ``site.cfg.example``
 for details.
 (`gh-15012 <https://github.com/numpy/numpy/pull/15012>`__)
 

diff --git a/doc/source/user/building.rst b/doc/source/user/building.rst
@@ -195,23 +195,37 @@ or::
     BLAS=None LAPACK=None ATLAS=None python setup.py build
 
 
-64-bit BLAS and LAPACK with symbol suffix
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+64-bit BLAS and LAPACK
+~~~~~~~~~~~~~~~~~~~~~~
+
+You can tell Numpy to use 64-bit BLAS/LAPACK libraries by setting the
+environment variable::
+
+    NPY_USE_BLAS_ILP64=1
+
+when building Numpy. The following 64-bit BLAS/LAPACK libraries are
+supported:
+
+1. OpenBLAS ILP64 with ``64_`` symbol suffix (``openblas64_``)
+2. OpenBLAS ILP64 without symbol suffix (``openblas_ilp64``)
+
+The order in which they are preferred is determined by
+``NPY_BLAS_ILP64_ORDER`` and ``NPY_LAPACK_ILP64_ORDER`` environment
+variables. The default value is ``openblas64_,openblas_ilp64``.
+
+.. note::
 
-Numpy also supports 64-bit OpenBLAS with ``64_`` symbol suffix. Such
-library is obtained by compiling OpenBLAS with settings::
+   Using non-symbol-suffixed 64-bit BLAS/LAPACK in a program that also
+   uses 32-bit BLAS/LAPACK can cause crashes under certain conditions
+   (e.g. with embedded Python interpreters on Linux).
 
-    make INTERFACE64=1 SYMBOLSUFFIX=64_
+   The 64-bit OpenBLAS with ``64_`` symbol suffix is obtained by
+   compiling OpenBLAS with settings::
 
-To make Numpy use it, set ``NPY_USE_BLAS64_=1`` environment variable
-when building Numpy. You may also need to configure the
-``[openblas64_]`` section in ``site.cfg``.
+       make INTERFACE64=1 SYMBOLSUFFIX=64_
 
-The symbol suffix avoids symbol name clashes between 32-bit and 64-bit
-BLAS/LAPACK libraries, meaning that you can link to both in the same
-program. This avoids potential issues when using 64-bit BLAS/LAPACK in
-Numpy while simultaneously using other Python software that uses the
-32-bit versions.
+   The symbol suffix avoids the symbol name clashes between 32-bit and
+   64-bit BLAS/LAPACK libraries.
 
 
 Supplying additional compiler flags

diff --git a/numpy/core/setup.py b/numpy/core/setup.py
@@ -91,9 +91,6 @@ def is_npy_no_smp():
     #  block.
     return 'NPY_NOSMP' in os.environ
 
-def is_npy_use_blas64_():
-    return (os.environ.get('NPY_USE_BLAS64_', "0") != "0")
-
 def win32_checks(deflist):
     from numpy.distutils.misc_util import get_build_architecture
     a = get_build_architecture()
@@ -756,12 +753,12 @@ def get_mathlib_info(*args):
             join('src', 'common', 'numpyos.c'),
             ]
 
-    if is_npy_use_blas64_():
-        blas_info = get_info('blas64__opt', 2)
-        have_blas = blas_info and ('HAVE_CBLAS64_', None) in blas_info.get('define_macros', [])
+    if os.environ.get('NPY_USE_BLAS_ILP64', "0") != "0":
+        blas_info = get_info('blas_ilp64_opt', 2)
     else:
         blas_info = get_info('blas_opt', 0)
-        have_blas = blas_info and ('HAVE_CBLAS', None) in blas_info.get('define_macros', [])
+
+    have_blas = blas_info and ('HAVE_CBLAS', None) in blas_info.get('define_macros', [])
 
     if have_blas:
         extra_info = blas_info

diff --git a/numpy/core/src/common/cblasfuncs.c b/numpy/core/src/common/cblasfuncs.c
@@ -10,20 +10,10 @@
 #include <assert.h>
 #include <numpy/arrayobject.h>
 #include "npy_cblas.h"
-#include "npy_cblas64_.h"
 #include "arraytypes.h"
 #include "common.h"
 
 
-/*
- * If 64-bit CBLAS with symbol suffix '64_' is available, use it.
- */
-#ifdef HAVE_CBLAS64_
-#define CBLAS_FUNC(name) name ## 64_
-#else
-#define CBLAS_FUNC(name) name
-#endif
-
 static const double oneD[2] = {1.0, 0.0}, zeroD[2] = {0.0, 0.0};
 static const float oneF[2] = {1.0, 0.0}, zeroF[2] = {0.0, 0.0};
 

diff --git a/numpy/core/src/common/npy_cblas.h b/numpy/core/src/common/npy_cblas.h
@@ -25,8 +25,34 @@ enum CBLAS_SIDE {CblasLeft=141, CblasRight=142};
 
 #define CBLAS_INDEX size_t  /* this may vary between platforms */
 
-#define BLASINT int
-#define BLASNAME(name) name
+#ifdef NO_APPEND_FORTRAN
+#define BLAS_FORTRAN_SUFFIX
+#else
+#define BLAS_FORTRAN_SUFFIX _
+#endif
+
+#ifndef BLAS_SYMBOL_PREFIX
+#define BLAS_SYMBOL_PREFIX
+#endif
+
+#ifndef BLAS_SYMBOL_SUFFIX
+#define BLAS_SYMBOL_SUFFIX
+#endif
+
+#define BLAS_FUNC_CONCAT(name,prefix,suffix,suffix2) prefix ## name ## suffix ## suffix2
+#define BLAS_FUNC_EXPAND(name,prefix,suffix,suffix2) BLAS_FUNC_CONCAT(name,prefix,suffix,suffix2)
+
+#define CBLAS_FUNC(name) BLAS_FUNC_EXPAND(name,BLAS_SYMBOL_PREFIX,,BLAS_SYMBOL_SUFFIX)
+#define BLAS_FUNC(name) BLAS_FUNC_EXPAND(name,BLAS_SYMBOL_PREFIX,BLAS_FORTRAN_SUFFIX,BLAS_SYMBOL_SUFFIX)
+
+#ifdef HAVE_BLAS_ILP64
+#define CBLAS_INT npy_int64
+#else
+#define CBLAS_INT int
+#endif
+
+#define BLASNAME(name) CBLAS_FUNC(name)
+#define BLASINT CBLAS_INT
 
 #include "npy_cblas_base.h"
 

diff --git a/numpy/core/src/common/npy_cblas64_.h b/numpy/core/src/common/npy_cblas64_.h
diff --git a/numpy/core/src/common/python_xerbla.c b/numpy/core/src/common/python_xerbla.c
@@ -1,11 +1,6 @@
 #include "Python.h"
 #include "numpy/npy_common.h"
-
-/*
- * From f2c.h, this should be safe unless fortran is set to use 64
- * bit integers. We don't seem to have any good way to detect that.
- */
-typedef int integer;
+#include "npy_cblas.h"
 
 /*
   From the original manpage:
@@ -24,7 +19,7 @@ typedef int integer;
   info: Number of the invalid parameter.
 */
 
-int xerbla_(char *srname, integer *info)
+CBLAS_INT BLAS_FUNC(xerbla)(char *srname, CBLAS_INT *info)
 {
         static const char format[] = "On entry to %.*s" \
                 " parameter number %d had an illegal value";
@@ -42,19 +37,11 @@ int xerbla_(char *srname, integer *info)
 #ifdef WITH_THREAD
         save = PyGILState_Ensure();
 #endif
-        PyOS_snprintf(buf, sizeof(buf), format, len, srname, *info);
+        PyOS_snprintf(buf, sizeof(buf), format, len, srname, (int)*info);
         PyErr_SetString(PyExc_ValueError, buf);
 #ifdef WITH_THREAD
         PyGILState_Release(save);
 #endif
 
         return 0;
 }
-
-
-/* Same for LAPACK64_ */
-npy_int64 xerbla_64_(char *srname, npy_int64 *info)
-{
-        integer info_int = (integer)*info;
-        return xerbla_(srname, &info_int);
-}
diff --git a/numpy/core/src/multiarray/arraytypes.c.src b/numpy/core/src/multiarray/arraytypes.c.src
@@ -3535,17 +3535,17 @@ NPY_NO_EXPORT void
            npy_intp n, void *NPY_UNUSED(ignore))
 {
 #if defined(HAVE_CBLAS)
-    int is1b = blas_stride(is1, sizeof(@type@));
-    int is2b = blas_stride(is2, sizeof(@type@));
+    CBLAS_INT is1b = blas_stride(is1, sizeof(@type@));
+    CBLAS_INT is2b = blas_stride(is2, sizeof(@type@));
 
     if (is1b && is2b)
     {
         double sum = 0.;  /* double for stability */
 
         while (n > 0) {
-            int chunk = n < NPY_CBLAS_CHUNK ? n : NPY_CBLAS_CHUNK;
+            CBLAS_INT chunk = n < NPY_CBLAS_CHUNK ? n : NPY_CBLAS_CHUNK;
 
-            sum += cblas_@prefix@dot(chunk,
+            sum += CBLAS_FUNC(cblas_@prefix@dot)(chunk,
                                      (@type@ *) ip1, is1b,
                                      (@type@ *) ip2, is2b);
             /* use char strides here */
@@ -3584,17 +3584,17 @@ NPY_NO_EXPORT void
            char *op, npy_intp n, void *NPY_UNUSED(ignore))
 {
 #if defined(HAVE_CBLAS)
-    int is1b = blas_stride(is1, sizeof(@ctype@));
-    int is2b = blas_stride(is2, sizeof(@ctype@));
+    CBLAS_INT is1b = blas_stride(is1, sizeof(@ctype@));
+    CBLAS_INT is2b = blas_stride(is2, sizeof(@ctype@));
 
     if (is1b && is2b) {
         double sum[2] = {0., 0.};  /* double for stability */
 
         while (n > 0) {
-            int chunk = n < NPY_CBLAS_CHUNK ? n : NPY_CBLAS_CHUNK;
+            CBLAS_INT chunk = n < NPY_CBLAS_CHUNK ? n : NPY_CBLAS_CHUNK;
             @type@ tmp[2];
 
-            cblas_@prefix@dotu_sub((int)n, ip1, is1b, ip2, is2b, tmp);
+            CBLAS_FUNC(cblas_@prefix@dotu_sub)((CBLAS_INT)n, ip1, is1b, ip2, is2b, tmp);
             sum[0] += (double)tmp[0];
             sum[1] += (double)tmp[1];
             /* use char strides here */

diff --git a/numpy/core/src/multiarray/common.h b/numpy/core/src/multiarray/common.h
@@ -303,7 +303,11 @@ blas_stride(npy_intp stride, unsigned itemsize)
      */
     if (stride > 0 && npy_is_aligned((void *)stride, itemsize)) {
         stride /= itemsize;
+#ifndef HAVE_BLAS_ILP64
         if (stride <= INT_MAX) {
+#else
+        if (stride <= NPY_MAX_INT64) {
+#endif
             return stride;
         }
     }
@@ -314,7 +318,11 @@ blas_stride(npy_intp stride, unsigned itemsize)
  * Define a chunksize for CBLAS. CBLAS counts in integers.
  */
 #if NPY_MAX_INTP > INT_MAX
-# define NPY_CBLAS_CHUNK  (INT_MAX / 2 + 1)
+# ifndef HAVE_BLAS_ILP64
+#  define NPY_CBLAS_CHUNK  (INT_MAX / 2 + 1)
+# else
+#  define NPY_CBLAS_CHUNK  (NPY_MAX_INT64 / 2 + 1)
+# endif
 #else
 # define NPY_CBLAS_CHUNK  NPY_MAX_INTP
 #endif

diff --git a/numpy/core/src/multiarray/vdot.c b/numpy/core/src/multiarray/vdot.c
@@ -15,17 +15,17 @@ CFLOAT_vdot(char *ip1, npy_intp is1, char *ip2, npy_intp is2,
             char *op, npy_intp n, void *NPY_UNUSED(ignore))
 {
 #if defined(HAVE_CBLAS)
-    int is1b = blas_stride(is1, sizeof(npy_cfloat));
-    int is2b = blas_stride(is2, sizeof(npy_cfloat));
+    CBLAS_INT is1b = blas_stride(is1, sizeof(npy_cfloat));
+    CBLAS_INT is2b = blas_stride(is2, sizeof(npy_cfloat));
 
     if (is1b && is2b) {
         double sum[2] = {0., 0.};  /* double for stability */
 
         while (n > 0) {
-            int chunk = n < NPY_CBLAS_CHUNK ? n : NPY_CBLAS_CHUNK;
+            CBLAS_INT chunk = n < NPY_CBLAS_CHUNK ? n : NPY_CBLAS_CHUNK;
             float tmp[2];
 
-            cblas_cdotc_sub((int)n, ip1, is1b, ip2, is2b, tmp);
+            CBLAS_FUNC(cblas_cdotc_sub)((CBLAS_INT)n, ip1, is1b, ip2, is2b, tmp);
             sum[0] += (double)tmp[0];
             sum[1] += (double)tmp[1];
             /* use char strides here */
@@ -66,17 +66,17 @@ CDOUBLE_vdot(char *ip1, npy_intp is1, char *ip2, npy_intp is2,
              char *op, npy_intp n, void *NPY_UNUSED(ignore))
 {
 #if defined(HAVE_CBLAS)
-    int is1b = blas_stride(is1, sizeof(npy_cdouble));
-    int is2b = blas_stride(is2, sizeof(npy_cdouble));
+    CBLAS_INT is1b = blas_stride(is1, sizeof(npy_cdouble));
+    CBLAS_INT is2b = blas_stride(is2, sizeof(npy_cdouble));
 
     if (is1b && is2b) {
         double sum[2] = {0., 0.};  /* double for stability */
 
         while (n > 0) {
-            int chunk = n < NPY_CBLAS_CHUNK ? n : NPY_CBLAS_CHUNK;
+            CBLAS_INT chunk = n < NPY_CBLAS_CHUNK ? n : NPY_CBLAS_CHUNK;
             double tmp[2];
 
-            cblas_zdotc_sub((int)n, ip1, is1b, ip2, is2b, tmp);
+            CBLAS_FUNC(cblas_zdotc_sub)((CBLAS_INT)n, ip1, is1b, ip2, is2b, tmp);
             sum[0] += (double)tmp[0];
             sum[1] += (double)tmp[1];
             /* use char strides here */