Fix overloads with target="generic" for CUDA

As identified in numba#8271, the CUDA target needs to be set as the target at the bottom of the call stack, otherwise overloads for the generic target cannot be resolved. This is required so that the fix applied in numba#8562 (using the generic target for `ol_compatible_view` from numba#8537) actually works.
gmarkall · Nov 2, 2022 · 4c4e319 · 4c4e319
1 parent 2c108f9
commit 4c4e319
Showing 1 changed file with 10 additions and 8 deletions.
diff --git a/numba/cuda/compiler.py b/numba/cuda/compiler.py
@@ -207,14 +207,16 @@ def compile_cuda(pyfunc, return_type, args, debug=False, lineinfo=False,
         flags.nvvm_options = nvvm_options
 
     # Run compilation pipeline
-    cres = compiler.compile_extra(typingctx=typingctx,
-                                  targetctx=targetctx,
-                                  func=pyfunc,
-                                  args=args,
-                                  return_type=return_type,
-                                  flags=flags,
-                                  locals={},
-                                  pipeline_class=CUDACompiler)
+    from numba.core.target_extension import target_override
+    with target_override('cuda'):
+        cres = compiler.compile_extra(typingctx=typingctx,
+                                      targetctx=targetctx,
+                                      func=pyfunc,
+                                      args=args,
+                                      return_type=return_type,
+                                      flags=flags,
+                                      locals={},
+                                      pipeline_class=CUDACompiler)
 
     library = cres.library
     library.finalize()