You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
We are seeing (tiny) differences in performance between x in (1,2) and x == 1 or x == 2, favoring the latter. A bit of asm inspection shows that a subfunction is being created and not inlined in the first case on the CUDA target. Is it possible to automatically inline this or add support for this case to literal_unroll? It makes the code a bit prettier and more pythonic if you can use the in statement.
import numba as nb
from numba import cuda
ONE, TWO, THREE = 1, 2, 3
@cuda.jit(nb.void(nb.float64[:]))
def fn_a(arg):
if arg[0] in (ONE, TWO):
arg[0] += 1
@cuda.jit(nb.void(nb.float64[:]))
def fn_b(arg):
if arg[0] == ONE or arg[0] == TWO:
arg[0] += 1
print("*" * 25)
print(next(iter(fn_a.overloads.values())).inspect_llvm())
print("#" * 25)
print(next(iter(fn_a.overloads.values())).inspect_asm(cc=None))
print("*" * 25)
print(next(iter(fn_b.overloads.values())).inspect_llvm())
print("#" * 25)
print(next(iter(fn_b.overloads.values())).inspect_asm(cc=None))
print("*" * 25)
Yields
; ModuleID = "cuda.kernel.wrapper"
target triple = "nvptx64-nvidia-cuda"
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-i128:128:128-f32:32:32-f64:64:64-v16:16:16-v32:32:32-v64:64:64-v128:128:128-n16:32:64"
declare i32 @"_ZN8__main__4fn_aB2v1B94cw51cXTLSUwv1sCUt9Uw11Ew1dRRQPKzLTg4gaGKFsG2oMQGEYakJSQB1PQBk0Bynm21OiwU1a0UoLGhDpQE8oxrNQE_3dE5ArrayIdLi1E1A7mutable7alignedE"(i8** %".1", i8* %".2", i8* %".3", i64 %".4", i64 %".5", double* %".6", i64 %".7", i64 %".8")
define void @"_ZN6cudapy8__main__4fn_aB2v1B94cw51cXTLSUwv1sCUt9Uw11Ew1dRRQPKzLTg4gaGKFsG2oMQGEYakJSQB1PQBk0Bynm21OiwU1a0UoLGhDpQE8oxrNQE_3dE5ArrayIdLi1E1A7mutable7alignedE"(i8* %".1", i8* %".2", i64 %".3", i64 %".4", double* %".5", i64 %".6", i64 %".7")
{
.9:
%"inserted.meminfo" = insertvalue {i8*, i8*, i64, i64, double*, [1 x i64], [1 x i64]} undef, i8* %".1", 0
%"inserted.parent" = insertvalue {i8*, i8*, i64, i64, double*, [1 x i64], [1 x i64]} %"inserted.meminfo", i8* %".2", 1
%"inserted.nitems" = insertvalue {i8*, i8*, i64, i64, double*, [1 x i64], [1 x i64]} %"inserted.parent", i64 %".3", 2
%"inserted.itemsize" = insertvalue {i8*, i8*, i64, i64, double*, [1 x i64], [1 x i64]} %"inserted.nitems", i64 %".4", 3
%"inserted.data" = insertvalue {i8*, i8*, i64, i64, double*, [1 x i64], [1 x i64]} %"inserted.itemsize", double* %".5", 4
%".10" = insertvalue [1 x i64] undef, i64 %".6", 0
%"inserted.shape" = insertvalue {i8*, i8*, i64, i64, double*, [1 x i64], [1 x i64]} %"inserted.data", [1 x i64] %".10", 5
%".11" = insertvalue [1 x i64] undef, i64 %".7", 0
%"inserted.strides" = insertvalue {i8*, i8*, i64, i64, double*, [1 x i64], [1 x i64]} %"inserted.shape", [1 x i64] %".11", 6
%".12" = alloca i8*
store i8* null, i8** %".12"
store i8* null, i8** %".12"
%"extracted.meminfo" = extractvalue {i8*, i8*, i64, i64, double*, [1 x i64], [1 x i64]} %"inserted.strides", 0
%"extracted.parent" = extractvalue {i8*, i8*, i64, i64, double*, [1 x i64], [1 x i64]} %"inserted.strides", 1
%"extracted.nitems" = extractvalue {i8*, i8*, i64, i64, double*, [1 x i64], [1 x i64]} %"inserted.strides", 2
%"extracted.itemsize" = extractvalue {i8*, i8*, i64, i64, double*, [1 x i64], [1 x i64]} %"inserted.strides", 3
%"extracted.data" = extractvalue {i8*, i8*, i64, i64, double*, [1 x i64], [1 x i64]} %"inserted.strides", 4
%"extracted.shape" = extractvalue {i8*, i8*, i64, i64, double*, [1 x i64], [1 x i64]} %"inserted.strides", 5
%".15" = extractvalue [1 x i64] %"extracted.shape", 0
%"extracted.strides" = extractvalue {i8*, i8*, i64, i64, double*, [1 x i64], [1 x i64]} %"inserted.strides", 6
%".16" = extractvalue [1 x i64] %"extracted.strides", 0
%".17" = call i32 @"_ZN8__main__4fn_aB2v1B94cw51cXTLSUwv1sCUt9Uw11Ew1dRRQPKzLTg4gaGKFsG2oMQGEYakJSQB1PQBk0Bynm21OiwU1a0UoLGhDpQE8oxrNQE_3dE5ArrayIdLi1E1A7mutable7alignedE"(i8** %".12", i8* %"extracted.meminfo", i8* %"extracted.parent", i64 %"extracted.nitems", i64 %"extracted.itemsize", double* %"extracted.data", i64 %".15", i64 %".16")
%".18" = icmp eq i32 %".17", 0
%".19" = icmp eq i32 %".17", -2
%".20" = or i1 %".18", %".19"
%".21" = xor i1 %".20", -1
%".22" = icmp eq i32 %".17", -1
%".23" = icmp eq i32 %".17", -3
%".24" = icmp sge i32 %".17", 1
%".25" = load i8*, i8** %".12"
ret void
}
@"_ZN6cudapy8__main__4fn_aB2v1B94cw51cXTLSUwv1sCUt9Uw11Ew1dRRQPKzLTg4gaGKFsG2oMQGEYakJSQB1PQBk0Bynm21OiwU1a0UoLGhDpQE8oxrNQE_3dE5ArrayIdLi1E1A7mutable7alignedE__errcode__" = global i32 0
@"_ZN6cudapy8__main__4fn_aB2v1B94cw51cXTLSUwv1sCUt9Uw11Ew1dRRQPKzLTg4gaGKFsG2oMQGEYakJSQB1PQBk0Bynm21OiwU1a0UoLGhDpQE8oxrNQE_3dE5ArrayIdLi1E1A7mutable7alignedE__tidx__" = global i32 0
@"_ZN6cudapy8__main__4fn_aB2v1B94cw51cXTLSUwv1sCUt9Uw11Ew1dRRQPKzLTg4gaGKFsG2oMQGEYakJSQB1PQBk0Bynm21OiwU1a0UoLGhDpQE8oxrNQE_3dE5ArrayIdLi1E1A7mutable7alignedE__ctaidx__" = global i32 0
@"_ZN6cudapy8__main__4fn_aB2v1B94cw51cXTLSUwv1sCUt9Uw11Ew1dRRQPKzLTg4gaGKFsG2oMQGEYakJSQB1PQBk0Bynm21OiwU1a0UoLGhDpQE8oxrNQE_3dE5ArrayIdLi1E1A7mutable7alignedE__tidy__" = global i32 0
@"_ZN6cudapy8__main__4fn_aB2v1B94cw51cXTLSUwv1sCUt9Uw11Ew1dRRQPKzLTg4gaGKFsG2oMQGEYakJSQB1PQBk0Bynm21OiwU1a0UoLGhDpQE8oxrNQE_3dE5ArrayIdLi1E1A7mutable7alignedE__ctaidy__" = global i32 0
@"_ZN6cudapy8__main__4fn_aB2v1B94cw51cXTLSUwv1sCUt9Uw11Ew1dRRQPKzLTg4gaGKFsG2oMQGEYakJSQB1PQBk0Bynm21OiwU1a0UoLGhDpQE8oxrNQE_3dE5ArrayIdLi1E1A7mutable7alignedE__tidz__" = global i32 0
@"_ZN6cudapy8__main__4fn_aB2v1B94cw51cXTLSUwv1sCUt9Uw11Ew1dRRQPKzLTg4gaGKFsG2oMQGEYakJSQB1PQBk0Bynm21OiwU1a0UoLGhDpQE8oxrNQE_3dE5ArrayIdLi1E1A7mutable7alignedE__ctaidz__" = global i32 0
@"llvm.used" = appending global [1 x i8*] [i8* bitcast (void (i8*, i8*, i64, i64, double*, i64, i64)* @"_ZN6cudapy8__main__4fn_aB2v1B94cw51cXTLSUwv1sCUt9Uw11Ew1dRRQPKzLTg4gaGKFsG2oMQGEYakJSQB1PQBk0Bynm21OiwU1a0UoLGhDpQE8oxrNQE_3dE5ArrayIdLi1E1A7mutable7alignedE" to i8*)], section "llvm.metadata"
!nvvmir.version = !{ !0 }
!nvvm.annotations = !{ !1 }
!0 = !{ i32 1, i32 9, i32 3, i32 1 }
!1 = !{ void (i8*, i8*, i64, i64, double*, i64, i64)* @"_ZN6cudapy8__main__4fn_aB2v1B94cw51cXTLSUwv1sCUt9Uw11Ew1dRRQPKzLTg4gaGKFsG2oMQGEYakJSQB1PQBk0Bynm21OiwU1a0UoLGhDpQE8oxrNQE_3dE5ArrayIdLi1E1A7mutable7alignedE", !"kernel", i32 1 }
; ModuleID = "fn_a$1"
target triple = "nvptx64-nvidia-cuda"
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-i128:128:128-f32:32:32-f64:64:64-v16:16:16-v32:32:32-v64:64:64-v128:128:128-n16:32:64"
@"_ZN08NumbaEnv8__main__4fn_aB2v1B94cw51cXTLSUwv1sCUt9Uw11Ew1dRRQPKzLTg4gaGKFsG2oMQGEYakJSQB1PQBk0Bynm21OiwU1a0UoLGhDpQE8oxrNQE_3dE5ArrayIdLi1E1A7mutable7alignedE" = common global i8* null
define linkonce_odr i32 @"_ZN8__main__4fn_aB2v1B94cw51cXTLSUwv1sCUt9Uw11Ew1dRRQPKzLTg4gaGKFsG2oMQGEYakJSQB1PQBk0Bynm21OiwU1a0UoLGhDpQE8oxrNQE_3dE5ArrayIdLi1E1A7mutable7alignedE"(i8** %".ret", i8* %"arg.arg.0", i8* %"arg.arg.1", i64 %"arg.arg.2", i64 %"arg.arg.3", double* %"arg.arg.4", i64 %"arg.arg.5.0", i64 %"arg.arg.6.0")
{
entry:
%"inserted.meminfo" = insertvalue {i8*, i8*, i64, i64, double*, [1 x i64], [1 x i64]} undef, i8* %"arg.arg.0", 0
%"inserted.parent" = insertvalue {i8*, i8*, i64, i64, double*, [1 x i64], [1 x i64]} %"inserted.meminfo", i8* %"arg.arg.1", 1
%"inserted.nitems" = insertvalue {i8*, i8*, i64, i64, double*, [1 x i64], [1 x i64]} %"inserted.parent", i64 %"arg.arg.2", 2
%"inserted.itemsize" = insertvalue {i8*, i8*, i64, i64, double*, [1 x i64], [1 x i64]} %"inserted.nitems", i64 %"arg.arg.3", 3
%"inserted.data" = insertvalue {i8*, i8*, i64, i64, double*, [1 x i64], [1 x i64]} %"inserted.itemsize", double* %"arg.arg.4", 4
%".10" = insertvalue [1 x i64] undef, i64 %"arg.arg.5.0", 0
%"inserted.shape" = insertvalue {i8*, i8*, i64, i64, double*, [1 x i64], [1 x i64]} %"inserted.data", [1 x i64] %".10", 5
%".11" = insertvalue [1 x i64] undef, i64 %"arg.arg.6.0", 0
%"inserted.strides" = insertvalue {i8*, i8*, i64, i64, double*, [1 x i64], [1 x i64]} %"inserted.shape", [1 x i64] %".11", 6
%"arg" = alloca {i8*, i8*, i64, i64, double*, [1 x i64], [1 x i64]}
store {i8*, i8*, i64, i64, double*, [1 x i64], [1 x i64]} zeroinitializer, {i8*, i8*, i64, i64, double*, [1 x i64], [1 x i64]}* %"arg"
%".15" = alloca {i8*, i8*, i64, i64, double*, [1 x i64], [1 x i64]}
store {i8*, i8*, i64, i64, double*, [1 x i64], [1 x i64]} zeroinitializer, {i8*, i8*, i64, i64, double*, [1 x i64], [1 x i64]}* %".15"
%".51" = alloca i8
store i8 0, i8* %".51"
%".65" = alloca i1
store i1 0, i1* %".65"
%".78" = alloca {i8*, i8*, i64, i64, double*, [1 x i64], [1 x i64]}
store {i8*, i8*, i64, i64, double*, [1 x i64], [1 x i64]} zeroinitializer, {i8*, i8*, i64, i64, double*, [1 x i64], [1 x i64]}* %".78"
%".111" = alloca {i8*, i8*, i64, i64, double*, [1 x i64], [1 x i64]}
store {i8*, i8*, i64, i64, double*, [1 x i64], [1 x i64]} zeroinitializer, {i8*, i8*, i64, i64, double*, [1 x i64], [1 x i64]}* %".111"
br label %"B0"
B0:
store {i8*, i8*, i64, i64, double*, [1 x i64], [1 x i64]} %"inserted.strides", {i8*, i8*, i64, i64, double*, [1 x i64], [1 x i64]}* %"arg"
%".14" = load {i8*, i8*, i64, i64, double*, [1 x i64], [1 x i64]}, {i8*, i8*, i64, i64, double*, [1 x i64], [1 x i64]}* %"arg"
store {i8*, i8*, i64, i64, double*, [1 x i64], [1 x i64]} zeroinitializer, {i8*, i8*, i64, i64, double*, [1 x i64], [1 x i64]}* %".15"
store {i8*, i8*, i64, i64, double*, [1 x i64], [1 x i64]} %".14", {i8*, i8*, i64, i64, double*, [1 x i64], [1 x i64]}* %".15"
%".19" = getelementptr inbounds {i8*, i8*, i64, i64, double*, [1 x i64], [1 x i64]}, {i8*, i8*, i64, i64, double*, [1 x i64], [1 x i64]}* %".15", i32 0, i32 5
%".20" = getelementptr inbounds [1 x i64], [1 x i64]* %".19", i32 0, i32 0
%".21" = load i64, i64* %".20", !range !1
%".22" = insertvalue [1 x i64] undef, i64 %".21", 0
%".23" = extractvalue [1 x i64] %".22", 0
%".24" = getelementptr inbounds {i8*, i8*, i64, i64, double*, [1 x i64], [1 x i64]}, {i8*, i8*, i64, i64, double*, [1 x i64], [1 x i64]}* %".15", i32 0, i32 6
%".25" = load [1 x i64], [1 x i64]* %".24"
%".26" = extractvalue [1 x i64] %".25", 0
%".27" = icmp slt i64 0, 0
%".28" = add i64 0, %".23"
%".29" = select i1 %".27", i64 %".28", i64 0
%".30" = getelementptr inbounds {i8*, i8*, i64, i64, double*, [1 x i64], [1 x i64]}, {i8*, i8*, i64, i64, double*, [1 x i64], [1 x i64]}* %".15", i32 0, i32 5
%".31" = getelementptr inbounds [1 x i64], [1 x i64]* %".30", i32 0, i32 0
%".32" = load i64, i64* %".31", !range !1
%".33" = insertvalue [1 x i64] undef, i64 %".32", 0
%".34" = extractvalue [1 x i64] %".33", 0
%".35" = getelementptr inbounds {i8*, i8*, i64, i64, double*, [1 x i64], [1 x i64]}, {i8*, i8*, i64, i64, double*, [1 x i64], [1 x i64]}* %".15", i32 0, i32 6
%".36" = load [1 x i64], [1 x i64]* %".35"
%".37" = extractvalue [1 x i64] %".36", 0
%".38" = getelementptr inbounds {i8*, i8*, i64, i64, double*, [1 x i64], [1 x i64]}, {i8*, i8*, i64, i64, double*, [1 x i64], [1 x i64]}* %".15", i32 0, i32 4
%".39" = load double*, double** %".38"
%".40" = mul i64 %".37", %".29"
%".41" = ptrtoint double* %".39" to i64
%".42" = add i64 %".41", %".40"
%".43" = inttoptr i64 %".42" to double*
%".44" = load double, double* %".43"
%".45" = insertvalue {i64, i64} undef, i64 1, 0
%".46" = insertvalue {i64, i64} %".45", i64 2, 1
%".47" = extractvalue {i64, i64} %".46", 0
%".48" = extractvalue {i64, i64} %".46", 1
%".49" = insertvalue [2 x i64] undef, i64 1, 0
%".50" = insertvalue [2 x i64] %".49", i64 2, 1
store i8 0, i8* %".51"
%".54" = extractvalue [2 x i64] %".50", 0
%".55" = extractvalue [2 x i64] %".50", 1
%".56" = call i32 @"_ZN5numba7cpython7listobj6in_seq12_3clocals_3e17seq_contains_implB2v2B40c8tJTC_2fWQI8IW1CiAAYKPM6RBFDjESyhCQA_3dE8UniTupleIxLi2EEd"(i8* %".51", i64 %".54", i64 %".55", double %".44")
%".57" = icmp eq i32 %".56", 0
%".58" = icmp eq i32 %".56", -2
%".59" = or i1 %".57", %".58"
%".60" = xor i1 %".59", -1
%".61" = icmp eq i32 %".56", -1
%".62" = icmp eq i32 %".56", -3
%".63" = icmp sge i32 %".56", 1
%".64" = load i8, i8* %".51"
%".67" = icmp eq i8 %".64", 0
br i1 %".67", label %"B0.if", label %"B0.else"
B18:
%".77" = load {i8*, i8*, i64, i64, double*, [1 x i64], [1 x i64]}, {i8*, i8*, i64, i64, double*, [1 x i64], [1 x i64]}* %"arg"
store {i8*, i8*, i64, i64, double*, [1 x i64], [1 x i64]} zeroinitializer, {i8*, i8*, i64, i64, double*, [1 x i64], [1 x i64]}* %".78"
store {i8*, i8*, i64, i64, double*, [1 x i64], [1 x i64]} %".77", {i8*, i8*, i64, i64, double*, [1 x i64], [1 x i64]}* %".78"
%".82" = getelementptr inbounds {i8*, i8*, i64, i64, double*, [1 x i64], [1 x i64]}, {i8*, i8*, i64, i64, double*, [1 x i64], [1 x i64]}* %".78", i32 0, i32 5
%".83" = getelementptr inbounds [1 x i64], [1 x i64]* %".82", i32 0, i32 0
%".84" = load i64, i64* %".83", !range !1
%".85" = insertvalue [1 x i64] undef, i64 %".84", 0
%".86" = extractvalue [1 x i64] %".85", 0
%".87" = getelementptr inbounds {i8*, i8*, i64, i64, double*, [1 x i64], [1 x i64]}, {i8*, i8*, i64, i64, double*, [1 x i64], [1 x i64]}* %".78", i32 0, i32 6
%".88" = load [1 x i64], [1 x i64]* %".87"
%".89" = extractvalue [1 x i64] %".88", 0
%".90" = icmp slt i64 0, 0
%".91" = add i64 0, %".86"
%".92" = select i1 %".90", i64 %".91", i64 0
%".93" = getelementptr inbounds {i8*, i8*, i64, i64, double*, [1 x i64], [1 x i64]}, {i8*, i8*, i64, i64, double*, [1 x i64], [1 x i64]}* %".78", i32 0, i32 5
%".94" = getelementptr inbounds [1 x i64], [1 x i64]* %".93", i32 0, i32 0
%".95" = load i64, i64* %".94", !range !1
%".96" = insertvalue [1 x i64] undef, i64 %".95", 0
%".97" = extractvalue [1 x i64] %".96", 0
%".98" = getelementptr inbounds {i8*, i8*, i64, i64, double*, [1 x i64], [1 x i64]}, {i8*, i8*, i64, i64, double*, [1 x i64], [1 x i64]}* %".78", i32 0, i32 6
%".99" = load [1 x i64], [1 x i64]* %".98"
%".100" = extractvalue [1 x i64] %".99", 0
%".101" = getelementptr inbounds {i8*, i8*, i64, i64, double*, [1 x i64], [1 x i64]}, {i8*, i8*, i64, i64, double*, [1 x i64], [1 x i64]}* %".78", i32 0, i32 4
%".102" = load double*, double** %".101"
%".103" = mul i64 %".100", %".92"
%".104" = ptrtoint double* %".102" to i64
%".105" = add i64 %".104", %".103"
%".106" = inttoptr i64 %".105" to double*
%".107" = load double, double* %".106"
%".108" = sitofp i64 1 to double
%".109" = fadd double %".107", %".108"
%".110" = load {i8*, i8*, i64, i64, double*, [1 x i64], [1 x i64]}, {i8*, i8*, i64, i64, double*, [1 x i64], [1 x i64]}* %"arg"
store {i8*, i8*, i64, i64, double*, [1 x i64], [1 x i64]} zeroinitializer, {i8*, i8*, i64, i64, double*, [1 x i64], [1 x i64]}* %".111"
store {i8*, i8*, i64, i64, double*, [1 x i64], [1 x i64]} %".110", {i8*, i8*, i64, i64, double*, [1 x i64], [1 x i64]}* %".111"
%".115" = getelementptr inbounds {i8*, i8*, i64, i64, double*, [1 x i64], [1 x i64]}, {i8*, i8*, i64, i64, double*, [1 x i64], [1 x i64]}* %".111", i32 0, i32 5
%".116" = getelementptr inbounds [1 x i64], [1 x i64]* %".115", i32 0, i32 0
%".117" = load i64, i64* %".116", !range !1
%".118" = insertvalue [1 x i64] undef, i64 %".117", 0
%".119" = extractvalue [1 x i64] %".118", 0
%".120" = getelementptr inbounds {i8*, i8*, i64, i64, double*, [1 x i64], [1 x i64]}, {i8*, i8*, i64, i64, double*, [1 x i64], [1 x i64]}* %".111", i32 0, i32 6
%".121" = load [1 x i64], [1 x i64]* %".120"
%".122" = extractvalue [1 x i64] %".121", 0
%".123" = icmp slt i64 0, 0
%".124" = add i64 0, %".119"
%".125" = select i1 %".123", i64 %".124", i64 0
%".126" = getelementptr inbounds {i8*, i8*, i64, i64, double*, [1 x i64], [1 x i64]}, {i8*, i8*, i64, i64, double*, [1 x i64], [1 x i64]}* %".111", i32 0, i32 5
%".127" = getelementptr inbounds [1 x i64], [1 x i64]* %".126", i32 0, i32 0
%".128" = load i64, i64* %".127", !range !1
%".129" = insertvalue [1 x i64] undef, i64 %".128", 0
%".130" = extractvalue [1 x i64] %".129", 0
%".131" = getelementptr inbounds {i8*, i8*, i64, i64, double*, [1 x i64], [1 x i64]}, {i8*, i8*, i64, i64, double*, [1 x i64], [1 x i64]}* %".111", i32 0, i32 6
%".132" = load [1 x i64], [1 x i64]* %".131"
%".133" = extractvalue [1 x i64] %".132", 0
%".134" = getelementptr inbounds {i8*, i8*, i64, i64, double*, [1 x i64], [1 x i64]}, {i8*, i8*, i64, i64, double*, [1 x i64], [1 x i64]}* %".111", i32 0, i32 4
%".135" = load double*, double** %".134"
%".136" = mul i64 %".133", %".125"
%".137" = ptrtoint double* %".135" to i64
%".138" = add i64 %".137", %".136"
%".139" = inttoptr i64 %".138" to double*
store double %".109", double* %".139"
%".141" = load {i8*, i8*, i64, i64, double*, [1 x i64], [1 x i64]}, {i8*, i8*, i64, i64, double*, [1 x i64], [1 x i64]}* %"arg"
store {i8*, i8*, i64, i64, double*, [1 x i64], [1 x i64]} zeroinitializer, {i8*, i8*, i64, i64, double*, [1 x i64], [1 x i64]}* %"arg"
br label %"B34"
B34:
%".144" = load {i8*, i8*, i64, i64, double*, [1 x i64], [1 x i64]}, {i8*, i8*, i64, i64, double*, [1 x i64], [1 x i64]}* %"arg"
store {i8*, i8*, i64, i64, double*, [1 x i64], [1 x i64]} zeroinitializer, {i8*, i8*, i64, i64, double*, [1 x i64], [1 x i64]}* %"arg"
store i8* null, i8** %".ret"
ret i32 0
B0.if:
store i1 0, i1* %".65"
br label %"B0.endif"
B0.else:
store i1 1, i1* %".65"
br label %"B0.endif"
B0.endif:
%".73" = load i1, i1* %".65"
br i1 %".60", label %"B0.endif.if", label %"B0.endif.endif", !prof !2
B0.endif.if:
ret i32 %".56"
B0.endif.endif:
br i1 %".73", label %"B18", label %"B34"
}
declare i32 @"_ZN5numba7cpython7listobj6in_seq12_3clocals_3e17seq_contains_implB2v2B40c8tJTC_2fWQI8IW1CiAAYKPM6RBFDjESyhCQA_3dE8UniTupleIxLi2EEd"(i8* %".ret", i64 %"arg.lst.0", i64 %"arg.lst.1", double %"arg.value")
!nvvmir.version = !{ !0 }
!0 = !{ i32 1, i32 9, i32 3, i32 1 }
!1 = !{ i64 0, i64 9223372036854775807 }
!2 = !{ !"branch_weights", i32 1, i32 99 }
; ModuleID = "in_seq.<locals>.seq_contains_impl$2"
target triple = "nvptx64-nvidia-cuda"
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-i128:128:128-f32:32:32-f64:64:64-v16:16:16-v32:32:32-v64:64:64-v128:128:128-n16:32:64"
@"_ZN08NumbaEnv5numba7cpython7listobj6in_seq12_3clocals_3e17seq_contains_implB2v2B40c8tJTC_2fWQI8IW1CiAAYKPM6RBFDjESyhCQA_3dE8UniTupleIxLi2EEd" = common global i8* null
define linkonce_odr i32 @"_ZN5numba7cpython7listobj6in_seq12_3clocals_3e17seq_contains_implB2v2B40c8tJTC_2fWQI8IW1CiAAYKPM6RBFDjESyhCQA_3dE8UniTupleIxLi2EEd"(i8* %".ret", i64 %"arg.lst.0", i64 %"arg.lst.1", double %"arg.value")
{
entry:
%".6" = insertvalue [2 x i64] undef, i64 %"arg.lst.0", 0
%".7" = insertvalue [2 x i64] %".6", i64 %"arg.lst.1", 1
%"value" = alloca double
store double 0.0, double* %"value"
%".10" = alloca {i64*, [2 x i64]}
store {i64*, [2 x i64]} zeroinitializer, {i64*, [2 x i64]}* %".10"
%".13" = alloca i64
store i64 0, i64* %".13"
%"$phi6.0" = alloca {i64*, [2 x i64]}
store {i64*, [2 x i64]} zeroinitializer, {i64*, [2 x i64]}* %"$phi6.0"
%"$16pred" = alloca i1
store i1 0, i1* %"$16pred"
%".29" = alloca {i64, i1}
store {i64, i1} zeroinitializer, {i64, i1}* %".29"
%".32" = alloca {i64*, [2 x i64]}
store {i64*, [2 x i64]} zeroinitializer, {i64*, [2 x i64]}* %".32"
%".60" = alloca {i64, i1}
store {i64, i1} zeroinitializer, {i64, i1}* %".60"
%".66" = alloca {i64, i1}
store {i64, i1} zeroinitializer, {i64, i1}* %".66"
%"$phi8.1" = alloca i64
store i64 0, i64* %"$phi8.1"
br label %"B0"
B0:
store double %"arg.value", double* %"value"
store {i64*, [2 x i64]} zeroinitializer, {i64*, [2 x i64]}* %".10"
store i64 0, i64* %".13"
%".16" = getelementptr inbounds {i64*, [2 x i64]}, {i64*, [2 x i64]}* %".10", i32 0, i32 0
store i64* %".13", i64** %".16"
%".18" = getelementptr inbounds {i64*, [2 x i64]}, {i64*, [2 x i64]}* %".10", i32 0, i32 1
store [2 x i64] %".7", [2 x i64]* %".18"
%".20" = load {i64*, [2 x i64]}, {i64*, [2 x i64]}* %".10"
%".22" = load {i64*, [2 x i64]}, {i64*, [2 x i64]}* %"$phi6.0"
store {i64*, [2 x i64]} %".20", {i64*, [2 x i64]}* %"$phi6.0"
br label %"B6"
B6:
%".26" = load i1, i1* %"$16pred"
store i1 0, i1* %"$16pred"
%".28" = load {i64*, [2 x i64]}, {i64*, [2 x i64]}* %"$phi6.0"
store {i64, i1} zeroinitializer, {i64, i1}* %".29"
store {i64*, [2 x i64]} zeroinitializer, {i64*, [2 x i64]}* %".32"
store {i64*, [2 x i64]} %".28", {i64*, [2 x i64]}* %".32"
%".36" = getelementptr inbounds {i64*, [2 x i64]}, {i64*, [2 x i64]}* %".32", i32 0, i32 1
%".37" = load [2 x i64], [2 x i64]* %".36"
%".38" = getelementptr inbounds {i64*, [2 x i64]}, {i64*, [2 x i64]}* %".32", i32 0, i32 0
%".39" = load i64*, i64** %".38"
%".40" = load i64, i64* %".39"
%".41" = icmp slt i64 %".40", 2
%".42" = getelementptr inbounds {i64, i1}, {i64, i1}* %".29", i32 0, i32 1
store i1 %".41", i1* %".42"
br i1 %".41", label %"B6.if", label %"B6.endif"
B8:
%".76" = load i64, i64* %"$phi8.1"
%".77" = load i64, i64* %"$phi8.1"
store i64 0, i64* %"$phi8.1"
%".79" = load double, double* %"value"
%".80" = sitofp i64 %".76" to double
%".81" = fcmp oeq double %".80", %".79"
%".82" = load i1, i1* %"$16pred"
store i1 %".81", i1* %"$16pred"
%".84" = load i1, i1* %"$16pred"
br i1 %".84", label %"B18", label %"B6"
B18:
%".86" = load double, double* %"value"
store double 0.0, double* %"value"
%".88" = load {i64*, [2 x i64]}, {i64*, [2 x i64]}* %"$phi6.0"
store {i64*, [2 x i64]} zeroinitializer, {i64*, [2 x i64]}* %"$phi6.0"
%".90" = load i1, i1* %"$16pred"
store i1 0, i1* %"$16pred"
%".92" = zext i1 true to i8
store i8 %".92", i8* %".ret"
ret i32 0
B26:
%".95" = load double, double* %"value"
store double 0.0, double* %"value"
%".97" = load i64, i64* %"$phi8.1"
store i64 0, i64* %"$phi8.1"
%".99" = load {i64*, [2 x i64]}, {i64*, [2 x i64]}* %"$phi6.0"
store {i64*, [2 x i64]} zeroinitializer, {i64*, [2 x i64]}* %"$phi6.0"
%".101" = zext i1 false to i8
store i8 %".101", i8* %".ret"
ret i32 0
B6.if:
switch i64 %".40", label %"switch.else" [i64 0, label %"switch.0" i64 -2, label %"switch.0" i64 1, label %"switch.1" i64 -1, label %"switch.1"]
B6.endif:
%".59" = load {i64, i1}, {i64, i1}* %".29"
store {i64, i1} zeroinitializer, {i64, i1}* %".60"
store {i64, i1} %".59", {i64, i1}* %".60"
%".64" = getelementptr inbounds {i64, i1}, {i64, i1}* %".60", i32 0, i32 0
%".65" = load i64, i64* %".64"
store {i64, i1} zeroinitializer, {i64, i1}* %".66"
store {i64, i1} %".59", {i64, i1}* %".66"
%".70" = getelementptr inbounds {i64, i1}, {i64, i1}* %".66", i32 0, i32 1
%".71" = load i1, i1* %".70"
%".73" = load i64, i64* %"$phi8.1"
store i64 %".65", i64* %"$phi8.1"
br i1 %".71", label %"B8", label %"B26"
switch.else:
ret i32 1
switch.end:
%".47" = phi i64 [%".48", %"switch.0"], [%".50", %"switch.1"]
%".52" = getelementptr inbounds {i64, i1}, {i64, i1}* %".29", i32 0, i32 0
store i64 %".47", i64* %".52"
%".54" = add i64 %".40", 1
%".55" = getelementptr inbounds {i64*, [2 x i64]}, {i64*, [2 x i64]}* %".32", i32 0, i32 0
%".56" = load i64*, i64** %".55"
store i64 %".54", i64* %".56"
br label %"B6.endif"
switch.0:
%".48" = extractvalue [2 x i64] %".37", 0
br label %"switch.end"
switch.1:
%".50" = extractvalue [2 x i64] %".37", 1
br label %"switch.end"
}
!nvvmir.version = !{ !0 }
!0 = !{ i32 1, i32 9, i32 3, i32 1 }
#########################
//
// Generated by NVIDIA NVVM Compiler
//
// Compiler Build ID: CL-31294372
// Cuda compilation tools, release 11.7, V11.7.64
// Based on NVVM 7.0.1
//
.version 7.7
.target sm_75
.address_size 64
// .globl _ZN6cudapy8__main__4fn_aB2v1B94cw51cXTLSUwv1sCUt9Uw11Ew1dRRQPKzLTg4gaGKFsG2oMQGEYakJSQB1PQBk0Bynm21OiwU1a0UoLGhDpQE8oxrNQE_3dE5ArrayIdLi1E1A7mutable7alignedE
.visible .global .align 4 .u32 _ZN6cudapy8__main__4fn_aB2v1B94cw51cXTLSUwv1sCUt9Uw11Ew1dRRQPKzLTg4gaGKFsG2oMQGEYakJSQB1PQBk0Bynm21OiwU1a0UoLGhDpQE8oxrNQE_3dE5ArrayIdLi1E1A7mutable7alignedE__errcode__;
.visible .global .align 4 .u32 _ZN6cudapy8__main__4fn_aB2v1B94cw51cXTLSUwv1sCUt9Uw11Ew1dRRQPKzLTg4gaGKFsG2oMQGEYakJSQB1PQBk0Bynm21OiwU1a0UoLGhDpQE8oxrNQE_3dE5ArrayIdLi1E1A7mutable7alignedE__tidx__;
.visible .global .align 4 .u32 _ZN6cudapy8__main__4fn_aB2v1B94cw51cXTLSUwv1sCUt9Uw11Ew1dRRQPKzLTg4gaGKFsG2oMQGEYakJSQB1PQBk0Bynm21OiwU1a0UoLGhDpQE8oxrNQE_3dE5ArrayIdLi1E1A7mutable7alignedE__ctaidx__;
.visible .global .align 4 .u32 _ZN6cudapy8__main__4fn_aB2v1B94cw51cXTLSUwv1sCUt9Uw11Ew1dRRQPKzLTg4gaGKFsG2oMQGEYakJSQB1PQBk0Bynm21OiwU1a0UoLGhDpQE8oxrNQE_3dE5ArrayIdLi1E1A7mutable7alignedE__tidy__;
.visible .global .align 4 .u32 _ZN6cudapy8__main__4fn_aB2v1B94cw51cXTLSUwv1sCUt9Uw11Ew1dRRQPKzLTg4gaGKFsG2oMQGEYakJSQB1PQBk0Bynm21OiwU1a0UoLGhDpQE8oxrNQE_3dE5ArrayIdLi1E1A7mutable7alignedE__ctaidy__;
.visible .global .align 4 .u32 _ZN6cudapy8__main__4fn_aB2v1B94cw51cXTLSUwv1sCUt9Uw11Ew1dRRQPKzLTg4gaGKFsG2oMQGEYakJSQB1PQBk0Bynm21OiwU1a0UoLGhDpQE8oxrNQE_3dE5ArrayIdLi1E1A7mutable7alignedE__tidz__;
.visible .global .align 4 .u32 _ZN6cudapy8__main__4fn_aB2v1B94cw51cXTLSUwv1sCUt9Uw11Ew1dRRQPKzLTg4gaGKFsG2oMQGEYakJSQB1PQBk0Bynm21OiwU1a0UoLGhDpQE8oxrNQE_3dE5ArrayIdLi1E1A7mutable7alignedE__ctaidz__;
.common .global .align 8 .u64 _ZN08NumbaEnv8__main__4fn_aB2v1B94cw51cXTLSUwv1sCUt9Uw11Ew1dRRQPKzLTg4gaGKFsG2oMQGEYakJSQB1PQBk0Bynm21OiwU1a0UoLGhDpQE8oxrNQE_3dE5ArrayIdLi1E1A7mutable7alignedE;
.common .global .align 8 .u64 _ZN08NumbaEnv5numba7cpython7listobj6in_seq12_3clocals_3e17seq_contains_implB2v2B40c8tJTC_2fWQI8IW1CiAAYKPM6RBFDjESyhCQA_3dE8UniTupleIxLi2EEd;
.visible .entry _ZN6cudapy8__main__4fn_aB2v1B94cw51cXTLSUwv1sCUt9Uw11Ew1dRRQPKzLTg4gaGKFsG2oMQGEYakJSQB1PQBk0Bynm21OiwU1a0UoLGhDpQE8oxrNQE_3dE5ArrayIdLi1E1A7mutable7alignedE(
.param .u64 _ZN6cudapy8__main__4fn_aB2v1B94cw51cXTLSUwv1sCUt9Uw11Ew1dRRQPKzLTg4gaGKFsG2oMQGEYakJSQB1PQBk0Bynm21OiwU1a0UoLGhDpQE8oxrNQE_3dE5ArrayIdLi1E1A7mutable7alignedE_param_0,
.param .u64 _ZN6cudapy8__main__4fn_aB2v1B94cw51cXTLSUwv1sCUt9Uw11Ew1dRRQPKzLTg4gaGKFsG2oMQGEYakJSQB1PQBk0Bynm21OiwU1a0UoLGhDpQE8oxrNQE_3dE5ArrayIdLi1E1A7mutable7alignedE_param_1,
.param .u64 _ZN6cudapy8__main__4fn_aB2v1B94cw51cXTLSUwv1sCUt9Uw11Ew1dRRQPKzLTg4gaGKFsG2oMQGEYakJSQB1PQBk0Bynm21OiwU1a0UoLGhDpQE8oxrNQE_3dE5ArrayIdLi1E1A7mutable7alignedE_param_2,
.param .u64 _ZN6cudapy8__main__4fn_aB2v1B94cw51cXTLSUwv1sCUt9Uw11Ew1dRRQPKzLTg4gaGKFsG2oMQGEYakJSQB1PQBk0Bynm21OiwU1a0UoLGhDpQE8oxrNQE_3dE5ArrayIdLi1E1A7mutable7alignedE_param_3,
.param .u64 _ZN6cudapy8__main__4fn_aB2v1B94cw51cXTLSUwv1sCUt9Uw11Ew1dRRQPKzLTg4gaGKFsG2oMQGEYakJSQB1PQBk0Bynm21OiwU1a0UoLGhDpQE8oxrNQE_3dE5ArrayIdLi1E1A7mutable7alignedE_param_4,
.param .u64 _ZN6cudapy8__main__4fn_aB2v1B94cw51cXTLSUwv1sCUt9Uw11Ew1dRRQPKzLTg4gaGKFsG2oMQGEYakJSQB1PQBk0Bynm21OiwU1a0UoLGhDpQE8oxrNQE_3dE5ArrayIdLi1E1A7mutable7alignedE_param_5,
.param .u64 _ZN6cudapy8__main__4fn_aB2v1B94cw51cXTLSUwv1sCUt9Uw11Ew1dRRQPKzLTg4gaGKFsG2oMQGEYakJSQB1PQBk0Bynm21OiwU1a0UoLGhDpQE8oxrNQE_3dE5ArrayIdLi1E1A7mutable7alignedE_param_6
)
{
.reg .pred %p<20>;
.reg .f64 %fd<4>;
.reg .b64 %rd<16>;
ld.param.u64 %rd8, [_ZN6cudapy8__main__4fn_aB2v1B94cw51cXTLSUwv1sCUt9Uw11Ew1dRRQPKzLTg4gaGKFsG2oMQGEYakJSQB1PQBk0Bynm21OiwU1a0UoLGhDpQE8oxrNQE_3dE5ArrayIdLi1E1A7mutable7alignedE_param_4];
cvta.to.global.u64 %rd1, %rd8;
ld.global.f64 %fd1, [%rd1];
mov.u64 %rd7, 0;
mov.u64 %rd10, 1;
mov.pred %p5, -1;
mov.pred %p12, 0;
mov.u64 %rd14, %rd7;
$L__BB0_1:
setp.gt.s64 %p3, %rd14, 1;
mov.u64 %rd13, %rd7;
@%p3 bra $L__BB0_9;
setp.gt.s64 %p6, %rd14, -1;
@%p6 bra $L__BB0_5;
setp.eq.s64 %p9, %rd14, -2;
mov.u64 %rd13, %rd10;
@%p9 bra $L__BB0_8;
setp.eq.s64 %p10, %rd14, -1;
mov.pred %p18, %p5;
mov.pred %p19, %p5;
@%p10 bra $L__BB0_7;
bra.uni $L__BB0_11;
$L__BB0_5:
setp.eq.s64 %p7, %rd14, 0;
mov.u64 %rd13, %rd10;
@%p7 bra $L__BB0_8;
setp.ne.s64 %p8, %rd14, 1;
mov.pred %p18, %p5;
mov.pred %p19, %p5;
@%p8 bra $L__BB0_11;
$L__BB0_7:
mov.u64 %rd13, 2;
$L__BB0_8:
add.s64 %rd14, %rd14, 1;
$L__BB0_9:
mov.pred %p18, %p5;
mov.pred %p19, %p12;
@%p3 bra $L__BB0_11;
cvt.rn.f64.s64 %fd2, %rd13;
setp.neu.f64 %p16, %fd1, %fd2;
mov.pred %p18, %p12;
mov.pred %p19, %p12;
@%p16 bra $L__BB0_1;
$L__BB0_11:
or.pred %p17, %p18, %p19;
@%p17 bra $L__BB0_13;
add.f64 %fd3, %fd1, 0d3FF0000000000000;
st.global.f64 [%rd1], %fd3;
$L__BB0_13:
ret;
}
*************************
; ModuleID = "cuda.kernel.wrapper"
target triple = "nvptx64-nvidia-cuda"
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-i128:128:128-f32:32:32-f64:64:64-v16:16:16-v32:32:32-v64:64:64-v128:128:128-n16:32:64"
declare i32 @"_ZN8__main__4fn_bB2v3B94cw51cXTLSUwv1sCUt9Uw11Ew1dRRQPKzLTg4gaGKFsG2oMQGEYakJSQB1PQBk0Bynm21OiwU1a0UoLGhDpQE8oxrNQE_3dE5ArrayIdLi1E1A7mutable7alignedE"(i8** %".1", i8* %".2", i8* %".3", i64 %".4", i64 %".5", double* %".6", i64 %".7", i64 %".8")
define void @"_ZN6cudapy8__main__4fn_bB2v3B94cw51cXTLSUwv1sCUt9Uw11Ew1dRRQPKzLTg4gaGKFsG2oMQGEYakJSQB1PQBk0Bynm21OiwU1a0UoLGhDpQE8oxrNQE_3dE5ArrayIdLi1E1A7mutable7alignedE"(i8* %".1", i8* %".2", i64 %".3", i64 %".4", double* %".5", i64 %".6", i64 %".7")
{
.9:
%"inserted.meminfo" = insertvalue {i8*, i8*, i64, i64, double*, [1 x i64], [1 x i64]} undef, i8* %".1", 0
%"inserted.parent" = insertvalue {i8*, i8*, i64, i64, double*, [1 x i64], [1 x i64]} %"inserted.meminfo", i8* %".2", 1
%"inserted.nitems" = insertvalue {i8*, i8*, i64, i64, double*, [1 x i64], [1 x i64]} %"inserted.parent", i64 %".3", 2
%"inserted.itemsize" = insertvalue {i8*, i8*, i64, i64, double*, [1 x i64], [1 x i64]} %"inserted.nitems", i64 %".4", 3
%"inserted.data" = insertvalue {i8*, i8*, i64, i64, double*, [1 x i64], [1 x i64]} %"inserted.itemsize", double* %".5", 4
%".10" = insertvalue [1 x i64] undef, i64 %".6", 0
%"inserted.shape" = insertvalue {i8*, i8*, i64, i64, double*, [1 x i64], [1 x i64]} %"inserted.data", [1 x i64] %".10", 5
%".11" = insertvalue [1 x i64] undef, i64 %".7", 0
%"inserted.strides" = insertvalue {i8*, i8*, i64, i64, double*, [1 x i64], [1 x i64]} %"inserted.shape", [1 x i64] %".11", 6
%".12" = alloca i8*
store i8* null, i8** %".12"
store i8* null, i8** %".12"
%"extracted.meminfo" = extractvalue {i8*, i8*, i64, i64, double*, [1 x i64], [1 x i64]} %"inserted.strides", 0
%"extracted.parent" = extractvalue {i8*, i8*, i64, i64, double*, [1 x i64], [1 x i64]} %"inserted.strides", 1
%"extracted.nitems" = extractvalue {i8*, i8*, i64, i64, double*, [1 x i64], [1 x i64]} %"inserted.strides", 2
%"extracted.itemsize" = extractvalue {i8*, i8*, i64, i64, double*, [1 x i64], [1 x i64]} %"inserted.strides", 3
%"extracted.data" = extractvalue {i8*, i8*, i64, i64, double*, [1 x i64], [1 x i64]} %"inserted.strides", 4
%"extracted.shape" = extractvalue {i8*, i8*, i64, i64, double*, [1 x i64], [1 x i64]} %"inserted.strides", 5
%".15" = extractvalue [1 x i64] %"extracted.shape", 0
%"extracted.strides" = extractvalue {i8*, i8*, i64, i64, double*, [1 x i64], [1 x i64]} %"inserted.strides", 6
%".16" = extractvalue [1 x i64] %"extracted.strides", 0
%".17" = call i32 @"_ZN8__main__4fn_bB2v3B94cw51cXTLSUwv1sCUt9Uw11Ew1dRRQPKzLTg4gaGKFsG2oMQGEYakJSQB1PQBk0Bynm21OiwU1a0UoLGhDpQE8oxrNQE_3dE5ArrayIdLi1E1A7mutable7alignedE"(i8** %".12", i8* %"extracted.meminfo", i8* %"extracted.parent", i64 %"extracted.nitems", i64 %"extracted.itemsize", double* %"extracted.data", i64 %".15", i64 %".16")
%".18" = icmp eq i32 %".17", 0
%".19" = icmp eq i32 %".17", -2
%".20" = or i1 %".18", %".19"
%".21" = xor i1 %".20", -1
%".22" = icmp eq i32 %".17", -1
%".23" = icmp eq i32 %".17", -3
%".24" = icmp sge i32 %".17", 1
%".25" = load i8*, i8** %".12"
ret void
}
@"_ZN6cudapy8__main__4fn_bB2v3B94cw51cXTLSUwv1sCUt9Uw11Ew1dRRQPKzLTg4gaGKFsG2oMQGEYakJSQB1PQBk0Bynm21OiwU1a0UoLGhDpQE8oxrNQE_3dE5ArrayIdLi1E1A7mutable7alignedE__errcode__" = global i32 0
@"_ZN6cudapy8__main__4fn_bB2v3B94cw51cXTLSUwv1sCUt9Uw11Ew1dRRQPKzLTg4gaGKFsG2oMQGEYakJSQB1PQBk0Bynm21OiwU1a0UoLGhDpQE8oxrNQE_3dE5ArrayIdLi1E1A7mutable7alignedE__tidx__" = global i32 0
@"_ZN6cudapy8__main__4fn_bB2v3B94cw51cXTLSUwv1sCUt9Uw11Ew1dRRQPKzLTg4gaGKFsG2oMQGEYakJSQB1PQBk0Bynm21OiwU1a0UoLGhDpQE8oxrNQE_3dE5ArrayIdLi1E1A7mutable7alignedE__ctaidx__" = global i32 0
@"_ZN6cudapy8__main__4fn_bB2v3B94cw51cXTLSUwv1sCUt9Uw11Ew1dRRQPKzLTg4gaGKFsG2oMQGEYakJSQB1PQBk0Bynm21OiwU1a0UoLGhDpQE8oxrNQE_3dE5ArrayIdLi1E1A7mutable7alignedE__tidy__" = global i32 0
@"_ZN6cudapy8__main__4fn_bB2v3B94cw51cXTLSUwv1sCUt9Uw11Ew1dRRQPKzLTg4gaGKFsG2oMQGEYakJSQB1PQBk0Bynm21OiwU1a0UoLGhDpQE8oxrNQE_3dE5ArrayIdLi1E1A7mutable7alignedE__ctaidy__" = global i32 0
@"_ZN6cudapy8__main__4fn_bB2v3B94cw51cXTLSUwv1sCUt9Uw11Ew1dRRQPKzLTg4gaGKFsG2oMQGEYakJSQB1PQBk0Bynm21OiwU1a0UoLGhDpQE8oxrNQE_3dE5ArrayIdLi1E1A7mutable7alignedE__tidz__" = global i32 0
@"_ZN6cudapy8__main__4fn_bB2v3B94cw51cXTLSUwv1sCUt9Uw11Ew1dRRQPKzLTg4gaGKFsG2oMQGEYakJSQB1PQBk0Bynm21OiwU1a0UoLGhDpQE8oxrNQE_3dE5ArrayIdLi1E1A7mutable7alignedE__ctaidz__" = global i32 0
@"llvm.used" = appending global [1 x i8*] [i8* bitcast (void (i8*, i8*, i64, i64, double*, i64, i64)* @"_ZN6cudapy8__main__4fn_bB2v3B94cw51cXTLSUwv1sCUt9Uw11Ew1dRRQPKzLTg4gaGKFsG2oMQGEYakJSQB1PQBk0Bynm21OiwU1a0UoLGhDpQE8oxrNQE_3dE5ArrayIdLi1E1A7mutable7alignedE" to i8*)], section "llvm.metadata"
!nvvmir.version = !{ !0 }
!nvvm.annotations = !{ !1 }
!0 = !{ i32 1, i32 9, i32 3, i32 1 }
!1 = !{ void (i8*, i8*, i64, i64, double*, i64, i64)* @"_ZN6cudapy8__main__4fn_bB2v3B94cw51cXTLSUwv1sCUt9Uw11Ew1dRRQPKzLTg4gaGKFsG2oMQGEYakJSQB1PQBk0Bynm21OiwU1a0UoLGhDpQE8oxrNQE_3dE5ArrayIdLi1E1A7mutable7alignedE", !"kernel", i32 1 }
; ModuleID = "fn_b$3"
target triple = "nvptx64-nvidia-cuda"
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-i128:128:128-f32:32:32-f64:64:64-v16:16:16-v32:32:32-v64:64:64-v128:128:128-n16:32:64"
@"_ZN08NumbaEnv8__main__4fn_bB2v3B94cw51cXTLSUwv1sCUt9Uw11Ew1dRRQPKzLTg4gaGKFsG2oMQGEYakJSQB1PQBk0Bynm21OiwU1a0UoLGhDpQE8oxrNQE_3dE5ArrayIdLi1E1A7mutable7alignedE" = common global i8* null
define linkonce_odr i32 @"_ZN8__main__4fn_bB2v3B94cw51cXTLSUwv1sCUt9Uw11Ew1dRRQPKzLTg4gaGKFsG2oMQGEYakJSQB1PQBk0Bynm21OiwU1a0UoLGhDpQE8oxrNQE_3dE5ArrayIdLi1E1A7mutable7alignedE"(i8** %".ret", i8* %"arg.arg.0", i8* %"arg.arg.1", i64 %"arg.arg.2", i64 %"arg.arg.3", double* %"arg.arg.4", i64 %"arg.arg.5.0", i64 %"arg.arg.6.0")
{
entry:
%"inserted.meminfo" = insertvalue {i8*, i8*, i64, i64, double*, [1 x i64], [1 x i64]} undef, i8* %"arg.arg.0", 0
%"inserted.parent" = insertvalue {i8*, i8*, i64, i64, double*, [1 x i64], [1 x i64]} %"inserted.meminfo", i8* %"arg.arg.1", 1
%"inserted.nitems" = insertvalue {i8*, i8*, i64, i64, double*, [1 x i64], [1 x i64]} %"inserted.parent", i64 %"arg.arg.2", 2
%"inserted.itemsize" = insertvalue {i8*, i8*, i64, i64, double*, [1 x i64], [1 x i64]} %"inserted.nitems", i64 %"arg.arg.3", 3
%"inserted.data" = insertvalue {i8*, i8*, i64, i64, double*, [1 x i64], [1 x i64]} %"inserted.itemsize", double* %"arg.arg.4", 4
%".10" = insertvalue [1 x i64] undef, i64 %"arg.arg.5.0", 0
%"inserted.shape" = insertvalue {i8*, i8*, i64, i64, double*, [1 x i64], [1 x i64]} %"inserted.data", [1 x i64] %".10", 5
%".11" = insertvalue [1 x i64] undef, i64 %"arg.arg.6.0", 0
%"inserted.strides" = insertvalue {i8*, i8*, i64, i64, double*, [1 x i64], [1 x i64]} %"inserted.shape", [1 x i64] %".11", 6
%"arg" = alloca {i8*, i8*, i64, i64, double*, [1 x i64], [1 x i64]}
store {i8*, i8*, i64, i64, double*, [1 x i64], [1 x i64]} zeroinitializer, {i8*, i8*, i64, i64, double*, [1 x i64], [1 x i64]}* %"arg"
%".15" = alloca {i8*, i8*, i64, i64, double*, [1 x i64], [1 x i64]}
store {i8*, i8*, i64, i64, double*, [1 x i64], [1 x i64]} zeroinitializer, {i8*, i8*, i64, i64, double*, [1 x i64], [1 x i64]}* %".15"
%".49" = alloca {i8*, i8*, i64, i64, double*, [1 x i64], [1 x i64]}
store {i8*, i8*, i64, i64, double*, [1 x i64], [1 x i64]} zeroinitializer, {i8*, i8*, i64, i64, double*, [1 x i64], [1 x i64]}* %".49"
%".83" = alloca {i8*, i8*, i64, i64, double*, [1 x i64], [1 x i64]}
store {i8*, i8*, i64, i64, double*, [1 x i64], [1 x i64]} zeroinitializer, {i8*, i8*, i64, i64, double*, [1 x i64], [1 x i64]}* %".83"
%".116" = alloca {i8*, i8*, i64, i64, double*, [1 x i64], [1 x i64]}
store {i8*, i8*, i64, i64, double*, [1 x i64], [1 x i64]} zeroinitializer, {i8*, i8*, i64, i64, double*, [1 x i64], [1 x i64]}* %".116"
br label %"B0"
B0:
store {i8*, i8*, i64, i64, double*, [1 x i64], [1 x i64]} %"inserted.strides", {i8*, i8*, i64, i64, double*, [1 x i64], [1 x i64]}* %"arg"
%".14" = load {i8*, i8*, i64, i64, double*, [1 x i64], [1 x i64]}, {i8*, i8*, i64, i64, double*, [1 x i64], [1 x i64]}* %"arg"
store {i8*, i8*, i64, i64, double*, [1 x i64], [1 x i64]} zeroinitializer, {i8*, i8*, i64, i64, double*, [1 x i64], [1 x i64]}* %".15"
store {i8*, i8*, i64, i64, double*, [1 x i64], [1 x i64]} %".14", {i8*, i8*, i64, i64, double*, [1 x i64], [1 x i64]}* %".15"
%".19" = getelementptr inbounds {i8*, i8*, i64, i64, double*, [1 x i64], [1 x i64]}, {i8*, i8*, i64, i64, double*, [1 x i64], [1 x i64]}* %".15", i32 0, i32 5
%".20" = getelementptr inbounds [1 x i64], [1 x i64]* %".19", i32 0, i32 0
%".21" = load i64, i64* %".20", !range !1
%".22" = insertvalue [1 x i64] undef, i64 %".21", 0
%".23" = extractvalue [1 x i64] %".22", 0
%".24" = getelementptr inbounds {i8*, i8*, i64, i64, double*, [1 x i64], [1 x i64]}, {i8*, i8*, i64, i64, double*, [1 x i64], [1 x i64]}* %".15", i32 0, i32 6
%".25" = load [1 x i64], [1 x i64]* %".24"
%".26" = extractvalue [1 x i64] %".25", 0
%".27" = icmp slt i64 0, 0
%".28" = add i64 0, %".23"
%".29" = select i1 %".27", i64 %".28", i64 0
%".30" = getelementptr inbounds {i8*, i8*, i64, i64, double*, [1 x i64], [1 x i64]}, {i8*, i8*, i64, i64, double*, [1 x i64], [1 x i64]}* %".15", i32 0, i32 5
%".31" = getelementptr inbounds [1 x i64], [1 x i64]* %".30", i32 0, i32 0
%".32" = load i64, i64* %".31", !range !1
%".33" = insertvalue [1 x i64] undef, i64 %".32", 0
%".34" = extractvalue [1 x i64] %".33", 0
%".35" = getelementptr inbounds {i8*, i8*, i64, i64, double*, [1 x i64], [1 x i64]}, {i8*, i8*, i64, i64, double*, [1 x i64], [1 x i64]}* %".15", i32 0, i32 6
%".36" = load [1 x i64], [1 x i64]* %".35"
%".37" = extractvalue [1 x i64] %".36", 0
%".38" = getelementptr inbounds {i8*, i8*, i64, i64, double*, [1 x i64], [1 x i64]}, {i8*, i8*, i64, i64, double*, [1 x i64], [1 x i64]}* %".15", i32 0, i32 4
%".39" = load double*, double** %".38"
%".40" = mul i64 %".37", %".29"
%".41" = ptrtoint double* %".39" to i64
%".42" = add i64 %".41", %".40"
%".43" = inttoptr i64 %".42" to double*
%".44" = load double, double* %".43"
%".45" = sitofp i64 1 to double
%".46" = fcmp oeq double %".44", %".45"
br i1 %".46", label %"B26", label %"B14"
B14:
%".48" = load {i8*, i8*, i64, i64, double*, [1 x i64], [1 x i64]}, {i8*, i8*, i64, i64, double*, [1 x i64], [1 x i64]}* %"arg"
store {i8*, i8*, i64, i64, double*, [1 x i64], [1 x i64]} zeroinitializer, {i8*, i8*, i64, i64, double*, [1 x i64], [1 x i64]}* %".49"
store {i8*, i8*, i64, i64, double*, [1 x i64], [1 x i64]} %".48", {i8*, i8*, i64, i64, double*, [1 x i64], [1 x i64]}* %".49"
%".53" = getelementptr inbounds {i8*, i8*, i64, i64, double*, [1 x i64], [1 x i64]}, {i8*, i8*, i64, i64, double*, [1 x i64], [1 x i64]}* %".49", i32 0, i32 5
%".54" = getelementptr inbounds [1 x i64], [1 x i64]* %".53", i32 0, i32 0
%".55" = load i64, i64* %".54", !range !1
%".56" = insertvalue [1 x i64] undef, i64 %".55", 0
%".57" = extractvalue [1 x i64] %".56", 0
%".58" = getelementptr inbounds {i8*, i8*, i64, i64, double*, [1 x i64], [1 x i64]}, {i8*, i8*, i64, i64, double*, [1 x i64], [1 x i64]}* %".49", i32 0, i32 6
%".59" = load [1 x i64], [1 x i64]* %".58"
%".60" = extractvalue [1 x i64] %".59", 0
%".61" = icmp slt i64 0, 0
%".62" = add i64 0, %".57"
%".63" = select i1 %".61", i64 %".62", i64 0
%".64" = getelementptr inbounds {i8*, i8*, i64, i64, double*, [1 x i64], [1 x i64]}, {i8*, i8*, i64, i64, double*, [1 x i64], [1 x i64]}* %".49", i32 0, i32 5
%".65" = getelementptr inbounds [1 x i64], [1 x i64]* %".64", i32 0, i32 0
%".66" = load i64, i64* %".65", !range !1
%".67" = insertvalue [1 x i64] undef, i64 %".66", 0
%".68" = extractvalue [1 x i64] %".67", 0
%".69" = getelementptr inbounds {i8*, i8*, i64, i64, double*, [1 x i64], [1 x i64]}, {i8*, i8*, i64, i64, double*, [1 x i64], [1 x i64]}* %".49", i32 0, i32 6
%".70" = load [1 x i64], [1 x i64]* %".69"
%".71" = extractvalue [1 x i64] %".70", 0
%".72" = getelementptr inbounds {i8*, i8*, i64, i64, double*, [1 x i64], [1 x i64]}, {i8*, i8*, i64, i64, double*, [1 x i64], [1 x i64]}* %".49", i32 0, i32 4
%".73" = load double*, double** %".72"
%".74" = mul i64 %".71", %".63"
%".75" = ptrtoint double* %".73" to i64
%".76" = add i64 %".75", %".74"
%".77" = inttoptr i64 %".76" to double*
%".78" = load double, double* %".77"
%".79" = sitofp i64 2 to double
%".80" = fcmp oeq double %".78", %".79"
br i1 %".80", label %"B26", label %"B42"
B26:
%".82" = load {i8*, i8*, i64, i64, double*, [1 x i64], [1 x i64]}, {i8*, i8*, i64, i64, double*, [1 x i64], [1 x i64]}* %"arg"
store {i8*, i8*, i64, i64, double*, [1 x i64], [1 x i64]} zeroinitializer, {i8*, i8*, i64, i64, double*, [1 x i64], [1 x i64]}* %".83"
store {i8*, i8*, i64, i64, double*, [1 x i64], [1 x i64]} %".82", {i8*, i8*, i64, i64, double*, [1 x i64], [1 x i64]}* %".83"
%".87" = getelementptr inbounds {i8*, i8*, i64, i64, double*, [1 x i64], [1 x i64]}, {i8*, i8*, i64, i64, double*, [1 x i64], [1 x i64]}* %".83", i32 0, i32 5
%".88" = getelementptr inbounds [1 x i64], [1 x i64]* %".87", i32 0, i32 0
%".89" = load i64, i64* %".88", !range !1
%".90" = insertvalue [1 x i64] undef, i64 %".89", 0
%".91" = extractvalue [1 x i64] %".90", 0
%".92" = getelementptr inbounds {i8*, i8*, i64, i64, double*, [1 x i64], [1 x i64]}, {i8*, i8*, i64, i64, double*, [1 x i64], [1 x i64]}* %".83", i32 0, i32 6
%".93" = load [1 x i64], [1 x i64]* %".92"
%".94" = extractvalue [1 x i64] %".93", 0
%".95" = icmp slt i64 0, 0
%".96" = add i64 0, %".91"
%".97" = select i1 %".95", i64 %".96", i64 0
%".98" = getelementptr inbounds {i8*, i8*, i64, i64, double*, [1 x i64], [1 x i64]}, {i8*, i8*, i64, i64, double*, [1 x i64], [1 x i64]}* %".83", i32 0, i32 5
%".99" = getelementptr inbounds [1 x i64], [1 x i64]* %".98", i32 0, i32 0
%".100" = load i64, i64* %".99", !range !1
%".101" = insertvalue [1 x i64] undef, i64 %".100", 0
%".102" = extractvalue [1 x i64] %".101", 0
%".103" = getelementptr inbounds {i8*, i8*, i64, i64, double*, [1 x i64], [1 x i64]}, {i8*, i8*, i64, i64, double*, [1 x i64], [1 x i64]}* %".83", i32 0, i32 6
%".104" = load [1 x i64], [1 x i64]* %".103"
%".105" = extractvalue [1 x i64] %".104", 0
%".106" = getelementptr inbounds {i8*, i8*, i64, i64, double*, [1 x i64], [1 x i64]}, {i8*, i8*, i64, i64, double*, [1 x i64], [1 x i64]}* %".83", i32 0, i32 4
%".107" = load double*, double** %".106"
%".108" = mul i64 %".105", %".97"
%".109" = ptrtoint double* %".107" to i64
%".110" = add i64 %".109", %".108"
%".111" = inttoptr i64 %".110" to double*
%".112" = load double, double* %".111"
%".113" = sitofp i64 1 to double
%".114" = fadd double %".112", %".113"
%".115" = load {i8*, i8*, i64, i64, double*, [1 x i64], [1 x i64]}, {i8*, i8*, i64, i64, double*, [1 x i64], [1 x i64]}* %"arg"
store {i8*, i8*, i64, i64, double*, [1 x i64], [1 x i64]} zeroinitializer, {i8*, i8*, i64, i64, double*, [1 x i64], [1 x i64]}* %".116"
store {i8*, i8*, i64, i64, double*, [1 x i64], [1 x i64]} %".115", {i8*, i8*, i64, i64, double*, [1 x i64], [1 x i64]}* %".116"
%".120" = getelementptr inbounds {i8*, i8*, i64, i64, double*, [1 x i64], [1 x i64]}, {i8*, i8*, i64, i64, double*, [1 x i64], [1 x i64]}* %".116", i32 0, i32 5
%".121" = getelementptr inbounds [1 x i64], [1 x i64]* %".120", i32 0, i32 0
%".122" = load i64, i64* %".121", !range !1
%".123" = insertvalue [1 x i64] undef, i64 %".122", 0
%".124" = extractvalue [1 x i64] %".123", 0
%".125" = getelementptr inbounds {i8*, i8*, i64, i64, double*, [1 x i64], [1 x i64]}, {i8*, i8*, i64, i64, double*, [1 x i64], [1 x i64]}* %".116", i32 0, i32 6
%".126" = load [1 x i64], [1 x i64]* %".125"
%".127" = extractvalue [1 x i64] %".126", 0
%".128" = icmp slt i64 0, 0
%".129" = add i64 0, %".124"
%".130" = select i1 %".128", i64 %".129", i64 0
%".131" = getelementptr inbounds {i8*, i8*, i64, i64, double*, [1 x i64], [1 x i64]}, {i8*, i8*, i64, i64, double*, [1 x i64], [1 x i64]}* %".116", i32 0, i32 5
%".132" = getelementptr inbounds [1 x i64], [1 x i64]* %".131", i32 0, i32 0
%".133" = load i64, i64* %".132", !range !1
%".134" = insertvalue [1 x i64] undef, i64 %".133", 0
%".135" = extractvalue [1 x i64] %".134", 0
%".136" = getelementptr inbounds {i8*, i8*, i64, i64, double*, [1 x i64], [1 x i64]}, {i8*, i8*, i64, i64, double*, [1 x i64], [1 x i64]}* %".116", i32 0, i32 6
%".137" = load [1 x i64], [1 x i64]* %".136"
%".138" = extractvalue [1 x i64] %".137", 0
%".139" = getelementptr inbounds {i8*, i8*, i64, i64, double*, [1 x i64], [1 x i64]}, {i8*, i8*, i64, i64, double*, [1 x i64], [1 x i64]}* %".116", i32 0, i32 4
%".140" = load double*, double** %".139"
%".141" = mul i64 %".138", %".130"
%".142" = ptrtoint double* %".140" to i64
%".143" = add i64 %".142", %".141"
%".144" = inttoptr i64 %".143" to double*
store double %".114", double* %".144"
%".146" = load {i8*, i8*, i64, i64, double*, [1 x i64], [1 x i64]}, {i8*, i8*, i64, i64, double*, [1 x i64], [1 x i64]}* %"arg"
store {i8*, i8*, i64, i64, double*, [1 x i64], [1 x i64]} zeroinitializer, {i8*, i8*, i64, i64, double*, [1 x i64], [1 x i64]}* %"arg"
br label %"B42"
B42:
%".149" = load {i8*, i8*, i64, i64, double*, [1 x i64], [1 x i64]}, {i8*, i8*, i64, i64, double*, [1 x i64], [1 x i64]}* %"arg"
store {i8*, i8*, i64, i64, double*, [1 x i64], [1 x i64]} zeroinitializer, {i8*, i8*, i64, i64, double*, [1 x i64], [1 x i64]}* %"arg"
store i8* null, i8** %".ret"
ret i32 0
}
!nvvmir.version = !{ !0 }
!0 = !{ i32 1, i32 9, i32 3, i32 1 }
!1 = !{ i64 0, i64 9223372036854775807 }
#########################
//
// Generated by NVIDIA NVVM Compiler
//
// Compiler Build ID: CL-31294372
// Cuda compilation tools, release 11.7, V11.7.64
// Based on NVVM 7.0.1
//
.version 7.7
.target sm_75
.address_size 64
// .globl _ZN6cudapy8__main__4fn_bB2v3B94cw51cXTLSUwv1sCUt9Uw11Ew1dRRQPKzLTg4gaGKFsG2oMQGEYakJSQB1PQBk0Bynm21OiwU1a0UoLGhDpQE8oxrNQE_3dE5ArrayIdLi1E1A7mutable7alignedE
.visible .global .align 4 .u32 _ZN6cudapy8__main__4fn_bB2v3B94cw51cXTLSUwv1sCUt9Uw11Ew1dRRQPKzLTg4gaGKFsG2oMQGEYakJSQB1PQBk0Bynm21OiwU1a0UoLGhDpQE8oxrNQE_3dE5ArrayIdLi1E1A7mutable7alignedE__errcode__;
.visible .global .align 4 .u32 _ZN6cudapy8__main__4fn_bB2v3B94cw51cXTLSUwv1sCUt9Uw11Ew1dRRQPKzLTg4gaGKFsG2oMQGEYakJSQB1PQBk0Bynm21OiwU1a0UoLGhDpQE8oxrNQE_3dE5ArrayIdLi1E1A7mutable7alignedE__tidx__;
.visible .global .align 4 .u32 _ZN6cudapy8__main__4fn_bB2v3B94cw51cXTLSUwv1sCUt9Uw11Ew1dRRQPKzLTg4gaGKFsG2oMQGEYakJSQB1PQBk0Bynm21OiwU1a0UoLGhDpQE8oxrNQE_3dE5ArrayIdLi1E1A7mutable7alignedE__ctaidx__;
.visible .global .align 4 .u32 _ZN6cudapy8__main__4fn_bB2v3B94cw51cXTLSUwv1sCUt9Uw11Ew1dRRQPKzLTg4gaGKFsG2oMQGEYakJSQB1PQBk0Bynm21OiwU1a0UoLGhDpQE8oxrNQE_3dE5ArrayIdLi1E1A7mutable7alignedE__tidy__;
.visible .global .align 4 .u32 _ZN6cudapy8__main__4fn_bB2v3B94cw51cXTLSUwv1sCUt9Uw11Ew1dRRQPKzLTg4gaGKFsG2oMQGEYakJSQB1PQBk0Bynm21OiwU1a0UoLGhDpQE8oxrNQE_3dE5ArrayIdLi1E1A7mutable7alignedE__ctaidy__;
.visible .global .align 4 .u32 _ZN6cudapy8__main__4fn_bB2v3B94cw51cXTLSUwv1sCUt9Uw11Ew1dRRQPKzLTg4gaGKFsG2oMQGEYakJSQB1PQBk0Bynm21OiwU1a0UoLGhDpQE8oxrNQE_3dE5ArrayIdLi1E1A7mutable7alignedE__tidz__;
.visible .global .align 4 .u32 _ZN6cudapy8__main__4fn_bB2v3B94cw51cXTLSUwv1sCUt9Uw11Ew1dRRQPKzLTg4gaGKFsG2oMQGEYakJSQB1PQBk0Bynm21OiwU1a0UoLGhDpQE8oxrNQE_3dE5ArrayIdLi1E1A7mutable7alignedE__ctaidz__;
.common .global .align 8 .u64 _ZN08NumbaEnv8__main__4fn_bB2v3B94cw51cXTLSUwv1sCUt9Uw11Ew1dRRQPKzLTg4gaGKFsG2oMQGEYakJSQB1PQBk0Bynm21OiwU1a0UoLGhDpQE8oxrNQE_3dE5ArrayIdLi1E1A7mutable7alignedE;
.visible .entry _ZN6cudapy8__main__4fn_bB2v3B94cw51cXTLSUwv1sCUt9Uw11Ew1dRRQPKzLTg4gaGKFsG2oMQGEYakJSQB1PQBk0Bynm21OiwU1a0UoLGhDpQE8oxrNQE_3dE5ArrayIdLi1E1A7mutable7alignedE(
.param .u64 _ZN6cudapy8__main__4fn_bB2v3B94cw51cXTLSUwv1sCUt9Uw11Ew1dRRQPKzLTg4gaGKFsG2oMQGEYakJSQB1PQBk0Bynm21OiwU1a0UoLGhDpQE8oxrNQE_3dE5ArrayIdLi1E1A7mutable7alignedE_param_0,
.param .u64 _ZN6cudapy8__main__4fn_bB2v3B94cw51cXTLSUwv1sCUt9Uw11Ew1dRRQPKzLTg4gaGKFsG2oMQGEYakJSQB1PQBk0Bynm21OiwU1a0UoLGhDpQE8oxrNQE_3dE5ArrayIdLi1E1A7mutable7alignedE_param_1,
.param .u64 _ZN6cudapy8__main__4fn_bB2v3B94cw51cXTLSUwv1sCUt9Uw11Ew1dRRQPKzLTg4gaGKFsG2oMQGEYakJSQB1PQBk0Bynm21OiwU1a0UoLGhDpQE8oxrNQE_3dE5ArrayIdLi1E1A7mutable7alignedE_param_2,
.param .u64 _ZN6cudapy8__main__4fn_bB2v3B94cw51cXTLSUwv1sCUt9Uw11Ew1dRRQPKzLTg4gaGKFsG2oMQGEYakJSQB1PQBk0Bynm21OiwU1a0UoLGhDpQE8oxrNQE_3dE5ArrayIdLi1E1A7mutable7alignedE_param_3,
.param .u64 _ZN6cudapy8__main__4fn_bB2v3B94cw51cXTLSUwv1sCUt9Uw11Ew1dRRQPKzLTg4gaGKFsG2oMQGEYakJSQB1PQBk0Bynm21OiwU1a0UoLGhDpQE8oxrNQE_3dE5ArrayIdLi1E1A7mutable7alignedE_param_4,
.param .u64 _ZN6cudapy8__main__4fn_bB2v3B94cw51cXTLSUwv1sCUt9Uw11Ew1dRRQPKzLTg4gaGKFsG2oMQGEYakJSQB1PQBk0Bynm21OiwU1a0UoLGhDpQE8oxrNQE_3dE5ArrayIdLi1E1A7mutable7alignedE_param_5,
.param .u64 _ZN6cudapy8__main__4fn_bB2v3B94cw51cXTLSUwv1sCUt9Uw11Ew1dRRQPKzLTg4gaGKFsG2oMQGEYakJSQB1PQBk0Bynm21OiwU1a0UoLGhDpQE8oxrNQE_3dE5ArrayIdLi1E1A7mutable7alignedE_param_6
)
{
.reg .pred %p<4>;
.reg .f64 %fd<3>;
.reg .b64 %rd<3>;
ld.param.u64 %rd2, [_ZN6cudapy8__main__4fn_bB2v3B94cw51cXTLSUwv1sCUt9Uw11Ew1dRRQPKzLTg4gaGKFsG2oMQGEYakJSQB1PQBk0Bynm21OiwU1a0UoLGhDpQE8oxrNQE_3dE5ArrayIdLi1E1A7mutable7alignedE_param_4];
cvta.to.global.u64 %rd1, %rd2;
ld.global.f64 %fd1, [%rd1];
setp.neu.f64 %p1, %fd1, 0d3FF0000000000000;
setp.neu.f64 %p2, %fd1, 0d4000000000000000;
and.pred %p3, %p1, %p2;
@%p3 bra $L__BB0_2;
add.f64 %fd2, %fd1, 0d3FF0000000000000;
st.global.f64 [%rd1], %fd2;
$L__BB0_2:
ret;
}
*************************
The text was updated successfully, but these errors were encountered:
Feature request
We are seeing (tiny) differences in performance between
x in (1,2)
andx == 1 or x == 2
, favoring the latter. A bit of asm inspection shows that a subfunction is being created and not inlined in the first case on the CUDA target. Is it possible to automatically inline this or add support for this case toliteral_unroll
? It makes the code a bit prettier and more pythonic if you can use thein
statement.Yields
The text was updated successfully, but these errors were encountered: