1; RUN: llvm-as %s -o %t.bc 2; RUN: llvm-spirv %t.bc -spirv-text -o %t.txt 3; RUN: FileCheck < %t.txt %s --check-prefix=CHECK-SPIRV 4; RUN: llvm-spirv %t.bc -o %t.spv 5; RUN: llvm-spirv -r %t.spv -o %t.rev.bc 6; RUN: llvm-dis < %t.rev.bc | FileCheck %s --check-prefix=CHECK-LLVM 7 8; CHECK-LLVM: call spir_func %opencl.event_t{{.*}}* @_Z29async_work_group_strided_copyPU3AS1Dv2_hPKU3AS3S_jj9ocl_event( 9 10; CHECK-SPIRV-DAG: GroupAsyncCopy {{[0-9]+}} {{[0-9]+}} [[Scope:[0-9]+]] 11; CHECK-SPIRV-DAG: Constant {{[0-9]+}} [[Scope]] 12 13target datalayout = "e-p:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024" 14target triple = "spir-unknown-unknown" 15 16%opencl.event_t = type opaque 17 18; Function Attrs: nounwind 19define spir_kernel void @test_fn(<2 x i8> addrspace(1)* %src, <2 x i8> addrspace(1)* %dst, <2 x i8> addrspace(3)* %localBuffer, i32 %copiesPerWorkgroup, i32 %copiesPerWorkItem) #0 { 20entry: 21 %src.addr = alloca <2 x i8> addrspace(1)*, align 4 22 %dst.addr = alloca <2 x i8> addrspace(1)*, align 4 23 %localBuffer.addr = alloca <2 x i8> addrspace(3)*, align 4 24 %copiesPerWorkgroup.addr = alloca i32, align 4 25 %copiesPerWorkItem.addr = alloca i32, align 4 26 %i = alloca i32, align 4 27 %event = alloca %opencl.event_t*, align 4 28 store <2 x i8> addrspace(1)* %src, <2 x i8> addrspace(1)** %src.addr, align 4 29 store <2 x i8> addrspace(1)* %dst, <2 x i8> addrspace(1)** %dst.addr, align 4 30 store <2 x i8> addrspace(3)* %localBuffer, <2 x i8> addrspace(3)** %localBuffer.addr, align 4 31 store i32 %copiesPerWorkgroup, i32* %copiesPerWorkgroup.addr, align 4 32 store i32 %copiesPerWorkItem, i32* %copiesPerWorkItem.addr, align 4 33 store i32 0, i32* %i, align 4 34 br label %for.cond 35 36for.cond: ; preds = %for.inc, %entry 37 %0 = load i32, i32* %i, align 4 38 %1 = load i32, i32* %copiesPerWorkItem.addr, align 4 39 %cmp = icmp slt i32 %0, %1 40 br i1 %cmp, label %for.body, label %for.end 41 42for.body: ; preds = %for.cond 43 %call = call spir_func i32 @_Z12get_local_idj(i32 0) 44 %2 = load i32, i32* %copiesPerWorkItem.addr, align 4 45 %mul = mul i32 %call, %2 46 %3 = load i32, i32* %i, align 4 47 %add = add i32 %mul, %3 48 %4 = load <2 x i8> addrspace(3)*, <2 x i8> addrspace(3)** %localBuffer.addr, align 4 49 %arrayidx = getelementptr inbounds <2 x i8>, <2 x i8> addrspace(3)* %4, i32 %add 50 store <2 x i8> zeroinitializer, <2 x i8> addrspace(3)* %arrayidx, align 2 51 br label %for.inc 52 53for.inc: ; preds = %for.body 54 %5 = load i32, i32* %i, align 4 55 %inc = add nsw i32 %5, 1 56 store i32 %inc, i32* %i, align 4 57 br label %for.cond 58 59for.end: ; preds = %for.cond 60 call spir_func void @_Z7barrierj(i32 1) 61 store i32 0, i32* %i, align 4 62 br label %for.cond1 63 64for.cond1: ; preds = %for.inc12, %for.end 65 %6 = load i32, i32* %i, align 4 66 %7 = load i32, i32* %copiesPerWorkItem.addr, align 4 67 %cmp2 = icmp slt i32 %6, %7 68 br i1 %cmp2, label %for.body3, label %for.end14 69 70for.body3: ; preds = %for.cond1 71 %call4 = call spir_func i32 @_Z13get_global_idj(i32 0) 72 %8 = load i32, i32* %copiesPerWorkItem.addr, align 4 73 %mul5 = mul i32 %call4, %8 74 %9 = load i32, i32* %i, align 4 75 %add6 = add i32 %mul5, %9 76 %10 = load <2 x i8> addrspace(1)*, <2 x i8> addrspace(1)** %src.addr, align 4 77 %arrayidx7 = getelementptr inbounds <2 x i8>, <2 x i8> addrspace(1)* %10, i32 %add6 78 %11 = load <2 x i8> , <2 x i8> addrspace(1)* %arrayidx7, align 2 79 %call8 = call spir_func i32 @_Z12get_local_idj(i32 0) 80 %12 = load i32, i32* %copiesPerWorkItem.addr, align 4 81 %mul9 = mul i32 %call8, %12 82 %13 = load i32, i32* %i, align 4 83 %add10 = add i32 %mul9, %13 84 %14 = load <2 x i8> addrspace(3)*, <2 x i8> addrspace(3)** %localBuffer.addr, align 4 85 %arrayidx11 = getelementptr inbounds <2 x i8>, <2 x i8> addrspace(3)* %14, i32 %add10 86 store <2 x i8> %11, <2 x i8> addrspace(3)* %arrayidx11, align 2 87 br label %for.inc12 88 89for.inc12: ; preds = %for.body3 90 %15 = load i32, i32* %i, align 4 91 %inc13 = add nsw i32 %15, 1 92 store i32 %inc13, i32* %i, align 4 93 br label %for.cond1 94 95for.end14: ; preds = %for.cond1 96 call spir_func void @_Z7barrierj(i32 1) 97 %16 = load <2 x i8> addrspace(1)*, <2 x i8> addrspace(1)** %dst.addr, align 4 98 %17 = load i32, i32* %copiesPerWorkgroup.addr, align 4 99 %call15 = call spir_func i32 @_Z12get_group_idj(i32 0) 100 %mul16 = mul i32 %17, %call15 101 %add.ptr = getelementptr inbounds <2 x i8>, <2 x i8> addrspace(1)* %16, i32 %mul16 102 %18 = load <2 x i8> addrspace(3)*, <2 x i8> addrspace(3)** %localBuffer.addr, align 4 103 %19 = load i32, i32* %copiesPerWorkgroup.addr, align 4 104 %call17 = call spir_func %opencl.event_t* @_Z21async_work_group_copyPU3AS1Dv2_cPKU3AS3S_j9ocl_event(<2 x i8> addrspace(1)* %add.ptr, <2 x i8> addrspace(3)* %18, i32 %19, %opencl.event_t* null) 105 store %opencl.event_t* %call17, %opencl.event_t** %event, align 4 106 %20 = addrspacecast %opencl.event_t** %event to %opencl.event_t* addrspace(4)* 107 call spir_func void @_Z17wait_group_eventsiPU3AS49ocl_event(i32 1, %opencl.event_t* addrspace(4)* %20) 108 ret void 109} 110 111declare spir_func i32 @_Z12get_local_idj(i32) #1 112 113declare spir_func void @_Z7barrierj(i32) #1 114 115declare spir_func i32 @_Z13get_global_idj(i32) #1 116 117declare spir_func %opencl.event_t* @_Z21async_work_group_copyPU3AS1Dv2_cPKU3AS3S_j9ocl_event(<2 x i8> addrspace(1)*, <2 x i8> addrspace(3)*, i32, %opencl.event_t*) #1 118 119declare spir_func i32 @_Z12get_group_idj(i32) #1 120 121declare spir_func void @_Z17wait_group_eventsiPU3AS49ocl_event(i32, %opencl.event_t* addrspace(4)*) #1 122 123attributes #0 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-realign-stack" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" } 124attributes #1 = { "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-realign-stack" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" } 125 126!opencl.kernels = !{!0} 127!opencl.enable.FP_CONTRACT = !{} 128!opencl.spir.version = !{!6} 129!opencl.ocl.version = !{!7} 130!opencl.used.extensions = !{!8} 131!opencl.used.optional.core.features = !{!8} 132!opencl.compiler.options = !{!8} 133 134!0 = !{void (<2 x i8> addrspace(1)*, <2 x i8> addrspace(1)*, <2 x i8> addrspace(3)*, i32, i32)* @test_fn, !1, !2, !3, !4, !5} 135!1 = !{!"kernel_arg_addr_space", i32 1, i32 1, i32 3, i32 0, i32 0} 136!2 = !{!"kernel_arg_access_qual", !"none", !"none", !"none", !"none", !"none"} 137!3 = !{!"kernel_arg_type", !"char2*", !"char2*", !"char2*", !"int", !"int"} 138!4 = !{!"kernel_arg_base_type", !"char2*", !"char2*", !"char2*", !"int", !"int"} 139!5 = !{!"kernel_arg_type_qual", !"const", !"", !"", !"", !""} 140!6 = !{i32 1, i32 2} 141!7 = !{i32 2, i32 0} 142!8 = !{} 143