1; RUN: llc < %s -march=nvptx64 -mcpu=sm_30 | FileCheck %s 2 3declare i32 @llvm.nvvm.shfl.sync.down.i32(i32, i32, i32, i32) 4declare float @llvm.nvvm.shfl.sync.down.f32(float, i32, i32, i32) 5declare i32 @llvm.nvvm.shfl.sync.up.i32(i32, i32, i32, i32) 6declare float @llvm.nvvm.shfl.sync.up.f32(float, i32, i32, i32) 7declare i32 @llvm.nvvm.shfl.sync.bfly.i32(i32, i32, i32, i32) 8declare float @llvm.nvvm.shfl.sync.bfly.f32(float, i32, i32, i32) 9declare i32 @llvm.nvvm.shfl.sync.idx.i32(i32, i32, i32, i32) 10declare float @llvm.nvvm.shfl.sync.idx.f32(float, i32, i32, i32) 11 12; CHECK-LABEL: .func{{.*}}shfl.sync.rrr 13define i32 @shfl.sync.rrr(i32 %mask, i32 %a, i32 %b, i32 %c) { 14 ; CHECK: ld.param.u32 [[MASK:%r[0-9]+]] 15 ; CHECK: ld.param.u32 [[A:%r[0-9]+]] 16 ; CHECK: ld.param.u32 [[B:%r[0-9]+]] 17 ; CHECK: ld.param.u32 [[C:%r[0-9]+]] 18 ; CHECK: shfl.sync.down.b32 [[OUT:%r[0-9]+]], [[A]], [[B]], [[C]], [[MASK]]; 19 ; CHECK: st.param.{{.}}32 {{.*}}, [[OUT]] 20 %val = call i32 @llvm.nvvm.shfl.sync.down.i32(i32 %mask, i32 %a, i32 %b, i32 %c) 21 ret i32 %val 22} 23 24; CHECK-LABEL: .func{{.*}}shfl.sync.irr 25define i32 @shfl.sync.irr(i32 %a, i32 %b, i32 %c) { 26 ; CHECK: ld.param.u32 [[A:%r[0-9]+]] 27 ; CHECK: ld.param.u32 [[B:%r[0-9]+]] 28 ; CHECK: ld.param.u32 [[C:%r[0-9]+]] 29 ; CHECK: shfl.sync.down.b32 [[OUT:%r[0-9]+]], [[A]], [[B]], [[C]], 1; 30 ; CHECK: st.param.{{.}}32 {{.*}}, [[OUT]] 31 %val = call i32 @llvm.nvvm.shfl.sync.down.i32(i32 1, i32 %a, i32 %b, i32 %c) 32 ret i32 %val 33} 34 35; CHECK-LABEL: .func{{.*}}shfl.sync.rri 36define i32 @shfl.sync.rri(i32 %mask, i32 %a, i32 %b) { 37 ; CHECK: ld.param.u32 [[MASK:%r[0-9]+]] 38 ; CHECK: ld.param.u32 [[A:%r[0-9]+]] 39 ; CHECK: ld.param.u32 [[B:%r[0-9]+]] 40 ; CHECK: shfl.sync.down.b32 [[OUT:%r[0-9]+]], [[A]], [[B]], 1, [[MASK]]; 41 ; CHECK: st.param.{{.}}32 {{.*}}, [[OUT]] 42 %val = call i32 @llvm.nvvm.shfl.sync.down.i32(i32 %mask, i32 %a, i32 %b, i32 1) 43 ret i32 %val 44} 45 46; CHECK-LABEL: .func{{.*}}shfl.sync.iri 47define i32 @shfl.sync.iri(i32 %a, i32 %b) { 48 ; CHECK: ld.param.u32 [[A:%r[0-9]+]] 49 ; CHECK: ld.param.u32 [[B:%r[0-9]+]] 50 ; CHECK: shfl.sync.down.b32 [[OUT:%r[0-9]+]], [[A]], [[B]], 2, 1; 51 ; CHECK: st.param.{{.}}32 {{.*}}, [[OUT]] 52 %val = call i32 @llvm.nvvm.shfl.sync.down.i32(i32 1, i32 %a, i32 %b, i32 2) 53 ret i32 %val 54} 55 56; CHECK-LABEL: .func{{.*}}shfl.sync.rir 57define i32 @shfl.sync.rir(i32 %mask, i32 %a, i32 %c) { 58 ; CHECK: ld.param.u32 [[MASK:%r[0-9]+]] 59 ; CHECK: ld.param.u32 [[A:%r[0-9]+]] 60 ; CHECK: ld.param.u32 [[C:%r[0-9]+]] 61 ; CHECK: shfl.sync.down.b32 [[OUT:%r[0-9]+]], [[A]], 1, [[C]], [[MASK]]; 62 ; CHECK: st.param.{{.}}32 {{.*}}, [[OUT]] 63 %val = call i32 @llvm.nvvm.shfl.sync.down.i32(i32 %mask, i32 %a, i32 1, i32 %c) 64 ret i32 %val 65} 66 67; CHECK-LABEL: .func{{.*}}shfl.sync.iir 68define i32 @shfl.sync.iir(i32 %a, i32 %c) { 69 ; CHECK: ld.param.u32 [[A:%r[0-9]+]] 70 ; CHECK: ld.param.u32 [[C:%r[0-9]+]] 71 ; CHECK: shfl.sync.down.b32 [[OUT:%r[0-9]+]], [[A]], 2, [[C]], 1; 72 ; CHECK: st.param.{{.}}32 {{.*}}, [[OUT]] 73 %val = call i32 @llvm.nvvm.shfl.sync.down.i32(i32 1, i32 %a, i32 2, i32 %c) 74 ret i32 %val 75} 76 77; CHECK-LABEL: .func{{.*}}shfl.sync.rii 78define i32 @shfl.sync.rii(i32 %mask, i32 %a) { 79 ; CHECK: ld.param.u32 [[MASK:%r[0-9]+]] 80 ; CHECK: ld.param.u32 [[A:%r[0-9]+]] 81 ; CHECK: shfl.sync.down.b32 [[OUT:%r[0-9]+]], [[A]], 1, 2, [[MASK]]; 82 ; CHECK: st.param.{{.}}32 {{.*}}, [[OUT]] 83 %val = call i32 @llvm.nvvm.shfl.sync.down.i32(i32 %mask, i32 %a, i32 1, i32 2) 84 ret i32 %val 85} 86 87; CHECK-LABEL: .func{{.*}}shfl.sync.iii 88define i32 @shfl.sync.iii(i32 %a, i32 %b) { 89 ; CHECK: ld.param.u32 [[A:%r[0-9]+]] 90 ; CHECK: shfl.sync.down.b32 [[OUT:%r[0-9]+]], [[A]], 2, 3, 1; 91 ; CHECK: st.param.{{.}}32 {{.*}}, [[OUT]] 92 %val = call i32 @llvm.nvvm.shfl.sync.down.i32(i32 1, i32 %a, i32 2, i32 3) 93 ret i32 %val 94} 95