1; RUN: llc -march=amdgcn -mcpu=fiji -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,W64 %s 2; RUN: llc -march=amdgcn -mcpu=gfx1010 -mattr=+wavefrontsize32,-wavefrontsize64 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,W32 %s 3; RUN: llc -march=amdgcn -mcpu=gfx1010 -mattr=-wavefrontsize32,+wavefrontsize64 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,W64 %s 4 5; RUN: opt -O3 -S < %s | FileCheck -check-prefixes=OPT,OPT-WXX %s 6; RUN: opt -mtriple=amdgcn-- -O3 -S < %s | FileCheck -check-prefixes=OPT,OPT-WXX %s 7; RUN: opt -mtriple=amdgcn-- -O3 -mattr=+wavefrontsize32 -S < %s | FileCheck -check-prefixes=OPT,OPT-W32 %s 8; RUN: opt -mtriple=amdgcn-- -O3 -mattr=+wavefrontsize64 -S < %s | FileCheck -check-prefixes=OPT,OPT-W64 %s 9; RUN: opt -mtriple=amdgcn-- -mcpu=tonga -O3 -S < %s | FileCheck -check-prefixes=OPT,OPT-W64 %s 10; RUN: opt -mtriple=amdgcn-- -mcpu=gfx1010 -O3 -mattr=+wavefrontsize32,-wavefrontsize64 -S < %s | FileCheck -check-prefixes=OPT,OPT-W32 %s 11; RUN: opt -mtriple=amdgcn-- -mcpu=gfx1010 -O3 -mattr=-wavefrontsize32,+wavefrontsize64 -S < %s | FileCheck -check-prefixes=OPT,OPT-W64 %s 12 13; GCN-LABEL: {{^}}fold_wavefrontsize: 14; OPT-LABEL: define amdgpu_kernel void @fold_wavefrontsize( 15 16; W32: v_mov_b32_e32 [[V:v[0-9]+]], 32 17; W64: v_mov_b32_e32 [[V:v[0-9]+]], 64 18; GCN: store_dword v{{.+}}, [[V]] 19 20; OPT-W32: store i32 32, i32 addrspace(1)* %arg, align 4 21; OPT-W64: store i32 64, i32 addrspace(1)* %arg, align 4 22; OPT-WXX: %tmp = tail call i32 @llvm.amdgcn.wavefrontsize() 23; OPT-WXX: store i32 %tmp, i32 addrspace(1)* %arg, align 4 24; OPT-NEXT: ret void 25 26define amdgpu_kernel void @fold_wavefrontsize(i32 addrspace(1)* nocapture %arg) { 27bb: 28 %tmp = tail call i32 @llvm.amdgcn.wavefrontsize() #0 29 store i32 %tmp, i32 addrspace(1)* %arg, align 4 30 ret void 31} 32 33; GCN-LABEL: {{^}}fold_and_optimize_wavefrontsize: 34; OPT-LABEL: define amdgpu_kernel void @fold_and_optimize_wavefrontsize( 35 36; W32: v_mov_b32_e32 [[V:v[0-9]+]], 1{{$}} 37; W64: v_mov_b32_e32 [[V:v[0-9]+]], 2{{$}} 38; GCN-NOT: cndmask 39; GCN: store_dword v{{.+}}, [[V]] 40 41; OPT-W32: store i32 1, i32 addrspace(1)* %arg, align 4 42; OPT-W64: store i32 2, i32 addrspace(1)* %arg, align 4 43; OPT-WXX: %tmp = tail call i32 @llvm.amdgcn.wavefrontsize() 44; OPT-WXX: %tmp1 = icmp ugt i32 %tmp, 32 45; OPT-WXX: %tmp2 = select i1 %tmp1, i32 2, i32 1 46; OPT-WXX: store i32 %tmp2, i32 addrspace(1)* %arg 47; OPT-NEXT: ret void 48 49define amdgpu_kernel void @fold_and_optimize_wavefrontsize(i32 addrspace(1)* nocapture %arg) { 50bb: 51 %tmp = tail call i32 @llvm.amdgcn.wavefrontsize() #0 52 %tmp1 = icmp ugt i32 %tmp, 32 53 %tmp2 = select i1 %tmp1, i32 2, i32 1 54 store i32 %tmp2, i32 addrspace(1)* %arg 55 ret void 56} 57 58; GCN-LABEL: {{^}}fold_and_optimize_if_wavefrontsize: 59; OPT-LABEL: define amdgpu_kernel void @fold_and_optimize_if_wavefrontsize( 60 61; OPT: bb: 62; OPT-WXX: %tmp = tail call i32 @llvm.amdgcn.wavefrontsize() 63; OPT-WXX: %tmp1 = icmp ugt i32 %tmp, 32 64; OPT-WXX: bb3: 65; OPT-W64: store i32 1, i32 addrspace(1)* %arg, align 4 66; OPT-NEXT: ret void 67 68define amdgpu_kernel void @fold_and_optimize_if_wavefrontsize(i32 addrspace(1)* nocapture %arg) { 69bb: 70 %tmp = tail call i32 @llvm.amdgcn.wavefrontsize() #0 71 %tmp1 = icmp ugt i32 %tmp, 32 72 br i1 %tmp1, label %bb2, label %bb3 73 74bb2: ; preds = %bb 75 store i32 1, i32 addrspace(1)* %arg, align 4 76 br label %bb3 77 78bb3: ; preds = %bb2, %bb 79 ret void 80} 81 82declare i32 @llvm.amdgcn.wavefrontsize() #0 83 84attributes #0 = { nounwind readnone speculatable } 85