1; RUN: llc -mtriple=amdgcn--amdpal -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s 2; RUN: llc -mtriple=amdgcn--amdpal -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s 3 4; TODO: Some of those tests fail with OS == amdhsa due to unreasonable register 5; allocation differences. 6 7; SI-LABEL: {{^}}s_addk_i32_k0: 8; SI: s_load_dword [[VAL:s[0-9]+]] 9; SI: s_addk_i32 [[VAL]], 0x41 10; SI: v_mov_b32_e32 [[VRESULT:v[0-9]+]], [[VAL]] 11; SI: buffer_store_dword [[VRESULT]] 12; SI: s_endpgm 13define amdgpu_kernel void @s_addk_i32_k0(i32 addrspace(1)* %out, i32 %b) { 14 %add = add i32 %b, 65 15 store i32 %add, i32 addrspace(1)* %out 16 ret void 17} 18 19; FIXME: This should be folded with any number of uses. 20; SI-LABEL: {{^}}s_addk_i32_k0_x2: 21; SI: s_movk_i32 [[K:s[0-9]+]], 0x41 22; SI-DAG: s_add_i32 {{s[0-9]+}}, {{s[0-9]+}}, [[K]] 23; SI-DAG: s_add_i32 {{s[0-9]+}}, {{s[0-9]+}}, [[K]] 24; SI: s_endpgm 25define amdgpu_kernel void @s_addk_i32_k0_x2(i32 addrspace(1)* %out0, i32 addrspace(1)* %out1, i32 %a, i32 %b) { 26 %add0 = add i32 %a, 65 27 %add1 = add i32 %b, 65 28 store i32 %add0, i32 addrspace(1)* %out0 29 store i32 %add1, i32 addrspace(1)* %out1 30 ret void 31} 32 33; SI-LABEL: {{^}}s_addk_i32_k1: 34; SI: s_addk_i32 {{s[0-9]+}}, 0x7fff{{$}} 35; SI: s_endpgm 36define amdgpu_kernel void @s_addk_i32_k1(i32 addrspace(1)* %out, i32 %b) { 37 %add = add i32 %b, 32767 ; (1 << 15) - 1 38 store i32 %add, i32 addrspace(1)* %out 39 ret void 40} 41 42; SI-LABEL: {{^}}s_addk_i32_k2: 43; SI: s_sub_i32 s{{[0-9]+}}, s{{[0-9]+}}, 17 44; SI: s_endpgm 45define amdgpu_kernel void @s_addk_i32_k2(i32 addrspace(1)* %out, i32 %b) { 46 %add = add i32 %b, -17 47 store i32 %add, i32 addrspace(1)* %out 48 ret void 49} 50 51; SI-LABEL: {{^}}s_addk_i32_k3: 52; SI: s_addk_i32 {{s[0-9]+}}, 0xffbf{{$}} 53; SI: s_endpgm 54define amdgpu_kernel void @s_addk_i32_k3(i32 addrspace(1)* %out, i32 %b) { 55 %add = add i32 %b, -65 56 store i32 %add, i32 addrspace(1)* %out 57 ret void 58} 59 60; SI-LABEL: {{^}}s_addk_v2i32_k0: 61; SI-DAG: s_addk_i32 {{s[0-9]+}}, 0x41 62; SI-DAG: s_addk_i32 {{s[0-9]+}}, 0x42 63; SI: s_endpgm 64define amdgpu_kernel void @s_addk_v2i32_k0(<2 x i32> addrspace(1)* %out, <2 x i32> %b) { 65 %add = add <2 x i32> %b, <i32 65, i32 66> 66 store <2 x i32> %add, <2 x i32> addrspace(1)* %out 67 ret void 68} 69 70; SI-LABEL: {{^}}s_addk_v4i32_k0: 71; SI-DAG: s_addk_i32 {{s[0-9]+}}, 0x41 72; SI-DAG: s_addk_i32 {{s[0-9]+}}, 0x42 73; SI-DAG: s_addk_i32 {{s[0-9]+}}, 0x43 74; SI-DAG: s_addk_i32 {{s[0-9]+}}, 0x44 75; SI: s_endpgm 76define amdgpu_kernel void @s_addk_v4i32_k0(<4 x i32> addrspace(1)* %out, <4 x i32> %b) { 77 %add = add <4 x i32> %b, <i32 65, i32 66, i32 67, i32 68> 78 store <4 x i32> %add, <4 x i32> addrspace(1)* %out 79 ret void 80} 81 82; SI-LABEL: {{^}}s_addk_v8i32_k0: 83; SI-DAG: s_addk_i32 {{s[0-9]+}}, 0x41 84; SI-DAG: s_addk_i32 {{s[0-9]+}}, 0x42 85; SI-DAG: s_addk_i32 {{s[0-9]+}}, 0x43 86; SI-DAG: s_addk_i32 {{s[0-9]+}}, 0x44 87; SI-DAG: s_addk_i32 {{s[0-9]+}}, 0x45 88; SI-DAG: s_addk_i32 {{s[0-9]+}}, 0x46 89; SI-DAG: s_addk_i32 {{s[0-9]+}}, 0x47 90; SI-DAG: s_addk_i32 {{s[0-9]+}}, 0x48 91; SI: s_endpgm 92define amdgpu_kernel void @s_addk_v8i32_k0(<8 x i32> addrspace(1)* %out, <8 x i32> %b) { 93 %add = add <8 x i32> %b, <i32 65, i32 66, i32 67, i32 68, i32 69, i32 70, i32 71, i32 72> 94 store <8 x i32> %add, <8 x i32> addrspace(1)* %out 95 ret void 96} 97 98; SI-LABEL: {{^}}no_s_addk_i32_k0: 99; SI: s_add_i32 {{s[0-9]+}}, {{s[0-9]+}}, 0x8000{{$}} 100; SI: s_endpgm 101define amdgpu_kernel void @no_s_addk_i32_k0(i32 addrspace(1)* %out, i32 %b) { 102 %add = add i32 %b, 32768 ; 1 << 15 103 store i32 %add, i32 addrspace(1)* %out 104 ret void 105} 106 107@lds = addrspace(3) global [512 x i32] undef, align 4 108 109; SI-LABEL: {{^}}commute_s_addk_i32: 110; SI: s_addk_i32 s{{[0-9]+}}, 0x800{{$}} 111define amdgpu_kernel void @commute_s_addk_i32(i32 addrspace(1)* %out, i32 %b) #0 { 112 %size = call i32 @llvm.amdgcn.groupstaticsize() 113 %add = add i32 %size, %b 114 call void asm sideeffect "; foo $0, $1", "v,s"([512 x i32] addrspace(3)* @lds, i32 %add) 115 ret void 116} 117 118declare i32 @llvm.amdgcn.groupstaticsize() #1 119 120attributes #0 = { nounwind } 121attributes #1 = { nounwind readnone } 122