1; RUN: llc -verify-machineinstrs -march=amdgcn -mcpu=SI < %s | FileCheck -check-prefix=SI %s 2; RUN: llc -verify-machineinstrs -march=amdgcn -mcpu=tonga < %s | FileCheck -check-prefix=SI %s 3 4; FIXME: Broken on evergreen 5; FIXME: For some reason the 8 and 16 vectors are being stored as 6; individual elements instead of 128-bit stores. 7 8 9; FIXME: Why is the constant moved into the intermediate register and 10; not just directly into the vector component? 11 12; SI-LABEL: {{^}}insertelement_v4f32_0: 13; s_load_dwordx4 s{{[}}[[LOW_REG:[0-9]+]]: 14; v_mov_b32_e32 15; v_mov_b32_e32 [[CONSTREG:v[0-9]+]], 5.000000e+00 16; v_mov_b32_e32 v[[LOW_REG]], [[CONSTREG]] 17; buffer_store_dwordx4 v{{[}}[[LOW_REG]]: 18define void @insertelement_v4f32_0(<4 x float> addrspace(1)* %out, <4 x float> %a) nounwind { 19 %vecins = insertelement <4 x float> %a, float 5.000000e+00, i32 0 20 store <4 x float> %vecins, <4 x float> addrspace(1)* %out, align 16 21 ret void 22} 23 24; SI-LABEL: {{^}}insertelement_v4f32_1: 25define void @insertelement_v4f32_1(<4 x float> addrspace(1)* %out, <4 x float> %a) nounwind { 26 %vecins = insertelement <4 x float> %a, float 5.000000e+00, i32 1 27 store <4 x float> %vecins, <4 x float> addrspace(1)* %out, align 16 28 ret void 29} 30 31; SI-LABEL: {{^}}insertelement_v4f32_2: 32define void @insertelement_v4f32_2(<4 x float> addrspace(1)* %out, <4 x float> %a) nounwind { 33 %vecins = insertelement <4 x float> %a, float 5.000000e+00, i32 2 34 store <4 x float> %vecins, <4 x float> addrspace(1)* %out, align 16 35 ret void 36} 37 38; SI-LABEL: {{^}}insertelement_v4f32_3: 39define void @insertelement_v4f32_3(<4 x float> addrspace(1)* %out, <4 x float> %a) nounwind { 40 %vecins = insertelement <4 x float> %a, float 5.000000e+00, i32 3 41 store <4 x float> %vecins, <4 x float> addrspace(1)* %out, align 16 42 ret void 43} 44 45; SI-LABEL: {{^}}insertelement_v4i32_0: 46define void @insertelement_v4i32_0(<4 x i32> addrspace(1)* %out, <4 x i32> %a) nounwind { 47 %vecins = insertelement <4 x i32> %a, i32 999, i32 0 48 store <4 x i32> %vecins, <4 x i32> addrspace(1)* %out, align 16 49 ret void 50} 51 52; SI-LABEL: {{^}}dynamic_insertelement_v2f32: 53; SI: v_mov_b32_e32 [[CONST:v[0-9]+]], 0x40a00000 54; SI: v_movreld_b32_e32 v[[LOW_RESULT_REG:[0-9]+]], [[CONST]] 55; SI: buffer_store_dwordx2 {{v\[}}[[LOW_RESULT_REG]]: 56define void @dynamic_insertelement_v2f32(<2 x float> addrspace(1)* %out, <2 x float> %a, i32 %b) nounwind { 57 %vecins = insertelement <2 x float> %a, float 5.000000e+00, i32 %b 58 store <2 x float> %vecins, <2 x float> addrspace(1)* %out, align 8 59 ret void 60} 61 62; SI-LABEL: {{^}}dynamic_insertelement_v4f32: 63; SI: v_mov_b32_e32 [[CONST:v[0-9]+]], 0x40a00000 64; SI: v_movreld_b32_e32 v[[LOW_RESULT_REG:[0-9]+]], [[CONST]] 65; SI: buffer_store_dwordx4 {{v\[}}[[LOW_RESULT_REG]]: 66define void @dynamic_insertelement_v4f32(<4 x float> addrspace(1)* %out, <4 x float> %a, i32 %b) nounwind { 67 %vecins = insertelement <4 x float> %a, float 5.000000e+00, i32 %b 68 store <4 x float> %vecins, <4 x float> addrspace(1)* %out, align 16 69 ret void 70} 71 72; SI-LABEL: {{^}}dynamic_insertelement_v8f32: 73; FIXMESI: buffer_store_dwordx4 74; FIXMESI: buffer_store_dwordx4 75define void @dynamic_insertelement_v8f32(<8 x float> addrspace(1)* %out, <8 x float> %a, i32 %b) nounwind { 76 %vecins = insertelement <8 x float> %a, float 5.000000e+00, i32 %b 77 store <8 x float> %vecins, <8 x float> addrspace(1)* %out, align 32 78 ret void 79} 80 81; SI-LABEL: {{^}}dynamic_insertelement_v16f32: 82; FIXMESI: buffer_store_dwordx4 83; FIXMESI: buffer_store_dwordx4 84; FIXMESI: buffer_store_dwordx4 85; FIXMESI: buffer_store_dwordx4 86define void @dynamic_insertelement_v16f32(<16 x float> addrspace(1)* %out, <16 x float> %a, i32 %b) nounwind { 87 %vecins = insertelement <16 x float> %a, float 5.000000e+00, i32 %b 88 store <16 x float> %vecins, <16 x float> addrspace(1)* %out, align 64 89 ret void 90} 91 92; SI-LABEL: {{^}}dynamic_insertelement_v2i32: 93; SI: buffer_store_dwordx2 94define void @dynamic_insertelement_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> %a, i32 %b) nounwind { 95 %vecins = insertelement <2 x i32> %a, i32 5, i32 %b 96 store <2 x i32> %vecins, <2 x i32> addrspace(1)* %out, align 8 97 ret void 98} 99 100; SI-LABEL: {{^}}dynamic_insertelement_v4i32: 101; SI: buffer_store_dwordx4 102define void @dynamic_insertelement_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> %a, i32 %b) nounwind { 103 %vecins = insertelement <4 x i32> %a, i32 5, i32 %b 104 store <4 x i32> %vecins, <4 x i32> addrspace(1)* %out, align 16 105 ret void 106} 107 108; SI-LABEL: {{^}}dynamic_insertelement_v8i32: 109; FIXMESI: buffer_store_dwordx4 110; FIXMESI: buffer_store_dwordx4 111define void @dynamic_insertelement_v8i32(<8 x i32> addrspace(1)* %out, <8 x i32> %a, i32 %b) nounwind { 112 %vecins = insertelement <8 x i32> %a, i32 5, i32 %b 113 store <8 x i32> %vecins, <8 x i32> addrspace(1)* %out, align 32 114 ret void 115} 116 117; SI-LABEL: {{^}}dynamic_insertelement_v16i32: 118; FIXMESI: buffer_store_dwordx4 119; FIXMESI: buffer_store_dwordx4 120; FIXMESI: buffer_store_dwordx4 121; FIXMESI: buffer_store_dwordx4 122define void @dynamic_insertelement_v16i32(<16 x i32> addrspace(1)* %out, <16 x i32> %a, i32 %b) nounwind { 123 %vecins = insertelement <16 x i32> %a, i32 5, i32 %b 124 store <16 x i32> %vecins, <16 x i32> addrspace(1)* %out, align 64 125 ret void 126} 127 128 129; SI-LABEL: {{^}}dynamic_insertelement_v2i16: 130; FIXMESI: buffer_store_dwordx2 131define void @dynamic_insertelement_v2i16(<2 x i16> addrspace(1)* %out, <2 x i16> %a, i32 %b) nounwind { 132 %vecins = insertelement <2 x i16> %a, i16 5, i32 %b 133 store <2 x i16> %vecins, <2 x i16> addrspace(1)* %out, align 8 134 ret void 135} 136 137; SI-LABEL: {{^}}dynamic_insertelement_v4i16: 138; FIXMESI: buffer_store_dwordx4 139define void @dynamic_insertelement_v4i16(<4 x i16> addrspace(1)* %out, <4 x i16> %a, i32 %b) nounwind { 140 %vecins = insertelement <4 x i16> %a, i16 5, i32 %b 141 store <4 x i16> %vecins, <4 x i16> addrspace(1)* %out, align 16 142 ret void 143} 144 145 146; SI-LABEL: {{^}}dynamic_insertelement_v2i8: 147; FIXMESI: BUFFER_STORE_USHORT 148define void @dynamic_insertelement_v2i8(<2 x i8> addrspace(1)* %out, <2 x i8> %a, i32 %b) nounwind { 149 %vecins = insertelement <2 x i8> %a, i8 5, i32 %b 150 store <2 x i8> %vecins, <2 x i8> addrspace(1)* %out, align 8 151 ret void 152} 153 154; SI-LABEL: {{^}}dynamic_insertelement_v4i8: 155; FIXMESI: buffer_store_dword 156define void @dynamic_insertelement_v4i8(<4 x i8> addrspace(1)* %out, <4 x i8> %a, i32 %b) nounwind { 157 %vecins = insertelement <4 x i8> %a, i8 5, i32 %b 158 store <4 x i8> %vecins, <4 x i8> addrspace(1)* %out, align 16 159 ret void 160} 161 162; SI-LABEL: {{^}}dynamic_insertelement_v8i8: 163; FIXMESI: buffer_store_dwordx2 164define void @dynamic_insertelement_v8i8(<8 x i8> addrspace(1)* %out, <8 x i8> %a, i32 %b) nounwind { 165 %vecins = insertelement <8 x i8> %a, i8 5, i32 %b 166 store <8 x i8> %vecins, <8 x i8> addrspace(1)* %out, align 16 167 ret void 168} 169 170; SI-LABEL: {{^}}dynamic_insertelement_v16i8: 171; FIXMESI: buffer_store_dwordx4 172define void @dynamic_insertelement_v16i8(<16 x i8> addrspace(1)* %out, <16 x i8> %a, i32 %b) nounwind { 173 %vecins = insertelement <16 x i8> %a, i8 5, i32 %b 174 store <16 x i8> %vecins, <16 x i8> addrspace(1)* %out, align 16 175 ret void 176} 177 178; This test requires handling INSERT_SUBREG in SIFixSGPRCopies. Check that 179; the compiler doesn't crash. 180; SI-LABEL: {{^}}insert_split_bb: 181define void @insert_split_bb(<2 x i32> addrspace(1)* %out, i32 addrspace(1)* %in, i32 %a, i32 %b) { 182entry: 183 %0 = insertelement <2 x i32> undef, i32 %a, i32 0 184 %1 = icmp eq i32 %a, 0 185 br i1 %1, label %if, label %else 186 187if: 188 %2 = load i32, i32 addrspace(1)* %in 189 %3 = insertelement <2 x i32> %0, i32 %2, i32 1 190 br label %endif 191 192else: 193 %4 = getelementptr i32, i32 addrspace(1)* %in, i32 1 194 %5 = load i32, i32 addrspace(1)* %4 195 %6 = insertelement <2 x i32> %0, i32 %5, i32 1 196 br label %endif 197 198endif: 199 %7 = phi <2 x i32> [%3, %if], [%6, %else] 200 store <2 x i32> %7, <2 x i32> addrspace(1)* %out 201 ret void 202} 203 204; SI-LABEL: {{^}}dynamic_insertelement_v2f64: 205; SI: buffer_store_dwordx2 206; SI: buffer_store_dwordx2 207; SI: buffer_store_dwordx2 208; SI: buffer_store_dwordx2 209; SI: s_endpgm 210define void @dynamic_insertelement_v2f64(<2 x double> addrspace(1)* %out, <2 x double> %a, i32 %b) nounwind { 211 %vecins = insertelement <2 x double> %a, double 8.0, i32 %b 212 store <2 x double> %vecins, <2 x double> addrspace(1)* %out, align 16 213 ret void 214} 215 216; SI-LABEL: {{^}}dynamic_insertelement_v2i64: 217; SI: buffer_store_dwordx2 218; SI: buffer_store_dwordx2 219; SI: s_endpgm 220define void @dynamic_insertelement_v2i64(<2 x i64> addrspace(1)* %out, <2 x i64> %a, i32 %b) nounwind { 221 %vecins = insertelement <2 x i64> %a, i64 5, i32 %b 222 store <2 x i64> %vecins, <2 x i64> addrspace(1)* %out, align 8 223 ret void 224} 225 226; SI-LABEL: {{^}}dynamic_insertelement_v4f64: 227; SI: buffer_store_dwordx2 228; SI: buffer_store_dwordx2 229; SI: buffer_store_dwordx2 230; SI: buffer_store_dwordx2 231; SI: s_endpgm 232define void @dynamic_insertelement_v4f64(<4 x double> addrspace(1)* %out, <4 x double> %a, i32 %b) nounwind { 233 %vecins = insertelement <4 x double> %a, double 8.0, i32 %b 234 store <4 x double> %vecins, <4 x double> addrspace(1)* %out, align 16 235 ret void 236} 237 238; SI-LABEL: {{^}}dynamic_insertelement_v8f64: 239; SI: buffer_store_dwordx2 240; SI: buffer_store_dwordx2 241; SI: buffer_store_dwordx2 242; SI: buffer_store_dwordx2 243; SI: buffer_store_dwordx2 244; SI: buffer_store_dwordx2 245; SI: buffer_store_dwordx2 246; SI: buffer_store_dwordx2 247; SI: s_endpgm 248define void @dynamic_insertelement_v8f64(<8 x double> addrspace(1)* %out, <8 x double> %a, i32 %b) nounwind { 249 %vecins = insertelement <8 x double> %a, double 8.0, i32 %b 250 store <8 x double> %vecins, <8 x double> addrspace(1)* %out, align 16 251 ret void 252} 253