1; RUN: llc -march=amdgcn -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s 2; RUN: llc -march=amdgcn -mcpu=bonaire -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=CI %s 3 4declare i32 @llvm.r600.read.tidig.x() #0 5declare i32 @llvm.r600.read.tidig.y() #0 6 7; In this test both the pointer and the offset operands to the 8; BUFFER_LOAD instructions end up being stored in vgprs. This 9; requires us to add the pointer and offset together, store the 10; result in the offset operand (vaddr), and then store 0 in an 11; sgpr register pair and use that for the pointer operand 12; (low 64-bits of srsrc). 13 14; GCN-LABEL: {{^}}mubuf: 15 16; Make sure we aren't using VGPRs for the source operand of s_mov_b64 17; GCN-NOT: s_mov_b64 s[{{[0-9]+:[0-9]+}}], v 18 19; Make sure we aren't using VGPR's for the srsrc operand of BUFFER_LOAD_* 20; instructions 21; GCN: buffer_load_ubyte v{{[0-9]+}}, v[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}], 0 addr64 22; GCN: buffer_load_ubyte v{{[0-9]+}}, v[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}], 0 addr64 23 24define void @mubuf(i32 addrspace(1)* %out, i8 addrspace(1)* %in) #1 { 25entry: 26 %tmp = call i32 @llvm.r600.read.tidig.x() 27 %tmp1 = call i32 @llvm.r600.read.tidig.y() 28 %tmp2 = sext i32 %tmp to i64 29 %tmp3 = sext i32 %tmp1 to i64 30 br label %loop 31 32loop: ; preds = %loop, %entry 33 %tmp4 = phi i64 [ 0, %entry ], [ %tmp5, %loop ] 34 %tmp5 = add i64 %tmp2, %tmp4 35 %tmp6 = getelementptr i8, i8 addrspace(1)* %in, i64 %tmp5 36 %tmp7 = load i8, i8 addrspace(1)* %tmp6, align 1 37 %tmp8 = or i64 %tmp5, 1 38 %tmp9 = getelementptr i8, i8 addrspace(1)* %in, i64 %tmp8 39 %tmp10 = load i8, i8 addrspace(1)* %tmp9, align 1 40 %tmp11 = add i8 %tmp7, %tmp10 41 %tmp12 = sext i8 %tmp11 to i32 42 store i32 %tmp12, i32 addrspace(1)* %out 43 %tmp13 = icmp slt i64 %tmp5, 10 44 br i1 %tmp13, label %loop, label %done 45 46done: ; preds = %loop 47 ret void 48} 49 50; Test moving an SMRD instruction to the VALU 51 52; GCN-LABEL: {{^}}smrd_valu: 53; GCN: buffer_load_dword [[OUT:v[0-9]+]] 54; GCN: buffer_store_dword [[OUT]] 55define void @smrd_valu(i32 addrspace(2)* addrspace(1)* %in, i32 %a, i32 %b, i32 addrspace(1)* %out) #1 { 56entry: 57 %tmp = icmp ne i32 %a, 0 58 br i1 %tmp, label %if, label %else 59 60if: ; preds = %entry 61 %tmp1 = load i32 addrspace(2)*, i32 addrspace(2)* addrspace(1)* %in 62 br label %endif 63 64else: ; preds = %entry 65 %tmp2 = getelementptr i32 addrspace(2)*, i32 addrspace(2)* addrspace(1)* %in 66 %tmp3 = load i32 addrspace(2)*, i32 addrspace(2)* addrspace(1)* %tmp2 67 br label %endif 68 69endif: ; preds = %else, %if 70 %tmp4 = phi i32 addrspace(2)* [ %tmp1, %if ], [ %tmp3, %else ] 71 %tmp5 = getelementptr i32, i32 addrspace(2)* %tmp4, i32 3000 72 %tmp6 = load i32, i32 addrspace(2)* %tmp5 73 store i32 %tmp6, i32 addrspace(1)* %out 74 ret void 75} 76 77; Test moving an SMRD with an immediate offset to the VALU 78 79; GCN-LABEL: {{^}}smrd_valu2: 80; GCN-NOT: v_add 81; GCN: buffer_load_dword v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, s[{{[0-9]+:[0-9]+}}], 0 addr64 offset:16{{$}} 82define void @smrd_valu2(i32 addrspace(1)* %out, [8 x i32] addrspace(2)* %in) #1 { 83entry: 84 %tmp = call i32 @llvm.r600.read.tidig.x() #0 85 %tmp1 = add i32 %tmp, 4 86 %tmp2 = getelementptr [8 x i32], [8 x i32] addrspace(2)* %in, i32 %tmp, i32 4 87 %tmp3 = load i32, i32 addrspace(2)* %tmp2 88 store i32 %tmp3, i32 addrspace(1)* %out 89 ret void 90} 91 92; Use a big offset that will use the SMRD literal offset on CI 93; GCN-LABEL: {{^}}smrd_valu_ci_offset: 94; GCN-NOT: v_add 95; GCN: s_movk_i32 [[OFFSET:s[0-9]+]], 0x4e20{{$}} 96; GCN-NOT: v_add 97; GCN: buffer_load_dword v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, s[{{[0-9]+:[0-9]+}}], [[OFFSET]] addr64{{$}} 98; GCN: v_add_i32_e32 99; GCN: buffer_store_dword 100define void @smrd_valu_ci_offset(i32 addrspace(1)* %out, i32 addrspace(2)* %in, i32 %c) #1 { 101entry: 102 %tmp = call i32 @llvm.r600.read.tidig.x() #0 103 %tmp2 = getelementptr i32, i32 addrspace(2)* %in, i32 %tmp 104 %tmp3 = getelementptr i32, i32 addrspace(2)* %tmp2, i32 5000 105 %tmp4 = load i32, i32 addrspace(2)* %tmp3 106 %tmp5 = add i32 %tmp4, %c 107 store i32 %tmp5, i32 addrspace(1)* %out 108 ret void 109} 110 111; GCN-LABEL: {{^}}smrd_valu_ci_offset_x2: 112; GCN-NOT: v_add 113; GCN: s_mov_b32 [[OFFSET:s[0-9]+]], 0x9c40{{$}} 114; GCN-NOT: v_add 115; GCN: buffer_load_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}}, s[{{[0-9]+:[0-9]+}}], [[OFFSET]] addr64{{$}} 116; GCN: v_or_b32_e32 {{v[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}} 117; GCN: v_or_b32_e32 {{v[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}} 118; GCN: buffer_store_dwordx2 119define void @smrd_valu_ci_offset_x2(i64 addrspace(1)* %out, i64 addrspace(2)* %in, i64 %c) #1 { 120entry: 121 %tmp = call i32 @llvm.r600.read.tidig.x() #0 122 %tmp2 = getelementptr i64, i64 addrspace(2)* %in, i32 %tmp 123 %tmp3 = getelementptr i64, i64 addrspace(2)* %tmp2, i32 5000 124 %tmp4 = load i64, i64 addrspace(2)* %tmp3 125 %tmp5 = or i64 %tmp4, %c 126 store i64 %tmp5, i64 addrspace(1)* %out 127 ret void 128} 129 130; GCN-LABEL: {{^}}smrd_valu_ci_offset_x4: 131; GCN-NOT: v_add 132; GCN: s_movk_i32 [[OFFSET:s[0-9]+]], 0x4d20{{$}} 133; GCN-NOT: v_add 134; GCN: buffer_load_dwordx4 v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}}, s[{{[0-9]+:[0-9]+}}], [[OFFSET]] addr64{{$}} 135; GCN: v_or_b32_e32 {{v[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}} 136; GCN: v_or_b32_e32 {{v[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}} 137; GCN: v_or_b32_e32 {{v[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}} 138; GCN: v_or_b32_e32 {{v[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}} 139; GCN: buffer_store_dwordx4 140define void @smrd_valu_ci_offset_x4(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(2)* %in, <4 x i32> %c) #1 { 141entry: 142 %tmp = call i32 @llvm.r600.read.tidig.x() #0 143 %tmp2 = getelementptr <4 x i32>, <4 x i32> addrspace(2)* %in, i32 %tmp 144 %tmp3 = getelementptr <4 x i32>, <4 x i32> addrspace(2)* %tmp2, i32 1234 145 %tmp4 = load <4 x i32>, <4 x i32> addrspace(2)* %tmp3 146 %tmp5 = or <4 x i32> %tmp4, %c 147 store <4 x i32> %tmp5, <4 x i32> addrspace(1)* %out 148 ret void 149} 150 151; Original scalar load uses SGPR offset on SI and 32-bit literal on 152; CI. 153 154; GCN-LABEL: {{^}}smrd_valu_ci_offset_x8: 155; GCN-NOT: v_add 156; GCN: s_mov_b32 [[OFFSET0:s[0-9]+]], 0x9a40{{$}} 157; GCN-NOT: v_add 158; GCN: buffer_load_dwordx4 v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}}, s[{{[0-9]+:[0-9]+}}], [[OFFSET0]] addr64{{$}} 159; GCN-NOT: v_add 160; GCN: s_mov_b32 [[OFFSET1:s[0-9]+]], 0x9a50{{$}} 161; GCN-NOT: v_add 162; GCN: buffer_load_dwordx4 v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}}, s[{{[0-9]+:[0-9]+}}], [[OFFSET1]] addr64{{$}} 163 164; GCN: v_or_b32_e32 {{v[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}} 165; GCN: v_or_b32_e32 {{v[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}} 166; GCN: v_or_b32_e32 {{v[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}} 167; GCN: v_or_b32_e32 {{v[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}} 168; GCN: v_or_b32_e32 {{v[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}} 169; GCN: v_or_b32_e32 {{v[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}} 170; GCN: v_or_b32_e32 {{v[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}} 171; GCN: v_or_b32_e32 {{v[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}} 172; GCN: buffer_store_dwordx4 173; GCN: buffer_store_dwordx4 174define void @smrd_valu_ci_offset_x8(<8 x i32> addrspace(1)* %out, <8 x i32> addrspace(2)* %in, <8 x i32> %c) #1 { 175entry: 176 %tmp = call i32 @llvm.r600.read.tidig.x() #0 177 %tmp2 = getelementptr <8 x i32>, <8 x i32> addrspace(2)* %in, i32 %tmp 178 %tmp3 = getelementptr <8 x i32>, <8 x i32> addrspace(2)* %tmp2, i32 1234 179 %tmp4 = load <8 x i32>, <8 x i32> addrspace(2)* %tmp3 180 %tmp5 = or <8 x i32> %tmp4, %c 181 store <8 x i32> %tmp5, <8 x i32> addrspace(1)* %out 182 ret void 183} 184 185; GCN-LABEL: {{^}}smrd_valu_ci_offset_x16: 186 187; GCN-NOT: v_add 188; GCN: s_mov_b32 [[OFFSET0:s[0-9]+]], 0x13480{{$}} 189; GCN-NOT: v_add 190; GCN: buffer_load_dwordx4 v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}}, s[{{[0-9]+:[0-9]+}}], [[OFFSET0]] addr64{{$}} 191; GCN-NOT: v_add 192; GCN: s_mov_b32 [[OFFSET1:s[0-9]+]], 0x13490{{$}} 193; GCN-NOT: v_add 194; GCN: buffer_load_dwordx4 v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}}, s[{{[0-9]+:[0-9]+}}], [[OFFSET1]] addr64{{$}} 195; GCN-NOT: v_add 196; GCN: s_mov_b32 [[OFFSET2:s[0-9]+]], 0x134a0{{$}} 197; GCN-NOT: v_add 198; GCN: buffer_load_dwordx4 v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}}, s[{{[0-9]+:[0-9]+}}], [[OFFSET2]] addr64{{$}} 199; GCN-NOT: v_add 200; GCN: s_mov_b32 [[OFFSET3:s[0-9]+]], 0x134b0{{$}} 201; GCN-NOT: v_add 202; GCN: buffer_load_dwordx4 v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}}, s[{{[0-9]+:[0-9]+}}], [[OFFSET3]] addr64{{$}} 203 204; GCN: v_or_b32_e32 {{v[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}} 205; GCN: v_or_b32_e32 {{v[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}} 206; GCN: v_or_b32_e32 {{v[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}} 207; GCN: v_or_b32_e32 {{v[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}} 208; GCN: v_or_b32_e32 {{v[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}} 209; GCN: v_or_b32_e32 {{v[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}} 210; GCN: v_or_b32_e32 {{v[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}} 211; GCN: v_or_b32_e32 {{v[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}} 212; GCN: buffer_store_dwordx4 213; GCN: buffer_store_dwordx4 214; GCN: buffer_store_dwordx4 215; GCN: buffer_store_dwordx4 216 217; GCN: s_endpgm 218define void @smrd_valu_ci_offset_x16(<16 x i32> addrspace(1)* %out, <16 x i32> addrspace(2)* %in, <16 x i32> %c) #1 { 219entry: 220 %tmp = call i32 @llvm.r600.read.tidig.x() #0 221 %tmp2 = getelementptr <16 x i32>, <16 x i32> addrspace(2)* %in, i32 %tmp 222 %tmp3 = getelementptr <16 x i32>, <16 x i32> addrspace(2)* %tmp2, i32 1234 223 %tmp4 = load <16 x i32>, <16 x i32> addrspace(2)* %tmp3 224 %tmp5 = or <16 x i32> %tmp4, %c 225 store <16 x i32> %tmp5, <16 x i32> addrspace(1)* %out 226 ret void 227} 228 229; GCN-LABEL: {{^}}smrd_valu2_salu_user: 230; GCN: buffer_load_dword [[MOVED:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:16{{$}} 231; GCN: v_add_i32_e32 [[ADD:v[0-9]+]], vcc, s{{[0-9]+}}, [[MOVED]] 232; GCN: buffer_store_dword [[ADD]] 233define void @smrd_valu2_salu_user(i32 addrspace(1)* %out, [8 x i32] addrspace(2)* %in, i32 %a) #1 { 234entry: 235 %tmp = call i32 @llvm.r600.read.tidig.x() #0 236 %tmp1 = add i32 %tmp, 4 237 %tmp2 = getelementptr [8 x i32], [8 x i32] addrspace(2)* %in, i32 %tmp, i32 4 238 %tmp3 = load i32, i32 addrspace(2)* %tmp2 239 %tmp4 = add i32 %tmp3, %a 240 store i32 %tmp4, i32 addrspace(1)* %out 241 ret void 242} 243 244; GCN-LABEL: {{^}}smrd_valu2_max_smrd_offset: 245; GCN: buffer_load_dword v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:1020{{$}} 246define void @smrd_valu2_max_smrd_offset(i32 addrspace(1)* %out, [1024 x i32] addrspace(2)* %in) #1 { 247entry: 248 %tmp = call i32 @llvm.r600.read.tidig.x() #0 249 %tmp1 = add i32 %tmp, 4 250 %tmp2 = getelementptr [1024 x i32], [1024 x i32] addrspace(2)* %in, i32 %tmp, i32 255 251 %tmp3 = load i32, i32 addrspace(2)* %tmp2 252 store i32 %tmp3, i32 addrspace(1)* %out 253 ret void 254} 255 256; GCN-LABEL: {{^}}smrd_valu2_mubuf_offset: 257; GCN-NOT: v_add 258; GCN: buffer_load_dword v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:1024{{$}} 259define void @smrd_valu2_mubuf_offset(i32 addrspace(1)* %out, [1024 x i32] addrspace(2)* %in) #1 { 260entry: 261 %tmp = call i32 @llvm.r600.read.tidig.x() #0 262 %tmp1 = add i32 %tmp, 4 263 %tmp2 = getelementptr [1024 x i32], [1024 x i32] addrspace(2)* %in, i32 %tmp, i32 256 264 %tmp3 = load i32, i32 addrspace(2)* %tmp2 265 store i32 %tmp3, i32 addrspace(1)* %out 266 ret void 267} 268 269; GCN-LABEL: {{^}}s_load_imm_v8i32: 270; GCN: buffer_load_dwordx4 271; GCN: buffer_load_dwordx4 272define void @s_load_imm_v8i32(<8 x i32> addrspace(1)* %out, i32 addrspace(2)* nocapture readonly %in) #1 { 273entry: 274 %tmp0 = tail call i32 @llvm.r600.read.tidig.x() 275 %tmp1 = getelementptr inbounds i32, i32 addrspace(2)* %in, i32 %tmp0 276 %tmp2 = bitcast i32 addrspace(2)* %tmp1 to <8 x i32> addrspace(2)* 277 %tmp3 = load <8 x i32>, <8 x i32> addrspace(2)* %tmp2, align 4 278 store <8 x i32> %tmp3, <8 x i32> addrspace(1)* %out, align 32 279 ret void 280} 281 282; GCN-LABEL: {{^}}s_load_imm_v8i32_salu_user: 283; GCN: buffer_load_dwordx4 284; GCN: buffer_load_dwordx4 285; GCN: v_add_i32_e32 286; GCN: v_add_i32_e32 287; GCN: v_add_i32_e32 288; GCN: v_add_i32_e32 289; GCN: v_add_i32_e32 290; GCN: v_add_i32_e32 291; GCN: v_add_i32_e32 292; GCN: buffer_store_dword 293define void @s_load_imm_v8i32_salu_user(i32 addrspace(1)* %out, i32 addrspace(2)* nocapture readonly %in) #1 { 294entry: 295 %tmp0 = tail call i32 @llvm.r600.read.tidig.x() 296 %tmp1 = getelementptr inbounds i32, i32 addrspace(2)* %in, i32 %tmp0 297 %tmp2 = bitcast i32 addrspace(2)* %tmp1 to <8 x i32> addrspace(2)* 298 %tmp3 = load <8 x i32>, <8 x i32> addrspace(2)* %tmp2, align 4 299 300 %elt0 = extractelement <8 x i32> %tmp3, i32 0 301 %elt1 = extractelement <8 x i32> %tmp3, i32 1 302 %elt2 = extractelement <8 x i32> %tmp3, i32 2 303 %elt3 = extractelement <8 x i32> %tmp3, i32 3 304 %elt4 = extractelement <8 x i32> %tmp3, i32 4 305 %elt5 = extractelement <8 x i32> %tmp3, i32 5 306 %elt6 = extractelement <8 x i32> %tmp3, i32 6 307 %elt7 = extractelement <8 x i32> %tmp3, i32 7 308 309 %add0 = add i32 %elt0, %elt1 310 %add1 = add i32 %add0, %elt2 311 %add2 = add i32 %add1, %elt3 312 %add3 = add i32 %add2, %elt4 313 %add4 = add i32 %add3, %elt5 314 %add5 = add i32 %add4, %elt6 315 %add6 = add i32 %add5, %elt7 316 317 store i32 %add6, i32 addrspace(1)* %out 318 ret void 319} 320 321; GCN-LABEL: {{^}}s_load_imm_v16i32: 322; GCN: buffer_load_dwordx4 323; GCN: buffer_load_dwordx4 324; GCN: buffer_load_dwordx4 325; GCN: buffer_load_dwordx4 326define void @s_load_imm_v16i32(<16 x i32> addrspace(1)* %out, i32 addrspace(2)* nocapture readonly %in) #1 { 327entry: 328 %tmp0 = tail call i32 @llvm.r600.read.tidig.x() #1 329 %tmp1 = getelementptr inbounds i32, i32 addrspace(2)* %in, i32 %tmp0 330 %tmp2 = bitcast i32 addrspace(2)* %tmp1 to <16 x i32> addrspace(2)* 331 %tmp3 = load <16 x i32>, <16 x i32> addrspace(2)* %tmp2, align 4 332 store <16 x i32> %tmp3, <16 x i32> addrspace(1)* %out, align 32 333 ret void 334} 335 336; GCN-LABEL: {{^}}s_load_imm_v16i32_salu_user: 337; GCN: buffer_load_dwordx4 338; GCN: buffer_load_dwordx4 339; GCN: buffer_load_dwordx4 340; GCN: buffer_load_dwordx4 341; GCN: v_add_i32_e32 342; GCN: v_add_i32_e32 343; GCN: v_add_i32_e32 344; GCN: v_add_i32_e32 345; GCN: v_add_i32_e32 346; GCN: v_add_i32_e32 347; GCN: v_add_i32_e32 348; GCN: v_add_i32_e32 349; GCN: v_add_i32_e32 350; GCN: v_add_i32_e32 351; GCN: v_add_i32_e32 352; GCN: v_add_i32_e32 353; GCN: v_add_i32_e32 354; GCN: v_add_i32_e32 355; GCN: v_add_i32_e32 356; GCN: buffer_store_dword 357define void @s_load_imm_v16i32_salu_user(i32 addrspace(1)* %out, i32 addrspace(2)* nocapture readonly %in) #1 { 358entry: 359 %tmp0 = tail call i32 @llvm.r600.read.tidig.x() #1 360 %tmp1 = getelementptr inbounds i32, i32 addrspace(2)* %in, i32 %tmp0 361 %tmp2 = bitcast i32 addrspace(2)* %tmp1 to <16 x i32> addrspace(2)* 362 %tmp3 = load <16 x i32>, <16 x i32> addrspace(2)* %tmp2, align 4 363 364 %elt0 = extractelement <16 x i32> %tmp3, i32 0 365 %elt1 = extractelement <16 x i32> %tmp3, i32 1 366 %elt2 = extractelement <16 x i32> %tmp3, i32 2 367 %elt3 = extractelement <16 x i32> %tmp3, i32 3 368 %elt4 = extractelement <16 x i32> %tmp3, i32 4 369 %elt5 = extractelement <16 x i32> %tmp3, i32 5 370 %elt6 = extractelement <16 x i32> %tmp3, i32 6 371 %elt7 = extractelement <16 x i32> %tmp3, i32 7 372 %elt8 = extractelement <16 x i32> %tmp3, i32 8 373 %elt9 = extractelement <16 x i32> %tmp3, i32 9 374 %elt10 = extractelement <16 x i32> %tmp3, i32 10 375 %elt11 = extractelement <16 x i32> %tmp3, i32 11 376 %elt12 = extractelement <16 x i32> %tmp3, i32 12 377 %elt13 = extractelement <16 x i32> %tmp3, i32 13 378 %elt14 = extractelement <16 x i32> %tmp3, i32 14 379 %elt15 = extractelement <16 x i32> %tmp3, i32 15 380 381 %add0 = add i32 %elt0, %elt1 382 %add1 = add i32 %add0, %elt2 383 %add2 = add i32 %add1, %elt3 384 %add3 = add i32 %add2, %elt4 385 %add4 = add i32 %add3, %elt5 386 %add5 = add i32 %add4, %elt6 387 %add6 = add i32 %add5, %elt7 388 %add7 = add i32 %add6, %elt8 389 %add8 = add i32 %add7, %elt9 390 %add9 = add i32 %add8, %elt10 391 %add10 = add i32 %add9, %elt11 392 %add11 = add i32 %add10, %elt12 393 %add12 = add i32 %add11, %elt13 394 %add13 = add i32 %add12, %elt14 395 %add14 = add i32 %add13, %elt15 396 397 store i32 %add14, i32 addrspace(1)* %out 398 ret void 399} 400 401attributes #0 = { nounwind readnone } 402attributes #1 = { nounwind } 403