1; RUN: opt -S -mtriple=amdgcn-- -amdgpu-promote-alloca -sroa -instcombine < %s | FileCheck -check-prefix=OPT %s 2; RUN: opt -S -mtriple=amdgcn-- -amdgpu-promote-alloca -sroa -instcombine -amdgpu-promote-alloca-to-vector-limit=32 < %s | FileCheck -check-prefix=LIMIT32 %s 3 4target datalayout = "A5" 5 6; OPT-LABEL: @alloca_8xi64_max1024( 7; OPT-NOT: alloca 8; OPT: <8 x i64> 9; LIMIT32: alloca 10; LIMIT32-NOT: <8 x i64> 11define amdgpu_kernel void @alloca_8xi64_max1024(i64 addrspace(1)* %out, i32 %index) #0 { 12entry: 13 %tmp = alloca [8 x i64], addrspace(5) 14 %x = getelementptr [8 x i64], [8 x i64] addrspace(5)* %tmp, i32 0, i32 0 15 store i64 0, i64 addrspace(5)* %x 16 %tmp1 = getelementptr [8 x i64], [8 x i64] addrspace(5)* %tmp, i32 0, i32 %index 17 %tmp2 = load i64, i64 addrspace(5)* %tmp1 18 store i64 %tmp2, i64 addrspace(1)* %out 19 ret void 20} 21 22; OPT-LABEL: @alloca_9xi64_max1024( 23; OPT: alloca [9 x i64] 24; OPT-NOT: <9 x i64> 25; LIMIT32: alloca 26; LIMIT32-NOT: <9 x i64> 27define amdgpu_kernel void @alloca_9xi64_max1024(i64 addrspace(1)* %out, i32 %index) #0 { 28entry: 29 %tmp = alloca [9 x i64], addrspace(5) 30 %x = getelementptr [9 x i64], [9 x i64] addrspace(5)* %tmp, i32 0, i32 0 31 store i64 0, i64 addrspace(5)* %x 32 %tmp1 = getelementptr [9 x i64], [9 x i64] addrspace(5)* %tmp, i32 0, i32 %index 33 %tmp2 = load i64, i64 addrspace(5)* %tmp1 34 store i64 %tmp2, i64 addrspace(1)* %out 35 ret void 36} 37 38; OPT-LABEL: @alloca_16xi64_max512( 39; OPT-NOT: alloca 40; OPT: <16 x i64> 41; LIMIT32: alloca 42; LIMIT32-NOT: <16 x i64> 43define amdgpu_kernel void @alloca_16xi64_max512(i64 addrspace(1)* %out, i32 %index) #1 { 44entry: 45 %tmp = alloca [16 x i64], addrspace(5) 46 %x = getelementptr [16 x i64], [16 x i64] addrspace(5)* %tmp, i32 0, i32 0 47 store i64 0, i64 addrspace(5)* %x 48 %tmp1 = getelementptr [16 x i64], [16 x i64] addrspace(5)* %tmp, i32 0, i32 %index 49 %tmp2 = load i64, i64 addrspace(5)* %tmp1 50 store i64 %tmp2, i64 addrspace(1)* %out 51 ret void 52} 53 54; OPT-LABEL: @alloca_17xi64_max512( 55; OPT: alloca [17 x i64] 56; OPT-NOT: <17 x i64> 57; LIMIT32: alloca 58; LIMIT32-NOT: <17 x i64> 59define amdgpu_kernel void @alloca_17xi64_max512(i64 addrspace(1)* %out, i32 %index) #1 { 60entry: 61 %tmp = alloca [17 x i64], addrspace(5) 62 %x = getelementptr [17 x i64], [17 x i64] addrspace(5)* %tmp, i32 0, i32 0 63 store i64 0, i64 addrspace(5)* %x 64 %tmp1 = getelementptr [17 x i64], [17 x i64] addrspace(5)* %tmp, i32 0, i32 %index 65 %tmp2 = load i64, i64 addrspace(5)* %tmp1 66 store i64 %tmp2, i64 addrspace(1)* %out 67 ret void 68} 69 70; OPT-LABEL: @alloca_9xi128_max512( 71; OPT: alloca [9 x i128] 72; OPT-NOT: <9 x i128> 73; LIMIT32: alloca 74; LIMIT32-NOT: <9 x i128> 75define amdgpu_kernel void @alloca_9xi128_max512(i128 addrspace(1)* %out, i32 %index) #1 { 76entry: 77 %tmp = alloca [9 x i128], addrspace(5) 78 %x = getelementptr [9 x i128], [9 x i128] addrspace(5)* %tmp, i32 0, i32 0 79 store i128 0, i128 addrspace(5)* %x 80 %tmp1 = getelementptr [9 x i128], [9 x i128] addrspace(5)* %tmp, i32 0, i32 %index 81 %tmp2 = load i128, i128 addrspace(5)* %tmp1 82 store i128 %tmp2, i128 addrspace(1)* %out 83 ret void 84} 85 86; OPT-LABEL: @alloca_9xi128_max256( 87; OPT-NOT: alloca 88; OPT: <9 x i128> 89; LIMIT32: alloca 90; LIMIT32-NOT: <9 x i128> 91define amdgpu_kernel void @alloca_9xi128_max256(i128 addrspace(1)* %out, i32 %index) #2 { 92entry: 93 %tmp = alloca [9 x i128], addrspace(5) 94 %x = getelementptr [9 x i128], [9 x i128] addrspace(5)* %tmp, i32 0, i32 0 95 store i128 0, i128 addrspace(5)* %x 96 %tmp1 = getelementptr [9 x i128], [9 x i128] addrspace(5)* %tmp, i32 0, i32 %index 97 %tmp2 = load i128, i128 addrspace(5)* %tmp1 98 store i128 %tmp2, i128 addrspace(1)* %out 99 ret void 100} 101 102; OPT-LABEL: @alloca_16xi128_max256( 103; OPT-NOT: alloca 104; OPT: <16 x i128> 105; LIMIT32: alloca 106; LIMIT32-NOT: <16 x i128> 107define amdgpu_kernel void @alloca_16xi128_max256(i128 addrspace(1)* %out, i32 %index) #2 { 108entry: 109 %tmp = alloca [16 x i128], addrspace(5) 110 %x = getelementptr [16 x i128], [16 x i128] addrspace(5)* %tmp, i32 0, i32 0 111 store i128 0, i128 addrspace(5)* %x 112 %tmp1 = getelementptr [16 x i128], [16 x i128] addrspace(5)* %tmp, i32 0, i32 %index 113 %tmp2 = load i128, i128 addrspace(5)* %tmp1 114 store i128 %tmp2, i128 addrspace(1)* %out 115 ret void 116} 117 118; OPT-LABEL: @alloca_9xi256_max256( 119; OPT: alloca [9 x i256] 120; OPT-NOT: <9 x i256> 121; LIMIT32: alloca 122; LIMIT32-NOT: <9 x i256> 123define amdgpu_kernel void @alloca_9xi256_max256(i256 addrspace(1)* %out, i32 %index) #2 { 124entry: 125 %tmp = alloca [9 x i256], addrspace(5) 126 %x = getelementptr [9 x i256], [9 x i256] addrspace(5)* %tmp, i32 0, i32 0 127 store i256 0, i256 addrspace(5)* %x 128 %tmp1 = getelementptr [9 x i256], [9 x i256] addrspace(5)* %tmp, i32 0, i32 %index 129 %tmp2 = load i256, i256 addrspace(5)* %tmp1 130 store i256 %tmp2, i256 addrspace(1)* %out 131 ret void 132} 133 134attributes #0 = { "amdgpu-flat-work-group-size"="1,1024" } 135attributes #1 = { "amdgpu-flat-work-group-size"="1,512" } 136attributes #2 = { "amdgpu-flat-work-group-size"="1,256" } 137