1; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -amdgpu-promote-alloca < %s | FileCheck --check-prefix=OPT %s 2 3; Make sure that array alloca loaded and stored as multi-element aggregates are handled correctly 4; Strictly the promote-alloca pass shouldn't have to deal with this case as it is non-canonical, but 5; the pass should handle it gracefully if it is 6; The checks look for lines that previously caused issues in PromoteAlloca (non-canonical). Opt 7; should now leave these unchanged 8 9; OPT-LABEL: @promote_1d_aggr( 10; OPT: store [1 x float] %tmp3, [1 x float]* %f1 11 12%Block = type { [1 x float], i32 } 13%gl_PerVertex = type { <4 x float>, float, [1 x float], [1 x float] } 14%struct = type { i32, i32 } 15 16@block = external addrspace(1) global %Block 17@pv = external addrspace(1) global %gl_PerVertex 18 19define amdgpu_vs void @promote_1d_aggr() #0 { 20 %i = alloca i32 21 %f1 = alloca [1 x float] 22 %tmp = getelementptr %Block, %Block addrspace(1)* @block, i32 0, i32 1 23 %tmp1 = load i32, i32 addrspace(1)* %tmp 24 store i32 %tmp1, i32* %i 25 %tmp2 = getelementptr %Block, %Block addrspace(1)* @block, i32 0, i32 0 26 %tmp3 = load [1 x float], [1 x float] addrspace(1)* %tmp2 27 store [1 x float] %tmp3, [1 x float]* %f1 28 %tmp4 = load i32, i32* %i 29 %tmp5 = getelementptr [1 x float], [1 x float]* %f1, i32 0, i32 %tmp4 30 %tmp6 = load float, float* %tmp5 31 %tmp7 = alloca <4 x float> 32 %tmp8 = load <4 x float>, <4 x float>* %tmp7 33 %tmp9 = insertelement <4 x float> %tmp8, float %tmp6, i32 0 34 %tmp10 = insertelement <4 x float> %tmp9, float %tmp6, i32 1 35 %tmp11 = insertelement <4 x float> %tmp10, float %tmp6, i32 2 36 %tmp12 = insertelement <4 x float> %tmp11, float %tmp6, i32 3 37 %tmp13 = getelementptr %gl_PerVertex, %gl_PerVertex addrspace(1)* @pv, i32 0, i32 0 38 store <4 x float> %tmp12, <4 x float> addrspace(1)* %tmp13 39 ret void 40} 41 42 43; OPT-LABEL: @promote_store_aggr( 44; OPT: %tmp6 = load [2 x float], [2 x float]* %f1 45 46%Block2 = type { i32, [2 x float] } 47@block2 = external addrspace(1) global %Block2 48 49define amdgpu_vs void @promote_store_aggr() #0 { 50 %i = alloca i32 51 %f1 = alloca [2 x float] 52 %tmp = getelementptr %Block2, %Block2 addrspace(1)* @block2, i32 0, i32 0 53 %tmp1 = load i32, i32 addrspace(1)* %tmp 54 store i32 %tmp1, i32* %i 55 %tmp2 = load i32, i32* %i 56 %tmp3 = sitofp i32 %tmp2 to float 57 %tmp4 = getelementptr [2 x float], [2 x float]* %f1, i32 0, i32 0 58 store float %tmp3, float* %tmp4 59 %tmp5 = getelementptr [2 x float], [2 x float]* %f1, i32 0, i32 1 60 store float 2.000000e+00, float* %tmp5 61 %tmp6 = load [2 x float], [2 x float]* %f1 62 %tmp7 = getelementptr %Block2, %Block2 addrspace(1)* @block2, i32 0, i32 1 63 store [2 x float] %tmp6, [2 x float] addrspace(1)* %tmp7 64 %tmp8 = getelementptr %gl_PerVertex, %gl_PerVertex addrspace(1)* @pv, i32 0, i32 0 65 store <4 x float> <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>, <4 x float> addrspace(1)* %tmp8 66 ret void 67} 68 69; OPT-LABEL: @promote_load_from_store_aggr( 70; OPT: store [2 x float] %tmp3, [2 x float]* %f1 71 72%Block3 = type { [2 x float], i32 } 73@block3 = external addrspace(1) global %Block3 74 75define amdgpu_vs void @promote_load_from_store_aggr() #0 { 76 %i = alloca i32 77 %f1 = alloca [2 x float] 78 %tmp = getelementptr %Block3, %Block3 addrspace(1)* @block3, i32 0, i32 1 79 %tmp1 = load i32, i32 addrspace(1)* %tmp 80 store i32 %tmp1, i32* %i 81 %tmp2 = getelementptr %Block3, %Block3 addrspace(1)* @block3, i32 0, i32 0 82 %tmp3 = load [2 x float], [2 x float] addrspace(1)* %tmp2 83 store [2 x float] %tmp3, [2 x float]* %f1 84 %tmp4 = load i32, i32* %i 85 %tmp5 = getelementptr [2 x float], [2 x float]* %f1, i32 0, i32 %tmp4 86 %tmp6 = load float, float* %tmp5 87 %tmp7 = alloca <4 x float> 88 %tmp8 = load <4 x float>, <4 x float>* %tmp7 89 %tmp9 = insertelement <4 x float> %tmp8, float %tmp6, i32 0 90 %tmp10 = insertelement <4 x float> %tmp9, float %tmp6, i32 1 91 %tmp11 = insertelement <4 x float> %tmp10, float %tmp6, i32 2 92 %tmp12 = insertelement <4 x float> %tmp11, float %tmp6, i32 3 93 %tmp13 = getelementptr %gl_PerVertex, %gl_PerVertex addrspace(1)* @pv, i32 0, i32 0 94 store <4 x float> %tmp12, <4 x float> addrspace(1)* %tmp13 95 ret void 96} 97 98; OPT-LABEL: @promote_double_aggr( 99; OPT: store [2 x double] %tmp5, [2 x double]* %s 100 101@tmp_g = external addrspace(1) global { [4 x double], <2 x double>, <3 x double>, <4 x double> } 102@frag_color = external addrspace(1) global <4 x float> 103 104define amdgpu_ps void @promote_double_aggr() #0 { 105 %s = alloca [2 x double] 106 %tmp = getelementptr { [4 x double], <2 x double>, <3 x double>, <4 x double> }, { [4 x double], <2 x double>, <3 x double>, <4 x double> } addrspace(1)* @tmp_g, i32 0, i32 0, i32 0 107 %tmp1 = load double, double addrspace(1)* %tmp 108 %tmp2 = getelementptr { [4 x double], <2 x double>, <3 x double>, <4 x double> }, { [4 x double], <2 x double>, <3 x double>, <4 x double> } addrspace(1)* @tmp_g, i32 0, i32 0, i32 1 109 %tmp3 = load double, double addrspace(1)* %tmp2 110 %tmp4 = insertvalue [2 x double] undef, double %tmp1, 0 111 %tmp5 = insertvalue [2 x double] %tmp4, double %tmp3, 1 112 store [2 x double] %tmp5, [2 x double]* %s 113 %tmp6 = getelementptr [2 x double], [2 x double]* %s, i32 0, i32 1 114 %tmp7 = load double, double* %tmp6 115 %tmp8 = getelementptr [2 x double], [2 x double]* %s, i32 0, i32 1 116 %tmp9 = load double, double* %tmp8 117 %tmp10 = fadd double %tmp7, %tmp9 118 %tmp11 = getelementptr [2 x double], [2 x double]* %s, i32 0, i32 0 119 store double %tmp10, double* %tmp11 120 %tmp12 = getelementptr [2 x double], [2 x double]* %s, i32 0, i32 0 121 %tmp13 = load double, double* %tmp12 122 %tmp14 = getelementptr [2 x double], [2 x double]* %s, i32 0, i32 1 123 %tmp15 = load double, double* %tmp14 124 %tmp16 = fadd double %tmp13, %tmp15 125 %tmp17 = fptrunc double %tmp16 to float 126 %tmp18 = insertelement <4 x float> undef, float %tmp17, i32 0 127 %tmp19 = insertelement <4 x float> %tmp18, float %tmp17, i32 1 128 %tmp20 = insertelement <4 x float> %tmp19, float %tmp17, i32 2 129 %tmp21 = insertelement <4 x float> %tmp20, float %tmp17, i32 3 130 store <4 x float> %tmp21, <4 x float> addrspace(1)* @frag_color 131 ret void 132} 133 134; Don't crash on a type that isn't a valid vector element. 135; OPT-LABEL: @alloca_struct( 136define amdgpu_kernel void @alloca_struct() #0 { 137entry: 138 %alloca = alloca [2 x %struct], align 4 139 ret void 140} 141