1; RUN: opt < %s -basicaa -slp-vectorizer -S | FileCheck %s 2target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" 3target triple = "x86_64-unknown-linux-gnu" 4 5@b = common global [4 x i32] zeroinitializer, align 16 6@c = common global [4 x i32] zeroinitializer, align 16 7@d = common global [4 x i32] zeroinitializer, align 16 8@e = common global [4 x i32] zeroinitializer, align 16 9@a = common global [4 x i32] zeroinitializer, align 16 10@fb = common global [4 x float] zeroinitializer, align 16 11@fc = common global [4 x float] zeroinitializer, align 16 12@fa = common global [4 x float] zeroinitializer, align 16 13@fd = common global [4 x float] zeroinitializer, align 16 14 15; CHECK-LABEL: @addsub 16; CHECK: %5 = add nsw <4 x i32> %3, %4 17; CHECK: %6 = add nsw <4 x i32> %2, %5 18; CHECK: %7 = sub nsw <4 x i32> %2, %5 19; CHECK: %8 = shufflevector <4 x i32> %6, <4 x i32> %7, <4 x i32> <i32 0, i32 5, i32 2, i32 7> 20 21; Function Attrs: nounwind uwtable 22define void @addsub() #0 { 23entry: 24 %0 = load i32, i32* getelementptr inbounds ([4 x i32], [4 x i32]* @b, i32 0, i64 0), align 4 25 %1 = load i32, i32* getelementptr inbounds ([4 x i32], [4 x i32]* @c, i32 0, i64 0), align 4 26 %add = add nsw i32 %0, %1 27 %2 = load i32, i32* getelementptr inbounds ([4 x i32], [4 x i32]* @d, i32 0, i64 0), align 4 28 %3 = load i32, i32* getelementptr inbounds ([4 x i32], [4 x i32]* @e, i32 0, i64 0), align 4 29 %add1 = add nsw i32 %2, %3 30 %add2 = add nsw i32 %add, %add1 31 store i32 %add2, i32* getelementptr inbounds ([4 x i32], [4 x i32]* @a, i32 0, i64 0), align 4 32 %4 = load i32, i32* getelementptr inbounds ([4 x i32], [4 x i32]* @b, i32 0, i64 1), align 4 33 %5 = load i32, i32* getelementptr inbounds ([4 x i32], [4 x i32]* @c, i32 0, i64 1), align 4 34 %add3 = add nsw i32 %4, %5 35 %6 = load i32, i32* getelementptr inbounds ([4 x i32], [4 x i32]* @d, i32 0, i64 1), align 4 36 %7 = load i32, i32* getelementptr inbounds ([4 x i32], [4 x i32]* @e, i32 0, i64 1), align 4 37 %add4 = add nsw i32 %6, %7 38 %sub = sub nsw i32 %add3, %add4 39 store i32 %sub, i32* getelementptr inbounds ([4 x i32], [4 x i32]* @a, i32 0, i64 1), align 4 40 %8 = load i32, i32* getelementptr inbounds ([4 x i32], [4 x i32]* @b, i32 0, i64 2), align 4 41 %9 = load i32, i32* getelementptr inbounds ([4 x i32], [4 x i32]* @c, i32 0, i64 2), align 4 42 %add5 = add nsw i32 %8, %9 43 %10 = load i32, i32* getelementptr inbounds ([4 x i32], [4 x i32]* @d, i32 0, i64 2), align 4 44 %11 = load i32, i32* getelementptr inbounds ([4 x i32], [4 x i32]* @e, i32 0, i64 2), align 4 45 %add6 = add nsw i32 %10, %11 46 %add7 = add nsw i32 %add5, %add6 47 store i32 %add7, i32* getelementptr inbounds ([4 x i32], [4 x i32]* @a, i32 0, i64 2), align 4 48 %12 = load i32, i32* getelementptr inbounds ([4 x i32], [4 x i32]* @b, i32 0, i64 3), align 4 49 %13 = load i32, i32* getelementptr inbounds ([4 x i32], [4 x i32]* @c, i32 0, i64 3), align 4 50 %add8 = add nsw i32 %12, %13 51 %14 = load i32, i32* getelementptr inbounds ([4 x i32], [4 x i32]* @d, i32 0, i64 3), align 4 52 %15 = load i32, i32* getelementptr inbounds ([4 x i32], [4 x i32]* @e, i32 0, i64 3), align 4 53 %add9 = add nsw i32 %14, %15 54 %sub10 = sub nsw i32 %add8, %add9 55 store i32 %sub10, i32* getelementptr inbounds ([4 x i32], [4 x i32]* @a, i32 0, i64 3), align 4 56 ret void 57} 58 59; CHECK-LABEL: @subadd 60; CHECK: %5 = add nsw <4 x i32> %3, %4 61; CHECK: %6 = sub nsw <4 x i32> %2, %5 62; CHECK: %7 = add nsw <4 x i32> %2, %5 63; CHECK: %8 = shufflevector <4 x i32> %6, <4 x i32> %7, <4 x i32> <i32 0, i32 5, i32 2, i32 7> 64 65; Function Attrs: nounwind uwtable 66define void @subadd() #0 { 67entry: 68 %0 = load i32, i32* getelementptr inbounds ([4 x i32], [4 x i32]* @b, i32 0, i64 0), align 4 69 %1 = load i32, i32* getelementptr inbounds ([4 x i32], [4 x i32]* @c, i32 0, i64 0), align 4 70 %add = add nsw i32 %0, %1 71 %2 = load i32, i32* getelementptr inbounds ([4 x i32], [4 x i32]* @d, i32 0, i64 0), align 4 72 %3 = load i32, i32* getelementptr inbounds ([4 x i32], [4 x i32]* @e, i32 0, i64 0), align 4 73 %add1 = add nsw i32 %2, %3 74 %sub = sub nsw i32 %add, %add1 75 store i32 %sub, i32* getelementptr inbounds ([4 x i32], [4 x i32]* @a, i32 0, i64 0), align 4 76 %4 = load i32, i32* getelementptr inbounds ([4 x i32], [4 x i32]* @b, i32 0, i64 1), align 4 77 %5 = load i32, i32* getelementptr inbounds ([4 x i32], [4 x i32]* @c, i32 0, i64 1), align 4 78 %add2 = add nsw i32 %4, %5 79 %6 = load i32, i32* getelementptr inbounds ([4 x i32], [4 x i32]* @d, i32 0, i64 1), align 4 80 %7 = load i32, i32* getelementptr inbounds ([4 x i32], [4 x i32]* @e, i32 0, i64 1), align 4 81 %add3 = add nsw i32 %6, %7 82 %add4 = add nsw i32 %add2, %add3 83 store i32 %add4, i32* getelementptr inbounds ([4 x i32], [4 x i32]* @a, i32 0, i64 1), align 4 84 %8 = load i32, i32* getelementptr inbounds ([4 x i32], [4 x i32]* @b, i32 0, i64 2), align 4 85 %9 = load i32, i32* getelementptr inbounds ([4 x i32], [4 x i32]* @c, i32 0, i64 2), align 4 86 %add5 = add nsw i32 %8, %9 87 %10 = load i32, i32* getelementptr inbounds ([4 x i32], [4 x i32]* @d, i32 0, i64 2), align 4 88 %11 = load i32, i32* getelementptr inbounds ([4 x i32], [4 x i32]* @e, i32 0, i64 2), align 4 89 %add6 = add nsw i32 %10, %11 90 %sub7 = sub nsw i32 %add5, %add6 91 store i32 %sub7, i32* getelementptr inbounds ([4 x i32], [4 x i32]* @a, i32 0, i64 2), align 4 92 %12 = load i32, i32* getelementptr inbounds ([4 x i32], [4 x i32]* @b, i32 0, i64 3), align 4 93 %13 = load i32, i32* getelementptr inbounds ([4 x i32], [4 x i32]* @c, i32 0, i64 3), align 4 94 %add8 = add nsw i32 %12, %13 95 %14 = load i32, i32* getelementptr inbounds ([4 x i32], [4 x i32]* @d, i32 0, i64 3), align 4 96 %15 = load i32, i32* getelementptr inbounds ([4 x i32], [4 x i32]* @e, i32 0, i64 3), align 4 97 %add9 = add nsw i32 %14, %15 98 %add10 = add nsw i32 %add8, %add9 99 store i32 %add10, i32* getelementptr inbounds ([4 x i32], [4 x i32]* @a, i32 0, i64 3), align 4 100 ret void 101} 102 103; CHECK-LABEL: @faddfsub 104; CHECK: %2 = fadd <4 x float> %0, %1 105; CHECK: %3 = fsub <4 x float> %0, %1 106; CHECK: %4 = shufflevector <4 x float> %2, <4 x float> %3, <4 x i32> <i32 0, i32 5, i32 2, i32 7> 107; Function Attrs: nounwind uwtable 108define void @faddfsub() #0 { 109entry: 110 %0 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @fb, i32 0, i64 0), align 4 111 %1 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @fc, i32 0, i64 0), align 4 112 %add = fadd float %0, %1 113 store float %add, float* getelementptr inbounds ([4 x float], [4 x float]* @fa, i32 0, i64 0), align 4 114 %2 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @fb, i32 0, i64 1), align 4 115 %3 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @fc, i32 0, i64 1), align 4 116 %sub = fsub float %2, %3 117 store float %sub, float* getelementptr inbounds ([4 x float], [4 x float]* @fa, i32 0, i64 1), align 4 118 %4 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @fb, i32 0, i64 2), align 4 119 %5 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @fc, i32 0, i64 2), align 4 120 %add1 = fadd float %4, %5 121 store float %add1, float* getelementptr inbounds ([4 x float], [4 x float]* @fa, i32 0, i64 2), align 4 122 %6 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @fb, i32 0, i64 3), align 4 123 %7 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @fc, i32 0, i64 3), align 4 124 %sub2 = fsub float %6, %7 125 store float %sub2, float* getelementptr inbounds ([4 x float], [4 x float]* @fa, i32 0, i64 3), align 4 126 ret void 127} 128 129; CHECK-LABEL: @fsubfadd 130; CHECK: %2 = fsub <4 x float> %0, %1 131; CHECK: %3 = fadd <4 x float> %0, %1 132; CHECK: %4 = shufflevector <4 x float> %2, <4 x float> %3, <4 x i32> <i32 0, i32 5, i32 2, i32 7> 133; Function Attrs: nounwind uwtable 134define void @fsubfadd() #0 { 135entry: 136 %0 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @fb, i32 0, i64 0), align 4 137 %1 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @fc, i32 0, i64 0), align 4 138 %sub = fsub float %0, %1 139 store float %sub, float* getelementptr inbounds ([4 x float], [4 x float]* @fa, i32 0, i64 0), align 4 140 %2 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @fb, i32 0, i64 1), align 4 141 %3 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @fc, i32 0, i64 1), align 4 142 %add = fadd float %2, %3 143 store float %add, float* getelementptr inbounds ([4 x float], [4 x float]* @fa, i32 0, i64 1), align 4 144 %4 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @fb, i32 0, i64 2), align 4 145 %5 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @fc, i32 0, i64 2), align 4 146 %sub1 = fsub float %4, %5 147 store float %sub1, float* getelementptr inbounds ([4 x float], [4 x float]* @fa, i32 0, i64 2), align 4 148 %6 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @fb, i32 0, i64 3), align 4 149 %7 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @fc, i32 0, i64 3), align 4 150 %add2 = fadd float %6, %7 151 store float %add2, float* getelementptr inbounds ([4 x float], [4 x float]* @fa, i32 0, i64 3), align 4 152 ret void 153} 154 155; CHECK-LABEL: @No_faddfsub 156; CHECK-NOT: fadd <4 x float> 157; CHECK-NOT: fsub <4 x float> 158; CHECK-NOT: shufflevector 159; Function Attrs: nounwind uwtable 160define void @No_faddfsub() #0 { 161entry: 162 %0 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @fb, i32 0, i64 0), align 4 163 %1 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @fc, i32 0, i64 0), align 4 164 %add = fadd float %0, %1 165 store float %add, float* getelementptr inbounds ([4 x float], [4 x float]* @fa, i32 0, i64 0), align 4 166 %2 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @fb, i32 0, i64 1), align 4 167 %3 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @fc, i32 0, i64 1), align 4 168 %add1 = fadd float %2, %3 169 store float %add1, float* getelementptr inbounds ([4 x float], [4 x float]* @fa, i32 0, i64 1), align 4 170 %4 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @fb, i32 0, i64 2), align 4 171 %5 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @fc, i32 0, i64 2), align 4 172 %add2 = fadd float %4, %5 173 store float %add2, float* getelementptr inbounds ([4 x float], [4 x float]* @fa, i32 0, i64 2), align 4 174 %6 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @fb, i32 0, i64 3), align 4 175 %7 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @fc, i32 0, i64 3), align 4 176 %sub = fsub float %6, %7 177 store float %sub, float* getelementptr inbounds ([4 x float], [4 x float]* @fa, i32 0, i64 3), align 4 178 ret void 179} 180 181; Check vectorization of following code for float data type- 182; fc[0] = fb[0]+fa[0]; //swapped fb and fa 183; fc[1] = fa[1]-fb[1]; 184; fc[2] = fa[2]+fb[2]; 185; fc[3] = fa[3]-fb[3]; 186 187; CHECK-LABEL: @reorder_alt 188; CHECK: %3 = fadd <4 x float> %1, %2 189; CHECK: %4 = fsub <4 x float> %1, %2 190; CHECK: %5 = shufflevector <4 x float> %3, <4 x float> %4, <4 x i32> <i32 0, i32 5, i32 2, i32 7> 191define void @reorder_alt() #0 { 192 %1 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @fb, i32 0, i64 0), align 4 193 %2 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @fa, i32 0, i64 0), align 4 194 %3 = fadd float %1, %2 195 store float %3, float* getelementptr inbounds ([4 x float], [4 x float]* @fc, i32 0, i64 0), align 4 196 %4 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @fa, i32 0, i64 1), align 4 197 %5 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @fb, i32 0, i64 1), align 4 198 %6 = fsub float %4, %5 199 store float %6, float* getelementptr inbounds ([4 x float], [4 x float]* @fc, i32 0, i64 1), align 4 200 %7 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @fa, i32 0, i64 2), align 4 201 %8 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @fb, i32 0, i64 2), align 4 202 %9 = fadd float %7, %8 203 store float %9, float* getelementptr inbounds ([4 x float], [4 x float]* @fc, i32 0, i64 2), align 4 204 %10 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @fa, i32 0, i64 3), align 4 205 %11 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @fb, i32 0, i64 3), align 4 206 %12 = fsub float %10, %11 207 store float %12, float* getelementptr inbounds ([4 x float], [4 x float]* @fc, i32 0, i64 3), align 4 208 ret void 209} 210 211; Check vectorization of following code for float data type- 212; fc[0] = fa[0]+(fb[0]-fd[0]); 213; fc[1] = fa[1]-(fb[1]+fd[1]); 214; fc[2] = fa[2]+(fb[2]-fd[2]); 215; fc[3] = fa[3]-(fd[3]+fb[3]); //swapped fd and fb 216 217; CHECK-LABEL: @reorder_alt_subTree 218; CHECK: %4 = fsub <4 x float> %3, %2 219; CHECK: %5 = fadd <4 x float> %3, %2 220; CHECK: %6 = shufflevector <4 x float> %4, <4 x float> %5, <4 x i32> <i32 0, i32 5, i32 2, i32 7> 221; CHECK: %7 = fadd <4 x float> %1, %6 222; CHECK: %8 = fsub <4 x float> %1, %6 223; CHECK: %9 = shufflevector <4 x float> %7, <4 x float> %8, <4 x i32> <i32 0, i32 5, i32 2, i32 7> 224define void @reorder_alt_subTree() #0 { 225 %1 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @fa, i32 0, i64 0), align 4 226 %2 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @fb, i32 0, i64 0), align 4 227 %3 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @fd, i32 0, i64 0), align 4 228 %4 = fsub float %2, %3 229 %5 = fadd float %1, %4 230 store float %5, float* getelementptr inbounds ([4 x float], [4 x float]* @fc, i32 0, i64 0), align 4 231 %6 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @fa, i32 0, i64 1), align 4 232 %7 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @fb, i32 0, i64 1), align 4 233 %8 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @fd, i32 0, i64 1), align 4 234 %9 = fadd float %7, %8 235 %10 = fsub float %6, %9 236 store float %10, float* getelementptr inbounds ([4 x float], [4 x float]* @fc, i32 0, i64 1), align 4 237 %11 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @fa, i32 0, i64 2), align 4 238 %12 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @fb, i32 0, i64 2), align 4 239 %13 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @fd, i32 0, i64 2), align 4 240 %14 = fsub float %12, %13 241 %15 = fadd float %11, %14 242 store float %15, float* getelementptr inbounds ([4 x float], [4 x float]* @fc, i32 0, i64 2), align 4 243 %16 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @fa, i32 0, i64 3), align 4 244 %17 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @fd, i32 0, i64 3), align 4 245 %18 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @fb, i32 0, i64 3), align 4 246 %19 = fadd float %17, %18 247 %20 = fsub float %16, %19 248 store float %20, float* getelementptr inbounds ([4 x float], [4 x float]* @fc, i32 0, i64 3), align 4 249 ret void 250} 251 252; Check vectorization of following code for double data type- 253; c[0] = (a[0]+b[0])-d[0]; 254; c[1] = d[1]+(a[1]+b[1]); //swapped d[1] and (a[1]+b[1]) 255 256; CHECK-LABEL: @reorder_alt_rightsubTree 257; CHECK: fadd <2 x double> 258; CHECK: fsub <2 x double> 259; CHECK: shufflevector <2 x double> 260define void @reorder_alt_rightsubTree(double* nocapture %c, double* noalias nocapture readonly %a, double* noalias nocapture readonly %b, double* noalias nocapture readonly %d) { 261 %1 = load double, double* %a 262 %2 = load double, double* %b 263 %3 = fadd double %1, %2 264 %4 = load double, double* %d 265 %5 = fsub double %3, %4 266 store double %5, double* %c 267 %6 = getelementptr inbounds double, double* %d, i64 1 268 %7 = load double, double* %6 269 %8 = getelementptr inbounds double, double* %a, i64 1 270 %9 = load double, double* %8 271 %10 = getelementptr inbounds double, double* %b, i64 1 272 %11 = load double, double* %10 273 %12 = fadd double %9, %11 274 %13 = fadd double %7, %12 275 %14 = getelementptr inbounds double, double* %c, i64 1 276 store double %13, double* %14 277 ret void 278} 279 280; Dont vectorization of following code for float data type as sub is not commutative- 281; fc[0] = fb[0]+fa[0]; 282; fc[1] = fa[1]-fb[1]; 283; fc[2] = fa[2]+fb[2]; 284; fc[3] = fb[3]-fa[3]; 285; In the above code we can swap the 1st and 2nd operation as fadd is commutative 286; but not 2nd or 4th as fsub is not commutative. 287 288; CHECK-LABEL: @no_vec_shuff_reorder 289; CHECK-NOT: fadd <4 x float> 290; CHECK-NOT: fsub <4 x float> 291; CHECK-NOT: shufflevector 292define void @no_vec_shuff_reorder() #0 { 293 %1 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @fb, i32 0, i64 0), align 4 294 %2 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @fa, i32 0, i64 0), align 4 295 %3 = fadd float %1, %2 296 store float %3, float* getelementptr inbounds ([4 x float], [4 x float]* @fc, i32 0, i64 0), align 4 297 %4 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @fa, i32 0, i64 1), align 4 298 %5 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @fb, i32 0, i64 1), align 4 299 %6 = fsub float %4, %5 300 store float %6, float* getelementptr inbounds ([4 x float], [4 x float]* @fc, i32 0, i64 1), align 4 301 %7 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @fa, i32 0, i64 2), align 4 302 %8 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @fb, i32 0, i64 2), align 4 303 %9 = fadd float %7, %8 304 store float %9, float* getelementptr inbounds ([4 x float], [4 x float]* @fc, i32 0, i64 2), align 4 305 %10 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @fb, i32 0, i64 3), align 4 306 %11 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @fa, i32 0, i64 3), align 4 307 %12 = fsub float %10, %11 308 store float %12, float* getelementptr inbounds ([4 x float], [4 x float]* @fc, i32 0, i64 3), align 4 309 ret void 310} 311 312 313attributes #0 = { nounwind } 314 315