1target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128" 2; RUN: opt < %s -bb-vectorize -bb-vectorize-req-chain-depth=3 -bb-vectorize-ignore-target-info -instcombine -gvn -S | FileCheck %s 3 4declare double @llvm.fma.f64(double, double, double) 5declare double @llvm.fmuladd.f64(double, double, double) 6declare double @llvm.cos.f64(double) 7declare double @llvm.powi.f64(double, i32) 8declare double @llvm.round.f64(double) 9declare double @llvm.copysign.f64(double, double) 10declare double @llvm.ceil.f64(double) 11declare double @llvm.nearbyint.f64(double) 12declare double @llvm.rint.f64(double) 13declare double @llvm.trunc.f64(double) 14declare double @llvm.floor.f64(double) 15declare double @llvm.fabs.f64(double) 16declare i64 @llvm.bswap.i64(i64) 17declare i64 @llvm.ctpop.i64(i64) 18declare i64 @llvm.ctlz.i64(i64, i1) 19declare i64 @llvm.cttz.i64(i64, i1) 20 21; Basic depth-3 chain with fma 22define double @test1(double %A1, double %A2, double %B1, double %B2, double %C1, double %C2) { 23 %X1 = fsub double %A1, %B1 24 %X2 = fsub double %A2, %B2 25 %Y1 = call double @llvm.fma.f64(double %X1, double %A1, double %C1) 26 %Y2 = call double @llvm.fma.f64(double %X2, double %A2, double %C2) 27 %Z1 = fadd double %Y1, %B1 28 %Z2 = fadd double %Y2, %B2 29 %R = fmul double %Z1, %Z2 30 ret double %R 31; CHECK-LABEL: @test1( 32; CHECK: %X1.v.i1.1 = insertelement <2 x double> undef, double %B1, i32 0 33; CHECK: %X1.v.i1.2 = insertelement <2 x double> %X1.v.i1.1, double %B2, i32 1 34; CHECK: %X1.v.i0.1 = insertelement <2 x double> undef, double %A1, i32 0 35; CHECK: %X1.v.i0.2 = insertelement <2 x double> %X1.v.i0.1, double %A2, i32 1 36; CHECK: %X1 = fsub <2 x double> %X1.v.i0.2, %X1.v.i1.2 37; CHECK: %Y1.v.i2.1 = insertelement <2 x double> undef, double %C1, i32 0 38; CHECK: %Y1.v.i2.2 = insertelement <2 x double> %Y1.v.i2.1, double %C2, i32 1 39; CHECK: %Y1 = call <2 x double> @llvm.fma.v2f64(<2 x double> %X1, <2 x double> %X1.v.i0.2, <2 x double> %Y1.v.i2.2) 40; CHECK: %Z1 = fadd <2 x double> %Y1, %X1.v.i1.2 41; CHECK: %Z1.v.r1 = extractelement <2 x double> %Z1, i32 0 42; CHECK: %Z1.v.r2 = extractelement <2 x double> %Z1, i32 1 43; CHECK: %R = fmul double %Z1.v.r1, %Z1.v.r2 44; CHECK: ret double %R 45} 46 47; Basic depth-3 chain with fmuladd 48define double @test1a(double %A1, double %A2, double %B1, double %B2, double %C1, double %C2) { 49 %X1 = fsub double %A1, %B1 50 %X2 = fsub double %A2, %B2 51 %Y1 = call double @llvm.fmuladd.f64(double %X1, double %A1, double %C1) 52 %Y2 = call double @llvm.fmuladd.f64(double %X2, double %A2, double %C2) 53 %Z1 = fadd double %Y1, %B1 54 %Z2 = fadd double %Y2, %B2 55 %R = fmul double %Z1, %Z2 56 ret double %R 57; CHECK-LABEL: @test1a( 58; CHECK: %X1.v.i1.1 = insertelement <2 x double> undef, double %B1, i32 0 59; CHECK: %X1.v.i1.2 = insertelement <2 x double> %X1.v.i1.1, double %B2, i32 1 60; CHECK: %X1.v.i0.1 = insertelement <2 x double> undef, double %A1, i32 0 61; CHECK: %X1.v.i0.2 = insertelement <2 x double> %X1.v.i0.1, double %A2, i32 1 62; CHECK: %X1 = fsub <2 x double> %X1.v.i0.2, %X1.v.i1.2 63; CHECK: %Y1.v.i2.1 = insertelement <2 x double> undef, double %C1, i32 0 64; CHECK: %Y1.v.i2.2 = insertelement <2 x double> %Y1.v.i2.1, double %C2, i32 1 65; CHECK: %Y1 = call <2 x double> @llvm.fmuladd.v2f64(<2 x double> %X1, <2 x double> %X1.v.i0.2, <2 x double> %Y1.v.i2.2) 66; CHECK: %Z1 = fadd <2 x double> %Y1, %X1.v.i1.2 67; CHECK: %Z1.v.r1 = extractelement <2 x double> %Z1, i32 0 68; CHECK: %Z1.v.r2 = extractelement <2 x double> %Z1, i32 1 69; CHECK: %R = fmul double %Z1.v.r1, %Z1.v.r2 70; CHECK: ret double %R 71} 72 73; Basic depth-3 chain with cos 74define double @test2(double %A1, double %A2, double %B1, double %B2) { 75 %X1 = fsub double %A1, %B1 76 %X2 = fsub double %A2, %B2 77 %Y1 = call double @llvm.cos.f64(double %X1) 78 %Y2 = call double @llvm.cos.f64(double %X2) 79 %Z1 = fadd double %Y1, %B1 80 %Z2 = fadd double %Y2, %B2 81 %R = fmul double %Z1, %Z2 82 ret double %R 83; CHECK-LABEL: @test2( 84; CHECK: %X1.v.i1.1 = insertelement <2 x double> undef, double %B1, i32 0 85; CHECK: %X1.v.i1.2 = insertelement <2 x double> %X1.v.i1.1, double %B2, i32 1 86; CHECK: %X1.v.i0.1 = insertelement <2 x double> undef, double %A1, i32 0 87; CHECK: %X1.v.i0.2 = insertelement <2 x double> %X1.v.i0.1, double %A2, i32 1 88; CHECK: %X1 = fsub <2 x double> %X1.v.i0.2, %X1.v.i1.2 89; CHECK: %Y1 = call <2 x double> @llvm.cos.v2f64(<2 x double> %X1) 90; CHECK: %Z1 = fadd <2 x double> %Y1, %X1.v.i1.2 91; CHECK: %Z1.v.r1 = extractelement <2 x double> %Z1, i32 0 92; CHECK: %Z1.v.r2 = extractelement <2 x double> %Z1, i32 1 93; CHECK: %R = fmul double %Z1.v.r1, %Z1.v.r2 94; CHECK: ret double %R 95} 96 97; Basic depth-3 chain with powi 98define double @test3(double %A1, double %A2, double %B1, double %B2, i32 %P) { 99 100 %X1 = fsub double %A1, %B1 101 %X2 = fsub double %A2, %B2 102 %Y1 = call double @llvm.powi.f64(double %X1, i32 %P) 103 %Y2 = call double @llvm.powi.f64(double %X2, i32 %P) 104 %Z1 = fadd double %Y1, %B1 105 %Z2 = fadd double %Y2, %B2 106 %R = fmul double %Z1, %Z2 107 ret double %R 108; CHECK-LABEL: @test3( 109; CHECK: %X1.v.i1.1 = insertelement <2 x double> undef, double %B1, i32 0 110; CHECK: %X1.v.i1.2 = insertelement <2 x double> %X1.v.i1.1, double %B2, i32 1 111; CHECK: %X1.v.i0.1 = insertelement <2 x double> undef, double %A1, i32 0 112; CHECK: %X1.v.i0.2 = insertelement <2 x double> %X1.v.i0.1, double %A2, i32 1 113; CHECK: %X1 = fsub <2 x double> %X1.v.i0.2, %X1.v.i1.2 114; CHECK: %Y1 = call <2 x double> @llvm.powi.v2f64(<2 x double> %X1, i32 %P) 115; CHECK: %Z1 = fadd <2 x double> %Y1, %X1.v.i1.2 116; CHECK: %Z1.v.r1 = extractelement <2 x double> %Z1, i32 0 117; CHECK: %Z1.v.r2 = extractelement <2 x double> %Z1, i32 1 118; CHECK: %R = fmul double %Z1.v.r1, %Z1.v.r2 119; CHECK: ret double %R 120} 121 122; Basic depth-3 chain with powi (different powers: should not vectorize) 123define double @test4(double %A1, double %A2, double %B1, double %B2, i32 %P) { 124 125 %X1 = fsub double %A1, %B1 126 %X2 = fsub double %A2, %B2 127 %P2 = add i32 %P, 1 128 %Y1 = call double @llvm.powi.f64(double %X1, i32 %P) 129 %Y2 = call double @llvm.powi.f64(double %X2, i32 %P2) 130 %Z1 = fadd double %Y1, %B1 131 %Z2 = fadd double %Y2, %B2 132 %R = fmul double %Z1, %Z2 133 ret double %R 134; CHECK-LABEL: @test4( 135; CHECK-NOT: <2 x double> 136; CHECK: ret double %R 137} 138 139; Basic depth-3 chain with round 140define double @testround(double %A1, double %A2, double %B1, double %B2) { 141 %X1 = fsub double %A1, %B1 142 %X2 = fsub double %A2, %B2 143 %Y1 = call double @llvm.round.f64(double %X1) 144 %Y2 = call double @llvm.round.f64(double %X2) 145 %Z1 = fadd double %Y1, %B1 146 %Z2 = fadd double %Y2, %B2 147 %R = fmul double %Z1, %Z2 148 ret double %R 149; CHECK: @testround 150; CHECK: %X1.v.i1.1 = insertelement <2 x double> undef, double %B1, i32 0 151; CHECK: %X1.v.i1.2 = insertelement <2 x double> %X1.v.i1.1, double %B2, i32 1 152; CHECK: %X1.v.i0.1 = insertelement <2 x double> undef, double %A1, i32 0 153; CHECK: %X1.v.i0.2 = insertelement <2 x double> %X1.v.i0.1, double %A2, i32 1 154; CHECK: %X1 = fsub <2 x double> %X1.v.i0.2, %X1.v.i1.2 155; CHECK: %Y1 = call <2 x double> @llvm.round.v2f64(<2 x double> %X1) 156; CHECK: %Z1 = fadd <2 x double> %Y1, %X1.v.i1.2 157; CHECK: %Z1.v.r1 = extractelement <2 x double> %Z1, i32 0 158; CHECK: %Z1.v.r2 = extractelement <2 x double> %Z1, i32 1 159; CHECK: %R = fmul double %Z1.v.r1, %Z1.v.r2 160; CHECK: ret double %R 161 162} 163 164; Basic depth-3 chain with copysign 165define double @testcopysign(double %A1, double %A2, double %B1, double %B2) { 166 %X1 = fsub double %A1, %B1 167 %X2 = fsub double %A2, %B2 168 %Y1 = call double @llvm.copysign.f64(double %X1, double %A1) 169 %Y2 = call double @llvm.copysign.f64(double %X2, double %A1) 170 %Z1 = fadd double %Y1, %B1 171 %Z2 = fadd double %Y2, %B2 172 %R = fmul double %Z1, %Z2 173 ret double %R 174; CHECK: @testcopysign 175; CHECK: %X1.v.i1.1 = insertelement <2 x double> undef, double %B1, i32 0 176; CHECK: %X1.v.i1.2 = insertelement <2 x double> %X1.v.i1.1, double %B2, i32 1 177; CHECK: %X1.v.i0.1 = insertelement <2 x double> undef, double %A1, i32 0 178; CHECK: %X1.v.i0.2 = insertelement <2 x double> %X1.v.i0.1, double %A2, i32 1 179; CHECK: %X1 = fsub <2 x double> %X1.v.i0.2, %X1.v.i1.2 180; CHECK: %Y1.v.i1.2 = insertelement <2 x double> %X1.v.i0.1, double %A1, i32 1 181; CHECK: %Y1 = call <2 x double> @llvm.copysign.v2f64(<2 x double> %X1, <2 x double> %Y1.v.i1.2) 182; CHECK: %Z1 = fadd <2 x double> %Y1, %X1.v.i1.2 183; CHECK: %Z1.v.r1 = extractelement <2 x double> %Z1, i32 0 184; CHECK: %Z1.v.r2 = extractelement <2 x double> %Z1, i32 1 185; CHECK: %R = fmul double %Z1.v.r1, %Z1.v.r2 186; CHECK: ret double %R 187 188} 189 190; Basic depth-3 chain with ceil 191define double @testceil(double %A1, double %A2, double %B1, double %B2) { 192 %X1 = fsub double %A1, %B1 193 %X2 = fsub double %A2, %B2 194 %Y1 = call double @llvm.ceil.f64(double %X1) 195 %Y2 = call double @llvm.ceil.f64(double %X2) 196 %Z1 = fadd double %Y1, %B1 197 %Z2 = fadd double %Y2, %B2 198 %R = fmul double %Z1, %Z2 199 ret double %R 200; CHECK: @testceil 201; CHECK: %X1.v.i1.1 = insertelement <2 x double> undef, double %B1, i32 0 202; CHECK: %X1.v.i1.2 = insertelement <2 x double> %X1.v.i1.1, double %B2, i32 1 203; CHECK: %X1.v.i0.1 = insertelement <2 x double> undef, double %A1, i32 0 204; CHECK: %X1.v.i0.2 = insertelement <2 x double> %X1.v.i0.1, double %A2, i32 1 205; CHECK: %X1 = fsub <2 x double> %X1.v.i0.2, %X1.v.i1.2 206; CHECK: %Y1 = call <2 x double> @llvm.ceil.v2f64(<2 x double> %X1) 207; CHECK: %Z1 = fadd <2 x double> %Y1, %X1.v.i1.2 208; CHECK: %Z1.v.r1 = extractelement <2 x double> %Z1, i32 0 209; CHECK: %Z1.v.r2 = extractelement <2 x double> %Z1, i32 1 210; CHECK: %R = fmul double %Z1.v.r1, %Z1.v.r2 211; CHECK: ret double %R 212 213} 214 215; Basic depth-3 chain with nearbyint 216define double @testnearbyint(double %A1, double %A2, double %B1, double %B2) { 217 %X1 = fsub double %A1, %B1 218 %X2 = fsub double %A2, %B2 219 %Y1 = call double @llvm.nearbyint.f64(double %X1) 220 %Y2 = call double @llvm.nearbyint.f64(double %X2) 221 %Z1 = fadd double %Y1, %B1 222 %Z2 = fadd double %Y2, %B2 223 %R = fmul double %Z1, %Z2 224 ret double %R 225; CHECK: @testnearbyint 226; CHECK: %X1.v.i1.1 = insertelement <2 x double> undef, double %B1, i32 0 227; CHECK: %X1.v.i1.2 = insertelement <2 x double> %X1.v.i1.1, double %B2, i32 1 228; CHECK: %X1.v.i0.1 = insertelement <2 x double> undef, double %A1, i32 0 229; CHECK: %X1.v.i0.2 = insertelement <2 x double> %X1.v.i0.1, double %A2, i32 1 230; CHECK: %X1 = fsub <2 x double> %X1.v.i0.2, %X1.v.i1.2 231; CHECK: %Y1 = call <2 x double> @llvm.nearbyint.v2f64(<2 x double> %X1) 232; CHECK: %Z1 = fadd <2 x double> %Y1, %X1.v.i1.2 233; CHECK: %Z1.v.r1 = extractelement <2 x double> %Z1, i32 0 234; CHECK: %Z1.v.r2 = extractelement <2 x double> %Z1, i32 1 235; CHECK: %R = fmul double %Z1.v.r1, %Z1.v.r2 236; CHECK: ret double %R 237 238} 239 240; Basic depth-3 chain with rint 241define double @testrint(double %A1, double %A2, double %B1, double %B2) { 242 %X1 = fsub double %A1, %B1 243 %X2 = fsub double %A2, %B2 244 %Y1 = call double @llvm.rint.f64(double %X1) 245 %Y2 = call double @llvm.rint.f64(double %X2) 246 %Z1 = fadd double %Y1, %B1 247 %Z2 = fadd double %Y2, %B2 248 %R = fmul double %Z1, %Z2 249 ret double %R 250; CHECK: @testrint 251; CHECK: %X1.v.i1.1 = insertelement <2 x double> undef, double %B1, i32 0 252; CHECK: %X1.v.i1.2 = insertelement <2 x double> %X1.v.i1.1, double %B2, i32 1 253; CHECK: %X1.v.i0.1 = insertelement <2 x double> undef, double %A1, i32 0 254; CHECK: %X1.v.i0.2 = insertelement <2 x double> %X1.v.i0.1, double %A2, i32 1 255; CHECK: %X1 = fsub <2 x double> %X1.v.i0.2, %X1.v.i1.2 256; CHECK: %Y1 = call <2 x double> @llvm.rint.v2f64(<2 x double> %X1) 257; CHECK: %Z1 = fadd <2 x double> %Y1, %X1.v.i1.2 258; CHECK: %Z1.v.r1 = extractelement <2 x double> %Z1, i32 0 259; CHECK: %Z1.v.r2 = extractelement <2 x double> %Z1, i32 1 260; CHECK: %R = fmul double %Z1.v.r1, %Z1.v.r2 261; CHECK: ret double %R 262 263} 264 265; Basic depth-3 chain with trunc 266define double @testtrunc(double %A1, double %A2, double %B1, double %B2) { 267 %X1 = fsub double %A1, %B1 268 %X2 = fsub double %A2, %B2 269 %Y1 = call double @llvm.trunc.f64(double %X1) 270 %Y2 = call double @llvm.trunc.f64(double %X2) 271 %Z1 = fadd double %Y1, %B1 272 %Z2 = fadd double %Y2, %B2 273 %R = fmul double %Z1, %Z2 274 ret double %R 275; CHECK: @testtrunc 276; CHECK: %X1.v.i1.1 = insertelement <2 x double> undef, double %B1, i32 0 277; CHECK: %X1.v.i1.2 = insertelement <2 x double> %X1.v.i1.1, double %B2, i32 1 278; CHECK: %X1.v.i0.1 = insertelement <2 x double> undef, double %A1, i32 0 279; CHECK: %X1.v.i0.2 = insertelement <2 x double> %X1.v.i0.1, double %A2, i32 1 280; CHECK: %X1 = fsub <2 x double> %X1.v.i0.2, %X1.v.i1.2 281; CHECK: %Y1 = call <2 x double> @llvm.trunc.v2f64(<2 x double> %X1) 282; CHECK: %Z1 = fadd <2 x double> %Y1, %X1.v.i1.2 283; CHECK: %Z1.v.r1 = extractelement <2 x double> %Z1, i32 0 284; CHECK: %Z1.v.r2 = extractelement <2 x double> %Z1, i32 1 285; CHECK: %R = fmul double %Z1.v.r1, %Z1.v.r2 286; CHECK: ret double %R 287 288} 289 290; Basic depth-3 chain with floor 291define double @testfloor(double %A1, double %A2, double %B1, double %B2) { 292 %X1 = fsub double %A1, %B1 293 %X2 = fsub double %A2, %B2 294 %Y1 = call double @llvm.floor.f64(double %X1) 295 %Y2 = call double @llvm.floor.f64(double %X2) 296 %Z1 = fadd double %Y1, %B1 297 %Z2 = fadd double %Y2, %B2 298 %R = fmul double %Z1, %Z2 299 ret double %R 300; CHECK: @testfloor 301; CHECK: %X1.v.i1.1 = insertelement <2 x double> undef, double %B1, i32 0 302; CHECK: %X1.v.i1.2 = insertelement <2 x double> %X1.v.i1.1, double %B2, i32 1 303; CHECK: %X1.v.i0.1 = insertelement <2 x double> undef, double %A1, i32 0 304; CHECK: %X1.v.i0.2 = insertelement <2 x double> %X1.v.i0.1, double %A2, i32 1 305; CHECK: %X1 = fsub <2 x double> %X1.v.i0.2, %X1.v.i1.2 306; CHECK: %Y1 = call <2 x double> @llvm.floor.v2f64(<2 x double> %X1) 307; CHECK: %Z1 = fadd <2 x double> %Y1, %X1.v.i1.2 308; CHECK: %Z1.v.r1 = extractelement <2 x double> %Z1, i32 0 309; CHECK: %Z1.v.r2 = extractelement <2 x double> %Z1, i32 1 310; CHECK: %R = fmul double %Z1.v.r1, %Z1.v.r2 311; CHECK: ret double %R 312 313} 314 315; Basic depth-3 chain with fabs 316define double @testfabs(double %A1, double %A2, double %B1, double %B2) { 317 %X1 = fsub double %A1, %B1 318 %X2 = fsub double %A2, %B2 319 %Y1 = call double @llvm.fabs.f64(double %X1) 320 %Y2 = call double @llvm.fabs.f64(double %X2) 321 %Z1 = fadd double %Y1, %B1 322 %Z2 = fadd double %Y2, %B2 323 %R = fmul double %Z1, %Z2 324 ret double %R 325; CHECK: @testfabs 326; CHECK: %X1.v.i1.1 = insertelement <2 x double> undef, double %B1, i32 0 327; CHECK: %X1.v.i1.2 = insertelement <2 x double> %X1.v.i1.1, double %B2, i32 1 328; CHECK: %X1.v.i0.1 = insertelement <2 x double> undef, double %A1, i32 0 329; CHECK: %X1.v.i0.2 = insertelement <2 x double> %X1.v.i0.1, double %A2, i32 1 330; CHECK: %X1 = fsub <2 x double> %X1.v.i0.2, %X1.v.i1.2 331; CHECK: %Y1 = call <2 x double> @llvm.fabs.v2f64(<2 x double> %X1) 332; CHECK: %Z1 = fadd <2 x double> %Y1, %X1.v.i1.2 333; CHECK: %Z1.v.r1 = extractelement <2 x double> %Z1, i32 0 334; CHECK: %Z1.v.r2 = extractelement <2 x double> %Z1, i32 1 335; CHECK: %R = fmul double %Z1.v.r1, %Z1.v.r2 336; CHECK: ret double %R 337 338} 339 340; Basic depth-3 chain with bswap 341define i64 @testbswap(i64 %A1, i64 %A2, i64 %B1, i64 %B2) { 342 %X1 = sub i64 %A1, %B1 343 %X2 = sub i64 %A2, %B2 344 %Y1 = call i64 @llvm.bswap.i64(i64 %X1) 345 %Y2 = call i64 @llvm.bswap.i64(i64 %X2) 346 %Z1 = add i64 %Y1, %B1 347 %Z2 = add i64 %Y2, %B2 348 %R = mul i64 %Z1, %Z2 349 ret i64 %R 350 351; CHECK: @testbswap 352; CHECK: %X1.v.i1.1 = insertelement <2 x i64> undef, i64 %B1, i32 0 353; CHECK: %X1.v.i1.2 = insertelement <2 x i64> %X1.v.i1.1, i64 %B2, i32 1 354; CHECK: %X1.v.i0.1 = insertelement <2 x i64> undef, i64 %A1, i32 0 355; CHECK: %X1.v.i0.2 = insertelement <2 x i64> %X1.v.i0.1, i64 %A2, i32 1 356; CHECK: %X1 = sub <2 x i64> %X1.v.i0.2, %X1.v.i1.2 357; CHECK: %Y1 = call <2 x i64> @llvm.bswap.v2i64(<2 x i64> %X1) 358; CHECK: %Z1 = add <2 x i64> %Y1, %X1.v.i1.2 359; CHECK: %Z1.v.r1 = extractelement <2 x i64> %Z1, i32 0 360; CHECK: %Z1.v.r2 = extractelement <2 x i64> %Z1, i32 1 361; CHECK: %R = mul i64 %Z1.v.r1, %Z1.v.r2 362; CHECK: ret i64 %R 363 364} 365 366; Basic depth-3 chain with ctpop 367define i64 @testctpop(i64 %A1, i64 %A2, i64 %B1, i64 %B2) { 368 %X1 = sub i64 %A1, %B1 369 %X2 = sub i64 %A2, %B2 370 %Y1 = call i64 @llvm.ctpop.i64(i64 %X1) 371 %Y2 = call i64 @llvm.ctpop.i64(i64 %X2) 372 %Z1 = add i64 %Y1, %B1 373 %Z2 = add i64 %Y2, %B2 374 %R = mul i64 %Z1, %Z2 375 ret i64 %R 376 377; CHECK: @testctpop 378; CHECK: %X1.v.i1.1 = insertelement <2 x i64> undef, i64 %B1, i32 0 379; CHECK: %X1.v.i1.2 = insertelement <2 x i64> %X1.v.i1.1, i64 %B2, i32 1 380; CHECK: %X1.v.i0.1 = insertelement <2 x i64> undef, i64 %A1, i32 0 381; CHECK: %X1.v.i0.2 = insertelement <2 x i64> %X1.v.i0.1, i64 %A2, i32 1 382; CHECK: %X1 = sub <2 x i64> %X1.v.i0.2, %X1.v.i1.2 383; CHECK: %Y1 = call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %X1) 384; CHECK: %Z1 = add <2 x i64> %Y1, %X1.v.i1.2 385; CHECK: %Z1.v.r1 = extractelement <2 x i64> %Z1, i32 0 386; CHECK: %Z1.v.r2 = extractelement <2 x i64> %Z1, i32 1 387; CHECK: %R = mul i64 %Z1.v.r1, %Z1.v.r2 388; CHECK: ret i64 %R 389 390} 391 392; Basic depth-3 chain with ctlz 393define i64 @testctlz(i64 %A1, i64 %A2, i64 %B1, i64 %B2) { 394 %X1 = sub i64 %A1, %B1 395 %X2 = sub i64 %A2, %B2 396 %Y1 = call i64 @llvm.ctlz.i64(i64 %X1, i1 true) 397 %Y2 = call i64 @llvm.ctlz.i64(i64 %X2, i1 true) 398 %Z1 = add i64 %Y1, %B1 399 %Z2 = add i64 %Y2, %B2 400 %R = mul i64 %Z1, %Z2 401 ret i64 %R 402 403; CHECK: @testctlz 404; CHECK: %X1.v.i1.1 = insertelement <2 x i64> undef, i64 %B1, i32 0 405; CHECK: %X1.v.i1.2 = insertelement <2 x i64> %X1.v.i1.1, i64 %B2, i32 1 406; CHECK: %X1.v.i0.1 = insertelement <2 x i64> undef, i64 %A1, i32 0 407; CHECK: %X1.v.i0.2 = insertelement <2 x i64> %X1.v.i0.1, i64 %A2, i32 1 408; CHECK: %X1 = sub <2 x i64> %X1.v.i0.2, %X1.v.i1.2 409; CHECK: %Y1 = call <2 x i64> @llvm.ctlz.v2i64(<2 x i64> %X1, i1 true) 410; CHECK: %Z1 = add <2 x i64> %Y1, %X1.v.i1.2 411; CHECK: %Z1.v.r1 = extractelement <2 x i64> %Z1, i32 0 412; CHECK: %Z1.v.r2 = extractelement <2 x i64> %Z1, i32 1 413; CHECK: %R = mul i64 %Z1.v.r1, %Z1.v.r2 414; CHECK: ret i64 %R 415 416} 417 418; Basic depth-3 chain with ctlz 419define i64 @testctlzneg(i64 %A1, i64 %A2, i64 %B1, i64 %B2) { 420 %X1 = sub i64 %A1, %B1 421 %X2 = sub i64 %A2, %B2 422 %Y1 = call i64 @llvm.ctlz.i64(i64 %X1, i1 true) 423 %Y2 = call i64 @llvm.ctlz.i64(i64 %X2, i1 false) 424 %Z1 = add i64 %Y1, %B1 425 %Z2 = add i64 %Y2, %B2 426 %R = mul i64 %Z1, %Z2 427 ret i64 %R 428 429; CHECK: @testctlzneg 430; CHECK: %X1 = sub i64 %A1, %B1 431; CHECK: %X2 = sub i64 %A2, %B2 432; CHECK: %Y1 = call i64 @llvm.ctlz.i64(i64 %X1, i1 true) 433; CHECK: %Y2 = call i64 @llvm.ctlz.i64(i64 %X2, i1 false) 434; CHECK: %Z1 = add i64 %Y1, %B1 435; CHECK: %Z2 = add i64 %Y2, %B2 436; CHECK: %R = mul i64 %Z1, %Z2 437; CHECK: ret i64 %R 438} 439 440; Basic depth-3 chain with cttz 441define i64 @testcttz(i64 %A1, i64 %A2, i64 %B1, i64 %B2) { 442 %X1 = sub i64 %A1, %B1 443 %X2 = sub i64 %A2, %B2 444 %Y1 = call i64 @llvm.cttz.i64(i64 %X1, i1 true) 445 %Y2 = call i64 @llvm.cttz.i64(i64 %X2, i1 true) 446 %Z1 = add i64 %Y1, %B1 447 %Z2 = add i64 %Y2, %B2 448 %R = mul i64 %Z1, %Z2 449 ret i64 %R 450 451; CHECK: @testcttz 452; CHECK: %X1.v.i1.1 = insertelement <2 x i64> undef, i64 %B1, i32 0 453; CHECK: %X1.v.i1.2 = insertelement <2 x i64> %X1.v.i1.1, i64 %B2, i32 1 454; CHECK: %X1.v.i0.1 = insertelement <2 x i64> undef, i64 %A1, i32 0 455; CHECK: %X1.v.i0.2 = insertelement <2 x i64> %X1.v.i0.1, i64 %A2, i32 1 456; CHECK: %X1 = sub <2 x i64> %X1.v.i0.2, %X1.v.i1.2 457; CHECK: %Y1 = call <2 x i64> @llvm.cttz.v2i64(<2 x i64> %X1, i1 true) 458; CHECK: %Z1 = add <2 x i64> %Y1, %X1.v.i1.2 459; CHECK: %Z1.v.r1 = extractelement <2 x i64> %Z1, i32 0 460; CHECK: %Z1.v.r2 = extractelement <2 x i64> %Z1, i32 1 461; CHECK: %R = mul i64 %Z1.v.r1, %Z1.v.r2 462; CHECK: ret i64 %R 463 464} 465 466; Basic depth-3 chain with cttz 467define i64 @testcttzneg(i64 %A1, i64 %A2, i64 %B1, i64 %B2) { 468 %X1 = sub i64 %A1, %B1 469 %X2 = sub i64 %A2, %B2 470 %Y1 = call i64 @llvm.cttz.i64(i64 %X1, i1 true) 471 %Y2 = call i64 @llvm.cttz.i64(i64 %X2, i1 false) 472 %Z1 = add i64 %Y1, %B1 473 %Z2 = add i64 %Y2, %B2 474 %R = mul i64 %Z1, %Z2 475 ret i64 %R 476 477; CHECK: @testcttzneg 478; CHECK: %X1 = sub i64 %A1, %B1 479; CHECK: %X2 = sub i64 %A2, %B2 480; CHECK: %Y1 = call i64 @llvm.cttz.i64(i64 %X1, i1 true) 481; CHECK: %Y2 = call i64 @llvm.cttz.i64(i64 %X2, i1 false) 482; CHECK: %Z1 = add i64 %Y1, %B1 483; CHECK: %Z2 = add i64 %Y2, %B2 484; CHECK: %R = mul i64 %Z1, %Z2 485; CHECK: ret i64 %R 486} 487 488 489 490; CHECK: declare <2 x double> @llvm.fma.v2f64(<2 x double>, <2 x double>, <2 x double>) #0 491; CHECK: declare <2 x double> @llvm.fmuladd.v2f64(<2 x double>, <2 x double>, <2 x double>) #0 492; CHECK: declare <2 x double> @llvm.cos.v2f64(<2 x double>) #0 493; CHECK: declare <2 x double> @llvm.powi.v2f64(<2 x double>, i32) #0 494; CHECK: declare <2 x double> @llvm.round.v2f64(<2 x double>) #0 495; CHECK: declare <2 x double> @llvm.copysign.v2f64(<2 x double>, <2 x double>) #0 496; CHECK: declare <2 x double> @llvm.ceil.v2f64(<2 x double>) #0 497; CHECK: declare <2 x double> @llvm.nearbyint.v2f64(<2 x double>) #0 498; CHECK: declare <2 x double> @llvm.rint.v2f64(<2 x double>) #0 499; CHECK: declare <2 x double> @llvm.trunc.v2f64(<2 x double>) #0 500; CHECK: declare <2 x double> @llvm.floor.v2f64(<2 x double>) #0 501; CHECK: declare <2 x double> @llvm.fabs.v2f64(<2 x double>) #0 502; CHECK: declare <2 x i64> @llvm.bswap.v2i64(<2 x i64>) #0 503; CHECK: declare <2 x i64> @llvm.ctpop.v2i64(<2 x i64>) #0 504; CHECK: declare <2 x i64> @llvm.ctlz.v2i64(<2 x i64>, i1) #0 505; CHECK: declare <2 x i64> @llvm.cttz.v2i64(<2 x i64>, i1) #0 506; CHECK: attributes #0 = { nounwind readnone } 507