1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefix=CHECK --check-prefix=AVX 3; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefix=CHECK --check-prefix=AVX2 4 5; Check constant loads of every 128-bit and 256-bit vector type 6; for size optimization using splat ops available with AVX and AVX2. 7 8; There is no AVX broadcast from double to 128-bit vector because movddup has been around since SSE3 (grrr). 9define <2 x double> @splat_v2f64(<2 x double> %x) #0 { 10; CHECK-LABEL: splat_v2f64: 11; CHECK: # BB#0: 12; CHECK-NEXT: vmovddup {{.*#+}} xmm1 = mem[0,0] 13; CHECK-NEXT: vaddpd %xmm1, %xmm0, %xmm0 14; CHECK-NEXT: retq 15 %add = fadd <2 x double> %x, <double 1.0, double 1.0> 16 ret <2 x double> %add 17} 18 19define <4 x double> @splat_v4f64(<4 x double> %x) #1 { 20; CHECK-LABEL: splat_v4f64: 21; CHECK: # BB#0: 22; CHECK-NEXT: vbroadcastsd {{.*}}(%rip), %ymm1 23; CHECK-NEXT: vaddpd %ymm1, %ymm0, %ymm0 24; CHECK-NEXT: retq 25 %add = fadd <4 x double> %x, <double 1.0, double 1.0, double 1.0, double 1.0> 26 ret <4 x double> %add 27} 28 29define <4 x float> @splat_v4f32(<4 x float> %x) #0 { 30; CHECK-LABEL: splat_v4f32: 31; CHECK: # BB#0: 32; CHECK-NEXT: vbroadcastss {{.*}}(%rip), %xmm1 33; CHECK-NEXT: vaddps %xmm1, %xmm0, %xmm0 34; CHECK-NEXT: retq 35 %add = fadd <4 x float> %x, <float 1.0, float 1.0, float 1.0, float 1.0> 36 ret <4 x float> %add 37} 38 39define <8 x float> @splat_v8f32(<8 x float> %x) #1 { 40; CHECK-LABEL: splat_v8f32: 41; CHECK: # BB#0: 42; CHECK-NEXT: vbroadcastss {{.*}}(%rip), %ymm1 43; CHECK-NEXT: vaddps %ymm1, %ymm0, %ymm0 44; CHECK-NEXT: retq 45 %add = fadd <8 x float> %x, <float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0> 46 ret <8 x float> %add 47} 48 49; AVX can't do integer splats, so fake it: use vmovddup to splat 64-bit value. 50; We also generate vmovddup for AVX2 because it's one byte smaller than vpbroadcastq. 51define <2 x i64> @splat_v2i64(<2 x i64> %x) #1 { 52; CHECK-LABEL: splat_v2i64: 53; CHECK: # BB#0: 54; CHECK-NEXT: vmovddup {{.*#+}} xmm1 = mem[0,0] 55; CHECK-NEXT: vpaddq %xmm1, %xmm0, %xmm0 56; CHECK-NEXT: retq 57 %add = add <2 x i64> %x, <i64 1, i64 1> 58 ret <2 x i64> %add 59} 60 61; AVX can't do 256-bit integer ops, so we split this into two 128-bit vectors, 62; and then we fake it: use vmovddup to splat 64-bit value. 63define <4 x i64> @splat_v4i64(<4 x i64> %x) #0 { 64; AVX-LABEL: splat_v4i64: 65; AVX: # BB#0: 66; AVX-NEXT: vextractf128 $1, %ymm0, %xmm1 67; AVX-NEXT: vmovddup {{.*#+}} xmm2 = mem[0,0] 68; AVX-NEXT: vpaddq %xmm2, %xmm1, %xmm1 69; AVX-NEXT: vpaddq %xmm2, %xmm0, %xmm0 70; AVX-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 71; AVX-NEXT: retq 72; 73; AVX2-LABEL: splat_v4i64: 74; AVX2: # BB#0: 75; AVX2-NEXT: vpbroadcastq {{.*}}(%rip), %ymm1 76; AVX2-NEXT: vpaddq %ymm1, %ymm0, %ymm0 77; AVX2-NEXT: retq 78 %add = add <4 x i64> %x, <i64 1, i64 1, i64 1, i64 1> 79 ret <4 x i64> %add 80} 81 82; AVX can't do integer splats, so fake it: use vbroadcastss to splat 32-bit value. 83define <4 x i32> @splat_v4i32(<4 x i32> %x) #1 { 84; AVX-LABEL: splat_v4i32: 85; AVX: # BB#0: 86; AVX-NEXT: vbroadcastss {{.*}}(%rip), %xmm1 87; AVX-NEXT: vpaddd %xmm1, %xmm0, %xmm0 88; AVX-NEXT: retq 89; 90; AVX2-LABEL: splat_v4i32: 91; AVX2: # BB#0: 92; AVX2-NEXT: vpbroadcastd {{.*}}(%rip), %xmm1 93; AVX2-NEXT: vpaddd %xmm1, %xmm0, %xmm0 94; AVX2-NEXT: retq 95 %add = add <4 x i32> %x, <i32 1, i32 1, i32 1, i32 1> 96 ret <4 x i32> %add 97} 98 99; AVX can't do integer splats, so fake it: use vbroadcastss to splat 32-bit value. 100define <8 x i32> @splat_v8i32(<8 x i32> %x) #0 { 101; AVX-LABEL: splat_v8i32: 102; AVX: # BB#0: 103; AVX-NEXT: vextractf128 $1, %ymm0, %xmm1 104; AVX-NEXT: vbroadcastss {{.*}}(%rip), %xmm2 105; AVX-NEXT: vpaddd %xmm2, %xmm1, %xmm1 106; AVX-NEXT: vpaddd %xmm2, %xmm0, %xmm0 107; AVX-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 108; AVX-NEXT: retq 109; 110; AVX2-LABEL: splat_v8i32: 111; AVX2: # BB#0: 112; AVX2-NEXT: vpbroadcastd {{.*}}(%rip), %ymm1 113; AVX2-NEXT: vpaddd %ymm1, %ymm0, %ymm0 114; AVX2-NEXT: retq 115 %add = add <8 x i32> %x, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1> 116 ret <8 x i32> %add 117} 118 119; AVX can't do integer splats, and there's no broadcast fakery for 16-bit. Could use pshuflw, etc? 120define <8 x i16> @splat_v8i16(<8 x i16> %x) #1 { 121; AVX-LABEL: splat_v8i16: 122; AVX: # BB#0: 123; AVX-NEXT: vpaddw {{.*}}(%rip), %xmm0, %xmm0 124; AVX-NEXT: retq 125; 126; AVX2-LABEL: splat_v8i16: 127; AVX2: # BB#0: 128; AVX2-NEXT: vpbroadcastw {{.*}}(%rip), %xmm1 129; AVX2-NEXT: vpaddw %xmm1, %xmm0, %xmm0 130; AVX2-NEXT: retq 131 %add = add <8 x i16> %x, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1> 132 ret <8 x i16> %add 133} 134 135; AVX can't do integer splats, and there's no broadcast fakery for 16-bit. Could use pshuflw, etc? 136define <16 x i16> @splat_v16i16(<16 x i16> %x) #0 { 137; AVX-LABEL: splat_v16i16: 138; AVX: # BB#0: 139; AVX-NEXT: vextractf128 $1, %ymm0, %xmm1 140; AVX-NEXT: vmovdqa {{.*#+}} xmm2 = [1,1,1,1,1,1,1,1] 141; AVX-NEXT: vpaddw %xmm2, %xmm1, %xmm1 142; AVX-NEXT: vpaddw %xmm2, %xmm0, %xmm0 143; AVX-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 144; AVX-NEXT: retq 145; 146; AVX2-LABEL: splat_v16i16: 147; AVX2: # BB#0: 148; AVX2-NEXT: vpbroadcastw {{.*}}(%rip), %ymm1 149; AVX2-NEXT: vpaddw %ymm1, %ymm0, %ymm0 150; AVX2-NEXT: retq 151 %add = add <16 x i16> %x, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1> 152 ret <16 x i16> %add 153} 154 155; AVX can't do integer splats, and there's no broadcast fakery for 8-bit. Could use pshufb, etc? 156define <16 x i8> @splat_v16i8(<16 x i8> %x) #1 { 157; AVX-LABEL: splat_v16i8: 158; AVX: # BB#0: 159; AVX-NEXT: vpaddb {{.*}}(%rip), %xmm0, %xmm0 160; AVX-NEXT: retq 161; 162; AVX2-LABEL: splat_v16i8: 163; AVX2: # BB#0: 164; AVX2-NEXT: vpbroadcastb {{.*}}(%rip), %xmm1 165; AVX2-NEXT: vpaddb %xmm1, %xmm0, %xmm0 166; AVX2-NEXT: retq 167 %add = add <16 x i8> %x, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1> 168 ret <16 x i8> %add 169} 170 171; AVX can't do integer splats, and there's no broadcast fakery for 8-bit. Could use pshufb, etc? 172define <32 x i8> @splat_v32i8(<32 x i8> %x) #0 { 173; AVX-LABEL: splat_v32i8: 174; AVX: # BB#0: 175; AVX-NEXT: vextractf128 $1, %ymm0, %xmm1 176; AVX-NEXT: vmovdqa {{.*#+}} xmm2 = [1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1] 177; AVX-NEXT: vpaddb %xmm2, %xmm1, %xmm1 178; AVX-NEXT: vpaddb %xmm2, %xmm0, %xmm0 179; AVX-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 180; AVX-NEXT: retq 181; 182; AVX2-LABEL: splat_v32i8: 183; AVX2: # BB#0: 184; AVX2-NEXT: vpbroadcastb {{.*}}(%rip), %ymm1 185; AVX2-NEXT: vpaddb %ymm1, %ymm0, %ymm0 186; AVX2-NEXT: retq 187 %add = add <32 x i8> %x, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1> 188 ret <32 x i8> %add 189} 190 191; PR23259: Verify that ISel doesn't crash with a 'fatal error in backend' 192; due to a missing AVX pattern to select a v2i64 X86ISD::BROADCAST of a 193; loadi64 with multiple uses. 194 195@A = common global <3 x i64> zeroinitializer, align 32 196 197define <8 x i64> @pr23259() #1 { 198entry: 199 %0 = load <4 x i64>, <4 x i64>* bitcast (<3 x i64>* @A to <4 x i64>*), align 32 200 %1 = shufflevector <4 x i64> %0, <4 x i64> undef, <3 x i32> <i32 undef, i32 undef, i32 2> 201 %shuffle = shufflevector <3 x i64> <i64 1, i64 undef, i64 undef>, <3 x i64> %1, <8 x i32> <i32 5, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0> 202 ret <8 x i64> %shuffle 203} 204 205attributes #0 = { optsize } 206attributes #1 = { minsize } 207