1; RUN: llc -mtriple=x86_64-unknown-unknown -mattr=avx < %s | FileCheck %s -check-prefix=CHECK --check-prefix=AVX 2; RUN: llc -mtriple=x86_64-unknown-unknown -mattr=avx2 < %s | FileCheck %s -check-prefix=CHECK --check-prefix=AVX2 3 4; Check constant loads of every 128-bit and 256-bit vector type 5; for size optimization using splat ops available with AVX and AVX2. 6 7; There is no AVX broadcast from double to 128-bit vector because movddup has been around since SSE3 (grrr). 8define <2 x double> @splat_v2f64(<2 x double> %x) #0 { 9 %add = fadd <2 x double> %x, <double 1.0, double 1.0> 10 ret <2 x double> %add 11; CHECK-LABEL: splat_v2f64 12; CHECK: vmovddup 13; CHECK: vaddpd 14; CHECK-NEXT: retq 15} 16 17define <4 x double> @splat_v4f64(<4 x double> %x) #0 { 18 %add = fadd <4 x double> %x, <double 1.0, double 1.0, double 1.0, double 1.0> 19 ret <4 x double> %add 20; CHECK-LABEL: splat_v4f64 21; CHECK: vbroadcastsd 22; CHECK-NEXT: vaddpd 23; CHECK-NEXT: retq 24} 25 26define <4 x float> @splat_v4f32(<4 x float> %x) #0 { 27 %add = fadd <4 x float> %x, <float 1.0, float 1.0, float 1.0, float 1.0> 28 ret <4 x float> %add 29; CHECK-LABEL: splat_v4f32 30; CHECK: vbroadcastss 31; CHECK-NEXT: vaddps 32; CHECK-NEXT: retq 33} 34 35define <8 x float> @splat_v8f32(<8 x float> %x) #0 { 36 %add = fadd <8 x float> %x, <float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0> 37 ret <8 x float> %add 38; CHECK-LABEL: splat_v8f32 39; CHECK: vbroadcastss 40; CHECK-NEXT: vaddps 41; CHECK-NEXT: retq 42} 43 44; AVX can't do integer splats, so fake it: use vmovddup to splat 64-bit value. 45; We also generate vmovddup for AVX2 because it's one byte smaller than vpbroadcastq. 46define <2 x i64> @splat_v2i64(<2 x i64> %x) #0 { 47 %add = add <2 x i64> %x, <i64 1, i64 1> 48 ret <2 x i64> %add 49; CHECK-LABEL: splat_v2i64 50; CHECK: vmovddup 51; CHECK: vpaddq 52; CHECK-NEXT: retq 53} 54 55; AVX can't do 256-bit integer ops, so we split this into two 128-bit vectors, 56; and then we fake it: use vmovddup to splat 64-bit value. 57define <4 x i64> @splat_v4i64(<4 x i64> %x) #0 { 58 %add = add <4 x i64> %x, <i64 1, i64 1, i64 1, i64 1> 59 ret <4 x i64> %add 60; CHECK-LABEL: splat_v4i64 61; AVX: vmovddup 62; AVX: vpaddq 63; AVX: vpaddq 64; AVX2: vpbroadcastq 65; AVX2: vpaddq 66; CHECK: retq 67} 68 69; AVX can't do integer splats, so fake it: use vbroadcastss to splat 32-bit value. 70define <4 x i32> @splat_v4i32(<4 x i32> %x) #0 { 71 %add = add <4 x i32> %x, <i32 1, i32 1, i32 1, i32 1> 72 ret <4 x i32> %add 73; CHECK-LABEL: splat_v4i32 74; AVX: vbroadcastss 75; AVX2: vpbroadcastd 76; CHECK-NEXT: vpaddd 77; CHECK-NEXT: retq 78} 79 80; AVX can't do integer splats, so fake it: use vbroadcastss to splat 32-bit value. 81define <8 x i32> @splat_v8i32(<8 x i32> %x) #0 { 82 %add = add <8 x i32> %x, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1> 83 ret <8 x i32> %add 84; CHECK-LABEL: splat_v8i32 85; AVX: vbroadcastss 86; AVX: vpaddd 87; AVX: vpaddd 88; AVX2: vpbroadcastd 89; AVX2: vpaddd 90; CHECK: retq 91} 92 93; AVX can't do integer splats, and there's no broadcast fakery for 16-bit. Could use pshuflw, etc? 94define <8 x i16> @splat_v8i16(<8 x i16> %x) #0 { 95 %add = add <8 x i16> %x, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1> 96 ret <8 x i16> %add 97; CHECK-LABEL: splat_v8i16 98; AVX-NOT: broadcast 99; AVX2: vpbroadcastw 100; CHECK: vpaddw 101; CHECK-NEXT: retq 102} 103 104; AVX can't do integer splats, and there's no broadcast fakery for 16-bit. Could use pshuflw, etc? 105define <16 x i16> @splat_v16i16(<16 x i16> %x) #0 { 106 %add = add <16 x i16> %x, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1> 107 ret <16 x i16> %add 108; CHECK-LABEL: splat_v16i16 109; AVX-NOT: broadcast 110; AVX: vpaddw 111; AVX: vpaddw 112; AVX2: vpbroadcastw 113; AVX2: vpaddw 114; CHECK: retq 115} 116 117; AVX can't do integer splats, and there's no broadcast fakery for 8-bit. Could use pshufb, etc? 118define <16 x i8> @splat_v16i8(<16 x i8> %x) #0 { 119 %add = add <16 x i8> %x, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1> 120 ret <16 x i8> %add 121; CHECK-LABEL: splat_v16i8 122; AVX-NOT: broadcast 123; AVX2: vpbroadcastb 124; CHECK: vpaddb 125; CHECK-NEXT: retq 126} 127 128; AVX can't do integer splats, and there's no broadcast fakery for 8-bit. Could use pshufb, etc? 129define <32 x i8> @splat_v32i8(<32 x i8> %x) #0 { 130 %add = add <32 x i8> %x, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1> 131 ret <32 x i8> %add 132; CHECK-LABEL: splat_v32i8 133; AVX-NOT: broadcast 134; AVX: vpaddb 135; AVX: vpaddb 136; AVX2: vpbroadcastb 137; AVX2: vpaddb 138; CHECK: retq 139} 140 141attributes #0 = { optsize } 142