1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -mattr=+sse3 | FileCheck %s --check-prefix=SSE 3; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -mattr=+avx | FileCheck %s --check-prefix=AVX --check-prefix=AVX1 4; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -mattr=+avx512f | FileCheck %s --check-prefix=AVX --check-prefix=AVX512 5 6; Test ADDSUB ISel patterns. 7 8; Functions below are obtained from the following source: 9; 10; typedef double double2 __attribute__((ext_vector_type(2))); 11; typedef double double4 __attribute__((ext_vector_type(4))); 12; typedef float float4 __attribute__((ext_vector_type(4))); 13; typedef float float8 __attribute__((ext_vector_type(8))); 14; 15; float4 test1(float4 A, float4 B) { 16; float4 X = A - B; 17; float4 Y = A + B; 18; return (float4){X[0], Y[1], X[2], Y[3]}; 19; } 20; 21; float8 test2(float8 A, float8 B) { 22; float8 X = A - B; 23; float8 Y = A + B; 24; return (float8){X[0], Y[1], X[2], Y[3], X[4], Y[5], X[6], Y[7]}; 25; } 26; 27; double4 test3(double4 A, double4 B) { 28; double4 X = A - B; 29; double4 Y = A + B; 30; return (double4){X[0], Y[1], X[2], Y[3]}; 31; } 32; 33; double2 test4(double2 A, double2 B) { 34; double2 X = A - B; 35; double2 Y = A + B; 36; return (double2){X[0], Y[1]}; 37; } 38 39define <4 x float> @test1(<4 x float> %A, <4 x float> %B) { 40; SSE-LABEL: test1: 41; SSE: # BB#0: 42; SSE-NEXT: addsubps %xmm1, %xmm0 43; SSE-NEXT: retq 44; 45; AVX-LABEL: test1: 46; AVX: # BB#0: 47; AVX-NEXT: vaddsubps %xmm1, %xmm0, %xmm0 48; AVX-NEXT: retq 49 %sub = fsub <4 x float> %A, %B 50 %add = fadd <4 x float> %A, %B 51 %vecinit6 = shufflevector <4 x float> %sub, <4 x float> %add, <4 x i32> <i32 0, i32 5, i32 2, i32 7> 52 ret <4 x float> %vecinit6 53} 54 55define <8 x float> @test2(<8 x float> %A, <8 x float> %B) { 56; SSE-LABEL: test2: 57; SSE: # BB#0: 58; SSE-NEXT: addsubps %xmm2, %xmm0 59; SSE-NEXT: addsubps %xmm3, %xmm1 60; SSE-NEXT: retq 61; 62; AVX-LABEL: test2: 63; AVX: # BB#0: 64; AVX-NEXT: vaddsubps %ymm1, %ymm0, %ymm0 65; AVX-NEXT: retq 66 %sub = fsub <8 x float> %A, %B 67 %add = fadd <8 x float> %A, %B 68 %vecinit14 = shufflevector <8 x float> %sub, <8 x float> %add, <8 x i32> <i32 0, i32 9, i32 2, i32 11, i32 4, i32 13, i32 6, i32 15> 69 ret <8 x float> %vecinit14 70} 71 72define <4 x double> @test3(<4 x double> %A, <4 x double> %B) { 73; SSE-LABEL: test3: 74; SSE: # BB#0: 75; SSE-NEXT: addsubpd %xmm2, %xmm0 76; SSE-NEXT: addsubpd %xmm3, %xmm1 77; SSE-NEXT: retq 78; 79; AVX-LABEL: test3: 80; AVX: # BB#0: 81; AVX-NEXT: vaddsubpd %ymm1, %ymm0, %ymm0 82; AVX-NEXT: retq 83 %sub = fsub <4 x double> %A, %B 84 %add = fadd <4 x double> %A, %B 85 %vecinit6 = shufflevector <4 x double> %sub, <4 x double> %add, <4 x i32> <i32 0, i32 5, i32 2, i32 7> 86 ret <4 x double> %vecinit6 87} 88 89define <2 x double> @test4(<2 x double> %A, <2 x double> %B) #0 { 90; SSE-LABEL: test4: 91; SSE: # BB#0: 92; SSE-NEXT: addsubpd %xmm1, %xmm0 93; SSE-NEXT: retq 94; 95; AVX-LABEL: test4: 96; AVX: # BB#0: 97; AVX-NEXT: vaddsubpd %xmm1, %xmm0, %xmm0 98; AVX-NEXT: retq 99 %add = fadd <2 x double> %A, %B 100 %sub = fsub <2 x double> %A, %B 101 %vecinit2 = shufflevector <2 x double> %sub, <2 x double> %add, <2 x i32> <i32 0, i32 3> 102 ret <2 x double> %vecinit2 103} 104 105define <16 x float> @test5(<16 x float> %A, <16 x float> %B) { 106; SSE-LABEL: test5: 107; SSE: # BB#0: 108; SSE-NEXT: addsubps %xmm4, %xmm0 109; SSE-NEXT: addsubps %xmm5, %xmm1 110; SSE-NEXT: addsubps %xmm6, %xmm2 111; SSE-NEXT: addsubps %xmm7, %xmm3 112; SSE-NEXT: retq 113; 114; AVX1-LABEL: test5: 115; AVX1: # BB#0: 116; AVX1-NEXT: vaddsubps %ymm2, %ymm0, %ymm0 117; AVX1-NEXT: vaddsubps %ymm3, %ymm1, %ymm1 118; AVX1-NEXT: retq 119; 120; AVX512-LABEL: test5: 121; AVX512: # BB#0: 122; AVX512-NEXT: vaddps %zmm1, %zmm0, %zmm2 123; AVX512-NEXT: vsubps %zmm1, %zmm0, %zmm0 124; AVX512-NEXT: vshufps {{.*#+}} zmm0 = zmm0[0,2],zmm2[1,3],zmm0[4,6],zmm2[5,7],zmm0[8,10],zmm2[9,11],zmm0[12,14],zmm2[13,15] 125; AVX512-NEXT: vpermilps {{.*#+}} zmm0 = zmm0[0,2,1,3,4,6,5,7,8,10,9,11,12,14,13,15] 126; AVX512-NEXT: retq 127 %add = fadd <16 x float> %A, %B 128 %sub = fsub <16 x float> %A, %B 129 %vecinit2 = shufflevector <16 x float> %sub, <16 x float> %add, <16 x i32> <i32 0, i32 17, i32 2, i32 19, i32 4, i32 21, i32 6, i32 23, i32 8, i32 25, i32 10, i32 27, i32 12, i32 29, i32 14, i32 31> 130 ret <16 x float> %vecinit2 131} 132 133define <8 x double> @test6(<8 x double> %A, <8 x double> %B) { 134; SSE-LABEL: test6: 135; SSE: # BB#0: 136; SSE-NEXT: addsubpd %xmm4, %xmm0 137; SSE-NEXT: addsubpd %xmm5, %xmm1 138; SSE-NEXT: addsubpd %xmm6, %xmm2 139; SSE-NEXT: addsubpd %xmm7, %xmm3 140; SSE-NEXT: retq 141; 142; AVX1-LABEL: test6: 143; AVX1: # BB#0: 144; AVX1-NEXT: vaddsubpd %ymm2, %ymm0, %ymm0 145; AVX1-NEXT: vaddsubpd %ymm3, %ymm1, %ymm1 146; AVX1-NEXT: retq 147; 148; AVX512-LABEL: test6: 149; AVX512: # BB#0: 150; AVX512-NEXT: vaddpd %zmm1, %zmm0, %zmm2 151; AVX512-NEXT: vsubpd %zmm1, %zmm0, %zmm0 152; AVX512-NEXT: vshufpd {{.*#+}} zmm0 = zmm0[0],zmm2[1],zmm0[2],zmm2[3],zmm0[4],zmm2[5],zmm0[6],zmm2[7] 153; AVX512-NEXT: retq 154 %add = fadd <8 x double> %A, %B 155 %sub = fsub <8 x double> %A, %B 156 %vecinit2 = shufflevector <8 x double> %sub, <8 x double> %add, <8 x i32> <i32 0, i32 9, i32 2, i32 11, i32 4, i32 13, i32 6, i32 15> 157 ret <8 x double> %vecinit2 158} 159 160define <4 x float> @test1b(<4 x float> %A, <4 x float>* %B) { 161; SSE-LABEL: test1b: 162; SSE: # BB#0: 163; SSE-NEXT: addsubps (%rdi), %xmm0 164; SSE-NEXT: retq 165; 166; AVX-LABEL: test1b: 167; AVX: # BB#0: 168; AVX-NEXT: vaddsubps (%rdi), %xmm0, %xmm0 169; AVX-NEXT: retq 170 %1 = load <4 x float>, <4 x float>* %B 171 %add = fadd <4 x float> %A, %1 172 %sub = fsub <4 x float> %A, %1 173 %vecinit6 = shufflevector <4 x float> %sub, <4 x float> %add, <4 x i32> <i32 0, i32 5, i32 2, i32 7> 174 ret <4 x float> %vecinit6 175} 176 177define <8 x float> @test2b(<8 x float> %A, <8 x float>* %B) { 178; SSE-LABEL: test2b: 179; SSE: # BB#0: 180; SSE-NEXT: addsubps (%rdi), %xmm0 181; SSE-NEXT: addsubps 16(%rdi), %xmm1 182; SSE-NEXT: retq 183; 184; AVX-LABEL: test2b: 185; AVX: # BB#0: 186; AVX-NEXT: vaddsubps (%rdi), %ymm0, %ymm0 187; AVX-NEXT: retq 188 %1 = load <8 x float>, <8 x float>* %B 189 %add = fadd <8 x float> %A, %1 190 %sub = fsub <8 x float> %A, %1 191 %vecinit14 = shufflevector <8 x float> %sub, <8 x float> %add, <8 x i32> <i32 0, i32 9, i32 2, i32 11, i32 4, i32 13, i32 6, i32 15> 192 ret <8 x float> %vecinit14 193} 194 195define <4 x double> @test3b(<4 x double> %A, <4 x double>* %B) { 196; SSE-LABEL: test3b: 197; SSE: # BB#0: 198; SSE-NEXT: addsubpd (%rdi), %xmm0 199; SSE-NEXT: addsubpd 16(%rdi), %xmm1 200; SSE-NEXT: retq 201; 202; AVX-LABEL: test3b: 203; AVX: # BB#0: 204; AVX-NEXT: vaddsubpd (%rdi), %ymm0, %ymm0 205; AVX-NEXT: retq 206 %1 = load <4 x double>, <4 x double>* %B 207 %add = fadd <4 x double> %A, %1 208 %sub = fsub <4 x double> %A, %1 209 %vecinit6 = shufflevector <4 x double> %sub, <4 x double> %add, <4 x i32> <i32 0, i32 5, i32 2, i32 7> 210 ret <4 x double> %vecinit6 211} 212 213define <2 x double> @test4b(<2 x double> %A, <2 x double>* %B) { 214; SSE-LABEL: test4b: 215; SSE: # BB#0: 216; SSE-NEXT: addsubpd (%rdi), %xmm0 217; SSE-NEXT: retq 218; 219; AVX-LABEL: test4b: 220; AVX: # BB#0: 221; AVX-NEXT: vaddsubpd (%rdi), %xmm0, %xmm0 222; AVX-NEXT: retq 223 %1 = load <2 x double>, <2 x double>* %B 224 %sub = fsub <2 x double> %A, %1 225 %add = fadd <2 x double> %A, %1 226 %vecinit2 = shufflevector <2 x double> %sub, <2 x double> %add, <2 x i32> <i32 0, i32 3> 227 ret <2 x double> %vecinit2 228} 229 230define <4 x float> @test1c(<4 x float> %A, <4 x float>* %B) { 231; SSE-LABEL: test1c: 232; SSE: # BB#0: 233; SSE-NEXT: addsubps (%rdi), %xmm0 234; SSE-NEXT: retq 235; 236; AVX-LABEL: test1c: 237; AVX: # BB#0: 238; AVX-NEXT: vaddsubps (%rdi), %xmm0, %xmm0 239; AVX-NEXT: retq 240 %1 = load <4 x float>, <4 x float>* %B 241 %add = fadd <4 x float> %A, %1 242 %sub = fsub <4 x float> %A, %1 243 %vecinit6 = shufflevector <4 x float> %add, <4 x float> %sub, <4 x i32> <i32 4, i32 1, i32 6, i32 3> 244 ret <4 x float> %vecinit6 245} 246 247define <8 x float> @test2c(<8 x float> %A, <8 x float>* %B) { 248; SSE-LABEL: test2c: 249; SSE: # BB#0: 250; SSE-NEXT: addsubps (%rdi), %xmm0 251; SSE-NEXT: addsubps 16(%rdi), %xmm1 252; SSE-NEXT: retq 253; 254; AVX-LABEL: test2c: 255; AVX: # BB#0: 256; AVX-NEXT: vaddsubps (%rdi), %ymm0, %ymm0 257; AVX-NEXT: retq 258 %1 = load <8 x float>, <8 x float>* %B 259 %add = fadd <8 x float> %A, %1 260 %sub = fsub <8 x float> %A, %1 261 %vecinit14 = shufflevector <8 x float> %add, <8 x float> %sub, <8 x i32> <i32 8, i32 1, i32 10, i32 3, i32 12, i32 5, i32 14, i32 7> 262 ret <8 x float> %vecinit14 263} 264 265define <4 x double> @test3c(<4 x double> %A, <4 x double>* %B) { 266; SSE-LABEL: test3c: 267; SSE: # BB#0: 268; SSE-NEXT: addsubpd (%rdi), %xmm0 269; SSE-NEXT: addsubpd 16(%rdi), %xmm1 270; SSE-NEXT: retq 271; 272; AVX-LABEL: test3c: 273; AVX: # BB#0: 274; AVX-NEXT: vaddsubpd (%rdi), %ymm0, %ymm0 275; AVX-NEXT: retq 276 %1 = load <4 x double>, <4 x double>* %B 277 %add = fadd <4 x double> %A, %1 278 %sub = fsub <4 x double> %A, %1 279 %vecinit6 = shufflevector <4 x double> %add, <4 x double> %sub, <4 x i32> <i32 4, i32 1, i32 6, i32 3> 280 ret <4 x double> %vecinit6 281} 282 283define <2 x double> @test4c(<2 x double> %A, <2 x double>* %B) { 284; SSE-LABEL: test4c: 285; SSE: # BB#0: 286; SSE-NEXT: addsubpd (%rdi), %xmm0 287; SSE-NEXT: retq 288; 289; AVX-LABEL: test4c: 290; AVX: # BB#0: 291; AVX-NEXT: vaddsubpd (%rdi), %xmm0, %xmm0 292; AVX-NEXT: retq 293 %1 = load <2 x double>, <2 x double>* %B 294 %sub = fsub <2 x double> %A, %1 295 %add = fadd <2 x double> %A, %1 296 %vecinit2 = shufflevector <2 x double> %add, <2 x double> %sub, <2 x i32> <i32 2, i32 1> 297 ret <2 x double> %vecinit2 298} 299