1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py 2; RUN: opt < %s -mtriple=x86_64-unknown -basicaa -slp-vectorizer -instcombine -S | FileCheck %s --check-prefix=CHECK --check-prefix=SSE 3; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=slm -basicaa -slp-vectorizer -instcombine -S | FileCheck %s --check-prefix=CHECK --check-prefix=SLM 4; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=corei7-avx -basicaa -slp-vectorizer -instcombine -S | FileCheck %s --check-prefix=CHECK --check-prefix=AVX --check-prefix=AVX1 5; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=core-avx2 -basicaa -slp-vectorizer -instcombine -S | FileCheck %s --check-prefix=CHECK --check-prefix=AVX --check-prefix=AVX2 6; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=knl -basicaa -slp-vectorizer -instcombine -S | FileCheck %s --check-prefix=CHECK --check-prefix=AVX512 --check-prefix=AVX512F 7; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=skx -basicaa -slp-vectorizer -instcombine -S | FileCheck %s --check-prefix=CHECK --check-prefix=AVX512 --check-prefix=AVX512BW 8 9; 10; 128-bit vectors 11; 12 13define <2 x double> @test_v2f64(<2 x double> %a, <2 x double> %b) { 14; SSE-LABEL: @test_v2f64( 15; SSE-NEXT: [[TMP1:%.*]] = shufflevector <2 x double> [[A:%.*]], <2 x double> [[B:%.*]], <2 x i32> <i32 0, i32 2> 16; SSE-NEXT: [[TMP2:%.*]] = shufflevector <2 x double> [[A]], <2 x double> [[B]], <2 x i32> <i32 1, i32 3> 17; SSE-NEXT: [[TMP3:%.*]] = fadd <2 x double> [[TMP1]], [[TMP2]] 18; SSE-NEXT: ret <2 x double> [[TMP3]] 19; 20; SLM-LABEL: @test_v2f64( 21; SLM-NEXT: [[A0:%.*]] = extractelement <2 x double> [[A:%.*]], i32 0 22; SLM-NEXT: [[A1:%.*]] = extractelement <2 x double> [[A]], i32 1 23; SLM-NEXT: [[B0:%.*]] = extractelement <2 x double> [[B:%.*]], i32 0 24; SLM-NEXT: [[B1:%.*]] = extractelement <2 x double> [[B]], i32 1 25; SLM-NEXT: [[R0:%.*]] = fadd double [[A0]], [[A1]] 26; SLM-NEXT: [[R1:%.*]] = fadd double [[B0]], [[B1]] 27; SLM-NEXT: [[R00:%.*]] = insertelement <2 x double> undef, double [[R0]], i32 0 28; SLM-NEXT: [[R01:%.*]] = insertelement <2 x double> [[R00]], double [[R1]], i32 1 29; SLM-NEXT: ret <2 x double> [[R01]] 30; 31; AVX-LABEL: @test_v2f64( 32; AVX-NEXT: [[TMP1:%.*]] = shufflevector <2 x double> [[A:%.*]], <2 x double> [[B:%.*]], <2 x i32> <i32 0, i32 2> 33; AVX-NEXT: [[TMP2:%.*]] = shufflevector <2 x double> [[A]], <2 x double> [[B]], <2 x i32> <i32 1, i32 3> 34; AVX-NEXT: [[TMP3:%.*]] = fadd <2 x double> [[TMP1]], [[TMP2]] 35; AVX-NEXT: ret <2 x double> [[TMP3]] 36; 37; AVX512-LABEL: @test_v2f64( 38; AVX512-NEXT: [[TMP1:%.*]] = shufflevector <2 x double> [[A:%.*]], <2 x double> [[B:%.*]], <2 x i32> <i32 0, i32 2> 39; AVX512-NEXT: [[TMP2:%.*]] = shufflevector <2 x double> [[A]], <2 x double> [[B]], <2 x i32> <i32 1, i32 3> 40; AVX512-NEXT: [[TMP3:%.*]] = fadd <2 x double> [[TMP1]], [[TMP2]] 41; AVX512-NEXT: ret <2 x double> [[TMP3]] 42; 43 %a0 = extractelement <2 x double> %a, i32 0 44 %a1 = extractelement <2 x double> %a, i32 1 45 %b0 = extractelement <2 x double> %b, i32 0 46 %b1 = extractelement <2 x double> %b, i32 1 47 %r0 = fadd double %a0, %a1 48 %r1 = fadd double %b0, %b1 49 %r00 = insertelement <2 x double> undef, double %r0, i32 0 50 %r01 = insertelement <2 x double> %r00, double %r1, i32 1 51 ret <2 x double> %r01 52} 53 54define <4 x float> @test_v4f32(<4 x float> %a, <4 x float> %b) { 55; CHECK-LABEL: @test_v4f32( 56; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x float> [[A:%.*]], <4 x float> [[B:%.*]], <4 x i32> <i32 0, i32 2, i32 4, i32 6> 57; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x float> [[A]], <4 x float> [[B]], <4 x i32> <i32 1, i32 3, i32 5, i32 7> 58; CHECK-NEXT: [[TMP3:%.*]] = fadd <4 x float> [[TMP1]], [[TMP2]] 59; CHECK-NEXT: ret <4 x float> [[TMP3]] 60; 61 %a0 = extractelement <4 x float> %a, i32 0 62 %a1 = extractelement <4 x float> %a, i32 1 63 %a2 = extractelement <4 x float> %a, i32 2 64 %a3 = extractelement <4 x float> %a, i32 3 65 %b0 = extractelement <4 x float> %b, i32 0 66 %b1 = extractelement <4 x float> %b, i32 1 67 %b2 = extractelement <4 x float> %b, i32 2 68 %b3 = extractelement <4 x float> %b, i32 3 69 %r0 = fadd float %a0, %a1 70 %r1 = fadd float %a2, %a3 71 %r2 = fadd float %b0, %b1 72 %r3 = fadd float %b2, %b3 73 %r00 = insertelement <4 x float> undef, float %r0, i32 0 74 %r01 = insertelement <4 x float> %r00, float %r1, i32 1 75 %r02 = insertelement <4 x float> %r01, float %r2, i32 2 76 %r03 = insertelement <4 x float> %r02, float %r3, i32 3 77 ret <4 x float> %r03 78} 79 80define <2 x i64> @test_v2i64(<2 x i64> %a, <2 x i64> %b) { 81; SSE-LABEL: @test_v2i64( 82; SSE-NEXT: [[TMP1:%.*]] = shufflevector <2 x i64> [[A:%.*]], <2 x i64> [[B:%.*]], <2 x i32> <i32 0, i32 2> 83; SSE-NEXT: [[TMP2:%.*]] = shufflevector <2 x i64> [[A]], <2 x i64> [[B]], <2 x i32> <i32 1, i32 3> 84; SSE-NEXT: [[TMP3:%.*]] = add <2 x i64> [[TMP1]], [[TMP2]] 85; SSE-NEXT: ret <2 x i64> [[TMP3]] 86; 87; SLM-LABEL: @test_v2i64( 88; SLM-NEXT: [[A0:%.*]] = extractelement <2 x i64> [[A:%.*]], i32 0 89; SLM-NEXT: [[A1:%.*]] = extractelement <2 x i64> [[A]], i32 1 90; SLM-NEXT: [[B0:%.*]] = extractelement <2 x i64> [[B:%.*]], i32 0 91; SLM-NEXT: [[B1:%.*]] = extractelement <2 x i64> [[B]], i32 1 92; SLM-NEXT: [[R0:%.*]] = add i64 [[A0]], [[A1]] 93; SLM-NEXT: [[R1:%.*]] = add i64 [[B0]], [[B1]] 94; SLM-NEXT: [[R00:%.*]] = insertelement <2 x i64> undef, i64 [[R0]], i32 0 95; SLM-NEXT: [[R01:%.*]] = insertelement <2 x i64> [[R00]], i64 [[R1]], i32 1 96; SLM-NEXT: ret <2 x i64> [[R01]] 97; 98; AVX-LABEL: @test_v2i64( 99; AVX-NEXT: [[TMP1:%.*]] = shufflevector <2 x i64> [[A:%.*]], <2 x i64> [[B:%.*]], <2 x i32> <i32 0, i32 2> 100; AVX-NEXT: [[TMP2:%.*]] = shufflevector <2 x i64> [[A]], <2 x i64> [[B]], <2 x i32> <i32 1, i32 3> 101; AVX-NEXT: [[TMP3:%.*]] = add <2 x i64> [[TMP1]], [[TMP2]] 102; AVX-NEXT: ret <2 x i64> [[TMP3]] 103; 104; AVX512-LABEL: @test_v2i64( 105; AVX512-NEXT: [[TMP1:%.*]] = shufflevector <2 x i64> [[A:%.*]], <2 x i64> [[B:%.*]], <2 x i32> <i32 0, i32 2> 106; AVX512-NEXT: [[TMP2:%.*]] = shufflevector <2 x i64> [[A]], <2 x i64> [[B]], <2 x i32> <i32 1, i32 3> 107; AVX512-NEXT: [[TMP3:%.*]] = add <2 x i64> [[TMP1]], [[TMP2]] 108; AVX512-NEXT: ret <2 x i64> [[TMP3]] 109; 110 %a0 = extractelement <2 x i64> %a, i32 0 111 %a1 = extractelement <2 x i64> %a, i32 1 112 %b0 = extractelement <2 x i64> %b, i32 0 113 %b1 = extractelement <2 x i64> %b, i32 1 114 %r0 = add i64 %a0, %a1 115 %r1 = add i64 %b0, %b1 116 %r00 = insertelement <2 x i64> undef, i64 %r0, i32 0 117 %r01 = insertelement <2 x i64> %r00, i64 %r1, i32 1 118 ret <2 x i64> %r01 119} 120 121define <4 x i32> @test_v4i32(<4 x i32> %a, <4 x i32> %b) { 122; CHECK-LABEL: @test_v4i32( 123; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[A:%.*]], <4 x i32> [[B:%.*]], <4 x i32> <i32 0, i32 2, i32 4, i32 6> 124; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x i32> [[A]], <4 x i32> [[B]], <4 x i32> <i32 1, i32 3, i32 5, i32 7> 125; CHECK-NEXT: [[TMP3:%.*]] = add <4 x i32> [[TMP1]], [[TMP2]] 126; CHECK-NEXT: ret <4 x i32> [[TMP3]] 127; 128 %a0 = extractelement <4 x i32> %a, i32 0 129 %a1 = extractelement <4 x i32> %a, i32 1 130 %a2 = extractelement <4 x i32> %a, i32 2 131 %a3 = extractelement <4 x i32> %a, i32 3 132 %b0 = extractelement <4 x i32> %b, i32 0 133 %b1 = extractelement <4 x i32> %b, i32 1 134 %b2 = extractelement <4 x i32> %b, i32 2 135 %b3 = extractelement <4 x i32> %b, i32 3 136 %r0 = add i32 %a0, %a1 137 %r1 = add i32 %a2, %a3 138 %r2 = add i32 %b0, %b1 139 %r3 = add i32 %b2, %b3 140 %r00 = insertelement <4 x i32> undef, i32 %r0, i32 0 141 %r01 = insertelement <4 x i32> %r00, i32 %r1, i32 1 142 %r02 = insertelement <4 x i32> %r01, i32 %r2, i32 2 143 %r03 = insertelement <4 x i32> %r02, i32 %r3, i32 3 144 ret <4 x i32> %r03 145} 146 147define <8 x i16> @test_v8i16(<8 x i16> %a, <8 x i16> %b) { 148; CHECK-LABEL: @test_v8i16( 149; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x i16> [[A:%.*]], <8 x i16> [[B:%.*]], <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14> 150; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <8 x i16> [[A]], <8 x i16> [[B]], <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15> 151; CHECK-NEXT: [[TMP3:%.*]] = add <8 x i16> [[TMP1]], [[TMP2]] 152; CHECK-NEXT: ret <8 x i16> [[TMP3]] 153; 154 %a0 = extractelement <8 x i16> %a, i32 0 155 %a1 = extractelement <8 x i16> %a, i32 1 156 %a2 = extractelement <8 x i16> %a, i32 2 157 %a3 = extractelement <8 x i16> %a, i32 3 158 %a4 = extractelement <8 x i16> %a, i32 4 159 %a5 = extractelement <8 x i16> %a, i32 5 160 %a6 = extractelement <8 x i16> %a, i32 6 161 %a7 = extractelement <8 x i16> %a, i32 7 162 %b0 = extractelement <8 x i16> %b, i32 0 163 %b1 = extractelement <8 x i16> %b, i32 1 164 %b2 = extractelement <8 x i16> %b, i32 2 165 %b3 = extractelement <8 x i16> %b, i32 3 166 %b4 = extractelement <8 x i16> %b, i32 4 167 %b5 = extractelement <8 x i16> %b, i32 5 168 %b6 = extractelement <8 x i16> %b, i32 6 169 %b7 = extractelement <8 x i16> %b, i32 7 170 %r0 = add i16 %a0, %a1 171 %r1 = add i16 %a2, %a3 172 %r2 = add i16 %a4, %a5 173 %r3 = add i16 %a6, %a7 174 %r4 = add i16 %b0, %b1 175 %r5 = add i16 %b2, %b3 176 %r6 = add i16 %b4, %b5 177 %r7 = add i16 %b6, %b7 178 %r00 = insertelement <8 x i16> undef, i16 %r0, i32 0 179 %r01 = insertelement <8 x i16> %r00, i16 %r1, i32 1 180 %r02 = insertelement <8 x i16> %r01, i16 %r2, i32 2 181 %r03 = insertelement <8 x i16> %r02, i16 %r3, i32 3 182 %r04 = insertelement <8 x i16> %r03, i16 %r4, i32 4 183 %r05 = insertelement <8 x i16> %r04, i16 %r5, i32 5 184 %r06 = insertelement <8 x i16> %r05, i16 %r6, i32 6 185 %r07 = insertelement <8 x i16> %r06, i16 %r7, i32 7 186 ret <8 x i16> %r07 187} 188 189; 190; 256-bit vectors 191; 192 193define <4 x double> @test_v4f64(<4 x double> %a, <4 x double> %b) { 194; SSE-LABEL: @test_v4f64( 195; SSE-NEXT: [[TMP1:%.*]] = shufflevector <4 x double> [[A:%.*]], <4 x double> [[B:%.*]], <2 x i32> <i32 0, i32 4> 196; SSE-NEXT: [[TMP2:%.*]] = shufflevector <4 x double> [[A]], <4 x double> [[B]], <2 x i32> <i32 1, i32 5> 197; SSE-NEXT: [[TMP3:%.*]] = fadd <2 x double> [[TMP1]], [[TMP2]] 198; SSE-NEXT: [[TMP4:%.*]] = shufflevector <4 x double> [[A]], <4 x double> [[B]], <2 x i32> <i32 2, i32 6> 199; SSE-NEXT: [[TMP5:%.*]] = shufflevector <4 x double> [[A]], <4 x double> [[B]], <2 x i32> <i32 3, i32 7> 200; SSE-NEXT: [[TMP6:%.*]] = fadd <2 x double> [[TMP4]], [[TMP5]] 201; SSE-NEXT: [[R03:%.*]] = shufflevector <2 x double> [[TMP3]], <2 x double> [[TMP6]], <4 x i32> <i32 0, i32 1, i32 2, i32 3> 202; SSE-NEXT: ret <4 x double> [[R03]] 203; 204; SLM-LABEL: @test_v4f64( 205; SLM-NEXT: [[TMP1:%.*]] = shufflevector <4 x double> [[A:%.*]], <4 x double> [[B:%.*]], <2 x i32> <i32 0, i32 4> 206; SLM-NEXT: [[TMP2:%.*]] = shufflevector <4 x double> [[A]], <4 x double> [[B]], <2 x i32> <i32 1, i32 5> 207; SLM-NEXT: [[TMP3:%.*]] = fadd <2 x double> [[TMP1]], [[TMP2]] 208; SLM-NEXT: [[TMP4:%.*]] = shufflevector <4 x double> [[A]], <4 x double> [[B]], <2 x i32> <i32 2, i32 6> 209; SLM-NEXT: [[TMP5:%.*]] = shufflevector <4 x double> [[A]], <4 x double> [[B]], <2 x i32> <i32 3, i32 7> 210; SLM-NEXT: [[TMP6:%.*]] = fadd <2 x double> [[TMP4]], [[TMP5]] 211; SLM-NEXT: [[R03:%.*]] = shufflevector <2 x double> [[TMP3]], <2 x double> [[TMP6]], <4 x i32> <i32 0, i32 1, i32 2, i32 3> 212; SLM-NEXT: ret <4 x double> [[R03]] 213; 214; AVX-LABEL: @test_v4f64( 215; AVX-NEXT: [[TMP1:%.*]] = shufflevector <4 x double> [[A:%.*]], <4 x double> [[B:%.*]], <4 x i32> <i32 0, i32 4, i32 2, i32 6> 216; AVX-NEXT: [[TMP2:%.*]] = shufflevector <4 x double> [[A]], <4 x double> [[B]], <4 x i32> <i32 1, i32 5, i32 3, i32 7> 217; AVX-NEXT: [[TMP3:%.*]] = fadd <4 x double> [[TMP1]], [[TMP2]] 218; AVX-NEXT: ret <4 x double> [[TMP3]] 219; 220; AVX512-LABEL: @test_v4f64( 221; AVX512-NEXT: [[TMP1:%.*]] = shufflevector <4 x double> [[A:%.*]], <4 x double> [[B:%.*]], <4 x i32> <i32 0, i32 4, i32 2, i32 6> 222; AVX512-NEXT: [[TMP2:%.*]] = shufflevector <4 x double> [[A]], <4 x double> [[B]], <4 x i32> <i32 1, i32 5, i32 3, i32 7> 223; AVX512-NEXT: [[TMP3:%.*]] = fadd <4 x double> [[TMP1]], [[TMP2]] 224; AVX512-NEXT: ret <4 x double> [[TMP3]] 225; 226 %a0 = extractelement <4 x double> %a, i32 0 227 %a1 = extractelement <4 x double> %a, i32 1 228 %a2 = extractelement <4 x double> %a, i32 2 229 %a3 = extractelement <4 x double> %a, i32 3 230 %b0 = extractelement <4 x double> %b, i32 0 231 %b1 = extractelement <4 x double> %b, i32 1 232 %b2 = extractelement <4 x double> %b, i32 2 233 %b3 = extractelement <4 x double> %b, i32 3 234 %r0 = fadd double %a0, %a1 235 %r1 = fadd double %b0, %b1 236 %r2 = fadd double %a2, %a3 237 %r3 = fadd double %b2, %b3 238 %r00 = insertelement <4 x double> undef, double %r0, i32 0 239 %r01 = insertelement <4 x double> %r00, double %r1, i32 1 240 %r02 = insertelement <4 x double> %r01, double %r2, i32 2 241 %r03 = insertelement <4 x double> %r02, double %r3, i32 3 242 ret <4 x double> %r03 243} 244 245define <8 x float> @test_v8f32(<8 x float> %a, <8 x float> %b) { 246; SSE-LABEL: @test_v8f32( 247; SSE-NEXT: [[TMP1:%.*]] = shufflevector <8 x float> [[A:%.*]], <8 x float> [[B:%.*]], <4 x i32> <i32 0, i32 2, i32 8, i32 10> 248; SSE-NEXT: [[TMP2:%.*]] = shufflevector <8 x float> [[A]], <8 x float> [[B]], <4 x i32> <i32 1, i32 3, i32 9, i32 11> 249; SSE-NEXT: [[TMP3:%.*]] = fadd <4 x float> [[TMP1]], [[TMP2]] 250; SSE-NEXT: [[TMP4:%.*]] = shufflevector <8 x float> [[A]], <8 x float> [[B]], <4 x i32> <i32 4, i32 6, i32 12, i32 14> 251; SSE-NEXT: [[TMP5:%.*]] = shufflevector <8 x float> [[A]], <8 x float> [[B]], <4 x i32> <i32 5, i32 7, i32 13, i32 15> 252; SSE-NEXT: [[TMP6:%.*]] = fadd <4 x float> [[TMP4]], [[TMP5]] 253; SSE-NEXT: [[R07:%.*]] = shufflevector <4 x float> [[TMP3]], <4 x float> [[TMP6]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 254; SSE-NEXT: ret <8 x float> [[R07]] 255; 256; SLM-LABEL: @test_v8f32( 257; SLM-NEXT: [[TMP1:%.*]] = shufflevector <8 x float> [[A:%.*]], <8 x float> [[B:%.*]], <4 x i32> <i32 0, i32 2, i32 8, i32 10> 258; SLM-NEXT: [[TMP2:%.*]] = shufflevector <8 x float> [[A]], <8 x float> [[B]], <4 x i32> <i32 1, i32 3, i32 9, i32 11> 259; SLM-NEXT: [[TMP3:%.*]] = fadd <4 x float> [[TMP1]], [[TMP2]] 260; SLM-NEXT: [[TMP4:%.*]] = shufflevector <8 x float> [[A]], <8 x float> [[B]], <4 x i32> <i32 4, i32 6, i32 12, i32 14> 261; SLM-NEXT: [[TMP5:%.*]] = shufflevector <8 x float> [[A]], <8 x float> [[B]], <4 x i32> <i32 5, i32 7, i32 13, i32 15> 262; SLM-NEXT: [[TMP6:%.*]] = fadd <4 x float> [[TMP4]], [[TMP5]] 263; SLM-NEXT: [[R07:%.*]] = shufflevector <4 x float> [[TMP3]], <4 x float> [[TMP6]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 264; SLM-NEXT: ret <8 x float> [[R07]] 265; 266; AVX-LABEL: @test_v8f32( 267; AVX-NEXT: [[TMP1:%.*]] = shufflevector <8 x float> [[A:%.*]], <8 x float> [[B:%.*]], <8 x i32> <i32 0, i32 2, i32 8, i32 10, i32 4, i32 6, i32 12, i32 14> 268; AVX-NEXT: [[TMP2:%.*]] = shufflevector <8 x float> [[A]], <8 x float> [[B]], <8 x i32> <i32 1, i32 3, i32 9, i32 11, i32 5, i32 7, i32 13, i32 15> 269; AVX-NEXT: [[TMP3:%.*]] = fadd <8 x float> [[TMP1]], [[TMP2]] 270; AVX-NEXT: ret <8 x float> [[TMP3]] 271; 272; AVX512-LABEL: @test_v8f32( 273; AVX512-NEXT: [[TMP1:%.*]] = shufflevector <8 x float> [[A:%.*]], <8 x float> [[B:%.*]], <8 x i32> <i32 0, i32 2, i32 8, i32 10, i32 4, i32 6, i32 12, i32 14> 274; AVX512-NEXT: [[TMP2:%.*]] = shufflevector <8 x float> [[A]], <8 x float> [[B]], <8 x i32> <i32 1, i32 3, i32 9, i32 11, i32 5, i32 7, i32 13, i32 15> 275; AVX512-NEXT: [[TMP3:%.*]] = fadd <8 x float> [[TMP1]], [[TMP2]] 276; AVX512-NEXT: ret <8 x float> [[TMP3]] 277; 278 %a0 = extractelement <8 x float> %a, i32 0 279 %a1 = extractelement <8 x float> %a, i32 1 280 %a2 = extractelement <8 x float> %a, i32 2 281 %a3 = extractelement <8 x float> %a, i32 3 282 %a4 = extractelement <8 x float> %a, i32 4 283 %a5 = extractelement <8 x float> %a, i32 5 284 %a6 = extractelement <8 x float> %a, i32 6 285 %a7 = extractelement <8 x float> %a, i32 7 286 %b0 = extractelement <8 x float> %b, i32 0 287 %b1 = extractelement <8 x float> %b, i32 1 288 %b2 = extractelement <8 x float> %b, i32 2 289 %b3 = extractelement <8 x float> %b, i32 3 290 %b4 = extractelement <8 x float> %b, i32 4 291 %b5 = extractelement <8 x float> %b, i32 5 292 %b6 = extractelement <8 x float> %b, i32 6 293 %b7 = extractelement <8 x float> %b, i32 7 294 %r0 = fadd float %a0, %a1 295 %r1 = fadd float %a2, %a3 296 %r2 = fadd float %b0, %b1 297 %r3 = fadd float %b2, %b3 298 %r4 = fadd float %a4, %a5 299 %r5 = fadd float %a6, %a7 300 %r6 = fadd float %b4, %b5 301 %r7 = fadd float %b6, %b7 302 %r00 = insertelement <8 x float> undef, float %r0, i32 0 303 %r01 = insertelement <8 x float> %r00, float %r1, i32 1 304 %r02 = insertelement <8 x float> %r01, float %r2, i32 2 305 %r03 = insertelement <8 x float> %r02, float %r3, i32 3 306 %r04 = insertelement <8 x float> %r03, float %r4, i32 4 307 %r05 = insertelement <8 x float> %r04, float %r5, i32 5 308 %r06 = insertelement <8 x float> %r05, float %r6, i32 6 309 %r07 = insertelement <8 x float> %r06, float %r7, i32 7 310 ret <8 x float> %r07 311} 312 313define <4 x i64> @test_v4i64(<4 x i64> %a, <4 x i64> %b) { 314; SSE-LABEL: @test_v4i64( 315; SSE-NEXT: [[TMP1:%.*]] = shufflevector <4 x i64> [[A:%.*]], <4 x i64> [[B:%.*]], <2 x i32> <i32 0, i32 4> 316; SSE-NEXT: [[TMP2:%.*]] = shufflevector <4 x i64> [[A]], <4 x i64> [[B]], <2 x i32> <i32 1, i32 5> 317; SSE-NEXT: [[TMP3:%.*]] = add <2 x i64> [[TMP1]], [[TMP2]] 318; SSE-NEXT: [[TMP4:%.*]] = shufflevector <4 x i64> [[A]], <4 x i64> [[B]], <2 x i32> <i32 2, i32 6> 319; SSE-NEXT: [[TMP5:%.*]] = shufflevector <4 x i64> [[A]], <4 x i64> [[B]], <2 x i32> <i32 3, i32 7> 320; SSE-NEXT: [[TMP6:%.*]] = add <2 x i64> [[TMP4]], [[TMP5]] 321; SSE-NEXT: [[R03:%.*]] = shufflevector <2 x i64> [[TMP3]], <2 x i64> [[TMP6]], <4 x i32> <i32 0, i32 1, i32 2, i32 3> 322; SSE-NEXT: ret <4 x i64> [[R03]] 323; 324; SLM-LABEL: @test_v4i64( 325; SLM-NEXT: [[TMP1:%.*]] = shufflevector <4 x i64> [[A:%.*]], <4 x i64> [[B:%.*]], <2 x i32> <i32 0, i32 4> 326; SLM-NEXT: [[TMP2:%.*]] = shufflevector <4 x i64> [[A]], <4 x i64> [[B]], <2 x i32> <i32 1, i32 5> 327; SLM-NEXT: [[TMP3:%.*]] = add <2 x i64> [[TMP1]], [[TMP2]] 328; SLM-NEXT: [[TMP4:%.*]] = shufflevector <4 x i64> [[A]], <4 x i64> [[B]], <2 x i32> <i32 2, i32 6> 329; SLM-NEXT: [[TMP5:%.*]] = shufflevector <4 x i64> [[A]], <4 x i64> [[B]], <2 x i32> <i32 3, i32 7> 330; SLM-NEXT: [[TMP6:%.*]] = add <2 x i64> [[TMP4]], [[TMP5]] 331; SLM-NEXT: [[R03:%.*]] = shufflevector <2 x i64> [[TMP3]], <2 x i64> [[TMP6]], <4 x i32> <i32 0, i32 1, i32 2, i32 3> 332; SLM-NEXT: ret <4 x i64> [[R03]] 333; 334; AVX-LABEL: @test_v4i64( 335; AVX-NEXT: [[TMP1:%.*]] = shufflevector <4 x i64> [[A:%.*]], <4 x i64> [[B:%.*]], <4 x i32> <i32 0, i32 4, i32 2, i32 6> 336; AVX-NEXT: [[TMP2:%.*]] = shufflevector <4 x i64> [[A]], <4 x i64> [[B]], <4 x i32> <i32 1, i32 5, i32 3, i32 7> 337; AVX-NEXT: [[TMP3:%.*]] = add <4 x i64> [[TMP1]], [[TMP2]] 338; AVX-NEXT: ret <4 x i64> [[TMP3]] 339; 340; AVX512-LABEL: @test_v4i64( 341; AVX512-NEXT: [[TMP1:%.*]] = shufflevector <4 x i64> [[A:%.*]], <4 x i64> [[B:%.*]], <4 x i32> <i32 0, i32 4, i32 2, i32 6> 342; AVX512-NEXT: [[TMP2:%.*]] = shufflevector <4 x i64> [[A]], <4 x i64> [[B]], <4 x i32> <i32 1, i32 5, i32 3, i32 7> 343; AVX512-NEXT: [[TMP3:%.*]] = add <4 x i64> [[TMP1]], [[TMP2]] 344; AVX512-NEXT: ret <4 x i64> [[TMP3]] 345; 346 %a0 = extractelement <4 x i64> %a, i32 0 347 %a1 = extractelement <4 x i64> %a, i32 1 348 %a2 = extractelement <4 x i64> %a, i32 2 349 %a3 = extractelement <4 x i64> %a, i32 3 350 %b0 = extractelement <4 x i64> %b, i32 0 351 %b1 = extractelement <4 x i64> %b, i32 1 352 %b2 = extractelement <4 x i64> %b, i32 2 353 %b3 = extractelement <4 x i64> %b, i32 3 354 %r0 = add i64 %a0, %a1 355 %r1 = add i64 %b0, %b1 356 %r2 = add i64 %a2, %a3 357 %r3 = add i64 %b2, %b3 358 %r00 = insertelement <4 x i64> undef, i64 %r0, i32 0 359 %r01 = insertelement <4 x i64> %r00, i64 %r1, i32 1 360 %r02 = insertelement <4 x i64> %r01, i64 %r2, i32 2 361 %r03 = insertelement <4 x i64> %r02, i64 %r3, i32 3 362 ret <4 x i64> %r03 363} 364 365define <8 x i32> @test_v8i32(<8 x i32> %a, <8 x i32> %b) { 366; SSE-LABEL: @test_v8i32( 367; SSE-NEXT: [[TMP1:%.*]] = shufflevector <8 x i32> [[A:%.*]], <8 x i32> [[B:%.*]], <4 x i32> <i32 0, i32 2, i32 8, i32 10> 368; SSE-NEXT: [[TMP2:%.*]] = shufflevector <8 x i32> [[A]], <8 x i32> [[B]], <4 x i32> <i32 1, i32 3, i32 9, i32 11> 369; SSE-NEXT: [[TMP3:%.*]] = add <4 x i32> [[TMP1]], [[TMP2]] 370; SSE-NEXT: [[TMP4:%.*]] = shufflevector <8 x i32> [[A]], <8 x i32> [[B]], <4 x i32> <i32 4, i32 6, i32 12, i32 14> 371; SSE-NEXT: [[TMP5:%.*]] = shufflevector <8 x i32> [[A]], <8 x i32> [[B]], <4 x i32> <i32 5, i32 7, i32 13, i32 15> 372; SSE-NEXT: [[TMP6:%.*]] = add <4 x i32> [[TMP4]], [[TMP5]] 373; SSE-NEXT: [[R07:%.*]] = shufflevector <4 x i32> [[TMP3]], <4 x i32> [[TMP6]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 374; SSE-NEXT: ret <8 x i32> [[R07]] 375; 376; SLM-LABEL: @test_v8i32( 377; SLM-NEXT: [[TMP1:%.*]] = shufflevector <8 x i32> [[A:%.*]], <8 x i32> [[B:%.*]], <4 x i32> <i32 0, i32 2, i32 8, i32 10> 378; SLM-NEXT: [[TMP2:%.*]] = shufflevector <8 x i32> [[A]], <8 x i32> [[B]], <4 x i32> <i32 1, i32 3, i32 9, i32 11> 379; SLM-NEXT: [[TMP3:%.*]] = add <4 x i32> [[TMP1]], [[TMP2]] 380; SLM-NEXT: [[TMP4:%.*]] = shufflevector <8 x i32> [[A]], <8 x i32> [[B]], <4 x i32> <i32 4, i32 6, i32 12, i32 14> 381; SLM-NEXT: [[TMP5:%.*]] = shufflevector <8 x i32> [[A]], <8 x i32> [[B]], <4 x i32> <i32 5, i32 7, i32 13, i32 15> 382; SLM-NEXT: [[TMP6:%.*]] = add <4 x i32> [[TMP4]], [[TMP5]] 383; SLM-NEXT: [[R07:%.*]] = shufflevector <4 x i32> [[TMP3]], <4 x i32> [[TMP6]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 384; SLM-NEXT: ret <8 x i32> [[R07]] 385; 386; AVX-LABEL: @test_v8i32( 387; AVX-NEXT: [[TMP1:%.*]] = shufflevector <8 x i32> [[A:%.*]], <8 x i32> [[B:%.*]], <8 x i32> <i32 0, i32 2, i32 8, i32 10, i32 4, i32 6, i32 12, i32 14> 388; AVX-NEXT: [[TMP2:%.*]] = shufflevector <8 x i32> [[A]], <8 x i32> [[B]], <8 x i32> <i32 1, i32 3, i32 9, i32 11, i32 5, i32 7, i32 13, i32 15> 389; AVX-NEXT: [[TMP3:%.*]] = add <8 x i32> [[TMP1]], [[TMP2]] 390; AVX-NEXT: ret <8 x i32> [[TMP3]] 391; 392; AVX512-LABEL: @test_v8i32( 393; AVX512-NEXT: [[TMP1:%.*]] = shufflevector <8 x i32> [[A:%.*]], <8 x i32> [[B:%.*]], <8 x i32> <i32 0, i32 2, i32 8, i32 10, i32 4, i32 6, i32 12, i32 14> 394; AVX512-NEXT: [[TMP2:%.*]] = shufflevector <8 x i32> [[A]], <8 x i32> [[B]], <8 x i32> <i32 1, i32 3, i32 9, i32 11, i32 5, i32 7, i32 13, i32 15> 395; AVX512-NEXT: [[TMP3:%.*]] = add <8 x i32> [[TMP1]], [[TMP2]] 396; AVX512-NEXT: ret <8 x i32> [[TMP3]] 397; 398 %a0 = extractelement <8 x i32> %a, i32 0 399 %a1 = extractelement <8 x i32> %a, i32 1 400 %a2 = extractelement <8 x i32> %a, i32 2 401 %a3 = extractelement <8 x i32> %a, i32 3 402 %a4 = extractelement <8 x i32> %a, i32 4 403 %a5 = extractelement <8 x i32> %a, i32 5 404 %a6 = extractelement <8 x i32> %a, i32 6 405 %a7 = extractelement <8 x i32> %a, i32 7 406 %b0 = extractelement <8 x i32> %b, i32 0 407 %b1 = extractelement <8 x i32> %b, i32 1 408 %b2 = extractelement <8 x i32> %b, i32 2 409 %b3 = extractelement <8 x i32> %b, i32 3 410 %b4 = extractelement <8 x i32> %b, i32 4 411 %b5 = extractelement <8 x i32> %b, i32 5 412 %b6 = extractelement <8 x i32> %b, i32 6 413 %b7 = extractelement <8 x i32> %b, i32 7 414 %r0 = add i32 %a0, %a1 415 %r1 = add i32 %a2, %a3 416 %r2 = add i32 %b0, %b1 417 %r3 = add i32 %b2, %b3 418 %r4 = add i32 %a4, %a5 419 %r5 = add i32 %a6, %a7 420 %r6 = add i32 %b4, %b5 421 %r7 = add i32 %b6, %b7 422 %r00 = insertelement <8 x i32> undef, i32 %r0, i32 0 423 %r01 = insertelement <8 x i32> %r00, i32 %r1, i32 1 424 %r02 = insertelement <8 x i32> %r01, i32 %r2, i32 2 425 %r03 = insertelement <8 x i32> %r02, i32 %r3, i32 3 426 %r04 = insertelement <8 x i32> %r03, i32 %r4, i32 4 427 %r05 = insertelement <8 x i32> %r04, i32 %r5, i32 5 428 %r06 = insertelement <8 x i32> %r05, i32 %r6, i32 6 429 %r07 = insertelement <8 x i32> %r06, i32 %r7, i32 7 430 ret <8 x i32> %r07 431} 432 433define <16 x i16> @test_v16i16(<16 x i16> %a, <16 x i16> %b) { 434; SSE-LABEL: @test_v16i16( 435; SSE-NEXT: [[TMP1:%.*]] = shufflevector <16 x i16> [[A:%.*]], <16 x i16> [[B:%.*]], <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 16, i32 18, i32 20, i32 22> 436; SSE-NEXT: [[TMP2:%.*]] = shufflevector <16 x i16> [[A]], <16 x i16> [[B]], <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 17, i32 19, i32 21, i32 23> 437; SSE-NEXT: [[TMP3:%.*]] = add <8 x i16> [[TMP1]], [[TMP2]] 438; SSE-NEXT: [[TMP4:%.*]] = shufflevector <16 x i16> [[A]], <16 x i16> [[B]], <8 x i32> <i32 8, i32 10, i32 12, i32 14, i32 24, i32 26, i32 28, i32 30> 439; SSE-NEXT: [[TMP5:%.*]] = shufflevector <16 x i16> [[A]], <16 x i16> [[B]], <8 x i32> <i32 9, i32 11, i32 13, i32 15, i32 25, i32 27, i32 29, i32 31> 440; SSE-NEXT: [[TMP6:%.*]] = add <8 x i16> [[TMP4]], [[TMP5]] 441; SSE-NEXT: [[RV15:%.*]] = shufflevector <8 x i16> [[TMP3]], <8 x i16> [[TMP6]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 442; SSE-NEXT: ret <16 x i16> [[RV15]] 443; 444; SLM-LABEL: @test_v16i16( 445; SLM-NEXT: [[TMP1:%.*]] = shufflevector <16 x i16> [[A:%.*]], <16 x i16> [[B:%.*]], <16 x i32> <i32 0, i32 2, i32 4, i32 6, i32 16, i32 18, i32 20, i32 22, i32 8, i32 10, i32 12, i32 14, i32 24, i32 26, i32 28, i32 30> 446; SLM-NEXT: [[TMP2:%.*]] = shufflevector <16 x i16> [[A]], <16 x i16> [[B]], <16 x i32> <i32 1, i32 3, i32 5, i32 7, i32 17, i32 19, i32 21, i32 23, i32 9, i32 11, i32 13, i32 15, i32 25, i32 27, i32 29, i32 31> 447; SLM-NEXT: [[TMP3:%.*]] = add <16 x i16> [[TMP1]], [[TMP2]] 448; SLM-NEXT: ret <16 x i16> [[TMP3]] 449; 450; AVX-LABEL: @test_v16i16( 451; AVX-NEXT: [[TMP1:%.*]] = shufflevector <16 x i16> [[A:%.*]], <16 x i16> [[B:%.*]], <16 x i32> <i32 0, i32 2, i32 4, i32 6, i32 16, i32 18, i32 20, i32 22, i32 8, i32 10, i32 12, i32 14, i32 24, i32 26, i32 28, i32 30> 452; AVX-NEXT: [[TMP2:%.*]] = shufflevector <16 x i16> [[A]], <16 x i16> [[B]], <16 x i32> <i32 1, i32 3, i32 5, i32 7, i32 17, i32 19, i32 21, i32 23, i32 9, i32 11, i32 13, i32 15, i32 25, i32 27, i32 29, i32 31> 453; AVX-NEXT: [[TMP3:%.*]] = add <16 x i16> [[TMP1]], [[TMP2]] 454; AVX-NEXT: ret <16 x i16> [[TMP3]] 455; 456; AVX512-LABEL: @test_v16i16( 457; AVX512-NEXT: [[TMP1:%.*]] = shufflevector <16 x i16> [[A:%.*]], <16 x i16> [[B:%.*]], <16 x i32> <i32 0, i32 2, i32 4, i32 6, i32 16, i32 18, i32 20, i32 22, i32 8, i32 10, i32 12, i32 14, i32 24, i32 26, i32 28, i32 30> 458; AVX512-NEXT: [[TMP2:%.*]] = shufflevector <16 x i16> [[A]], <16 x i16> [[B]], <16 x i32> <i32 1, i32 3, i32 5, i32 7, i32 17, i32 19, i32 21, i32 23, i32 9, i32 11, i32 13, i32 15, i32 25, i32 27, i32 29, i32 31> 459; AVX512-NEXT: [[TMP3:%.*]] = add <16 x i16> [[TMP1]], [[TMP2]] 460; AVX512-NEXT: ret <16 x i16> [[TMP3]] 461; 462 %a0 = extractelement <16 x i16> %a, i32 0 463 %a1 = extractelement <16 x i16> %a, i32 1 464 %a2 = extractelement <16 x i16> %a, i32 2 465 %a3 = extractelement <16 x i16> %a, i32 3 466 %a4 = extractelement <16 x i16> %a, i32 4 467 %a5 = extractelement <16 x i16> %a, i32 5 468 %a6 = extractelement <16 x i16> %a, i32 6 469 %a7 = extractelement <16 x i16> %a, i32 7 470 %a8 = extractelement <16 x i16> %a, i32 8 471 %a9 = extractelement <16 x i16> %a, i32 9 472 %a10 = extractelement <16 x i16> %a, i32 10 473 %a11 = extractelement <16 x i16> %a, i32 11 474 %a12 = extractelement <16 x i16> %a, i32 12 475 %a13 = extractelement <16 x i16> %a, i32 13 476 %a14 = extractelement <16 x i16> %a, i32 14 477 %a15 = extractelement <16 x i16> %a, i32 15 478 %b0 = extractelement <16 x i16> %b, i32 0 479 %b1 = extractelement <16 x i16> %b, i32 1 480 %b2 = extractelement <16 x i16> %b, i32 2 481 %b3 = extractelement <16 x i16> %b, i32 3 482 %b4 = extractelement <16 x i16> %b, i32 4 483 %b5 = extractelement <16 x i16> %b, i32 5 484 %b6 = extractelement <16 x i16> %b, i32 6 485 %b7 = extractelement <16 x i16> %b, i32 7 486 %b8 = extractelement <16 x i16> %b, i32 8 487 %b9 = extractelement <16 x i16> %b, i32 9 488 %b10 = extractelement <16 x i16> %b, i32 10 489 %b11 = extractelement <16 x i16> %b, i32 11 490 %b12 = extractelement <16 x i16> %b, i32 12 491 %b13 = extractelement <16 x i16> %b, i32 13 492 %b14 = extractelement <16 x i16> %b, i32 14 493 %b15 = extractelement <16 x i16> %b, i32 15 494 %r0 = add i16 %a0 , %a1 495 %r1 = add i16 %a2 , %a3 496 %r2 = add i16 %a4 , %a5 497 %r3 = add i16 %a6 , %a7 498 %r4 = add i16 %b0 , %b1 499 %r5 = add i16 %b2 , %b3 500 %r6 = add i16 %b4 , %b5 501 %r7 = add i16 %b6 , %b7 502 %r8 = add i16 %a8 , %a9 503 %r9 = add i16 %a10, %a11 504 %r10 = add i16 %a12, %a13 505 %r11 = add i16 %a14, %a15 506 %r12 = add i16 %b8 , %b9 507 %r13 = add i16 %b10, %b11 508 %r14 = add i16 %b12, %b13 509 %r15 = add i16 %b14, %b15 510 %rv0 = insertelement <16 x i16> undef, i16 %r0 , i32 0 511 %rv1 = insertelement <16 x i16> %rv0 , i16 %r1 , i32 1 512 %rv2 = insertelement <16 x i16> %rv1 , i16 %r2 , i32 2 513 %rv3 = insertelement <16 x i16> %rv2 , i16 %r3 , i32 3 514 %rv4 = insertelement <16 x i16> %rv3 , i16 %r4 , i32 4 515 %rv5 = insertelement <16 x i16> %rv4 , i16 %r5 , i32 5 516 %rv6 = insertelement <16 x i16> %rv5 , i16 %r6 , i32 6 517 %rv7 = insertelement <16 x i16> %rv6 , i16 %r7 , i32 7 518 %rv8 = insertelement <16 x i16> %rv7 , i16 %r8 , i32 8 519 %rv9 = insertelement <16 x i16> %rv8 , i16 %r9 , i32 9 520 %rv10 = insertelement <16 x i16> %rv9 , i16 %r10, i32 10 521 %rv11 = insertelement <16 x i16> %rv10, i16 %r11, i32 11 522 %rv12 = insertelement <16 x i16> %rv11, i16 %r12, i32 12 523 %rv13 = insertelement <16 x i16> %rv12, i16 %r13, i32 13 524 %rv14 = insertelement <16 x i16> %rv13, i16 %r14, i32 14 525 %rv15 = insertelement <16 x i16> %rv14, i16 %r15, i32 15 526 ret <16 x i16> %rv15 527} 528