1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py 2; RUN: opt < %s -mtriple=x86_64-unknown -basic-aa -slp-vectorizer -instcombine -S | FileCheck %s --check-prefix=CHECK --check-prefix=SSE 3; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=slm -basic-aa -slp-vectorizer -instcombine -S | FileCheck %s --check-prefix=CHECK --check-prefix=SLM 4; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=corei7-avx -basic-aa -slp-vectorizer -instcombine -S | FileCheck %s --check-prefix=CHECK --check-prefix=AVX 5; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=core-avx2 -basic-aa -slp-vectorizer -instcombine -S | FileCheck %s --check-prefix=CHECK --check-prefix=AVX 6; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=knl -basic-aa -slp-vectorizer -instcombine -S | FileCheck %s --check-prefix=CHECK --check-prefix=AVX 7; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=skx -basic-aa -slp-vectorizer -instcombine -S | FileCheck %s --check-prefix=CHECK --check-prefix=AVX 8 9; 10; 128-bit vectors 11; 12 13define <2 x double> @test_v2f64(<2 x double> %a, <2 x double> %b) { 14; SSE-LABEL: @test_v2f64( 15; SSE-NEXT: [[TMP1:%.*]] = shufflevector <2 x double> [[A:%.*]], <2 x double> [[B:%.*]], <2 x i32> <i32 0, i32 2> 16; SSE-NEXT: [[TMP2:%.*]] = shufflevector <2 x double> [[A]], <2 x double> [[B]], <2 x i32> <i32 1, i32 3> 17; SSE-NEXT: [[TMP3:%.*]] = fsub <2 x double> [[TMP1]], [[TMP2]] 18; SSE-NEXT: ret <2 x double> [[TMP3]] 19; 20; SLM-LABEL: @test_v2f64( 21; SLM-NEXT: [[A0:%.*]] = extractelement <2 x double> [[A:%.*]], i32 0 22; SLM-NEXT: [[A1:%.*]] = extractelement <2 x double> [[A]], i32 1 23; SLM-NEXT: [[B0:%.*]] = extractelement <2 x double> [[B:%.*]], i32 0 24; SLM-NEXT: [[B1:%.*]] = extractelement <2 x double> [[B]], i32 1 25; SLM-NEXT: [[R0:%.*]] = fsub double [[A0]], [[A1]] 26; SLM-NEXT: [[R1:%.*]] = fsub double [[B0]], [[B1]] 27; SLM-NEXT: [[R00:%.*]] = insertelement <2 x double> undef, double [[R0]], i32 0 28; SLM-NEXT: [[R01:%.*]] = insertelement <2 x double> [[R00]], double [[R1]], i32 1 29; SLM-NEXT: ret <2 x double> [[R01]] 30; 31; AVX-LABEL: @test_v2f64( 32; AVX-NEXT: [[TMP1:%.*]] = shufflevector <2 x double> [[A:%.*]], <2 x double> [[B:%.*]], <2 x i32> <i32 0, i32 2> 33; AVX-NEXT: [[TMP2:%.*]] = shufflevector <2 x double> [[A]], <2 x double> [[B]], <2 x i32> <i32 1, i32 3> 34; AVX-NEXT: [[TMP3:%.*]] = fsub <2 x double> [[TMP1]], [[TMP2]] 35; AVX-NEXT: ret <2 x double> [[TMP3]] 36; 37 %a0 = extractelement <2 x double> %a, i32 0 38 %a1 = extractelement <2 x double> %a, i32 1 39 %b0 = extractelement <2 x double> %b, i32 0 40 %b1 = extractelement <2 x double> %b, i32 1 41 %r0 = fsub double %a0, %a1 42 %r1 = fsub double %b0, %b1 43 %r00 = insertelement <2 x double> undef, double %r0, i32 0 44 %r01 = insertelement <2 x double> %r00, double %r1, i32 1 45 ret <2 x double> %r01 46} 47 48define <4 x float> @test_v4f32(<4 x float> %a, <4 x float> %b) { 49; CHECK-LABEL: @test_v4f32( 50; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x float> [[A:%.*]], <4 x float> [[B:%.*]], <4 x i32> <i32 0, i32 2, i32 4, i32 6> 51; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x float> [[A]], <4 x float> [[B]], <4 x i32> <i32 1, i32 3, i32 5, i32 7> 52; CHECK-NEXT: [[TMP3:%.*]] = fsub <4 x float> [[TMP1]], [[TMP2]] 53; CHECK-NEXT: ret <4 x float> [[TMP3]] 54; 55 %a0 = extractelement <4 x float> %a, i32 0 56 %a1 = extractelement <4 x float> %a, i32 1 57 %a2 = extractelement <4 x float> %a, i32 2 58 %a3 = extractelement <4 x float> %a, i32 3 59 %b0 = extractelement <4 x float> %b, i32 0 60 %b1 = extractelement <4 x float> %b, i32 1 61 %b2 = extractelement <4 x float> %b, i32 2 62 %b3 = extractelement <4 x float> %b, i32 3 63 %r0 = fsub float %a0, %a1 64 %r1 = fsub float %a2, %a3 65 %r2 = fsub float %b0, %b1 66 %r3 = fsub float %b2, %b3 67 %r00 = insertelement <4 x float> undef, float %r0, i32 0 68 %r01 = insertelement <4 x float> %r00, float %r1, i32 1 69 %r02 = insertelement <4 x float> %r01, float %r2, i32 2 70 %r03 = insertelement <4 x float> %r02, float %r3, i32 3 71 ret <4 x float> %r03 72} 73 74define <2 x i64> @test_v2i64(<2 x i64> %a, <2 x i64> %b) { 75; CHECK-LABEL: @test_v2i64( 76; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <2 x i64> [[A:%.*]], <2 x i64> [[B:%.*]], <2 x i32> <i32 0, i32 2> 77; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <2 x i64> [[A]], <2 x i64> [[B]], <2 x i32> <i32 1, i32 3> 78; CHECK-NEXT: [[TMP3:%.*]] = sub <2 x i64> [[TMP1]], [[TMP2]] 79; CHECK-NEXT: ret <2 x i64> [[TMP3]] 80; 81 %a0 = extractelement <2 x i64> %a, i32 0 82 %a1 = extractelement <2 x i64> %a, i32 1 83 %b0 = extractelement <2 x i64> %b, i32 0 84 %b1 = extractelement <2 x i64> %b, i32 1 85 %r0 = sub i64 %a0, %a1 86 %r1 = sub i64 %b0, %b1 87 %r00 = insertelement <2 x i64> undef, i64 %r0, i32 0 88 %r01 = insertelement <2 x i64> %r00, i64 %r1, i32 1 89 ret <2 x i64> %r01 90} 91 92define <4 x i32> @test_v4i32(<4 x i32> %a, <4 x i32> %b) { 93; CHECK-LABEL: @test_v4i32( 94; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[A:%.*]], <4 x i32> [[B:%.*]], <4 x i32> <i32 0, i32 2, i32 4, i32 6> 95; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x i32> [[A]], <4 x i32> [[B]], <4 x i32> <i32 1, i32 3, i32 5, i32 7> 96; CHECK-NEXT: [[TMP3:%.*]] = sub <4 x i32> [[TMP1]], [[TMP2]] 97; CHECK-NEXT: ret <4 x i32> [[TMP3]] 98; 99 %a0 = extractelement <4 x i32> %a, i32 0 100 %a1 = extractelement <4 x i32> %a, i32 1 101 %a2 = extractelement <4 x i32> %a, i32 2 102 %a3 = extractelement <4 x i32> %a, i32 3 103 %b0 = extractelement <4 x i32> %b, i32 0 104 %b1 = extractelement <4 x i32> %b, i32 1 105 %b2 = extractelement <4 x i32> %b, i32 2 106 %b3 = extractelement <4 x i32> %b, i32 3 107 %r0 = sub i32 %a0, %a1 108 %r1 = sub i32 %a2, %a3 109 %r2 = sub i32 %b0, %b1 110 %r3 = sub i32 %b2, %b3 111 %r00 = insertelement <4 x i32> undef, i32 %r0, i32 0 112 %r01 = insertelement <4 x i32> %r00, i32 %r1, i32 1 113 %r02 = insertelement <4 x i32> %r01, i32 %r2, i32 2 114 %r03 = insertelement <4 x i32> %r02, i32 %r3, i32 3 115 ret <4 x i32> %r03 116} 117 118define <8 x i16> @test_v8i16(<8 x i16> %a, <8 x i16> %b) { 119; CHECK-LABEL: @test_v8i16( 120; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x i16> [[A:%.*]], <8 x i16> [[B:%.*]], <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14> 121; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <8 x i16> [[A]], <8 x i16> [[B]], <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15> 122; CHECK-NEXT: [[TMP3:%.*]] = sub <8 x i16> [[TMP1]], [[TMP2]] 123; CHECK-NEXT: ret <8 x i16> [[TMP3]] 124; 125 %a0 = extractelement <8 x i16> %a, i32 0 126 %a1 = extractelement <8 x i16> %a, i32 1 127 %a2 = extractelement <8 x i16> %a, i32 2 128 %a3 = extractelement <8 x i16> %a, i32 3 129 %a4 = extractelement <8 x i16> %a, i32 4 130 %a5 = extractelement <8 x i16> %a, i32 5 131 %a6 = extractelement <8 x i16> %a, i32 6 132 %a7 = extractelement <8 x i16> %a, i32 7 133 %b0 = extractelement <8 x i16> %b, i32 0 134 %b1 = extractelement <8 x i16> %b, i32 1 135 %b2 = extractelement <8 x i16> %b, i32 2 136 %b3 = extractelement <8 x i16> %b, i32 3 137 %b4 = extractelement <8 x i16> %b, i32 4 138 %b5 = extractelement <8 x i16> %b, i32 5 139 %b6 = extractelement <8 x i16> %b, i32 6 140 %b7 = extractelement <8 x i16> %b, i32 7 141 %r0 = sub i16 %a0, %a1 142 %r1 = sub i16 %a2, %a3 143 %r2 = sub i16 %a4, %a5 144 %r3 = sub i16 %a6, %a7 145 %r4 = sub i16 %b0, %b1 146 %r5 = sub i16 %b2, %b3 147 %r6 = sub i16 %b4, %b5 148 %r7 = sub i16 %b6, %b7 149 %r00 = insertelement <8 x i16> undef, i16 %r0, i32 0 150 %r01 = insertelement <8 x i16> %r00, i16 %r1, i32 1 151 %r02 = insertelement <8 x i16> %r01, i16 %r2, i32 2 152 %r03 = insertelement <8 x i16> %r02, i16 %r3, i32 3 153 %r04 = insertelement <8 x i16> %r03, i16 %r4, i32 4 154 %r05 = insertelement <8 x i16> %r04, i16 %r5, i32 5 155 %r06 = insertelement <8 x i16> %r05, i16 %r6, i32 6 156 %r07 = insertelement <8 x i16> %r06, i16 %r7, i32 7 157 ret <8 x i16> %r07 158} 159 160; 161; 256-bit vectors 162; 163 164define <4 x double> @test_v4f64(<4 x double> %a, <4 x double> %b) { 165; SSE-LABEL: @test_v4f64( 166; SSE-NEXT: [[TMP1:%.*]] = shufflevector <4 x double> [[A:%.*]], <4 x double> [[B:%.*]], <2 x i32> <i32 0, i32 4> 167; SSE-NEXT: [[TMP2:%.*]] = shufflevector <4 x double> [[A]], <4 x double> [[B]], <2 x i32> <i32 1, i32 5> 168; SSE-NEXT: [[TMP3:%.*]] = fsub <2 x double> [[TMP1]], [[TMP2]] 169; SSE-NEXT: [[TMP4:%.*]] = shufflevector <4 x double> [[A]], <4 x double> [[B]], <2 x i32> <i32 2, i32 6> 170; SSE-NEXT: [[TMP5:%.*]] = shufflevector <4 x double> [[A]], <4 x double> [[B]], <2 x i32> <i32 3, i32 7> 171; SSE-NEXT: [[TMP6:%.*]] = fsub <2 x double> [[TMP4]], [[TMP5]] 172; SSE-NEXT: [[R03:%.*]] = shufflevector <2 x double> [[TMP3]], <2 x double> [[TMP6]], <4 x i32> <i32 0, i32 1, i32 2, i32 3> 173; SSE-NEXT: ret <4 x double> [[R03]] 174; 175; SLM-LABEL: @test_v4f64( 176; SLM-NEXT: [[A0:%.*]] = extractelement <4 x double> [[A:%.*]], i32 0 177; SLM-NEXT: [[A1:%.*]] = extractelement <4 x double> [[A]], i32 1 178; SLM-NEXT: [[A2:%.*]] = extractelement <4 x double> [[A]], i32 2 179; SLM-NEXT: [[A3:%.*]] = extractelement <4 x double> [[A]], i32 3 180; SLM-NEXT: [[B0:%.*]] = extractelement <4 x double> [[B:%.*]], i32 0 181; SLM-NEXT: [[B1:%.*]] = extractelement <4 x double> [[B]], i32 1 182; SLM-NEXT: [[B2:%.*]] = extractelement <4 x double> [[B]], i32 2 183; SLM-NEXT: [[B3:%.*]] = extractelement <4 x double> [[B]], i32 3 184; SLM-NEXT: [[R0:%.*]] = fsub double [[A0]], [[A1]] 185; SLM-NEXT: [[R1:%.*]] = fsub double [[B0]], [[B1]] 186; SLM-NEXT: [[R2:%.*]] = fsub double [[A2]], [[A3]] 187; SLM-NEXT: [[R3:%.*]] = fsub double [[B2]], [[B3]] 188; SLM-NEXT: [[R00:%.*]] = insertelement <4 x double> undef, double [[R0]], i32 0 189; SLM-NEXT: [[R01:%.*]] = insertelement <4 x double> [[R00]], double [[R1]], i32 1 190; SLM-NEXT: [[R02:%.*]] = insertelement <4 x double> [[R01]], double [[R2]], i32 2 191; SLM-NEXT: [[R03:%.*]] = insertelement <4 x double> [[R02]], double [[R3]], i32 3 192; SLM-NEXT: ret <4 x double> [[R03]] 193; 194; AVX-LABEL: @test_v4f64( 195; AVX-NEXT: [[TMP1:%.*]] = shufflevector <4 x double> [[A:%.*]], <4 x double> [[B:%.*]], <4 x i32> <i32 0, i32 4, i32 2, i32 6> 196; AVX-NEXT: [[TMP2:%.*]] = shufflevector <4 x double> [[A]], <4 x double> [[B]], <4 x i32> <i32 1, i32 5, i32 3, i32 7> 197; AVX-NEXT: [[TMP3:%.*]] = fsub <4 x double> [[TMP1]], [[TMP2]] 198; AVX-NEXT: ret <4 x double> [[TMP3]] 199; 200 %a0 = extractelement <4 x double> %a, i32 0 201 %a1 = extractelement <4 x double> %a, i32 1 202 %a2 = extractelement <4 x double> %a, i32 2 203 %a3 = extractelement <4 x double> %a, i32 3 204 %b0 = extractelement <4 x double> %b, i32 0 205 %b1 = extractelement <4 x double> %b, i32 1 206 %b2 = extractelement <4 x double> %b, i32 2 207 %b3 = extractelement <4 x double> %b, i32 3 208 %r0 = fsub double %a0, %a1 209 %r1 = fsub double %b0, %b1 210 %r2 = fsub double %a2, %a3 211 %r3 = fsub double %b2, %b3 212 %r00 = insertelement <4 x double> undef, double %r0, i32 0 213 %r01 = insertelement <4 x double> %r00, double %r1, i32 1 214 %r02 = insertelement <4 x double> %r01, double %r2, i32 2 215 %r03 = insertelement <4 x double> %r02, double %r3, i32 3 216 ret <4 x double> %r03 217} 218 219define <8 x float> @test_v8f32(<8 x float> %a, <8 x float> %b) { 220; SSE-LABEL: @test_v8f32( 221; SSE-NEXT: [[TMP1:%.*]] = shufflevector <8 x float> [[A:%.*]], <8 x float> [[B:%.*]], <8 x i32> <i32 0, i32 2, i32 8, i32 10, i32 4, i32 6, i32 12, i32 14> 222; SSE-NEXT: [[TMP2:%.*]] = shufflevector <8 x float> [[A]], <8 x float> [[B]], <8 x i32> <i32 1, i32 3, i32 9, i32 11, i32 5, i32 7, i32 13, i32 15> 223; SSE-NEXT: [[TMP3:%.*]] = fsub <8 x float> [[TMP1]], [[TMP2]] 224; SSE-NEXT: ret <8 x float> [[TMP3]] 225; 226; SLM-LABEL: @test_v8f32( 227; SLM-NEXT: [[TMP1:%.*]] = shufflevector <8 x float> [[A:%.*]], <8 x float> [[B:%.*]], <4 x i32> <i32 0, i32 2, i32 8, i32 10> 228; SLM-NEXT: [[TMP2:%.*]] = shufflevector <8 x float> [[A]], <8 x float> [[B]], <4 x i32> <i32 1, i32 3, i32 9, i32 11> 229; SLM-NEXT: [[TMP3:%.*]] = fsub <4 x float> [[TMP1]], [[TMP2]] 230; SLM-NEXT: [[TMP4:%.*]] = shufflevector <8 x float> [[A]], <8 x float> [[B]], <4 x i32> <i32 4, i32 6, i32 12, i32 14> 231; SLM-NEXT: [[TMP5:%.*]] = shufflevector <8 x float> [[A]], <8 x float> [[B]], <4 x i32> <i32 5, i32 7, i32 13, i32 15> 232; SLM-NEXT: [[TMP6:%.*]] = fsub <4 x float> [[TMP4]], [[TMP5]] 233; SLM-NEXT: [[R07:%.*]] = shufflevector <4 x float> [[TMP3]], <4 x float> [[TMP6]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 234; SLM-NEXT: ret <8 x float> [[R07]] 235; 236; AVX-LABEL: @test_v8f32( 237; AVX-NEXT: [[TMP1:%.*]] = shufflevector <8 x float> [[A:%.*]], <8 x float> [[B:%.*]], <8 x i32> <i32 0, i32 2, i32 8, i32 10, i32 4, i32 6, i32 12, i32 14> 238; AVX-NEXT: [[TMP2:%.*]] = shufflevector <8 x float> [[A]], <8 x float> [[B]], <8 x i32> <i32 1, i32 3, i32 9, i32 11, i32 5, i32 7, i32 13, i32 15> 239; AVX-NEXT: [[TMP3:%.*]] = fsub <8 x float> [[TMP1]], [[TMP2]] 240; AVX-NEXT: ret <8 x float> [[TMP3]] 241; 242 %a0 = extractelement <8 x float> %a, i32 0 243 %a1 = extractelement <8 x float> %a, i32 1 244 %a2 = extractelement <8 x float> %a, i32 2 245 %a3 = extractelement <8 x float> %a, i32 3 246 %a4 = extractelement <8 x float> %a, i32 4 247 %a5 = extractelement <8 x float> %a, i32 5 248 %a6 = extractelement <8 x float> %a, i32 6 249 %a7 = extractelement <8 x float> %a, i32 7 250 %b0 = extractelement <8 x float> %b, i32 0 251 %b1 = extractelement <8 x float> %b, i32 1 252 %b2 = extractelement <8 x float> %b, i32 2 253 %b3 = extractelement <8 x float> %b, i32 3 254 %b4 = extractelement <8 x float> %b, i32 4 255 %b5 = extractelement <8 x float> %b, i32 5 256 %b6 = extractelement <8 x float> %b, i32 6 257 %b7 = extractelement <8 x float> %b, i32 7 258 %r0 = fsub float %a0, %a1 259 %r1 = fsub float %a2, %a3 260 %r2 = fsub float %b0, %b1 261 %r3 = fsub float %b2, %b3 262 %r4 = fsub float %a4, %a5 263 %r5 = fsub float %a6, %a7 264 %r6 = fsub float %b4, %b5 265 %r7 = fsub float %b6, %b7 266 %r00 = insertelement <8 x float> undef, float %r0, i32 0 267 %r01 = insertelement <8 x float> %r00, float %r1, i32 1 268 %r02 = insertelement <8 x float> %r01, float %r2, i32 2 269 %r03 = insertelement <8 x float> %r02, float %r3, i32 3 270 %r04 = insertelement <8 x float> %r03, float %r4, i32 4 271 %r05 = insertelement <8 x float> %r04, float %r5, i32 5 272 %r06 = insertelement <8 x float> %r05, float %r6, i32 6 273 %r07 = insertelement <8 x float> %r06, float %r7, i32 7 274 ret <8 x float> %r07 275} 276 277define <4 x i64> @test_v4i64(<4 x i64> %a, <4 x i64> %b) { 278; CHECK-LABEL: @test_v4i64( 279; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i64> [[A:%.*]], <4 x i64> [[B:%.*]], <4 x i32> <i32 0, i32 4, i32 2, i32 6> 280; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x i64> [[A]], <4 x i64> [[B]], <4 x i32> <i32 1, i32 5, i32 3, i32 7> 281; CHECK-NEXT: [[TMP3:%.*]] = sub <4 x i64> [[TMP1]], [[TMP2]] 282; CHECK-NEXT: ret <4 x i64> [[TMP3]] 283; 284 %a0 = extractelement <4 x i64> %a, i32 0 285 %a1 = extractelement <4 x i64> %a, i32 1 286 %a2 = extractelement <4 x i64> %a, i32 2 287 %a3 = extractelement <4 x i64> %a, i32 3 288 %b0 = extractelement <4 x i64> %b, i32 0 289 %b1 = extractelement <4 x i64> %b, i32 1 290 %b2 = extractelement <4 x i64> %b, i32 2 291 %b3 = extractelement <4 x i64> %b, i32 3 292 %r0 = sub i64 %a0, %a1 293 %r1 = sub i64 %b0, %b1 294 %r2 = sub i64 %a2, %a3 295 %r3 = sub i64 %b2, %b3 296 %r00 = insertelement <4 x i64> undef, i64 %r0, i32 0 297 %r01 = insertelement <4 x i64> %r00, i64 %r1, i32 1 298 %r02 = insertelement <4 x i64> %r01, i64 %r2, i32 2 299 %r03 = insertelement <4 x i64> %r02, i64 %r3, i32 3 300 ret <4 x i64> %r03 301} 302 303define <8 x i32> @test_v8i32(<8 x i32> %a, <8 x i32> %b) { 304; CHECK-LABEL: @test_v8i32( 305; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x i32> [[A:%.*]], <8 x i32> [[B:%.*]], <8 x i32> <i32 0, i32 2, i32 8, i32 10, i32 4, i32 6, i32 12, i32 14> 306; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <8 x i32> [[A]], <8 x i32> [[B]], <8 x i32> <i32 1, i32 3, i32 9, i32 11, i32 5, i32 7, i32 13, i32 15> 307; CHECK-NEXT: [[TMP3:%.*]] = sub <8 x i32> [[TMP1]], [[TMP2]] 308; CHECK-NEXT: ret <8 x i32> [[TMP3]] 309; 310 %a0 = extractelement <8 x i32> %a, i32 0 311 %a1 = extractelement <8 x i32> %a, i32 1 312 %a2 = extractelement <8 x i32> %a, i32 2 313 %a3 = extractelement <8 x i32> %a, i32 3 314 %a4 = extractelement <8 x i32> %a, i32 4 315 %a5 = extractelement <8 x i32> %a, i32 5 316 %a6 = extractelement <8 x i32> %a, i32 6 317 %a7 = extractelement <8 x i32> %a, i32 7 318 %b0 = extractelement <8 x i32> %b, i32 0 319 %b1 = extractelement <8 x i32> %b, i32 1 320 %b2 = extractelement <8 x i32> %b, i32 2 321 %b3 = extractelement <8 x i32> %b, i32 3 322 %b4 = extractelement <8 x i32> %b, i32 4 323 %b5 = extractelement <8 x i32> %b, i32 5 324 %b6 = extractelement <8 x i32> %b, i32 6 325 %b7 = extractelement <8 x i32> %b, i32 7 326 %r0 = sub i32 %a0, %a1 327 %r1 = sub i32 %a2, %a3 328 %r2 = sub i32 %b0, %b1 329 %r3 = sub i32 %b2, %b3 330 %r4 = sub i32 %a4, %a5 331 %r5 = sub i32 %a6, %a7 332 %r6 = sub i32 %b4, %b5 333 %r7 = sub i32 %b6, %b7 334 %r00 = insertelement <8 x i32> undef, i32 %r0, i32 0 335 %r01 = insertelement <8 x i32> %r00, i32 %r1, i32 1 336 %r02 = insertelement <8 x i32> %r01, i32 %r2, i32 2 337 %r03 = insertelement <8 x i32> %r02, i32 %r3, i32 3 338 %r04 = insertelement <8 x i32> %r03, i32 %r4, i32 4 339 %r05 = insertelement <8 x i32> %r04, i32 %r5, i32 5 340 %r06 = insertelement <8 x i32> %r05, i32 %r6, i32 6 341 %r07 = insertelement <8 x i32> %r06, i32 %r7, i32 7 342 ret <8 x i32> %r07 343} 344 345define <16 x i16> @test_v16i16(<16 x i16> %a, <16 x i16> %b) { 346; SSE-LABEL: @test_v16i16( 347; SSE-NEXT: [[TMP1:%.*]] = shufflevector <16 x i16> [[A:%.*]], <16 x i16> [[B:%.*]], <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 16, i32 18, i32 20, i32 22> 348; SSE-NEXT: [[TMP2:%.*]] = shufflevector <16 x i16> [[A]], <16 x i16> [[B]], <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 17, i32 19, i32 21, i32 23> 349; SSE-NEXT: [[TMP3:%.*]] = sub <8 x i16> [[TMP1]], [[TMP2]] 350; SSE-NEXT: [[TMP4:%.*]] = shufflevector <16 x i16> [[A]], <16 x i16> [[B]], <8 x i32> <i32 8, i32 10, i32 12, i32 14, i32 24, i32 26, i32 28, i32 30> 351; SSE-NEXT: [[TMP5:%.*]] = shufflevector <16 x i16> [[A]], <16 x i16> [[B]], <8 x i32> <i32 9, i32 11, i32 13, i32 15, i32 25, i32 27, i32 29, i32 31> 352; SSE-NEXT: [[TMP6:%.*]] = sub <8 x i16> [[TMP4]], [[TMP5]] 353; SSE-NEXT: [[RV15:%.*]] = shufflevector <8 x i16> [[TMP3]], <8 x i16> [[TMP6]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 354; SSE-NEXT: ret <16 x i16> [[RV15]] 355; 356; SLM-LABEL: @test_v16i16( 357; SLM-NEXT: [[TMP1:%.*]] = shufflevector <16 x i16> [[A:%.*]], <16 x i16> [[B:%.*]], <16 x i32> <i32 0, i32 2, i32 4, i32 6, i32 16, i32 18, i32 20, i32 22, i32 8, i32 10, i32 12, i32 14, i32 24, i32 26, i32 28, i32 30> 358; SLM-NEXT: [[TMP2:%.*]] = shufflevector <16 x i16> [[A]], <16 x i16> [[B]], <16 x i32> <i32 1, i32 3, i32 5, i32 7, i32 17, i32 19, i32 21, i32 23, i32 9, i32 11, i32 13, i32 15, i32 25, i32 27, i32 29, i32 31> 359; SLM-NEXT: [[TMP3:%.*]] = sub <16 x i16> [[TMP1]], [[TMP2]] 360; SLM-NEXT: ret <16 x i16> [[TMP3]] 361; 362; AVX-LABEL: @test_v16i16( 363; AVX-NEXT: [[TMP1:%.*]] = shufflevector <16 x i16> [[A:%.*]], <16 x i16> [[B:%.*]], <16 x i32> <i32 0, i32 2, i32 4, i32 6, i32 16, i32 18, i32 20, i32 22, i32 8, i32 10, i32 12, i32 14, i32 24, i32 26, i32 28, i32 30> 364; AVX-NEXT: [[TMP2:%.*]] = shufflevector <16 x i16> [[A]], <16 x i16> [[B]], <16 x i32> <i32 1, i32 3, i32 5, i32 7, i32 17, i32 19, i32 21, i32 23, i32 9, i32 11, i32 13, i32 15, i32 25, i32 27, i32 29, i32 31> 365; AVX-NEXT: [[TMP3:%.*]] = sub <16 x i16> [[TMP1]], [[TMP2]] 366; AVX-NEXT: ret <16 x i16> [[TMP3]] 367; 368 %a0 = extractelement <16 x i16> %a, i32 0 369 %a1 = extractelement <16 x i16> %a, i32 1 370 %a2 = extractelement <16 x i16> %a, i32 2 371 %a3 = extractelement <16 x i16> %a, i32 3 372 %a4 = extractelement <16 x i16> %a, i32 4 373 %a5 = extractelement <16 x i16> %a, i32 5 374 %a6 = extractelement <16 x i16> %a, i32 6 375 %a7 = extractelement <16 x i16> %a, i32 7 376 %a8 = extractelement <16 x i16> %a, i32 8 377 %a9 = extractelement <16 x i16> %a, i32 9 378 %a10 = extractelement <16 x i16> %a, i32 10 379 %a11 = extractelement <16 x i16> %a, i32 11 380 %a12 = extractelement <16 x i16> %a, i32 12 381 %a13 = extractelement <16 x i16> %a, i32 13 382 %a14 = extractelement <16 x i16> %a, i32 14 383 %a15 = extractelement <16 x i16> %a, i32 15 384 %b0 = extractelement <16 x i16> %b, i32 0 385 %b1 = extractelement <16 x i16> %b, i32 1 386 %b2 = extractelement <16 x i16> %b, i32 2 387 %b3 = extractelement <16 x i16> %b, i32 3 388 %b4 = extractelement <16 x i16> %b, i32 4 389 %b5 = extractelement <16 x i16> %b, i32 5 390 %b6 = extractelement <16 x i16> %b, i32 6 391 %b7 = extractelement <16 x i16> %b, i32 7 392 %b8 = extractelement <16 x i16> %b, i32 8 393 %b9 = extractelement <16 x i16> %b, i32 9 394 %b10 = extractelement <16 x i16> %b, i32 10 395 %b11 = extractelement <16 x i16> %b, i32 11 396 %b12 = extractelement <16 x i16> %b, i32 12 397 %b13 = extractelement <16 x i16> %b, i32 13 398 %b14 = extractelement <16 x i16> %b, i32 14 399 %b15 = extractelement <16 x i16> %b, i32 15 400 %r0 = sub i16 %a0 , %a1 401 %r1 = sub i16 %a2 , %a3 402 %r2 = sub i16 %a4 , %a5 403 %r3 = sub i16 %a6 , %a7 404 %r4 = sub i16 %b0 , %b1 405 %r5 = sub i16 %b2 , %b3 406 %r6 = sub i16 %b4 , %b5 407 %r7 = sub i16 %b6 , %b7 408 %r8 = sub i16 %a8 , %a9 409 %r9 = sub i16 %a10, %a11 410 %r10 = sub i16 %a12, %a13 411 %r11 = sub i16 %a14, %a15 412 %r12 = sub i16 %b8 , %b9 413 %r13 = sub i16 %b10, %b11 414 %r14 = sub i16 %b12, %b13 415 %r15 = sub i16 %b14, %b15 416 %rv0 = insertelement <16 x i16> undef, i16 %r0 , i32 0 417 %rv1 = insertelement <16 x i16> %rv0 , i16 %r1 , i32 1 418 %rv2 = insertelement <16 x i16> %rv1 , i16 %r2 , i32 2 419 %rv3 = insertelement <16 x i16> %rv2 , i16 %r3 , i32 3 420 %rv4 = insertelement <16 x i16> %rv3 , i16 %r4 , i32 4 421 %rv5 = insertelement <16 x i16> %rv4 , i16 %r5 , i32 5 422 %rv6 = insertelement <16 x i16> %rv5 , i16 %r6 , i32 6 423 %rv7 = insertelement <16 x i16> %rv6 , i16 %r7 , i32 7 424 %rv8 = insertelement <16 x i16> %rv7 , i16 %r8 , i32 8 425 %rv9 = insertelement <16 x i16> %rv8 , i16 %r9 , i32 9 426 %rv10 = insertelement <16 x i16> %rv9 , i16 %r10, i32 10 427 %rv11 = insertelement <16 x i16> %rv10, i16 %r11, i32 11 428 %rv12 = insertelement <16 x i16> %rv11, i16 %r12, i32 12 429 %rv13 = insertelement <16 x i16> %rv12, i16 %r13, i32 13 430 %rv14 = insertelement <16 x i16> %rv13, i16 %r14, i32 14 431 %rv15 = insertelement <16 x i16> %rv14, i16 %r15, i32 15 432 ret <16 x i16> %rv15 433} 434