Lines Matching refs:AVX512
6 …< %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512bw | FileCheck %s --check-prefix=AVX512
7 …ple=x86_64-unknown-unknown -mattr=+avx512f,+avx512bw,+avx512vl | FileCheck %s --check-prefix=AVX512
33 ; AVX512-LABEL: test_v2f32:
34 ; AVX512: # %bb.0:
35 ; AVX512-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
36 ; AVX512-NEXT: vmaxss %xmm1, %xmm0, %xmm0
37 ; AVX512-NEXT: retq
78 ; AVX512-LABEL: test_v4f32:
79 ; AVX512: # %bb.0:
80 ; AVX512-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[3,3,3,3]
81 ; AVX512-NEXT: vpermilpd {{.*#+}} xmm2 = xmm0[1,0]
82 ; AVX512-NEXT: vmovshdup {{.*#+}} xmm3 = xmm0[1,1,3,3]
83 ; AVX512-NEXT: vmaxss %xmm3, %xmm0, %xmm0
84 ; AVX512-NEXT: vmaxss %xmm2, %xmm0, %xmm0
85 ; AVX512-NEXT: vmaxss %xmm1, %xmm0, %xmm0
86 ; AVX512-NEXT: retq
140 ; AVX512-LABEL: test_v8f32:
141 ; AVX512: # %bb.0:
142 ; AVX512-NEXT: vextractf128 $1, %ymm0, %xmm1
143 ; AVX512-NEXT: vpermilps {{.*#+}} xmm2 = xmm1[3,3,3,3]
144 ; AVX512-NEXT: vpermilpd {{.*#+}} xmm3 = xmm1[1,0]
145 ; AVX512-NEXT: vmovshdup {{.*#+}} xmm4 = xmm1[1,1,3,3]
146 ; AVX512-NEXT: vpermilps {{.*#+}} xmm5 = xmm0[3,3,3,3]
147 ; AVX512-NEXT: vpermilpd {{.*#+}} xmm6 = xmm0[1,0]
148 ; AVX512-NEXT: vmovshdup {{.*#+}} xmm7 = xmm0[1,1,3,3]
149 ; AVX512-NEXT: vmaxss %xmm7, %xmm0, %xmm0
150 ; AVX512-NEXT: vmaxss %xmm6, %xmm0, %xmm0
151 ; AVX512-NEXT: vmaxss %xmm5, %xmm0, %xmm0
152 ; AVX512-NEXT: vmaxss %xmm1, %xmm0, %xmm0
153 ; AVX512-NEXT: vmaxss %xmm4, %xmm0, %xmm0
154 ; AVX512-NEXT: vmaxss %xmm3, %xmm0, %xmm0
155 ; AVX512-NEXT: vmaxss %xmm2, %xmm0, %xmm0
156 ; AVX512-NEXT: vzeroupper
157 ; AVX512-NEXT: retq
216 ; AVX512-LABEL: test_v16f32:
217 ; AVX512: # %bb.0:
218 ; AVX512-NEXT: vextractf32x4 $3, %zmm0, %xmm1
219 ; AVX512-NEXT: vpermilps {{.*#+}} xmm8 = xmm1[3,3,3,3]
220 ; AVX512-NEXT: vpermilpd {{.*#+}} xmm9 = xmm1[1,0]
221 ; AVX512-NEXT: vmovshdup {{.*#+}} xmm10 = xmm1[1,1,3,3]
222 ; AVX512-NEXT: vextractf32x4 $2, %zmm0, %xmm5
223 ; AVX512-NEXT: vpermilps {{.*#+}} xmm11 = xmm5[3,3,3,3]
224 ; AVX512-NEXT: vpermilpd {{.*#+}} xmm12 = xmm5[1,0]
225 ; AVX512-NEXT: vmovshdup {{.*#+}} xmm13 = xmm5[1,1,3,3]
226 ; AVX512-NEXT: vextractf128 $1, %ymm0, %xmm3
227 ; AVX512-NEXT: vpermilps {{.*#+}} xmm14 = xmm3[3,3,3,3]
228 ; AVX512-NEXT: vpermilpd {{.*#+}} xmm15 = xmm3[1,0]
229 ; AVX512-NEXT: vmovshdup {{.*#+}} xmm7 = xmm3[1,1,3,3]
230 ; AVX512-NEXT: vpermilps {{.*#+}} xmm2 = xmm0[3,3,3,3]
231 ; AVX512-NEXT: vpermilpd {{.*#+}} xmm4 = xmm0[1,0]
232 ; AVX512-NEXT: vmovshdup {{.*#+}} xmm6 = xmm0[1,1,3,3]
233 ; AVX512-NEXT: vmaxss %xmm6, %xmm0, %xmm0
234 ; AVX512-NEXT: vmaxss %xmm4, %xmm0, %xmm0
235 ; AVX512-NEXT: vmaxss %xmm2, %xmm0, %xmm0
236 ; AVX512-NEXT: vmaxss %xmm3, %xmm0, %xmm0
237 ; AVX512-NEXT: vmaxss %xmm7, %xmm0, %xmm0
238 ; AVX512-NEXT: vmaxss %xmm15, %xmm0, %xmm0
239 ; AVX512-NEXT: vmaxss %xmm14, %xmm0, %xmm0
240 ; AVX512-NEXT: vmaxss %xmm5, %xmm0, %xmm0
241 ; AVX512-NEXT: vmaxss %xmm13, %xmm0, %xmm0
242 ; AVX512-NEXT: vmaxss %xmm12, %xmm0, %xmm0
243 ; AVX512-NEXT: vmaxss %xmm11, %xmm0, %xmm0
244 ; AVX512-NEXT: vmaxss %xmm1, %xmm0, %xmm0
245 ; AVX512-NEXT: vmaxss %xmm10, %xmm0, %xmm0
246 ; AVX512-NEXT: vmaxss %xmm9, %xmm0, %xmm0
247 ; AVX512-NEXT: vmaxss %xmm8, %xmm0, %xmm0
248 ; AVX512-NEXT: vzeroupper
249 ; AVX512-NEXT: retq
272 ; AVX512-LABEL: test_v2f64:
273 ; AVX512: # %bb.0:
274 ; AVX512-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0]
275 ; AVX512-NEXT: vmaxsd %xmm1, %xmm0, %xmm0
276 ; AVX512-NEXT: retq
311 ; AVX512-LABEL: test_v3f64:
312 ; AVX512: # %bb.0:
313 ; AVX512-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0]
314 ; AVX512-NEXT: vmaxsd %xmm1, %xmm0, %xmm1
315 ; AVX512-NEXT: vextractf128 $1, %ymm0, %xmm0
316 ; AVX512-NEXT: vmaxsd %xmm0, %xmm1, %xmm0
317 ; AVX512-NEXT: vzeroupper
318 ; AVX512-NEXT: retq
343 ; AVX512-LABEL: test_v4f64:
344 ; AVX512: # %bb.0:
345 ; AVX512-NEXT: vextractf128 $1, %ymm0, %xmm1
346 ; AVX512-NEXT: vpermilpd {{.*#+}} xmm2 = xmm1[1,0]
347 ; AVX512-NEXT: vpermilpd {{.*#+}} xmm3 = xmm0[1,0]
348 ; AVX512-NEXT: vmaxsd %xmm3, %xmm0, %xmm0
349 ; AVX512-NEXT: vmaxsd %xmm1, %xmm0, %xmm0
350 ; AVX512-NEXT: vmaxsd %xmm2, %xmm0, %xmm0
351 ; AVX512-NEXT: vzeroupper
352 ; AVX512-NEXT: retq
380 ; AVX512-LABEL: test_v8f64:
381 ; AVX512: # %bb.0:
382 ; AVX512-NEXT: vextractf32x4 $3, %zmm0, %xmm1
383 ; AVX512-NEXT: vpermilpd {{.*#+}} xmm2 = xmm1[1,0]
384 ; AVX512-NEXT: vextractf32x4 $2, %zmm0, %xmm3
385 ; AVX512-NEXT: vpermilpd {{.*#+}} xmm4 = xmm3[1,0]
386 ; AVX512-NEXT: vextractf128 $1, %ymm0, %xmm5
387 ; AVX512-NEXT: vpermilpd {{.*#+}} xmm6 = xmm5[1,0]
388 ; AVX512-NEXT: vpermilpd {{.*#+}} xmm7 = xmm0[1,0]
389 ; AVX512-NEXT: vmaxsd %xmm7, %xmm0, %xmm0
390 ; AVX512-NEXT: vmaxsd %xmm5, %xmm0, %xmm0
391 ; AVX512-NEXT: vmaxsd %xmm6, %xmm0, %xmm0
392 ; AVX512-NEXT: vmaxsd %xmm3, %xmm0, %xmm0
393 ; AVX512-NEXT: vmaxsd %xmm4, %xmm0, %xmm0
394 ; AVX512-NEXT: vmaxsd %xmm1, %xmm0, %xmm0
395 ; AVX512-NEXT: vmaxsd %xmm2, %xmm0, %xmm0
396 ; AVX512-NEXT: vzeroupper
397 ; AVX512-NEXT: retq
431 ; AVX512-LABEL: test_v16f64:
432 ; AVX512: # %bb.0:
433 ; AVX512-NEXT: vmaxpd %zmm1, %zmm0, %zmm0
434 ; AVX512-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0]
435 ; AVX512-NEXT: vmaxsd %xmm1, %xmm0, %xmm1
436 ; AVX512-NEXT: vextractf128 $1, %ymm0, %xmm2
437 ; AVX512-NEXT: vmaxsd %xmm2, %xmm1, %xmm1
438 ; AVX512-NEXT: vpermilpd {{.*#+}} xmm2 = xmm2[1,0]
439 ; AVX512-NEXT: vmaxsd %xmm2, %xmm1, %xmm1
440 ; AVX512-NEXT: vextractf32x4 $2, %zmm0, %xmm2
441 ; AVX512-NEXT: vmaxsd %xmm2, %xmm1, %xmm1
442 ; AVX512-NEXT: vpermilpd {{.*#+}} xmm2 = xmm2[1,0]
443 ; AVX512-NEXT: vmaxsd %xmm2, %xmm1, %xmm1
444 ; AVX512-NEXT: vextractf32x4 $3, %zmm0, %xmm0
445 ; AVX512-NEXT: vmaxsd %xmm0, %xmm1, %xmm1
446 ; AVX512-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0]
447 ; AVX512-NEXT: vmaxsd %xmm0, %xmm1, %xmm0
448 ; AVX512-NEXT: vzeroupper
449 ; AVX512-NEXT: retq
499 ; AVX512-LABEL: test_v2f16:
500 ; AVX512: # %bb.0:
501 ; AVX512-NEXT: movzwl %di, %eax
502 ; AVX512-NEXT: vmovd %eax, %xmm0
503 ; AVX512-NEXT: vcvtph2ps %xmm0, %xmm0
504 ; AVX512-NEXT: movzwl %si, %eax
505 ; AVX512-NEXT: vmovd %eax, %xmm1
506 ; AVX512-NEXT: vcvtph2ps %xmm1, %xmm1
507 ; AVX512-NEXT: vmaxss %xmm0, %xmm1, %xmm2
508 ; AVX512-NEXT: vcmpunordss %xmm0, %xmm0, %k1
509 ; AVX512-NEXT: vmovss %xmm1, %xmm2, %xmm2 {%k1}
510 ; AVX512-NEXT: vcvtps2ph $4, %xmm2, %xmm0
511 ; AVX512-NEXT: vmovd %xmm0, %eax
512 ; AVX512-NEXT: # kill: def $ax killed $ax killed $eax
513 ; AVX512-NEXT: retq