1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py 2; RUN: opt < %s -instcombine -S | FileCheck %s 3target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" 4 5; Verify that instcombine is able to fold identity shuffles. 6 7define <8 x i32> @identity_test_vpermd(<8 x i32> %a0) { 8; CHECK-LABEL: @identity_test_vpermd( 9; CHECK-NEXT: ret <8 x i32> %a0 10; 11 %a = tail call <8 x i32> @llvm.x86.avx2.permd(<8 x i32> %a0, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>) 12 ret <8 x i32> %a 13} 14 15define <8 x float> @identity_test_vpermps(<8 x float> %a0) { 16; CHECK-LABEL: @identity_test_vpermps( 17; CHECK-NEXT: ret <8 x float> %a0 18; 19 %a = tail call <8 x float> @llvm.x86.avx2.permps(<8 x float> %a0, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>) 20 ret <8 x float> %a 21} 22 23; Instcombine should be able to fold the following shuffle to a builtin shufflevector 24; with a shuffle mask of all zeroes. 25 26define <8 x i32> @zero_test_vpermd(<8 x i32> %a0) { 27; CHECK-LABEL: @zero_test_vpermd( 28; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x i32> %a0, <8 x i32> undef, <8 x i32> zeroinitializer 29; CHECK-NEXT: ret <8 x i32> [[TMP1]] 30; 31 %a = tail call <8 x i32> @llvm.x86.avx2.permd(<8 x i32> %a0, <8 x i32> zeroinitializer) 32 ret <8 x i32> %a 33} 34 35define <8 x float> @zero_test_vpermps(<8 x float> %a0) { 36; CHECK-LABEL: @zero_test_vpermps( 37; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x float> %a0, <8 x float> undef, <8 x i32> zeroinitializer 38; CHECK-NEXT: ret <8 x float> [[TMP1]] 39; 40 %a = tail call <8 x float> @llvm.x86.avx2.permps(<8 x float> %a0, <8 x i32> zeroinitializer) 41 ret <8 x float> %a 42} 43 44; Verify that instcombine is able to fold constant shuffles. 45 46define <8 x i32> @shuffle_test_vpermd(<8 x i32> %a0) { 47; CHECK-LABEL: @shuffle_test_vpermd( 48; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x i32> %a0, <8 x i32> undef, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0> 49; CHECK-NEXT: ret <8 x i32> [[TMP1]] 50; 51 %a = tail call <8 x i32> @llvm.x86.avx2.permd(<8 x i32> %a0, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>) 52 ret <8 x i32> %a 53} 54 55define <8 x float> @shuffle_test_vpermps(<8 x float> %a0) { 56; CHECK-LABEL: @shuffle_test_vpermps( 57; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x float> %a0, <8 x float> undef, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0> 58; CHECK-NEXT: ret <8 x float> [[TMP1]] 59; 60 %a = tail call <8 x float> @llvm.x86.avx2.permps(<8 x float> %a0, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>) 61 ret <8 x float> %a 62} 63 64; Verify that instcombine is able to fold constant shuffles with undef mask elements. 65 66define <8 x i32> @undef_test_vpermd(<8 x i32> %a0) { 67; CHECK-LABEL: @undef_test_vpermd( 68; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x i32> %a0, <8 x i32> undef, <8 x i32> <i32 undef, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0> 69; CHECK-NEXT: ret <8 x i32> [[TMP1]] 70; 71 %a = tail call <8 x i32> @llvm.x86.avx2.permd(<8 x i32> %a0, <8 x i32> <i32 undef, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>) 72 ret <8 x i32> %a 73} 74 75define <8 x float> @undef_test_vpermps(<8 x float> %a0) { 76; CHECK-LABEL: @undef_test_vpermps( 77; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x float> %a0, <8 x float> undef, <8 x i32> <i32 undef, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0> 78; CHECK-NEXT: ret <8 x float> [[TMP1]] 79; 80 %a = tail call <8 x float> @llvm.x86.avx2.permps(<8 x float> %a0, <8 x i32> <i32 undef, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>) 81 ret <8 x float> %a 82} 83 84declare <8 x i32> @llvm.x86.avx2.permd(<8 x i32>, <8 x i32>) 85declare <8 x float> @llvm.x86.avx2.permps(<8 x float>, <8 x i32>) 86