1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl -mattr=+avx512vl --show-mc-encoding| FileCheck %s
3
4; 256-bit
5
6define <8 x i8> @test_cmp_d_256(<8 x i32> %a0, <8 x i32> %a1) {
7; CHECK-LABEL: test_cmp_d_256:
8; CHECK:       ## BB#0:
9; CHECK-NEXT:    vpcmpeqd %ymm1, %ymm0, %k0 ## encoding: [0x62,0xf3,0x7d,0x28,0x1f,0xc1,0x00]
10; CHECK-NEXT:    vpcmpltd %ymm1, %ymm0, %k1 ## encoding: [0x62,0xf3,0x7d,0x28,0x1f,0xc9,0x01]
11; CHECK-NEXT:    vpcmpled %ymm1, %ymm0, %k2 ## encoding: [0x62,0xf3,0x7d,0x28,0x1f,0xd1,0x02]
12; CHECK-NEXT:    vpcmpunordd %ymm1, %ymm0, %k3 ## encoding: [0x62,0xf3,0x7d,0x28,0x1f,0xd9,0x03]
13; CHECK-NEXT:    vpcmpneqd %ymm1, %ymm0, %k4 ## encoding: [0x62,0xf3,0x7d,0x28,0x1f,0xe1,0x04]
14; CHECK-NEXT:    vpcmpnltd %ymm1, %ymm0, %k5 ## encoding: [0x62,0xf3,0x7d,0x28,0x1f,0xe9,0x05]
15; CHECK-NEXT:    vpcmpnled %ymm1, %ymm0, %k6 ## encoding: [0x62,0xf3,0x7d,0x28,0x1f,0xf1,0x06]
16; CHECK-NEXT:    vpcmpordd %ymm1, %ymm0, %k7 ## encoding: [0x62,0xf3,0x7d,0x28,0x1f,0xf9,0x07]
17; CHECK-NEXT:    kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0]
18; CHECK-NEXT:    vpinsrb $0, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x00]
19; CHECK-NEXT:    kmovw %k1, %eax ## encoding: [0xc5,0xf8,0x93,0xc1]
20; CHECK-NEXT:    vpinsrb $2, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x02]
21; CHECK-NEXT:    kmovw %k2, %eax ## encoding: [0xc5,0xf8,0x93,0xc2]
22; CHECK-NEXT:    vpinsrb $4, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x04]
23; CHECK-NEXT:    kmovw %k3, %eax ## encoding: [0xc5,0xf8,0x93,0xc3]
24; CHECK-NEXT:    vpinsrb $6, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x06]
25; CHECK-NEXT:    kmovw %k4, %eax ## encoding: [0xc5,0xf8,0x93,0xc4]
26; CHECK-NEXT:    vpinsrb $8, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x08]
27; CHECK-NEXT:    kmovw %k5, %eax ## encoding: [0xc5,0xf8,0x93,0xc5]
28; CHECK-NEXT:    vpinsrb $10, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0a]
29; CHECK-NEXT:    kmovw %k6, %eax ## encoding: [0xc5,0xf8,0x93,0xc6]
30; CHECK-NEXT:    vpinsrb $12, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0c]
31; CHECK-NEXT:    kmovw %k7, %eax ## encoding: [0xc5,0xf8,0x93,0xc7]
32; CHECK-NEXT:    vpinsrb $14, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0e]
33; CHECK-NEXT:    retq ## encoding: [0xc3]
34  %res0 = call i8 @llvm.x86.avx512.mask.cmp.d.256(<8 x i32> %a0, <8 x i32> %a1, i32 0, i8 -1)
35  %vec0 = insertelement <8 x i8> undef, i8 %res0, i32 0
36  %res1 = call i8 @llvm.x86.avx512.mask.cmp.d.256(<8 x i32> %a0, <8 x i32> %a1, i32 1, i8 -1)
37  %vec1 = insertelement <8 x i8> %vec0, i8 %res1, i32 1
38  %res2 = call i8 @llvm.x86.avx512.mask.cmp.d.256(<8 x i32> %a0, <8 x i32> %a1, i32 2, i8 -1)
39  %vec2 = insertelement <8 x i8> %vec1, i8 %res2, i32 2
40  %res3 = call i8 @llvm.x86.avx512.mask.cmp.d.256(<8 x i32> %a0, <8 x i32> %a1, i32 3, i8 -1)
41  %vec3 = insertelement <8 x i8> %vec2, i8 %res3, i32 3
42  %res4 = call i8 @llvm.x86.avx512.mask.cmp.d.256(<8 x i32> %a0, <8 x i32> %a1, i32 4, i8 -1)
43  %vec4 = insertelement <8 x i8> %vec3, i8 %res4, i32 4
44  %res5 = call i8 @llvm.x86.avx512.mask.cmp.d.256(<8 x i32> %a0, <8 x i32> %a1, i32 5, i8 -1)
45  %vec5 = insertelement <8 x i8> %vec4, i8 %res5, i32 5
46  %res6 = call i8 @llvm.x86.avx512.mask.cmp.d.256(<8 x i32> %a0, <8 x i32> %a1, i32 6, i8 -1)
47  %vec6 = insertelement <8 x i8> %vec5, i8 %res6, i32 6
48  %res7 = call i8 @llvm.x86.avx512.mask.cmp.d.256(<8 x i32> %a0, <8 x i32> %a1, i32 7, i8 -1)
49  %vec7 = insertelement <8 x i8> %vec6, i8 %res7, i32 7
50  ret <8 x i8> %vec7
51}
52
53define <8 x i8> @test_mask_cmp_d_256(<8 x i32> %a0, <8 x i32> %a1, i8 %mask) {
54; CHECK-LABEL: test_mask_cmp_d_256:
55; CHECK:       ## BB#0:
56; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
57; CHECK-NEXT:    vpcmpeqd %ymm1, %ymm0, %k0 {%k1} ## encoding: [0x62,0xf3,0x7d,0x29,0x1f,0xc1,0x00]
58; CHECK-NEXT:    vpcmpltd %ymm1, %ymm0, %k2 {%k1} ## encoding: [0x62,0xf3,0x7d,0x29,0x1f,0xd1,0x01]
59; CHECK-NEXT:    vpcmpled %ymm1, %ymm0, %k3 {%k1} ## encoding: [0x62,0xf3,0x7d,0x29,0x1f,0xd9,0x02]
60; CHECK-NEXT:    vpcmpunordd %ymm1, %ymm0, %k4 {%k1} ## encoding: [0x62,0xf3,0x7d,0x29,0x1f,0xe1,0x03]
61; CHECK-NEXT:    vpcmpneqd %ymm1, %ymm0, %k5 {%k1} ## encoding: [0x62,0xf3,0x7d,0x29,0x1f,0xe9,0x04]
62; CHECK-NEXT:    vpcmpnltd %ymm1, %ymm0, %k6 {%k1} ## encoding: [0x62,0xf3,0x7d,0x29,0x1f,0xf1,0x05]
63; CHECK-NEXT:    vpcmpnled %ymm1, %ymm0, %k7 {%k1} ## encoding: [0x62,0xf3,0x7d,0x29,0x1f,0xf9,0x06]
64; CHECK-NEXT:    vpcmpordd %ymm1, %ymm0, %k1 {%k1} ## encoding: [0x62,0xf3,0x7d,0x29,0x1f,0xc9,0x07]
65; CHECK-NEXT:    kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0]
66; CHECK-NEXT:    vpinsrb $0, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x00]
67; CHECK-NEXT:    kmovw %k2, %eax ## encoding: [0xc5,0xf8,0x93,0xc2]
68; CHECK-NEXT:    vpinsrb $2, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x02]
69; CHECK-NEXT:    kmovw %k3, %eax ## encoding: [0xc5,0xf8,0x93,0xc3]
70; CHECK-NEXT:    vpinsrb $4, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x04]
71; CHECK-NEXT:    kmovw %k4, %eax ## encoding: [0xc5,0xf8,0x93,0xc4]
72; CHECK-NEXT:    vpinsrb $6, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x06]
73; CHECK-NEXT:    kmovw %k5, %eax ## encoding: [0xc5,0xf8,0x93,0xc5]
74; CHECK-NEXT:    vpinsrb $8, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x08]
75; CHECK-NEXT:    kmovw %k6, %eax ## encoding: [0xc5,0xf8,0x93,0xc6]
76; CHECK-NEXT:    vpinsrb $10, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0a]
77; CHECK-NEXT:    kmovw %k7, %eax ## encoding: [0xc5,0xf8,0x93,0xc7]
78; CHECK-NEXT:    vpinsrb $12, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0c]
79; CHECK-NEXT:    kmovw %k1, %eax ## encoding: [0xc5,0xf8,0x93,0xc1]
80; CHECK-NEXT:    vpinsrb $14, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0e]
81; CHECK-NEXT:    retq ## encoding: [0xc3]
82  %res0 = call i8 @llvm.x86.avx512.mask.cmp.d.256(<8 x i32> %a0, <8 x i32> %a1, i32 0, i8 %mask)
83  %vec0 = insertelement <8 x i8> undef, i8 %res0, i32 0
84  %res1 = call i8 @llvm.x86.avx512.mask.cmp.d.256(<8 x i32> %a0, <8 x i32> %a1, i32 1, i8 %mask)
85  %vec1 = insertelement <8 x i8> %vec0, i8 %res1, i32 1
86  %res2 = call i8 @llvm.x86.avx512.mask.cmp.d.256(<8 x i32> %a0, <8 x i32> %a1, i32 2, i8 %mask)
87  %vec2 = insertelement <8 x i8> %vec1, i8 %res2, i32 2
88  %res3 = call i8 @llvm.x86.avx512.mask.cmp.d.256(<8 x i32> %a0, <8 x i32> %a1, i32 3, i8 %mask)
89  %vec3 = insertelement <8 x i8> %vec2, i8 %res3, i32 3
90  %res4 = call i8 @llvm.x86.avx512.mask.cmp.d.256(<8 x i32> %a0, <8 x i32> %a1, i32 4, i8 %mask)
91  %vec4 = insertelement <8 x i8> %vec3, i8 %res4, i32 4
92  %res5 = call i8 @llvm.x86.avx512.mask.cmp.d.256(<8 x i32> %a0, <8 x i32> %a1, i32 5, i8 %mask)
93  %vec5 = insertelement <8 x i8> %vec4, i8 %res5, i32 5
94  %res6 = call i8 @llvm.x86.avx512.mask.cmp.d.256(<8 x i32> %a0, <8 x i32> %a1, i32 6, i8 %mask)
95  %vec6 = insertelement <8 x i8> %vec5, i8 %res6, i32 6
96  %res7 = call i8 @llvm.x86.avx512.mask.cmp.d.256(<8 x i32> %a0, <8 x i32> %a1, i32 7, i8 %mask)
97  %vec7 = insertelement <8 x i8> %vec6, i8 %res7, i32 7
98  ret <8 x i8> %vec7
99}
100
101declare i8 @llvm.x86.avx512.mask.cmp.d.256(<8 x i32>, <8 x i32>, i32, i8) nounwind readnone
102
103define <8 x i8> @test_ucmp_d_256(<8 x i32> %a0, <8 x i32> %a1) {
104; CHECK-LABEL: test_ucmp_d_256:
105; CHECK:       ## BB#0:
106; CHECK-NEXT:    vpcmpequd %ymm1, %ymm0, %k0 ## encoding: [0x62,0xf3,0x7d,0x28,0x1e,0xc1,0x00]
107; CHECK-NEXT:    vpcmpltud %ymm1, %ymm0, %k1 ## encoding: [0x62,0xf3,0x7d,0x28,0x1e,0xc9,0x01]
108; CHECK-NEXT:    vpcmpleud %ymm1, %ymm0, %k2 ## encoding: [0x62,0xf3,0x7d,0x28,0x1e,0xd1,0x02]
109; CHECK-NEXT:    vpcmpunordud %ymm1, %ymm0, %k3 ## encoding: [0x62,0xf3,0x7d,0x28,0x1e,0xd9,0x03]
110; CHECK-NEXT:    vpcmpnequd %ymm1, %ymm0, %k4 ## encoding: [0x62,0xf3,0x7d,0x28,0x1e,0xe1,0x04]
111; CHECK-NEXT:    vpcmpnltud %ymm1, %ymm0, %k5 ## encoding: [0x62,0xf3,0x7d,0x28,0x1e,0xe9,0x05]
112; CHECK-NEXT:    vpcmpnleud %ymm1, %ymm0, %k6 ## encoding: [0x62,0xf3,0x7d,0x28,0x1e,0xf1,0x06]
113; CHECK-NEXT:    vpcmpordud %ymm1, %ymm0, %k7 ## encoding: [0x62,0xf3,0x7d,0x28,0x1e,0xf9,0x07]
114; CHECK-NEXT:    kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0]
115; CHECK-NEXT:    vpinsrb $0, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x00]
116; CHECK-NEXT:    kmovw %k1, %eax ## encoding: [0xc5,0xf8,0x93,0xc1]
117; CHECK-NEXT:    vpinsrb $2, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x02]
118; CHECK-NEXT:    kmovw %k2, %eax ## encoding: [0xc5,0xf8,0x93,0xc2]
119; CHECK-NEXT:    vpinsrb $4, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x04]
120; CHECK-NEXT:    kmovw %k3, %eax ## encoding: [0xc5,0xf8,0x93,0xc3]
121; CHECK-NEXT:    vpinsrb $6, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x06]
122; CHECK-NEXT:    kmovw %k4, %eax ## encoding: [0xc5,0xf8,0x93,0xc4]
123; CHECK-NEXT:    vpinsrb $8, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x08]
124; CHECK-NEXT:    kmovw %k5, %eax ## encoding: [0xc5,0xf8,0x93,0xc5]
125; CHECK-NEXT:    vpinsrb $10, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0a]
126; CHECK-NEXT:    kmovw %k6, %eax ## encoding: [0xc5,0xf8,0x93,0xc6]
127; CHECK-NEXT:    vpinsrb $12, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0c]
128; CHECK-NEXT:    kmovw %k7, %eax ## encoding: [0xc5,0xf8,0x93,0xc7]
129; CHECK-NEXT:    vpinsrb $14, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0e]
130; CHECK-NEXT:    retq ## encoding: [0xc3]
131  %res0 = call i8 @llvm.x86.avx512.mask.ucmp.d.256(<8 x i32> %a0, <8 x i32> %a1, i32 0, i8 -1)
132  %vec0 = insertelement <8 x i8> undef, i8 %res0, i32 0
133  %res1 = call i8 @llvm.x86.avx512.mask.ucmp.d.256(<8 x i32> %a0, <8 x i32> %a1, i32 1, i8 -1)
134  %vec1 = insertelement <8 x i8> %vec0, i8 %res1, i32 1
135  %res2 = call i8 @llvm.x86.avx512.mask.ucmp.d.256(<8 x i32> %a0, <8 x i32> %a1, i32 2, i8 -1)
136  %vec2 = insertelement <8 x i8> %vec1, i8 %res2, i32 2
137  %res3 = call i8 @llvm.x86.avx512.mask.ucmp.d.256(<8 x i32> %a0, <8 x i32> %a1, i32 3, i8 -1)
138  %vec3 = insertelement <8 x i8> %vec2, i8 %res3, i32 3
139  %res4 = call i8 @llvm.x86.avx512.mask.ucmp.d.256(<8 x i32> %a0, <8 x i32> %a1, i32 4, i8 -1)
140  %vec4 = insertelement <8 x i8> %vec3, i8 %res4, i32 4
141  %res5 = call i8 @llvm.x86.avx512.mask.ucmp.d.256(<8 x i32> %a0, <8 x i32> %a1, i32 5, i8 -1)
142  %vec5 = insertelement <8 x i8> %vec4, i8 %res5, i32 5
143  %res6 = call i8 @llvm.x86.avx512.mask.ucmp.d.256(<8 x i32> %a0, <8 x i32> %a1, i32 6, i8 -1)
144  %vec6 = insertelement <8 x i8> %vec5, i8 %res6, i32 6
145  %res7 = call i8 @llvm.x86.avx512.mask.ucmp.d.256(<8 x i32> %a0, <8 x i32> %a1, i32 7, i8 -1)
146  %vec7 = insertelement <8 x i8> %vec6, i8 %res7, i32 7
147  ret <8 x i8> %vec7
148}
149
150define <8 x i8> @test_mask_ucmp_d_256(<8 x i32> %a0, <8 x i32> %a1, i8 %mask) {
151; CHECK-LABEL: test_mask_ucmp_d_256:
152; CHECK:       ## BB#0:
153; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
154; CHECK-NEXT:    vpcmpequd %ymm1, %ymm0, %k0 {%k1} ## encoding: [0x62,0xf3,0x7d,0x29,0x1e,0xc1,0x00]
155; CHECK-NEXT:    vpcmpltud %ymm1, %ymm0, %k2 {%k1} ## encoding: [0x62,0xf3,0x7d,0x29,0x1e,0xd1,0x01]
156; CHECK-NEXT:    vpcmpleud %ymm1, %ymm0, %k3 {%k1} ## encoding: [0x62,0xf3,0x7d,0x29,0x1e,0xd9,0x02]
157; CHECK-NEXT:    vpcmpunordud %ymm1, %ymm0, %k4 {%k1} ## encoding: [0x62,0xf3,0x7d,0x29,0x1e,0xe1,0x03]
158; CHECK-NEXT:    vpcmpnequd %ymm1, %ymm0, %k5 {%k1} ## encoding: [0x62,0xf3,0x7d,0x29,0x1e,0xe9,0x04]
159; CHECK-NEXT:    vpcmpnltud %ymm1, %ymm0, %k6 {%k1} ## encoding: [0x62,0xf3,0x7d,0x29,0x1e,0xf1,0x05]
160; CHECK-NEXT:    vpcmpnleud %ymm1, %ymm0, %k7 {%k1} ## encoding: [0x62,0xf3,0x7d,0x29,0x1e,0xf9,0x06]
161; CHECK-NEXT:    vpcmpordud %ymm1, %ymm0, %k1 {%k1} ## encoding: [0x62,0xf3,0x7d,0x29,0x1e,0xc9,0x07]
162; CHECK-NEXT:    kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0]
163; CHECK-NEXT:    vpinsrb $0, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x00]
164; CHECK-NEXT:    kmovw %k2, %eax ## encoding: [0xc5,0xf8,0x93,0xc2]
165; CHECK-NEXT:    vpinsrb $2, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x02]
166; CHECK-NEXT:    kmovw %k3, %eax ## encoding: [0xc5,0xf8,0x93,0xc3]
167; CHECK-NEXT:    vpinsrb $4, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x04]
168; CHECK-NEXT:    kmovw %k4, %eax ## encoding: [0xc5,0xf8,0x93,0xc4]
169; CHECK-NEXT:    vpinsrb $6, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x06]
170; CHECK-NEXT:    kmovw %k5, %eax ## encoding: [0xc5,0xf8,0x93,0xc5]
171; CHECK-NEXT:    vpinsrb $8, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x08]
172; CHECK-NEXT:    kmovw %k6, %eax ## encoding: [0xc5,0xf8,0x93,0xc6]
173; CHECK-NEXT:    vpinsrb $10, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0a]
174; CHECK-NEXT:    kmovw %k7, %eax ## encoding: [0xc5,0xf8,0x93,0xc7]
175; CHECK-NEXT:    vpinsrb $12, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0c]
176; CHECK-NEXT:    kmovw %k1, %eax ## encoding: [0xc5,0xf8,0x93,0xc1]
177; CHECK-NEXT:    vpinsrb $14, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0e]
178; CHECK-NEXT:    retq ## encoding: [0xc3]
179  %res0 = call i8 @llvm.x86.avx512.mask.ucmp.d.256(<8 x i32> %a0, <8 x i32> %a1, i32 0, i8 %mask)
180  %vec0 = insertelement <8 x i8> undef, i8 %res0, i32 0
181  %res1 = call i8 @llvm.x86.avx512.mask.ucmp.d.256(<8 x i32> %a0, <8 x i32> %a1, i32 1, i8 %mask)
182  %vec1 = insertelement <8 x i8> %vec0, i8 %res1, i32 1
183  %res2 = call i8 @llvm.x86.avx512.mask.ucmp.d.256(<8 x i32> %a0, <8 x i32> %a1, i32 2, i8 %mask)
184  %vec2 = insertelement <8 x i8> %vec1, i8 %res2, i32 2
185  %res3 = call i8 @llvm.x86.avx512.mask.ucmp.d.256(<8 x i32> %a0, <8 x i32> %a1, i32 3, i8 %mask)
186  %vec3 = insertelement <8 x i8> %vec2, i8 %res3, i32 3
187  %res4 = call i8 @llvm.x86.avx512.mask.ucmp.d.256(<8 x i32> %a0, <8 x i32> %a1, i32 4, i8 %mask)
188  %vec4 = insertelement <8 x i8> %vec3, i8 %res4, i32 4
189  %res5 = call i8 @llvm.x86.avx512.mask.ucmp.d.256(<8 x i32> %a0, <8 x i32> %a1, i32 5, i8 %mask)
190  %vec5 = insertelement <8 x i8> %vec4, i8 %res5, i32 5
191  %res6 = call i8 @llvm.x86.avx512.mask.ucmp.d.256(<8 x i32> %a0, <8 x i32> %a1, i32 6, i8 %mask)
192  %vec6 = insertelement <8 x i8> %vec5, i8 %res6, i32 6
193  %res7 = call i8 @llvm.x86.avx512.mask.ucmp.d.256(<8 x i32> %a0, <8 x i32> %a1, i32 7, i8 %mask)
194  %vec7 = insertelement <8 x i8> %vec6, i8 %res7, i32 7
195  ret <8 x i8> %vec7
196}
197
198declare i8 @llvm.x86.avx512.mask.ucmp.d.256(<8 x i32>, <8 x i32>, i32, i8) nounwind readnone
199
200define <8 x i8> @test_cmp_q_256(<4 x i64> %a0, <4 x i64> %a1) {
201; CHECK-LABEL: test_cmp_q_256:
202; CHECK:       ## BB#0:
203; CHECK-NEXT:    vpcmpeqq %ymm1, %ymm0, %k0 ## encoding: [0x62,0xf3,0xfd,0x28,0x1f,0xc1,0x00]
204; CHECK-NEXT:    vpcmpltq %ymm1, %ymm0, %k1 ## encoding: [0x62,0xf3,0xfd,0x28,0x1f,0xc9,0x01]
205; CHECK-NEXT:    vpcmpleq %ymm1, %ymm0, %k2 ## encoding: [0x62,0xf3,0xfd,0x28,0x1f,0xd1,0x02]
206; CHECK-NEXT:    vpcmpunordq %ymm1, %ymm0, %k3 ## encoding: [0x62,0xf3,0xfd,0x28,0x1f,0xd9,0x03]
207; CHECK-NEXT:    vpcmpneqq %ymm1, %ymm0, %k4 ## encoding: [0x62,0xf3,0xfd,0x28,0x1f,0xe1,0x04]
208; CHECK-NEXT:    vpcmpnltq %ymm1, %ymm0, %k5 ## encoding: [0x62,0xf3,0xfd,0x28,0x1f,0xe9,0x05]
209; CHECK-NEXT:    vpcmpnleq %ymm1, %ymm0, %k6 ## encoding: [0x62,0xf3,0xfd,0x28,0x1f,0xf1,0x06]
210; CHECK-NEXT:    vpcmpordq %ymm1, %ymm0, %k7 ## encoding: [0x62,0xf3,0xfd,0x28,0x1f,0xf9,0x07]
211; CHECK-NEXT:    kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0]
212; CHECK-NEXT:    vpinsrb $0, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x00]
213; CHECK-NEXT:    kmovw %k1, %eax ## encoding: [0xc5,0xf8,0x93,0xc1]
214; CHECK-NEXT:    vpinsrb $2, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x02]
215; CHECK-NEXT:    kmovw %k2, %eax ## encoding: [0xc5,0xf8,0x93,0xc2]
216; CHECK-NEXT:    vpinsrb $4, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x04]
217; CHECK-NEXT:    kmovw %k3, %eax ## encoding: [0xc5,0xf8,0x93,0xc3]
218; CHECK-NEXT:    vpinsrb $6, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x06]
219; CHECK-NEXT:    kmovw %k4, %eax ## encoding: [0xc5,0xf8,0x93,0xc4]
220; CHECK-NEXT:    vpinsrb $8, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x08]
221; CHECK-NEXT:    kmovw %k5, %eax ## encoding: [0xc5,0xf8,0x93,0xc5]
222; CHECK-NEXT:    vpinsrb $10, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0a]
223; CHECK-NEXT:    kmovw %k6, %eax ## encoding: [0xc5,0xf8,0x93,0xc6]
224; CHECK-NEXT:    vpinsrb $12, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0c]
225; CHECK-NEXT:    kmovw %k7, %eax ## encoding: [0xc5,0xf8,0x93,0xc7]
226; CHECK-NEXT:    vpinsrb $14, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0e]
227; CHECK-NEXT:    retq ## encoding: [0xc3]
228  %res0 = call i8 @llvm.x86.avx512.mask.cmp.q.256(<4 x i64> %a0, <4 x i64> %a1, i32 0, i8 -1)
229  %vec0 = insertelement <8 x i8> undef, i8 %res0, i32 0
230  %res1 = call i8 @llvm.x86.avx512.mask.cmp.q.256(<4 x i64> %a0, <4 x i64> %a1, i32 1, i8 -1)
231  %vec1 = insertelement <8 x i8> %vec0, i8 %res1, i32 1
232  %res2 = call i8 @llvm.x86.avx512.mask.cmp.q.256(<4 x i64> %a0, <4 x i64> %a1, i32 2, i8 -1)
233  %vec2 = insertelement <8 x i8> %vec1, i8 %res2, i32 2
234  %res3 = call i8 @llvm.x86.avx512.mask.cmp.q.256(<4 x i64> %a0, <4 x i64> %a1, i32 3, i8 -1)
235  %vec3 = insertelement <8 x i8> %vec2, i8 %res3, i32 3
236  %res4 = call i8 @llvm.x86.avx512.mask.cmp.q.256(<4 x i64> %a0, <4 x i64> %a1, i32 4, i8 -1)
237  %vec4 = insertelement <8 x i8> %vec3, i8 %res4, i32 4
238  %res5 = call i8 @llvm.x86.avx512.mask.cmp.q.256(<4 x i64> %a0, <4 x i64> %a1, i32 5, i8 -1)
239  %vec5 = insertelement <8 x i8> %vec4, i8 %res5, i32 5
240  %res6 = call i8 @llvm.x86.avx512.mask.cmp.q.256(<4 x i64> %a0, <4 x i64> %a1, i32 6, i8 -1)
241  %vec6 = insertelement <8 x i8> %vec5, i8 %res6, i32 6
242  %res7 = call i8 @llvm.x86.avx512.mask.cmp.q.256(<4 x i64> %a0, <4 x i64> %a1, i32 7, i8 -1)
243  %vec7 = insertelement <8 x i8> %vec6, i8 %res7, i32 7
244  ret <8 x i8> %vec7
245}
246
247define <8 x i8> @test_mask_cmp_q_256(<4 x i64> %a0, <4 x i64> %a1, i8 %mask) {
248; CHECK-LABEL: test_mask_cmp_q_256:
249; CHECK:       ## BB#0:
250; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
251; CHECK-NEXT:    vpcmpeqq %ymm1, %ymm0, %k0 {%k1} ## encoding: [0x62,0xf3,0xfd,0x29,0x1f,0xc1,0x00]
252; CHECK-NEXT:    vpcmpltq %ymm1, %ymm0, %k2 {%k1} ## encoding: [0x62,0xf3,0xfd,0x29,0x1f,0xd1,0x01]
253; CHECK-NEXT:    vpcmpleq %ymm1, %ymm0, %k3 {%k1} ## encoding: [0x62,0xf3,0xfd,0x29,0x1f,0xd9,0x02]
254; CHECK-NEXT:    vpcmpunordq %ymm1, %ymm0, %k4 {%k1} ## encoding: [0x62,0xf3,0xfd,0x29,0x1f,0xe1,0x03]
255; CHECK-NEXT:    vpcmpneqq %ymm1, %ymm0, %k5 {%k1} ## encoding: [0x62,0xf3,0xfd,0x29,0x1f,0xe9,0x04]
256; CHECK-NEXT:    vpcmpnltq %ymm1, %ymm0, %k6 {%k1} ## encoding: [0x62,0xf3,0xfd,0x29,0x1f,0xf1,0x05]
257; CHECK-NEXT:    vpcmpnleq %ymm1, %ymm0, %k7 {%k1} ## encoding: [0x62,0xf3,0xfd,0x29,0x1f,0xf9,0x06]
258; CHECK-NEXT:    vpcmpordq %ymm1, %ymm0, %k1 {%k1} ## encoding: [0x62,0xf3,0xfd,0x29,0x1f,0xc9,0x07]
259; CHECK-NEXT:    kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0]
260; CHECK-NEXT:    vpinsrb $0, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x00]
261; CHECK-NEXT:    kmovw %k2, %eax ## encoding: [0xc5,0xf8,0x93,0xc2]
262; CHECK-NEXT:    vpinsrb $2, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x02]
263; CHECK-NEXT:    kmovw %k3, %eax ## encoding: [0xc5,0xf8,0x93,0xc3]
264; CHECK-NEXT:    vpinsrb $4, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x04]
265; CHECK-NEXT:    kmovw %k4, %eax ## encoding: [0xc5,0xf8,0x93,0xc4]
266; CHECK-NEXT:    vpinsrb $6, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x06]
267; CHECK-NEXT:    kmovw %k5, %eax ## encoding: [0xc5,0xf8,0x93,0xc5]
268; CHECK-NEXT:    vpinsrb $8, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x08]
269; CHECK-NEXT:    kmovw %k6, %eax ## encoding: [0xc5,0xf8,0x93,0xc6]
270; CHECK-NEXT:    vpinsrb $10, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0a]
271; CHECK-NEXT:    kmovw %k7, %eax ## encoding: [0xc5,0xf8,0x93,0xc7]
272; CHECK-NEXT:    vpinsrb $12, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0c]
273; CHECK-NEXT:    kmovw %k1, %eax ## encoding: [0xc5,0xf8,0x93,0xc1]
274; CHECK-NEXT:    vpinsrb $14, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0e]
275; CHECK-NEXT:    retq ## encoding: [0xc3]
276  %res0 = call i8 @llvm.x86.avx512.mask.cmp.q.256(<4 x i64> %a0, <4 x i64> %a1, i32 0, i8 %mask)
277  %vec0 = insertelement <8 x i8> undef, i8 %res0, i32 0
278  %res1 = call i8 @llvm.x86.avx512.mask.cmp.q.256(<4 x i64> %a0, <4 x i64> %a1, i32 1, i8 %mask)
279  %vec1 = insertelement <8 x i8> %vec0, i8 %res1, i32 1
280  %res2 = call i8 @llvm.x86.avx512.mask.cmp.q.256(<4 x i64> %a0, <4 x i64> %a1, i32 2, i8 %mask)
281  %vec2 = insertelement <8 x i8> %vec1, i8 %res2, i32 2
282  %res3 = call i8 @llvm.x86.avx512.mask.cmp.q.256(<4 x i64> %a0, <4 x i64> %a1, i32 3, i8 %mask)
283  %vec3 = insertelement <8 x i8> %vec2, i8 %res3, i32 3
284  %res4 = call i8 @llvm.x86.avx512.mask.cmp.q.256(<4 x i64> %a0, <4 x i64> %a1, i32 4, i8 %mask)
285  %vec4 = insertelement <8 x i8> %vec3, i8 %res4, i32 4
286  %res5 = call i8 @llvm.x86.avx512.mask.cmp.q.256(<4 x i64> %a0, <4 x i64> %a1, i32 5, i8 %mask)
287  %vec5 = insertelement <8 x i8> %vec4, i8 %res5, i32 5
288  %res6 = call i8 @llvm.x86.avx512.mask.cmp.q.256(<4 x i64> %a0, <4 x i64> %a1, i32 6, i8 %mask)
289  %vec6 = insertelement <8 x i8> %vec5, i8 %res6, i32 6
290  %res7 = call i8 @llvm.x86.avx512.mask.cmp.q.256(<4 x i64> %a0, <4 x i64> %a1, i32 7, i8 %mask)
291  %vec7 = insertelement <8 x i8> %vec6, i8 %res7, i32 7
292  ret <8 x i8> %vec7
293}
294
295declare i8 @llvm.x86.avx512.mask.cmp.q.256(<4 x i64>, <4 x i64>, i32, i8) nounwind readnone
296
297define <8 x i8> @test_ucmp_q_256(<4 x i64> %a0, <4 x i64> %a1) {
298; CHECK-LABEL: test_ucmp_q_256:
299; CHECK:       ## BB#0:
300; CHECK-NEXT:    vpcmpequq %ymm1, %ymm0, %k0 ## encoding: [0x62,0xf3,0xfd,0x28,0x1e,0xc1,0x00]
301; CHECK-NEXT:    vpcmpltuq %ymm1, %ymm0, %k1 ## encoding: [0x62,0xf3,0xfd,0x28,0x1e,0xc9,0x01]
302; CHECK-NEXT:    vpcmpleuq %ymm1, %ymm0, %k2 ## encoding: [0x62,0xf3,0xfd,0x28,0x1e,0xd1,0x02]
303; CHECK-NEXT:    vpcmpunorduq %ymm1, %ymm0, %k3 ## encoding: [0x62,0xf3,0xfd,0x28,0x1e,0xd9,0x03]
304; CHECK-NEXT:    vpcmpnequq %ymm1, %ymm0, %k4 ## encoding: [0x62,0xf3,0xfd,0x28,0x1e,0xe1,0x04]
305; CHECK-NEXT:    vpcmpnltuq %ymm1, %ymm0, %k5 ## encoding: [0x62,0xf3,0xfd,0x28,0x1e,0xe9,0x05]
306; CHECK-NEXT:    vpcmpnleuq %ymm1, %ymm0, %k6 ## encoding: [0x62,0xf3,0xfd,0x28,0x1e,0xf1,0x06]
307; CHECK-NEXT:    vpcmporduq %ymm1, %ymm0, %k7 ## encoding: [0x62,0xf3,0xfd,0x28,0x1e,0xf9,0x07]
308; CHECK-NEXT:    kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0]
309; CHECK-NEXT:    vpinsrb $0, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x00]
310; CHECK-NEXT:    kmovw %k1, %eax ## encoding: [0xc5,0xf8,0x93,0xc1]
311; CHECK-NEXT:    vpinsrb $2, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x02]
312; CHECK-NEXT:    kmovw %k2, %eax ## encoding: [0xc5,0xf8,0x93,0xc2]
313; CHECK-NEXT:    vpinsrb $4, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x04]
314; CHECK-NEXT:    kmovw %k3, %eax ## encoding: [0xc5,0xf8,0x93,0xc3]
315; CHECK-NEXT:    vpinsrb $6, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x06]
316; CHECK-NEXT:    kmovw %k4, %eax ## encoding: [0xc5,0xf8,0x93,0xc4]
317; CHECK-NEXT:    vpinsrb $8, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x08]
318; CHECK-NEXT:    kmovw %k5, %eax ## encoding: [0xc5,0xf8,0x93,0xc5]
319; CHECK-NEXT:    vpinsrb $10, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0a]
320; CHECK-NEXT:    kmovw %k6, %eax ## encoding: [0xc5,0xf8,0x93,0xc6]
321; CHECK-NEXT:    vpinsrb $12, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0c]
322; CHECK-NEXT:    kmovw %k7, %eax ## encoding: [0xc5,0xf8,0x93,0xc7]
323; CHECK-NEXT:    vpinsrb $14, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0e]
324; CHECK-NEXT:    retq ## encoding: [0xc3]
325  %res0 = call i8 @llvm.x86.avx512.mask.ucmp.q.256(<4 x i64> %a0, <4 x i64> %a1, i32 0, i8 -1)
326  %vec0 = insertelement <8 x i8> undef, i8 %res0, i32 0
327  %res1 = call i8 @llvm.x86.avx512.mask.ucmp.q.256(<4 x i64> %a0, <4 x i64> %a1, i32 1, i8 -1)
328  %vec1 = insertelement <8 x i8> %vec0, i8 %res1, i32 1
329  %res2 = call i8 @llvm.x86.avx512.mask.ucmp.q.256(<4 x i64> %a0, <4 x i64> %a1, i32 2, i8 -1)
330  %vec2 = insertelement <8 x i8> %vec1, i8 %res2, i32 2
331  %res3 = call i8 @llvm.x86.avx512.mask.ucmp.q.256(<4 x i64> %a0, <4 x i64> %a1, i32 3, i8 -1)
332  %vec3 = insertelement <8 x i8> %vec2, i8 %res3, i32 3
333  %res4 = call i8 @llvm.x86.avx512.mask.ucmp.q.256(<4 x i64> %a0, <4 x i64> %a1, i32 4, i8 -1)
334  %vec4 = insertelement <8 x i8> %vec3, i8 %res4, i32 4
335  %res5 = call i8 @llvm.x86.avx512.mask.ucmp.q.256(<4 x i64> %a0, <4 x i64> %a1, i32 5, i8 -1)
336  %vec5 = insertelement <8 x i8> %vec4, i8 %res5, i32 5
337  %res6 = call i8 @llvm.x86.avx512.mask.ucmp.q.256(<4 x i64> %a0, <4 x i64> %a1, i32 6, i8 -1)
338  %vec6 = insertelement <8 x i8> %vec5, i8 %res6, i32 6
339  %res7 = call i8 @llvm.x86.avx512.mask.ucmp.q.256(<4 x i64> %a0, <4 x i64> %a1, i32 7, i8 -1)
340  %vec7 = insertelement <8 x i8> %vec6, i8 %res7, i32 7
341  ret <8 x i8> %vec7
342}
343
344define <8 x i8> @test_mask_ucmp_q_256(<4 x i64> %a0, <4 x i64> %a1, i8 %mask) {
345; CHECK-LABEL: test_mask_ucmp_q_256:
346; CHECK:       ## BB#0:
347; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
348; CHECK-NEXT:    vpcmpequq %ymm1, %ymm0, %k0 {%k1} ## encoding: [0x62,0xf3,0xfd,0x29,0x1e,0xc1,0x00]
349; CHECK-NEXT:    vpcmpltuq %ymm1, %ymm0, %k2 {%k1} ## encoding: [0x62,0xf3,0xfd,0x29,0x1e,0xd1,0x01]
350; CHECK-NEXT:    vpcmpleuq %ymm1, %ymm0, %k3 {%k1} ## encoding: [0x62,0xf3,0xfd,0x29,0x1e,0xd9,0x02]
351; CHECK-NEXT:    vpcmpunorduq %ymm1, %ymm0, %k4 {%k1} ## encoding: [0x62,0xf3,0xfd,0x29,0x1e,0xe1,0x03]
352; CHECK-NEXT:    vpcmpnequq %ymm1, %ymm0, %k5 {%k1} ## encoding: [0x62,0xf3,0xfd,0x29,0x1e,0xe9,0x04]
353; CHECK-NEXT:    vpcmpnltuq %ymm1, %ymm0, %k6 {%k1} ## encoding: [0x62,0xf3,0xfd,0x29,0x1e,0xf1,0x05]
354; CHECK-NEXT:    vpcmpnleuq %ymm1, %ymm0, %k7 {%k1} ## encoding: [0x62,0xf3,0xfd,0x29,0x1e,0xf9,0x06]
355; CHECK-NEXT:    vpcmporduq %ymm1, %ymm0, %k1 {%k1} ## encoding: [0x62,0xf3,0xfd,0x29,0x1e,0xc9,0x07]
356; CHECK-NEXT:    kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0]
357; CHECK-NEXT:    vpinsrb $0, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x00]
358; CHECK-NEXT:    kmovw %k2, %eax ## encoding: [0xc5,0xf8,0x93,0xc2]
359; CHECK-NEXT:    vpinsrb $2, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x02]
360; CHECK-NEXT:    kmovw %k3, %eax ## encoding: [0xc5,0xf8,0x93,0xc3]
361; CHECK-NEXT:    vpinsrb $4, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x04]
362; CHECK-NEXT:    kmovw %k4, %eax ## encoding: [0xc5,0xf8,0x93,0xc4]
363; CHECK-NEXT:    vpinsrb $6, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x06]
364; CHECK-NEXT:    kmovw %k5, %eax ## encoding: [0xc5,0xf8,0x93,0xc5]
365; CHECK-NEXT:    vpinsrb $8, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x08]
366; CHECK-NEXT:    kmovw %k6, %eax ## encoding: [0xc5,0xf8,0x93,0xc6]
367; CHECK-NEXT:    vpinsrb $10, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0a]
368; CHECK-NEXT:    kmovw %k7, %eax ## encoding: [0xc5,0xf8,0x93,0xc7]
369; CHECK-NEXT:    vpinsrb $12, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0c]
370; CHECK-NEXT:    kmovw %k1, %eax ## encoding: [0xc5,0xf8,0x93,0xc1]
371; CHECK-NEXT:    vpinsrb $14, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0e]
372; CHECK-NEXT:    retq ## encoding: [0xc3]
373  %res0 = call i8 @llvm.x86.avx512.mask.ucmp.q.256(<4 x i64> %a0, <4 x i64> %a1, i32 0, i8 %mask)
374  %vec0 = insertelement <8 x i8> undef, i8 %res0, i32 0
375  %res1 = call i8 @llvm.x86.avx512.mask.ucmp.q.256(<4 x i64> %a0, <4 x i64> %a1, i32 1, i8 %mask)
376  %vec1 = insertelement <8 x i8> %vec0, i8 %res1, i32 1
377  %res2 = call i8 @llvm.x86.avx512.mask.ucmp.q.256(<4 x i64> %a0, <4 x i64> %a1, i32 2, i8 %mask)
378  %vec2 = insertelement <8 x i8> %vec1, i8 %res2, i32 2
379  %res3 = call i8 @llvm.x86.avx512.mask.ucmp.q.256(<4 x i64> %a0, <4 x i64> %a1, i32 3, i8 %mask)
380  %vec3 = insertelement <8 x i8> %vec2, i8 %res3, i32 3
381  %res4 = call i8 @llvm.x86.avx512.mask.ucmp.q.256(<4 x i64> %a0, <4 x i64> %a1, i32 4, i8 %mask)
382  %vec4 = insertelement <8 x i8> %vec3, i8 %res4, i32 4
383  %res5 = call i8 @llvm.x86.avx512.mask.ucmp.q.256(<4 x i64> %a0, <4 x i64> %a1, i32 5, i8 %mask)
384  %vec5 = insertelement <8 x i8> %vec4, i8 %res5, i32 5
385  %res6 = call i8 @llvm.x86.avx512.mask.ucmp.q.256(<4 x i64> %a0, <4 x i64> %a1, i32 6, i8 %mask)
386  %vec6 = insertelement <8 x i8> %vec5, i8 %res6, i32 6
387  %res7 = call i8 @llvm.x86.avx512.mask.ucmp.q.256(<4 x i64> %a0, <4 x i64> %a1, i32 7, i8 %mask)
388  %vec7 = insertelement <8 x i8> %vec6, i8 %res7, i32 7
389  ret <8 x i8> %vec7
390}
391
392declare i8 @llvm.x86.avx512.mask.ucmp.q.256(<4 x i64>, <4 x i64>, i32, i8) nounwind readnone
393
394; 128-bit
395
396define <8 x i8> @test_cmp_d_128(<4 x i32> %a0, <4 x i32> %a1) {
397; CHECK-LABEL: test_cmp_d_128:
398; CHECK:       ## BB#0:
399; CHECK-NEXT:    vpcmpeqd %xmm1, %xmm0, %k0 ## encoding: [0x62,0xf3,0x7d,0x08,0x1f,0xc1,0x00]
400; CHECK-NEXT:    vpcmpltd %xmm1, %xmm0, %k1 ## encoding: [0x62,0xf3,0x7d,0x08,0x1f,0xc9,0x01]
401; CHECK-NEXT:    vpcmpled %xmm1, %xmm0, %k2 ## encoding: [0x62,0xf3,0x7d,0x08,0x1f,0xd1,0x02]
402; CHECK-NEXT:    vpcmpunordd %xmm1, %xmm0, %k3 ## encoding: [0x62,0xf3,0x7d,0x08,0x1f,0xd9,0x03]
403; CHECK-NEXT:    vpcmpneqd %xmm1, %xmm0, %k4 ## encoding: [0x62,0xf3,0x7d,0x08,0x1f,0xe1,0x04]
404; CHECK-NEXT:    vpcmpnltd %xmm1, %xmm0, %k5 ## encoding: [0x62,0xf3,0x7d,0x08,0x1f,0xe9,0x05]
405; CHECK-NEXT:    vpcmpnled %xmm1, %xmm0, %k6 ## encoding: [0x62,0xf3,0x7d,0x08,0x1f,0xf1,0x06]
406; CHECK-NEXT:    vpcmpordd %xmm1, %xmm0, %k7 ## encoding: [0x62,0xf3,0x7d,0x08,0x1f,0xf9,0x07]
407; CHECK-NEXT:    kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0]
408; CHECK-NEXT:    vpinsrb $0, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x00]
409; CHECK-NEXT:    kmovw %k1, %eax ## encoding: [0xc5,0xf8,0x93,0xc1]
410; CHECK-NEXT:    vpinsrb $2, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x02]
411; CHECK-NEXT:    kmovw %k2, %eax ## encoding: [0xc5,0xf8,0x93,0xc2]
412; CHECK-NEXT:    vpinsrb $4, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x04]
413; CHECK-NEXT:    kmovw %k3, %eax ## encoding: [0xc5,0xf8,0x93,0xc3]
414; CHECK-NEXT:    vpinsrb $6, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x06]
415; CHECK-NEXT:    kmovw %k4, %eax ## encoding: [0xc5,0xf8,0x93,0xc4]
416; CHECK-NEXT:    vpinsrb $8, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x08]
417; CHECK-NEXT:    kmovw %k5, %eax ## encoding: [0xc5,0xf8,0x93,0xc5]
418; CHECK-NEXT:    vpinsrb $10, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0a]
419; CHECK-NEXT:    kmovw %k6, %eax ## encoding: [0xc5,0xf8,0x93,0xc6]
420; CHECK-NEXT:    vpinsrb $12, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0c]
421; CHECK-NEXT:    kmovw %k7, %eax ## encoding: [0xc5,0xf8,0x93,0xc7]
422; CHECK-NEXT:    vpinsrb $14, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0e]
423; CHECK-NEXT:    retq ## encoding: [0xc3]
424  %res0 = call i8 @llvm.x86.avx512.mask.cmp.d.128(<4 x i32> %a0, <4 x i32> %a1, i32 0, i8 -1)
425  %vec0 = insertelement <8 x i8> undef, i8 %res0, i32 0
426  %res1 = call i8 @llvm.x86.avx512.mask.cmp.d.128(<4 x i32> %a0, <4 x i32> %a1, i32 1, i8 -1)
427  %vec1 = insertelement <8 x i8> %vec0, i8 %res1, i32 1
428  %res2 = call i8 @llvm.x86.avx512.mask.cmp.d.128(<4 x i32> %a0, <4 x i32> %a1, i32 2, i8 -1)
429  %vec2 = insertelement <8 x i8> %vec1, i8 %res2, i32 2
430  %res3 = call i8 @llvm.x86.avx512.mask.cmp.d.128(<4 x i32> %a0, <4 x i32> %a1, i32 3, i8 -1)
431  %vec3 = insertelement <8 x i8> %vec2, i8 %res3, i32 3
432  %res4 = call i8 @llvm.x86.avx512.mask.cmp.d.128(<4 x i32> %a0, <4 x i32> %a1, i32 4, i8 -1)
433  %vec4 = insertelement <8 x i8> %vec3, i8 %res4, i32 4
434  %res5 = call i8 @llvm.x86.avx512.mask.cmp.d.128(<4 x i32> %a0, <4 x i32> %a1, i32 5, i8 -1)
435  %vec5 = insertelement <8 x i8> %vec4, i8 %res5, i32 5
436  %res6 = call i8 @llvm.x86.avx512.mask.cmp.d.128(<4 x i32> %a0, <4 x i32> %a1, i32 6, i8 -1)
437  %vec6 = insertelement <8 x i8> %vec5, i8 %res6, i32 6
438  %res7 = call i8 @llvm.x86.avx512.mask.cmp.d.128(<4 x i32> %a0, <4 x i32> %a1, i32 7, i8 -1)
439  %vec7 = insertelement <8 x i8> %vec6, i8 %res7, i32 7
440  ret <8 x i8> %vec7
441}
442
443define <8 x i8> @test_mask_cmp_d_128(<4 x i32> %a0, <4 x i32> %a1, i8 %mask) {
444; CHECK-LABEL: test_mask_cmp_d_128:
445; CHECK:       ## BB#0:
446; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
447; CHECK-NEXT:    vpcmpeqd %xmm1, %xmm0, %k0 {%k1} ## encoding: [0x62,0xf3,0x7d,0x09,0x1f,0xc1,0x00]
448; CHECK-NEXT:    vpcmpltd %xmm1, %xmm0, %k2 {%k1} ## encoding: [0x62,0xf3,0x7d,0x09,0x1f,0xd1,0x01]
449; CHECK-NEXT:    vpcmpled %xmm1, %xmm0, %k3 {%k1} ## encoding: [0x62,0xf3,0x7d,0x09,0x1f,0xd9,0x02]
450; CHECK-NEXT:    vpcmpunordd %xmm1, %xmm0, %k4 {%k1} ## encoding: [0x62,0xf3,0x7d,0x09,0x1f,0xe1,0x03]
451; CHECK-NEXT:    vpcmpneqd %xmm1, %xmm0, %k5 {%k1} ## encoding: [0x62,0xf3,0x7d,0x09,0x1f,0xe9,0x04]
452; CHECK-NEXT:    vpcmpnltd %xmm1, %xmm0, %k6 {%k1} ## encoding: [0x62,0xf3,0x7d,0x09,0x1f,0xf1,0x05]
453; CHECK-NEXT:    vpcmpnled %xmm1, %xmm0, %k7 {%k1} ## encoding: [0x62,0xf3,0x7d,0x09,0x1f,0xf9,0x06]
454; CHECK-NEXT:    vpcmpordd %xmm1, %xmm0, %k1 {%k1} ## encoding: [0x62,0xf3,0x7d,0x09,0x1f,0xc9,0x07]
455; CHECK-NEXT:    kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0]
456; CHECK-NEXT:    vpinsrb $0, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x00]
457; CHECK-NEXT:    kmovw %k2, %eax ## encoding: [0xc5,0xf8,0x93,0xc2]
458; CHECK-NEXT:    vpinsrb $2, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x02]
459; CHECK-NEXT:    kmovw %k3, %eax ## encoding: [0xc5,0xf8,0x93,0xc3]
460; CHECK-NEXT:    vpinsrb $4, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x04]
461; CHECK-NEXT:    kmovw %k4, %eax ## encoding: [0xc5,0xf8,0x93,0xc4]
462; CHECK-NEXT:    vpinsrb $6, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x06]
463; CHECK-NEXT:    kmovw %k5, %eax ## encoding: [0xc5,0xf8,0x93,0xc5]
464; CHECK-NEXT:    vpinsrb $8, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x08]
465; CHECK-NEXT:    kmovw %k6, %eax ## encoding: [0xc5,0xf8,0x93,0xc6]
466; CHECK-NEXT:    vpinsrb $10, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0a]
467; CHECK-NEXT:    kmovw %k7, %eax ## encoding: [0xc5,0xf8,0x93,0xc7]
468; CHECK-NEXT:    vpinsrb $12, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0c]
469; CHECK-NEXT:    kmovw %k1, %eax ## encoding: [0xc5,0xf8,0x93,0xc1]
470; CHECK-NEXT:    vpinsrb $14, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0e]
471; CHECK-NEXT:    retq ## encoding: [0xc3]
472  %res0 = call i8 @llvm.x86.avx512.mask.cmp.d.128(<4 x i32> %a0, <4 x i32> %a1, i32 0, i8 %mask)
473  %vec0 = insertelement <8 x i8> undef, i8 %res0, i32 0
474  %res1 = call i8 @llvm.x86.avx512.mask.cmp.d.128(<4 x i32> %a0, <4 x i32> %a1, i32 1, i8 %mask)
475  %vec1 = insertelement <8 x i8> %vec0, i8 %res1, i32 1
476  %res2 = call i8 @llvm.x86.avx512.mask.cmp.d.128(<4 x i32> %a0, <4 x i32> %a1, i32 2, i8 %mask)
477  %vec2 = insertelement <8 x i8> %vec1, i8 %res2, i32 2
478  %res3 = call i8 @llvm.x86.avx512.mask.cmp.d.128(<4 x i32> %a0, <4 x i32> %a1, i32 3, i8 %mask)
479  %vec3 = insertelement <8 x i8> %vec2, i8 %res3, i32 3
480  %res4 = call i8 @llvm.x86.avx512.mask.cmp.d.128(<4 x i32> %a0, <4 x i32> %a1, i32 4, i8 %mask)
481  %vec4 = insertelement <8 x i8> %vec3, i8 %res4, i32 4
482  %res5 = call i8 @llvm.x86.avx512.mask.cmp.d.128(<4 x i32> %a0, <4 x i32> %a1, i32 5, i8 %mask)
483  %vec5 = insertelement <8 x i8> %vec4, i8 %res5, i32 5
484  %res6 = call i8 @llvm.x86.avx512.mask.cmp.d.128(<4 x i32> %a0, <4 x i32> %a1, i32 6, i8 %mask)
485  %vec6 = insertelement <8 x i8> %vec5, i8 %res6, i32 6
486  %res7 = call i8 @llvm.x86.avx512.mask.cmp.d.128(<4 x i32> %a0, <4 x i32> %a1, i32 7, i8 %mask)
487  %vec7 = insertelement <8 x i8> %vec6, i8 %res7, i32 7
488  ret <8 x i8> %vec7
489}
490
491declare i8 @llvm.x86.avx512.mask.cmp.d.128(<4 x i32>, <4 x i32>, i32, i8) nounwind readnone
492
493define <8 x i8> @test_ucmp_d_128(<4 x i32> %a0, <4 x i32> %a1) {
494; CHECK-LABEL: test_ucmp_d_128:
495; CHECK:       ## BB#0:
496; CHECK-NEXT:    vpcmpequd %xmm1, %xmm0, %k0 ## encoding: [0x62,0xf3,0x7d,0x08,0x1e,0xc1,0x00]
497; CHECK-NEXT:    vpcmpltud %xmm1, %xmm0, %k1 ## encoding: [0x62,0xf3,0x7d,0x08,0x1e,0xc9,0x01]
498; CHECK-NEXT:    vpcmpleud %xmm1, %xmm0, %k2 ## encoding: [0x62,0xf3,0x7d,0x08,0x1e,0xd1,0x02]
499; CHECK-NEXT:    vpcmpunordud %xmm1, %xmm0, %k3 ## encoding: [0x62,0xf3,0x7d,0x08,0x1e,0xd9,0x03]
500; CHECK-NEXT:    vpcmpnequd %xmm1, %xmm0, %k4 ## encoding: [0x62,0xf3,0x7d,0x08,0x1e,0xe1,0x04]
501; CHECK-NEXT:    vpcmpnltud %xmm1, %xmm0, %k5 ## encoding: [0x62,0xf3,0x7d,0x08,0x1e,0xe9,0x05]
502; CHECK-NEXT:    vpcmpnleud %xmm1, %xmm0, %k6 ## encoding: [0x62,0xf3,0x7d,0x08,0x1e,0xf1,0x06]
503; CHECK-NEXT:    vpcmpordud %xmm1, %xmm0, %k7 ## encoding: [0x62,0xf3,0x7d,0x08,0x1e,0xf9,0x07]
504; CHECK-NEXT:    kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0]
505; CHECK-NEXT:    vpinsrb $0, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x00]
506; CHECK-NEXT:    kmovw %k1, %eax ## encoding: [0xc5,0xf8,0x93,0xc1]
507; CHECK-NEXT:    vpinsrb $2, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x02]
508; CHECK-NEXT:    kmovw %k2, %eax ## encoding: [0xc5,0xf8,0x93,0xc2]
509; CHECK-NEXT:    vpinsrb $4, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x04]
510; CHECK-NEXT:    kmovw %k3, %eax ## encoding: [0xc5,0xf8,0x93,0xc3]
511; CHECK-NEXT:    vpinsrb $6, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x06]
512; CHECK-NEXT:    kmovw %k4, %eax ## encoding: [0xc5,0xf8,0x93,0xc4]
513; CHECK-NEXT:    vpinsrb $8, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x08]
514; CHECK-NEXT:    kmovw %k5, %eax ## encoding: [0xc5,0xf8,0x93,0xc5]
515; CHECK-NEXT:    vpinsrb $10, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0a]
516; CHECK-NEXT:    kmovw %k6, %eax ## encoding: [0xc5,0xf8,0x93,0xc6]
517; CHECK-NEXT:    vpinsrb $12, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0c]
518; CHECK-NEXT:    kmovw %k7, %eax ## encoding: [0xc5,0xf8,0x93,0xc7]
519; CHECK-NEXT:    vpinsrb $14, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0e]
520; CHECK-NEXT:    retq ## encoding: [0xc3]
521  %res0 = call i8 @llvm.x86.avx512.mask.ucmp.d.128(<4 x i32> %a0, <4 x i32> %a1, i32 0, i8 -1)
522  %vec0 = insertelement <8 x i8> undef, i8 %res0, i32 0
523  %res1 = call i8 @llvm.x86.avx512.mask.ucmp.d.128(<4 x i32> %a0, <4 x i32> %a1, i32 1, i8 -1)
524  %vec1 = insertelement <8 x i8> %vec0, i8 %res1, i32 1
525  %res2 = call i8 @llvm.x86.avx512.mask.ucmp.d.128(<4 x i32> %a0, <4 x i32> %a1, i32 2, i8 -1)
526  %vec2 = insertelement <8 x i8> %vec1, i8 %res2, i32 2
527  %res3 = call i8 @llvm.x86.avx512.mask.ucmp.d.128(<4 x i32> %a0, <4 x i32> %a1, i32 3, i8 -1)
528  %vec3 = insertelement <8 x i8> %vec2, i8 %res3, i32 3
529  %res4 = call i8 @llvm.x86.avx512.mask.ucmp.d.128(<4 x i32> %a0, <4 x i32> %a1, i32 4, i8 -1)
530  %vec4 = insertelement <8 x i8> %vec3, i8 %res4, i32 4
531  %res5 = call i8 @llvm.x86.avx512.mask.ucmp.d.128(<4 x i32> %a0, <4 x i32> %a1, i32 5, i8 -1)
532  %vec5 = insertelement <8 x i8> %vec4, i8 %res5, i32 5
533  %res6 = call i8 @llvm.x86.avx512.mask.ucmp.d.128(<4 x i32> %a0, <4 x i32> %a1, i32 6, i8 -1)
534  %vec6 = insertelement <8 x i8> %vec5, i8 %res6, i32 6
535  %res7 = call i8 @llvm.x86.avx512.mask.ucmp.d.128(<4 x i32> %a0, <4 x i32> %a1, i32 7, i8 -1)
536  %vec7 = insertelement <8 x i8> %vec6, i8 %res7, i32 7
537  ret <8 x i8> %vec7
538}
539
540define <8 x i8> @test_mask_ucmp_d_128(<4 x i32> %a0, <4 x i32> %a1, i8 %mask) {
541; CHECK-LABEL: test_mask_ucmp_d_128:
542; CHECK:       ## BB#0:
543; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
544; CHECK-NEXT:    vpcmpequd %xmm1, %xmm0, %k0 {%k1} ## encoding: [0x62,0xf3,0x7d,0x09,0x1e,0xc1,0x00]
545; CHECK-NEXT:    vpcmpltud %xmm1, %xmm0, %k2 {%k1} ## encoding: [0x62,0xf3,0x7d,0x09,0x1e,0xd1,0x01]
546; CHECK-NEXT:    vpcmpleud %xmm1, %xmm0, %k3 {%k1} ## encoding: [0x62,0xf3,0x7d,0x09,0x1e,0xd9,0x02]
547; CHECK-NEXT:    vpcmpunordud %xmm1, %xmm0, %k4 {%k1} ## encoding: [0x62,0xf3,0x7d,0x09,0x1e,0xe1,0x03]
548; CHECK-NEXT:    vpcmpnequd %xmm1, %xmm0, %k5 {%k1} ## encoding: [0x62,0xf3,0x7d,0x09,0x1e,0xe9,0x04]
549; CHECK-NEXT:    vpcmpnltud %xmm1, %xmm0, %k6 {%k1} ## encoding: [0x62,0xf3,0x7d,0x09,0x1e,0xf1,0x05]
550; CHECK-NEXT:    vpcmpnleud %xmm1, %xmm0, %k7 {%k1} ## encoding: [0x62,0xf3,0x7d,0x09,0x1e,0xf9,0x06]
551; CHECK-NEXT:    vpcmpordud %xmm1, %xmm0, %k1 {%k1} ## encoding: [0x62,0xf3,0x7d,0x09,0x1e,0xc9,0x07]
552; CHECK-NEXT:    kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0]
553; CHECK-NEXT:    vpinsrb $0, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x00]
554; CHECK-NEXT:    kmovw %k2, %eax ## encoding: [0xc5,0xf8,0x93,0xc2]
555; CHECK-NEXT:    vpinsrb $2, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x02]
556; CHECK-NEXT:    kmovw %k3, %eax ## encoding: [0xc5,0xf8,0x93,0xc3]
557; CHECK-NEXT:    vpinsrb $4, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x04]
558; CHECK-NEXT:    kmovw %k4, %eax ## encoding: [0xc5,0xf8,0x93,0xc4]
559; CHECK-NEXT:    vpinsrb $6, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x06]
560; CHECK-NEXT:    kmovw %k5, %eax ## encoding: [0xc5,0xf8,0x93,0xc5]
561; CHECK-NEXT:    vpinsrb $8, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x08]
562; CHECK-NEXT:    kmovw %k6, %eax ## encoding: [0xc5,0xf8,0x93,0xc6]
563; CHECK-NEXT:    vpinsrb $10, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0a]
564; CHECK-NEXT:    kmovw %k7, %eax ## encoding: [0xc5,0xf8,0x93,0xc7]
565; CHECK-NEXT:    vpinsrb $12, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0c]
566; CHECK-NEXT:    kmovw %k1, %eax ## encoding: [0xc5,0xf8,0x93,0xc1]
567; CHECK-NEXT:    vpinsrb $14, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0e]
568; CHECK-NEXT:    retq ## encoding: [0xc3]
569  %res0 = call i8 @llvm.x86.avx512.mask.ucmp.d.128(<4 x i32> %a0, <4 x i32> %a1, i32 0, i8 %mask)
570  %vec0 = insertelement <8 x i8> undef, i8 %res0, i32 0
571  %res1 = call i8 @llvm.x86.avx512.mask.ucmp.d.128(<4 x i32> %a0, <4 x i32> %a1, i32 1, i8 %mask)
572  %vec1 = insertelement <8 x i8> %vec0, i8 %res1, i32 1
573  %res2 = call i8 @llvm.x86.avx512.mask.ucmp.d.128(<4 x i32> %a0, <4 x i32> %a1, i32 2, i8 %mask)
574  %vec2 = insertelement <8 x i8> %vec1, i8 %res2, i32 2
575  %res3 = call i8 @llvm.x86.avx512.mask.ucmp.d.128(<4 x i32> %a0, <4 x i32> %a1, i32 3, i8 %mask)
576  %vec3 = insertelement <8 x i8> %vec2, i8 %res3, i32 3
577  %res4 = call i8 @llvm.x86.avx512.mask.ucmp.d.128(<4 x i32> %a0, <4 x i32> %a1, i32 4, i8 %mask)
578  %vec4 = insertelement <8 x i8> %vec3, i8 %res4, i32 4
579  %res5 = call i8 @llvm.x86.avx512.mask.ucmp.d.128(<4 x i32> %a0, <4 x i32> %a1, i32 5, i8 %mask)
580  %vec5 = insertelement <8 x i8> %vec4, i8 %res5, i32 5
581  %res6 = call i8 @llvm.x86.avx512.mask.ucmp.d.128(<4 x i32> %a0, <4 x i32> %a1, i32 6, i8 %mask)
582  %vec6 = insertelement <8 x i8> %vec5, i8 %res6, i32 6
583  %res7 = call i8 @llvm.x86.avx512.mask.ucmp.d.128(<4 x i32> %a0, <4 x i32> %a1, i32 7, i8 %mask)
584  %vec7 = insertelement <8 x i8> %vec6, i8 %res7, i32 7
585  ret <8 x i8> %vec7
586}
587
588declare i8 @llvm.x86.avx512.mask.ucmp.d.128(<4 x i32>, <4 x i32>, i32, i8) nounwind readnone
589
590define <8 x i8> @test_cmp_q_128(<2 x i64> %a0, <2 x i64> %a1) {
591; CHECK-LABEL: test_cmp_q_128:
592; CHECK:       ## BB#0:
593; CHECK-NEXT:    vpcmpeqq %xmm1, %xmm0, %k0 ## encoding: [0x62,0xf3,0xfd,0x08,0x1f,0xc1,0x00]
594; CHECK-NEXT:    vpcmpltq %xmm1, %xmm0, %k1 ## encoding: [0x62,0xf3,0xfd,0x08,0x1f,0xc9,0x01]
595; CHECK-NEXT:    vpcmpleq %xmm1, %xmm0, %k2 ## encoding: [0x62,0xf3,0xfd,0x08,0x1f,0xd1,0x02]
596; CHECK-NEXT:    vpcmpunordq %xmm1, %xmm0, %k3 ## encoding: [0x62,0xf3,0xfd,0x08,0x1f,0xd9,0x03]
597; CHECK-NEXT:    vpcmpneqq %xmm1, %xmm0, %k4 ## encoding: [0x62,0xf3,0xfd,0x08,0x1f,0xe1,0x04]
598; CHECK-NEXT:    vpcmpnltq %xmm1, %xmm0, %k5 ## encoding: [0x62,0xf3,0xfd,0x08,0x1f,0xe9,0x05]
599; CHECK-NEXT:    vpcmpnleq %xmm1, %xmm0, %k6 ## encoding: [0x62,0xf3,0xfd,0x08,0x1f,0xf1,0x06]
600; CHECK-NEXT:    vpcmpordq %xmm1, %xmm0, %k7 ## encoding: [0x62,0xf3,0xfd,0x08,0x1f,0xf9,0x07]
601; CHECK-NEXT:    kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0]
602; CHECK-NEXT:    vpinsrb $0, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x00]
603; CHECK-NEXT:    kmovw %k1, %eax ## encoding: [0xc5,0xf8,0x93,0xc1]
604; CHECK-NEXT:    vpinsrb $2, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x02]
605; CHECK-NEXT:    kmovw %k2, %eax ## encoding: [0xc5,0xf8,0x93,0xc2]
606; CHECK-NEXT:    vpinsrb $4, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x04]
607; CHECK-NEXT:    kmovw %k3, %eax ## encoding: [0xc5,0xf8,0x93,0xc3]
608; CHECK-NEXT:    vpinsrb $6, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x06]
609; CHECK-NEXT:    kmovw %k4, %eax ## encoding: [0xc5,0xf8,0x93,0xc4]
610; CHECK-NEXT:    vpinsrb $8, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x08]
611; CHECK-NEXT:    kmovw %k5, %eax ## encoding: [0xc5,0xf8,0x93,0xc5]
612; CHECK-NEXT:    vpinsrb $10, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0a]
613; CHECK-NEXT:    kmovw %k6, %eax ## encoding: [0xc5,0xf8,0x93,0xc6]
614; CHECK-NEXT:    vpinsrb $12, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0c]
615; CHECK-NEXT:    kmovw %k7, %eax ## encoding: [0xc5,0xf8,0x93,0xc7]
616; CHECK-NEXT:    vpinsrb $14, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0e]
617; CHECK-NEXT:    retq ## encoding: [0xc3]
618  %res0 = call i8 @llvm.x86.avx512.mask.cmp.q.128(<2 x i64> %a0, <2 x i64> %a1, i32 0, i8 -1)
619  %vec0 = insertelement <8 x i8> undef, i8 %res0, i32 0
620  %res1 = call i8 @llvm.x86.avx512.mask.cmp.q.128(<2 x i64> %a0, <2 x i64> %a1, i32 1, i8 -1)
621  %vec1 = insertelement <8 x i8> %vec0, i8 %res1, i32 1
622  %res2 = call i8 @llvm.x86.avx512.mask.cmp.q.128(<2 x i64> %a0, <2 x i64> %a1, i32 2, i8 -1)
623  %vec2 = insertelement <8 x i8> %vec1, i8 %res2, i32 2
624  %res3 = call i8 @llvm.x86.avx512.mask.cmp.q.128(<2 x i64> %a0, <2 x i64> %a1, i32 3, i8 -1)
625  %vec3 = insertelement <8 x i8> %vec2, i8 %res3, i32 3
626  %res4 = call i8 @llvm.x86.avx512.mask.cmp.q.128(<2 x i64> %a0, <2 x i64> %a1, i32 4, i8 -1)
627  %vec4 = insertelement <8 x i8> %vec3, i8 %res4, i32 4
628  %res5 = call i8 @llvm.x86.avx512.mask.cmp.q.128(<2 x i64> %a0, <2 x i64> %a1, i32 5, i8 -1)
629  %vec5 = insertelement <8 x i8> %vec4, i8 %res5, i32 5
630  %res6 = call i8 @llvm.x86.avx512.mask.cmp.q.128(<2 x i64> %a0, <2 x i64> %a1, i32 6, i8 -1)
631  %vec6 = insertelement <8 x i8> %vec5, i8 %res6, i32 6
632  %res7 = call i8 @llvm.x86.avx512.mask.cmp.q.128(<2 x i64> %a0, <2 x i64> %a1, i32 7, i8 -1)
633  %vec7 = insertelement <8 x i8> %vec6, i8 %res7, i32 7
634  ret <8 x i8> %vec7
635}
636
637define <8 x i8> @test_mask_cmp_q_128(<2 x i64> %a0, <2 x i64> %a1, i8 %mask) {
638; CHECK-LABEL: test_mask_cmp_q_128:
639; CHECK:       ## BB#0:
640; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
641; CHECK-NEXT:    vpcmpeqq %xmm1, %xmm0, %k0 {%k1} ## encoding: [0x62,0xf3,0xfd,0x09,0x1f,0xc1,0x00]
642; CHECK-NEXT:    vpcmpltq %xmm1, %xmm0, %k2 {%k1} ## encoding: [0x62,0xf3,0xfd,0x09,0x1f,0xd1,0x01]
643; CHECK-NEXT:    vpcmpleq %xmm1, %xmm0, %k3 {%k1} ## encoding: [0x62,0xf3,0xfd,0x09,0x1f,0xd9,0x02]
644; CHECK-NEXT:    vpcmpunordq %xmm1, %xmm0, %k4 {%k1} ## encoding: [0x62,0xf3,0xfd,0x09,0x1f,0xe1,0x03]
645; CHECK-NEXT:    vpcmpneqq %xmm1, %xmm0, %k5 {%k1} ## encoding: [0x62,0xf3,0xfd,0x09,0x1f,0xe9,0x04]
646; CHECK-NEXT:    vpcmpnltq %xmm1, %xmm0, %k6 {%k1} ## encoding: [0x62,0xf3,0xfd,0x09,0x1f,0xf1,0x05]
647; CHECK-NEXT:    vpcmpnleq %xmm1, %xmm0, %k7 {%k1} ## encoding: [0x62,0xf3,0xfd,0x09,0x1f,0xf9,0x06]
648; CHECK-NEXT:    vpcmpordq %xmm1, %xmm0, %k1 {%k1} ## encoding: [0x62,0xf3,0xfd,0x09,0x1f,0xc9,0x07]
649; CHECK-NEXT:    kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0]
650; CHECK-NEXT:    vpinsrb $0, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x00]
651; CHECK-NEXT:    kmovw %k2, %eax ## encoding: [0xc5,0xf8,0x93,0xc2]
652; CHECK-NEXT:    vpinsrb $2, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x02]
653; CHECK-NEXT:    kmovw %k3, %eax ## encoding: [0xc5,0xf8,0x93,0xc3]
654; CHECK-NEXT:    vpinsrb $4, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x04]
655; CHECK-NEXT:    kmovw %k4, %eax ## encoding: [0xc5,0xf8,0x93,0xc4]
656; CHECK-NEXT:    vpinsrb $6, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x06]
657; CHECK-NEXT:    kmovw %k5, %eax ## encoding: [0xc5,0xf8,0x93,0xc5]
658; CHECK-NEXT:    vpinsrb $8, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x08]
659; CHECK-NEXT:    kmovw %k6, %eax ## encoding: [0xc5,0xf8,0x93,0xc6]
660; CHECK-NEXT:    vpinsrb $10, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0a]
661; CHECK-NEXT:    kmovw %k7, %eax ## encoding: [0xc5,0xf8,0x93,0xc7]
662; CHECK-NEXT:    vpinsrb $12, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0c]
663; CHECK-NEXT:    kmovw %k1, %eax ## encoding: [0xc5,0xf8,0x93,0xc1]
664; CHECK-NEXT:    vpinsrb $14, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0e]
665; CHECK-NEXT:    retq ## encoding: [0xc3]
666  %res0 = call i8 @llvm.x86.avx512.mask.cmp.q.128(<2 x i64> %a0, <2 x i64> %a1, i32 0, i8 %mask)
667  %vec0 = insertelement <8 x i8> undef, i8 %res0, i32 0
668  %res1 = call i8 @llvm.x86.avx512.mask.cmp.q.128(<2 x i64> %a0, <2 x i64> %a1, i32 1, i8 %mask)
669  %vec1 = insertelement <8 x i8> %vec0, i8 %res1, i32 1
670  %res2 = call i8 @llvm.x86.avx512.mask.cmp.q.128(<2 x i64> %a0, <2 x i64> %a1, i32 2, i8 %mask)
671  %vec2 = insertelement <8 x i8> %vec1, i8 %res2, i32 2
672  %res3 = call i8 @llvm.x86.avx512.mask.cmp.q.128(<2 x i64> %a0, <2 x i64> %a1, i32 3, i8 %mask)
673  %vec3 = insertelement <8 x i8> %vec2, i8 %res3, i32 3
674  %res4 = call i8 @llvm.x86.avx512.mask.cmp.q.128(<2 x i64> %a0, <2 x i64> %a1, i32 4, i8 %mask)
675  %vec4 = insertelement <8 x i8> %vec3, i8 %res4, i32 4
676  %res5 = call i8 @llvm.x86.avx512.mask.cmp.q.128(<2 x i64> %a0, <2 x i64> %a1, i32 5, i8 %mask)
677  %vec5 = insertelement <8 x i8> %vec4, i8 %res5, i32 5
678  %res6 = call i8 @llvm.x86.avx512.mask.cmp.q.128(<2 x i64> %a0, <2 x i64> %a1, i32 6, i8 %mask)
679  %vec6 = insertelement <8 x i8> %vec5, i8 %res6, i32 6
680  %res7 = call i8 @llvm.x86.avx512.mask.cmp.q.128(<2 x i64> %a0, <2 x i64> %a1, i32 7, i8 %mask)
681  %vec7 = insertelement <8 x i8> %vec6, i8 %res7, i32 7
682  ret <8 x i8> %vec7
683}
684
685declare i8 @llvm.x86.avx512.mask.cmp.q.128(<2 x i64>, <2 x i64>, i32, i8) nounwind readnone
686
687define <8 x i8> @test_ucmp_q_128(<2 x i64> %a0, <2 x i64> %a1) {
688; CHECK-LABEL: test_ucmp_q_128:
689; CHECK:       ## BB#0:
690; CHECK-NEXT:    vpcmpequq %xmm1, %xmm0, %k0 ## encoding: [0x62,0xf3,0xfd,0x08,0x1e,0xc1,0x00]
691; CHECK-NEXT:    vpcmpltuq %xmm1, %xmm0, %k1 ## encoding: [0x62,0xf3,0xfd,0x08,0x1e,0xc9,0x01]
692; CHECK-NEXT:    vpcmpleuq %xmm1, %xmm0, %k2 ## encoding: [0x62,0xf3,0xfd,0x08,0x1e,0xd1,0x02]
693; CHECK-NEXT:    vpcmpunorduq %xmm1, %xmm0, %k3 ## encoding: [0x62,0xf3,0xfd,0x08,0x1e,0xd9,0x03]
694; CHECK-NEXT:    vpcmpnequq %xmm1, %xmm0, %k4 ## encoding: [0x62,0xf3,0xfd,0x08,0x1e,0xe1,0x04]
695; CHECK-NEXT:    vpcmpnltuq %xmm1, %xmm0, %k5 ## encoding: [0x62,0xf3,0xfd,0x08,0x1e,0xe9,0x05]
696; CHECK-NEXT:    vpcmpnleuq %xmm1, %xmm0, %k6 ## encoding: [0x62,0xf3,0xfd,0x08,0x1e,0xf1,0x06]
697; CHECK-NEXT:    vpcmporduq %xmm1, %xmm0, %k7 ## encoding: [0x62,0xf3,0xfd,0x08,0x1e,0xf9,0x07]
698; CHECK-NEXT:    kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0]
699; CHECK-NEXT:    vpinsrb $0, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x00]
700; CHECK-NEXT:    kmovw %k1, %eax ## encoding: [0xc5,0xf8,0x93,0xc1]
701; CHECK-NEXT:    vpinsrb $2, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x02]
702; CHECK-NEXT:    kmovw %k2, %eax ## encoding: [0xc5,0xf8,0x93,0xc2]
703; CHECK-NEXT:    vpinsrb $4, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x04]
704; CHECK-NEXT:    kmovw %k3, %eax ## encoding: [0xc5,0xf8,0x93,0xc3]
705; CHECK-NEXT:    vpinsrb $6, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x06]
706; CHECK-NEXT:    kmovw %k4, %eax ## encoding: [0xc5,0xf8,0x93,0xc4]
707; CHECK-NEXT:    vpinsrb $8, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x08]
708; CHECK-NEXT:    kmovw %k5, %eax ## encoding: [0xc5,0xf8,0x93,0xc5]
709; CHECK-NEXT:    vpinsrb $10, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0a]
710; CHECK-NEXT:    kmovw %k6, %eax ## encoding: [0xc5,0xf8,0x93,0xc6]
711; CHECK-NEXT:    vpinsrb $12, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0c]
712; CHECK-NEXT:    kmovw %k7, %eax ## encoding: [0xc5,0xf8,0x93,0xc7]
713; CHECK-NEXT:    vpinsrb $14, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0e]
714; CHECK-NEXT:    retq ## encoding: [0xc3]
715  %res0 = call i8 @llvm.x86.avx512.mask.ucmp.q.128(<2 x i64> %a0, <2 x i64> %a1, i32 0, i8 -1)
716  %vec0 = insertelement <8 x i8> undef, i8 %res0, i32 0
717  %res1 = call i8 @llvm.x86.avx512.mask.ucmp.q.128(<2 x i64> %a0, <2 x i64> %a1, i32 1, i8 -1)
718  %vec1 = insertelement <8 x i8> %vec0, i8 %res1, i32 1
719  %res2 = call i8 @llvm.x86.avx512.mask.ucmp.q.128(<2 x i64> %a0, <2 x i64> %a1, i32 2, i8 -1)
720  %vec2 = insertelement <8 x i8> %vec1, i8 %res2, i32 2
721  %res3 = call i8 @llvm.x86.avx512.mask.ucmp.q.128(<2 x i64> %a0, <2 x i64> %a1, i32 3, i8 -1)
722  %vec3 = insertelement <8 x i8> %vec2, i8 %res3, i32 3
723  %res4 = call i8 @llvm.x86.avx512.mask.ucmp.q.128(<2 x i64> %a0, <2 x i64> %a1, i32 4, i8 -1)
724  %vec4 = insertelement <8 x i8> %vec3, i8 %res4, i32 4
725  %res5 = call i8 @llvm.x86.avx512.mask.ucmp.q.128(<2 x i64> %a0, <2 x i64> %a1, i32 5, i8 -1)
726  %vec5 = insertelement <8 x i8> %vec4, i8 %res5, i32 5
727  %res6 = call i8 @llvm.x86.avx512.mask.ucmp.q.128(<2 x i64> %a0, <2 x i64> %a1, i32 6, i8 -1)
728  %vec6 = insertelement <8 x i8> %vec5, i8 %res6, i32 6
729  %res7 = call i8 @llvm.x86.avx512.mask.ucmp.q.128(<2 x i64> %a0, <2 x i64> %a1, i32 7, i8 -1)
730  %vec7 = insertelement <8 x i8> %vec6, i8 %res7, i32 7
731  ret <8 x i8> %vec7
732}
733
734define <8 x i8> @test_mask_ucmp_q_128(<2 x i64> %a0, <2 x i64> %a1, i8 %mask) {
735; CHECK-LABEL: test_mask_ucmp_q_128:
736; CHECK:       ## BB#0:
737; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
738; CHECK-NEXT:    vpcmpequq %xmm1, %xmm0, %k0 {%k1} ## encoding: [0x62,0xf3,0xfd,0x09,0x1e,0xc1,0x00]
739; CHECK-NEXT:    vpcmpltuq %xmm1, %xmm0, %k2 {%k1} ## encoding: [0x62,0xf3,0xfd,0x09,0x1e,0xd1,0x01]
740; CHECK-NEXT:    vpcmpleuq %xmm1, %xmm0, %k3 {%k1} ## encoding: [0x62,0xf3,0xfd,0x09,0x1e,0xd9,0x02]
741; CHECK-NEXT:    vpcmpunorduq %xmm1, %xmm0, %k4 {%k1} ## encoding: [0x62,0xf3,0xfd,0x09,0x1e,0xe1,0x03]
742; CHECK-NEXT:    vpcmpnequq %xmm1, %xmm0, %k5 {%k1} ## encoding: [0x62,0xf3,0xfd,0x09,0x1e,0xe9,0x04]
743; CHECK-NEXT:    vpcmpnltuq %xmm1, %xmm0, %k6 {%k1} ## encoding: [0x62,0xf3,0xfd,0x09,0x1e,0xf1,0x05]
744; CHECK-NEXT:    vpcmpnleuq %xmm1, %xmm0, %k7 {%k1} ## encoding: [0x62,0xf3,0xfd,0x09,0x1e,0xf9,0x06]
745; CHECK-NEXT:    vpcmporduq %xmm1, %xmm0, %k1 {%k1} ## encoding: [0x62,0xf3,0xfd,0x09,0x1e,0xc9,0x07]
746; CHECK-NEXT:    kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0]
747; CHECK-NEXT:    vpinsrb $0, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x00]
748; CHECK-NEXT:    kmovw %k2, %eax ## encoding: [0xc5,0xf8,0x93,0xc2]
749; CHECK-NEXT:    vpinsrb $2, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x02]
750; CHECK-NEXT:    kmovw %k3, %eax ## encoding: [0xc5,0xf8,0x93,0xc3]
751; CHECK-NEXT:    vpinsrb $4, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x04]
752; CHECK-NEXT:    kmovw %k4, %eax ## encoding: [0xc5,0xf8,0x93,0xc4]
753; CHECK-NEXT:    vpinsrb $6, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x06]
754; CHECK-NEXT:    kmovw %k5, %eax ## encoding: [0xc5,0xf8,0x93,0xc5]
755; CHECK-NEXT:    vpinsrb $8, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x08]
756; CHECK-NEXT:    kmovw %k6, %eax ## encoding: [0xc5,0xf8,0x93,0xc6]
757; CHECK-NEXT:    vpinsrb $10, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0a]
758; CHECK-NEXT:    kmovw %k7, %eax ## encoding: [0xc5,0xf8,0x93,0xc7]
759; CHECK-NEXT:    vpinsrb $12, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0c]
760; CHECK-NEXT:    kmovw %k1, %eax ## encoding: [0xc5,0xf8,0x93,0xc1]
761; CHECK-NEXT:    vpinsrb $14, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0e]
762; CHECK-NEXT:    retq ## encoding: [0xc3]
763  %res0 = call i8 @llvm.x86.avx512.mask.ucmp.q.128(<2 x i64> %a0, <2 x i64> %a1, i32 0, i8 %mask)
764  %vec0 = insertelement <8 x i8> undef, i8 %res0, i32 0
765  %res1 = call i8 @llvm.x86.avx512.mask.ucmp.q.128(<2 x i64> %a0, <2 x i64> %a1, i32 1, i8 %mask)
766  %vec1 = insertelement <8 x i8> %vec0, i8 %res1, i32 1
767  %res2 = call i8 @llvm.x86.avx512.mask.ucmp.q.128(<2 x i64> %a0, <2 x i64> %a1, i32 2, i8 %mask)
768  %vec2 = insertelement <8 x i8> %vec1, i8 %res2, i32 2
769  %res3 = call i8 @llvm.x86.avx512.mask.ucmp.q.128(<2 x i64> %a0, <2 x i64> %a1, i32 3, i8 %mask)
770  %vec3 = insertelement <8 x i8> %vec2, i8 %res3, i32 3
771  %res4 = call i8 @llvm.x86.avx512.mask.ucmp.q.128(<2 x i64> %a0, <2 x i64> %a1, i32 4, i8 %mask)
772  %vec4 = insertelement <8 x i8> %vec3, i8 %res4, i32 4
773  %res5 = call i8 @llvm.x86.avx512.mask.ucmp.q.128(<2 x i64> %a0, <2 x i64> %a1, i32 5, i8 %mask)
774  %vec5 = insertelement <8 x i8> %vec4, i8 %res5, i32 5
775  %res6 = call i8 @llvm.x86.avx512.mask.ucmp.q.128(<2 x i64> %a0, <2 x i64> %a1, i32 6, i8 %mask)
776  %vec6 = insertelement <8 x i8> %vec5, i8 %res6, i32 6
777  %res7 = call i8 @llvm.x86.avx512.mask.ucmp.q.128(<2 x i64> %a0, <2 x i64> %a1, i32 7, i8 %mask)
778  %vec7 = insertelement <8 x i8> %vec6, i8 %res7, i32 7
779  ret <8 x i8> %vec7
780}
781
782declare i8 @llvm.x86.avx512.mask.ucmp.q.128(<2 x i64>, <2 x i64>, i32, i8) nounwind readnone
783
784define void @compr1(i8* %addr, <8 x double> %data, i8 %mask) {
785; CHECK-LABEL: compr1:
786; CHECK:       ## BB#0:
787; CHECK-NEXT:    kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
788; CHECK-NEXT:    vcompresspd %zmm0, (%rdi) {%k1} ## encoding: [0x62,0xf2,0xfd,0x49,0x8a,0x07]
789; CHECK-NEXT:    retq ## encoding: [0xc3]
790  call void @llvm.x86.avx512.mask.compress.store.pd.512(i8* %addr, <8 x double> %data, i8 %mask)
791  ret void
792}
793
794declare void @llvm.x86.avx512.mask.compress.store.pd.512(i8* %addr, <8 x double> %data, i8 %mask)
795
796define void @compr2(i8* %addr, <4 x double> %data, i8 %mask) {
797; CHECK-LABEL: compr2:
798; CHECK:       ## BB#0:
799; CHECK-NEXT:    kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
800; CHECK-NEXT:    vcompresspd %ymm0, (%rdi) {%k1} ## encoding: [0x62,0xf2,0xfd,0x29,0x8a,0x07]
801; CHECK-NEXT:    retq ## encoding: [0xc3]
802  call void @llvm.x86.avx512.mask.compress.store.pd.256(i8* %addr, <4 x double> %data, i8 %mask)
803  ret void
804}
805
806declare void @llvm.x86.avx512.mask.compress.store.pd.256(i8* %addr, <4 x double> %data, i8 %mask)
807
808define void @compr3(i8* %addr, <4 x float> %data, i8 %mask) {
809; CHECK-LABEL: compr3:
810; CHECK:       ## BB#0:
811; CHECK-NEXT:    kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
812; CHECK-NEXT:    vcompressps %xmm0, (%rdi) {%k1} ## encoding: [0x62,0xf2,0x7d,0x09,0x8a,0x07]
813; CHECK-NEXT:    retq ## encoding: [0xc3]
814  call void @llvm.x86.avx512.mask.compress.store.ps.128(i8* %addr, <4 x float> %data, i8 %mask)
815  ret void
816}
817
818declare void @llvm.x86.avx512.mask.compress.store.ps.128(i8* %addr, <4 x float> %data, i8 %mask)
819
820define <8 x double> @compr4(i8* %addr, <8 x double> %data, i8 %mask) {
821; CHECK-LABEL: compr4:
822; CHECK:       ## BB#0:
823; CHECK-NEXT:    kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
824; CHECK-NEXT:    vcompresspd %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0xfd,0xc9,0x8a,0xc0]
825; CHECK-NEXT:    retq ## encoding: [0xc3]
826  %res = call <8 x double> @llvm.x86.avx512.mask.compress.pd.512(<8 x double> %data, <8 x double> zeroinitializer, i8 %mask)
827  ret <8 x double> %res
828}
829
830declare <8 x double> @llvm.x86.avx512.mask.compress.pd.512(<8 x double> %data, <8 x double> %src0, i8 %mask)
831
832define <4 x double> @compr5(<4 x double> %data, <4 x double> %src0, i8 %mask) {
833; CHECK-LABEL: compr5:
834; CHECK:       ## BB#0:
835; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
836; CHECK-NEXT:    vcompresspd %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf2,0xfd,0x29,0x8a,0xc1]
837; CHECK-NEXT:    vmovaps %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc1]
838; CHECK-NEXT:    retq ## encoding: [0xc3]
839  %res = call <4 x double> @llvm.x86.avx512.mask.compress.pd.256( <4 x double> %data, <4 x double> %src0, i8 %mask)
840  ret <4 x double> %res
841}
842
843declare <4 x double> @llvm.x86.avx512.mask.compress.pd.256(<4 x double> %data, <4 x double> %src0, i8 %mask)
844
845define <4 x float> @compr6(<4 x float> %data, i8 %mask) {
846; CHECK-LABEL: compr6:
847; CHECK:       ## BB#0:
848; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
849; CHECK-NEXT:    vcompressps %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0x89,0x8a,0xc0]
850; CHECK-NEXT:    retq ## encoding: [0xc3]
851  %res = call <4 x float> @llvm.x86.avx512.mask.compress.ps.128(<4 x float> %data, <4 x float>zeroinitializer, i8 %mask)
852  ret <4 x float> %res
853}
854
855declare <4 x float> @llvm.x86.avx512.mask.compress.ps.128(<4 x float> %data, <4 x float> %src0, i8 %mask)
856
857define void @compr7(i8* %addr, <8 x double> %data) {
858; CHECK-LABEL: compr7:
859; CHECK:       ## BB#0:
860; CHECK-NEXT:    vmovupd %zmm0, (%rdi) ## encoding: [0x62,0xf1,0xfd,0x48,0x11,0x07]
861; CHECK-NEXT:    retq ## encoding: [0xc3]
862  call void @llvm.x86.avx512.mask.compress.store.pd.512(i8* %addr, <8 x double> %data, i8 -1)
863  ret void
864}
865
866define <4 x float> @compr8(<4 x float> %data) {
867; CHECK-LABEL: compr8:
868; CHECK:       ## BB#0:
869; CHECK-NEXT:    retq ## encoding: [0xc3]
870  %res = call <4 x float> @llvm.x86.avx512.mask.compress.ps.128(<4 x float> %data, <4 x float>zeroinitializer, i8 -1)
871  ret <4 x float> %res
872}
873
874define void @compr9(i8* %addr, <8 x i64> %data, i8 %mask) {
875; CHECK-LABEL: compr9:
876; CHECK:       ## BB#0:
877; CHECK-NEXT:    kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
878; CHECK-NEXT:    vpcompressq %zmm0, (%rdi) {%k1} ## encoding: [0x62,0xf2,0xfd,0x49,0x8b,0x07]
879; CHECK-NEXT:    retq ## encoding: [0xc3]
880  call void @llvm.x86.avx512.mask.compress.store.q.512(i8* %addr, <8 x i64> %data, i8 %mask)
881  ret void
882}
883
884declare void @llvm.x86.avx512.mask.compress.store.q.512(i8* %addr, <8 x i64> %data, i8 %mask)
885
886define <4 x i32> @compr10(<4 x i32> %data, i8 %mask) {
887; CHECK-LABEL: compr10:
888; CHECK:       ## BB#0:
889; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
890; CHECK-NEXT:    vpcompressd %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0x89,0x8b,0xc0]
891; CHECK-NEXT:    retq ## encoding: [0xc3]
892  %res = call <4 x i32> @llvm.x86.avx512.mask.compress.d.128(<4 x i32> %data, <4 x i32>zeroinitializer, i8 %mask)
893  ret <4 x i32> %res
894}
895
896declare <4 x i32> @llvm.x86.avx512.mask.compress.d.128(<4 x i32> %data, <4 x i32> %src0, i8 %mask)
897
898; Expand
899
900define <8 x double> @expand1(i8* %addr, <8 x double> %data, i8 %mask) {
901; CHECK-LABEL: expand1:
902; CHECK:       ## BB#0:
903; CHECK-NEXT:    kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
904; CHECK-NEXT:    vexpandpd (%rdi), %zmm0 {%k1} ## encoding: [0x62,0xf2,0xfd,0x49,0x88,0x07]
905; CHECK-NEXT:    retq ## encoding: [0xc3]
906  %res = call <8 x double> @llvm.x86.avx512.mask.expand.load.pd.512(i8* %addr, <8 x double> %data, i8 %mask)
907  ret <8 x double> %res
908}
909
910declare <8 x double> @llvm.x86.avx512.mask.expand.load.pd.512(i8* %addr, <8 x double> %data, i8 %mask)
911
912define <4 x double> @expand2(i8* %addr, <4 x double> %data, i8 %mask) {
913; CHECK-LABEL: expand2:
914; CHECK:       ## BB#0:
915; CHECK-NEXT:    kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
916; CHECK-NEXT:    vexpandpd (%rdi), %ymm0 {%k1} ## encoding: [0x62,0xf2,0xfd,0x29,0x88,0x07]
917; CHECK-NEXT:    retq ## encoding: [0xc3]
918  %res = call <4 x double> @llvm.x86.avx512.mask.expand.load.pd.256(i8* %addr, <4 x double> %data, i8 %mask)
919  ret <4 x double> %res
920}
921
922declare <4 x double> @llvm.x86.avx512.mask.expand.load.pd.256(i8* %addr, <4 x double> %data, i8 %mask)
923
924define <4 x float> @expand3(i8* %addr, <4 x float> %data, i8 %mask) {
925; CHECK-LABEL: expand3:
926; CHECK:       ## BB#0:
927; CHECK-NEXT:    kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
928; CHECK-NEXT:    vexpandps (%rdi), %xmm0 {%k1} ## encoding: [0x62,0xf2,0x7d,0x09,0x88,0x07]
929; CHECK-NEXT:    retq ## encoding: [0xc3]
930  %res = call <4 x float> @llvm.x86.avx512.mask.expand.load.ps.128(i8* %addr, <4 x float> %data, i8 %mask)
931  ret <4 x float> %res
932}
933
934declare <4 x float> @llvm.x86.avx512.mask.expand.load.ps.128(i8* %addr, <4 x float> %data, i8 %mask)
935
936define <8 x double> @expand4(i8* %addr, <8 x double> %data, i8 %mask) {
937; CHECK-LABEL: expand4:
938; CHECK:       ## BB#0:
939; CHECK-NEXT:    kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
940; CHECK-NEXT:    vexpandpd %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0xfd,0xc9,0x88,0xc0]
941; CHECK-NEXT:    retq ## encoding: [0xc3]
942  %res = call <8 x double> @llvm.x86.avx512.mask.expand.pd.512(<8 x double> %data, <8 x double> zeroinitializer, i8 %mask)
943  ret <8 x double> %res
944}
945
946declare <8 x double> @llvm.x86.avx512.mask.expand.pd.512(<8 x double> %data, <8 x double> %src0, i8 %mask)
947
948define <4 x double> @expand5(<4 x double> %data, <4 x double> %src0, i8 %mask) {
949; CHECK-LABEL: expand5:
950; CHECK:       ## BB#0:
951; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
952; CHECK-NEXT:    vexpandpd %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf2,0xfd,0x29,0x88,0xc8]
953; CHECK-NEXT:    vmovaps %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc1]
954; CHECK-NEXT:    retq ## encoding: [0xc3]
955  %res = call <4 x double> @llvm.x86.avx512.mask.expand.pd.256( <4 x double> %data, <4 x double> %src0, i8 %mask)
956  ret <4 x double> %res
957}
958
959declare <4 x double> @llvm.x86.avx512.mask.expand.pd.256(<4 x double> %data, <4 x double> %src0, i8 %mask)
960
961define <4 x float> @expand6(<4 x float> %data, i8 %mask) {
962; CHECK-LABEL: expand6:
963; CHECK:       ## BB#0:
964; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
965; CHECK-NEXT:    vexpandps %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0x89,0x88,0xc0]
966; CHECK-NEXT:    retq ## encoding: [0xc3]
967  %res = call <4 x float> @llvm.x86.avx512.mask.expand.ps.128(<4 x float> %data, <4 x float>zeroinitializer, i8 %mask)
968  ret <4 x float> %res
969}
970
971declare <4 x float> @llvm.x86.avx512.mask.expand.ps.128(<4 x float> %data, <4 x float> %src0, i8 %mask)
972
973define <8 x double> @expand7(i8* %addr, <8 x double> %data) {
974; CHECK-LABEL: expand7:
975; CHECK:       ## BB#0:
976; CHECK-NEXT:    vmovupd (%rdi), %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x10,0x07]
977; CHECK-NEXT:    retq ## encoding: [0xc3]
978  %res = call <8 x double> @llvm.x86.avx512.mask.expand.load.pd.512(i8* %addr, <8 x double> %data, i8 -1)
979  ret <8 x double> %res
980}
981
982define <4 x float> @expand8(<4 x float> %data) {
983; CHECK-LABEL: expand8:
984; CHECK:       ## BB#0:
985; CHECK-NEXT:    retq ## encoding: [0xc3]
986  %res = call <4 x float> @llvm.x86.avx512.mask.expand.ps.128(<4 x float> %data, <4 x float>zeroinitializer, i8 -1)
987  ret <4 x float> %res
988}
989
990define <8 x i64> @expand9(i8* %addr, <8 x i64> %data, i8 %mask) {
991; CHECK-LABEL: expand9:
992; CHECK:       ## BB#0:
993; CHECK-NEXT:    kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
994; CHECK-NEXT:    vpexpandq (%rdi), %zmm0 {%k1} ## encoding: [0x62,0xf2,0xfd,0x49,0x89,0x07]
995; CHECK-NEXT:    retq ## encoding: [0xc3]
996  %res = call <8 x i64> @llvm.x86.avx512.mask.expand.load.q.512(i8* %addr, <8 x i64> %data, i8 %mask)
997  ret <8 x i64> %res
998}
999
1000declare <8 x i64> @llvm.x86.avx512.mask.expand.load.q.512(i8* %addr, <8 x i64> %data, i8 %mask)
1001
1002define <4 x i32> @expand10(<4 x i32> %data, i8 %mask) {
1003; CHECK-LABEL: expand10:
1004; CHECK:       ## BB#0:
1005; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
1006; CHECK-NEXT:    vpexpandd %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0x89,0x89,0xc0]
1007; CHECK-NEXT:    retq ## encoding: [0xc3]
1008  %res = call <4 x i32> @llvm.x86.avx512.mask.expand.d.128(<4 x i32> %data, <4 x i32>zeroinitializer, i8 %mask)
1009  ret <4 x i32> %res
1010}
1011
1012declare <4 x i32> @llvm.x86.avx512.mask.expand.d.128(<4 x i32> %data, <4 x i32> %src0, i8 %mask)
1013
1014define < 2 x i64> @test_mask_mul_epi32_rr_128(< 4 x i32> %a, < 4 x i32> %b) {
1015; CHECK-LABEL: test_mask_mul_epi32_rr_128:
1016; CHECK:       ## BB#0:
1017; CHECK-NEXT:    vpmuldq %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf2,0xfd,0x08,0x28,0xc1]
1018; CHECK-NEXT:    retq ## encoding: [0xc3]
1019  %res = call < 2 x i64> @llvm.x86.avx512.mask.pmul.dq.128(< 4 x i32> %a, < 4 x i32> %b, < 2 x i64> zeroinitializer, i8 -1)
1020  ret < 2 x i64> %res
1021}
1022
1023define < 2 x i64> @test_mask_mul_epi32_rrk_128(< 4 x i32> %a, < 4 x i32> %b, < 2 x i64> %passThru, i8 %mask) {
1024; CHECK-LABEL: test_mask_mul_epi32_rrk_128:
1025; CHECK:       ## BB#0:
1026; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
1027; CHECK-NEXT:    vpmuldq %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf2,0xfd,0x09,0x28,0xd1]
1028; CHECK-NEXT:    vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2]
1029; CHECK-NEXT:    retq ## encoding: [0xc3]
1030  %res = call < 2 x i64> @llvm.x86.avx512.mask.pmul.dq.128(< 4 x i32> %a, < 4 x i32> %b, < 2 x i64> %passThru, i8 %mask)
1031  ret < 2 x i64> %res
1032}
1033
1034define < 2 x i64> @test_mask_mul_epi32_rrkz_128(< 4 x i32> %a, < 4 x i32> %b, i8 %mask) {
1035; CHECK-LABEL: test_mask_mul_epi32_rrkz_128:
1036; CHECK:       ## BB#0:
1037; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
1038; CHECK-NEXT:    vpmuldq %xmm1, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf2,0xfd,0x89,0x28,0xc1]
1039; CHECK-NEXT:    retq ## encoding: [0xc3]
1040  %res = call < 2 x i64> @llvm.x86.avx512.mask.pmul.dq.128(< 4 x i32> %a, < 4 x i32> %b, < 2 x i64> zeroinitializer, i8 %mask)
1041  ret < 2 x i64> %res
1042}
1043
1044define < 2 x i64> @test_mask_mul_epi32_rm_128(< 4 x i32> %a, < 4 x i32>* %ptr_b) {
1045; CHECK-LABEL: test_mask_mul_epi32_rm_128:
1046; CHECK:       ## BB#0:
1047; CHECK-NEXT:    vpmuldq (%rdi), %xmm0, %xmm0 ## encoding: [0x62,0xf2,0xfd,0x08,0x28,0x07]
1048; CHECK-NEXT:    retq ## encoding: [0xc3]
1049  %b = load < 4 x i32>, < 4 x i32>* %ptr_b
1050  %res = call < 2 x i64> @llvm.x86.avx512.mask.pmul.dq.128(< 4 x i32> %a, < 4 x i32> %b, < 2 x i64> zeroinitializer, i8 -1)
1051  ret < 2 x i64> %res
1052}
1053
1054define < 2 x i64> @test_mask_mul_epi32_rmk_128(< 4 x i32> %a, < 4 x i32>* %ptr_b, < 2 x i64> %passThru, i8 %mask) {
1055; CHECK-LABEL: test_mask_mul_epi32_rmk_128:
1056; CHECK:       ## BB#0:
1057; CHECK-NEXT:    kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
1058; CHECK-NEXT:    vpmuldq (%rdi), %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf2,0xfd,0x09,0x28,0x0f]
1059; CHECK-NEXT:    vmovaps %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc1]
1060; CHECK-NEXT:    retq ## encoding: [0xc3]
1061  %b = load < 4 x i32>, < 4 x i32>* %ptr_b
1062  %res = call < 2 x i64> @llvm.x86.avx512.mask.pmul.dq.128(< 4 x i32> %a, < 4 x i32> %b, < 2 x i64> %passThru, i8 %mask)
1063  ret < 2 x i64> %res
1064}
1065
1066define < 2 x i64> @test_mask_mul_epi32_rmkz_128(< 4 x i32> %a, < 4 x i32>* %ptr_b, i8 %mask) {
1067; CHECK-LABEL: test_mask_mul_epi32_rmkz_128:
1068; CHECK:       ## BB#0:
1069; CHECK-NEXT:    kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
1070; CHECK-NEXT:    vpmuldq (%rdi), %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf2,0xfd,0x89,0x28,0x07]
1071; CHECK-NEXT:    retq ## encoding: [0xc3]
1072  %b = load < 4 x i32>, < 4 x i32>* %ptr_b
1073  %res = call < 2 x i64> @llvm.x86.avx512.mask.pmul.dq.128(< 4 x i32> %a, < 4 x i32> %b, < 2 x i64> zeroinitializer, i8 %mask)
1074  ret < 2 x i64> %res
1075}
1076
1077define < 2 x i64> @test_mask_mul_epi32_rmb_128(< 4 x i32> %a, i64* %ptr_b) {
1078; CHECK-LABEL: test_mask_mul_epi32_rmb_128:
1079; CHECK:       ## BB#0:
1080; CHECK-NEXT:    vpmuldq (%rdi){1to2}, %xmm0, %xmm0 ## encoding: [0x62,0xf2,0xfd,0x18,0x28,0x07]
1081; CHECK-NEXT:    retq ## encoding: [0xc3]
1082  %q = load i64, i64* %ptr_b
1083  %vecinit.i = insertelement < 2 x i64> undef, i64 %q, i32 0
1084  %b64 = shufflevector < 2 x i64> %vecinit.i, < 2 x i64> undef, <2 x i32> zeroinitializer
1085  %b = bitcast < 2 x i64> %b64 to < 4 x i32>
1086  %res = call < 2 x i64> @llvm.x86.avx512.mask.pmul.dq.128(< 4 x i32> %a, < 4 x i32> %b, < 2 x i64> zeroinitializer, i8 -1)
1087  ret < 2 x i64> %res
1088}
1089
1090define < 2 x i64> @test_mask_mul_epi32_rmbk_128(< 4 x i32> %a, i64* %ptr_b, < 2 x i64> %passThru, i8 %mask) {
1091; CHECK-LABEL: test_mask_mul_epi32_rmbk_128:
1092; CHECK:       ## BB#0:
1093; CHECK-NEXT:    kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
1094; CHECK-NEXT:    vpmuldq (%rdi){1to2}, %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf2,0xfd,0x19,0x28,0x0f]
1095; CHECK-NEXT:    vmovaps %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc1]
1096; CHECK-NEXT:    retq ## encoding: [0xc3]
1097  %q = load i64, i64* %ptr_b
1098  %vecinit.i = insertelement < 2 x i64> undef, i64 %q, i32 0
1099  %b64 = shufflevector < 2 x i64> %vecinit.i, < 2 x i64> undef, <2 x i32> zeroinitializer
1100  %b = bitcast < 2 x i64> %b64 to < 4 x i32>
1101  %res = call < 2 x i64> @llvm.x86.avx512.mask.pmul.dq.128(< 4 x i32> %a, < 4 x i32> %b, < 2 x i64> %passThru, i8 %mask)
1102  ret < 2 x i64> %res
1103}
1104
1105define < 2 x i64> @test_mask_mul_epi32_rmbkz_128(< 4 x i32> %a, i64* %ptr_b, i8 %mask) {
1106; CHECK-LABEL: test_mask_mul_epi32_rmbkz_128:
1107; CHECK:       ## BB#0:
1108; CHECK-NEXT:    kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
1109; CHECK-NEXT:    vpmuldq (%rdi){1to2}, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf2,0xfd,0x99,0x28,0x07]
1110; CHECK-NEXT:    retq ## encoding: [0xc3]
1111  %q = load i64, i64* %ptr_b
1112  %vecinit.i = insertelement < 2 x i64> undef, i64 %q, i32 0
1113  %b64 = shufflevector < 2 x i64> %vecinit.i, < 2 x i64> undef, < 2 x i32> zeroinitializer
1114  %b = bitcast < 2 x i64> %b64 to < 4 x i32>
1115  %res = call < 2 x i64> @llvm.x86.avx512.mask.pmul.dq.128(< 4 x i32> %a, < 4 x i32> %b, < 2 x i64> zeroinitializer, i8 %mask)
1116  ret < 2 x i64> %res
1117}
1118
1119declare < 2 x i64> @llvm.x86.avx512.mask.pmul.dq.128(< 4 x i32>, < 4 x i32>, < 2 x i64>, i8)
1120
1121define < 4 x i64> @test_mask_mul_epi32_rr_256(< 8 x i32> %a, < 8 x i32> %b) {
1122; CHECK-LABEL: test_mask_mul_epi32_rr_256:
1123; CHECK:       ## BB#0:
1124; CHECK-NEXT:    vpmuldq %ymm1, %ymm0, %ymm0 ## encoding: [0x62,0xf2,0xfd,0x28,0x28,0xc1]
1125; CHECK-NEXT:    retq ## encoding: [0xc3]
1126  %res = call < 4 x i64> @llvm.x86.avx512.mask.pmul.dq.256(< 8 x i32> %a, < 8 x i32> %b, < 4 x i64> zeroinitializer, i8 -1)
1127  ret < 4 x i64> %res
1128}
1129
1130define < 4 x i64> @test_mask_mul_epi32_rrk_256(< 8 x i32> %a, < 8 x i32> %b, < 4 x i64> %passThru, i8 %mask) {
1131; CHECK-LABEL: test_mask_mul_epi32_rrk_256:
1132; CHECK:       ## BB#0:
1133; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
1134; CHECK-NEXT:    vpmuldq %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf2,0xfd,0x29,0x28,0xd1]
1135; CHECK-NEXT:    vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2]
1136; CHECK-NEXT:    retq ## encoding: [0xc3]
1137  %res = call < 4 x i64> @llvm.x86.avx512.mask.pmul.dq.256(< 8 x i32> %a, < 8 x i32> %b, < 4 x i64> %passThru, i8 %mask)
1138  ret < 4 x i64> %res
1139}
1140
1141define < 4 x i64> @test_mask_mul_epi32_rrkz_256(< 8 x i32> %a, < 8 x i32> %b, i8 %mask) {
1142; CHECK-LABEL: test_mask_mul_epi32_rrkz_256:
1143; CHECK:       ## BB#0:
1144; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
1145; CHECK-NEXT:    vpmuldq %ymm1, %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf2,0xfd,0xa9,0x28,0xc1]
1146; CHECK-NEXT:    retq ## encoding: [0xc3]
1147  %res = call < 4 x i64> @llvm.x86.avx512.mask.pmul.dq.256(< 8 x i32> %a, < 8 x i32> %b, < 4 x i64> zeroinitializer, i8 %mask)
1148  ret < 4 x i64> %res
1149}
1150
1151define < 4 x i64> @test_mask_mul_epi32_rm_256(< 8 x i32> %a, < 8 x i32>* %ptr_b) {
1152; CHECK-LABEL: test_mask_mul_epi32_rm_256:
1153; CHECK:       ## BB#0:
1154; CHECK-NEXT:    vpmuldq (%rdi), %ymm0, %ymm0 ## encoding: [0x62,0xf2,0xfd,0x28,0x28,0x07]
1155; CHECK-NEXT:    retq ## encoding: [0xc3]
1156  %b = load < 8 x i32>, < 8 x i32>* %ptr_b
1157  %res = call < 4 x i64> @llvm.x86.avx512.mask.pmul.dq.256(< 8 x i32> %a, < 8 x i32> %b, < 4 x i64> zeroinitializer, i8 -1)
1158  ret < 4 x i64> %res
1159}
1160
1161define < 4 x i64> @test_mask_mul_epi32_rmk_256(< 8 x i32> %a, < 8 x i32>* %ptr_b, < 4 x i64> %passThru, i8 %mask) {
1162; CHECK-LABEL: test_mask_mul_epi32_rmk_256:
1163; CHECK:       ## BB#0:
1164; CHECK-NEXT:    kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
1165; CHECK-NEXT:    vpmuldq (%rdi), %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf2,0xfd,0x29,0x28,0x0f]
1166; CHECK-NEXT:    vmovaps %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc1]
1167; CHECK-NEXT:    retq ## encoding: [0xc3]
1168  %b = load < 8 x i32>, < 8 x i32>* %ptr_b
1169  %res = call < 4 x i64> @llvm.x86.avx512.mask.pmul.dq.256(< 8 x i32> %a, < 8 x i32> %b, < 4 x i64> %passThru, i8 %mask)
1170  ret < 4 x i64> %res
1171}
1172
1173define < 4 x i64> @test_mask_mul_epi32_rmkz_256(< 8 x i32> %a, < 8 x i32>* %ptr_b, i8 %mask) {
1174; CHECK-LABEL: test_mask_mul_epi32_rmkz_256:
1175; CHECK:       ## BB#0:
1176; CHECK-NEXT:    kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
1177; CHECK-NEXT:    vpmuldq (%rdi), %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf2,0xfd,0xa9,0x28,0x07]
1178; CHECK-NEXT:    retq ## encoding: [0xc3]
1179  %b = load < 8 x i32>, < 8 x i32>* %ptr_b
1180  %res = call < 4 x i64> @llvm.x86.avx512.mask.pmul.dq.256(< 8 x i32> %a, < 8 x i32> %b, < 4 x i64> zeroinitializer, i8 %mask)
1181  ret < 4 x i64> %res
1182}
1183
1184define < 4 x i64> @test_mask_mul_epi32_rmb_256(< 8 x i32> %a, i64* %ptr_b) {
1185; CHECK-LABEL: test_mask_mul_epi32_rmb_256:
1186; CHECK:       ## BB#0:
1187; CHECK-NEXT:    vpmuldq (%rdi){1to4}, %ymm0, %ymm0 ## encoding: [0x62,0xf2,0xfd,0x38,0x28,0x07]
1188; CHECK-NEXT:    retq ## encoding: [0xc3]
1189  %q = load i64, i64* %ptr_b
1190  %vecinit.i = insertelement < 4 x i64> undef, i64 %q, i32 0
1191  %b64 = shufflevector < 4 x i64> %vecinit.i, < 4 x i64> undef, < 4 x i32> zeroinitializer
1192  %b = bitcast < 4 x i64> %b64 to < 8 x i32>
1193  %res = call < 4 x i64> @llvm.x86.avx512.mask.pmul.dq.256(< 8 x i32> %a, < 8 x i32> %b, < 4 x i64> zeroinitializer, i8 -1)
1194  ret < 4 x i64> %res
1195}
1196
1197define < 4 x i64> @test_mask_mul_epi32_rmbk_256(< 8 x i32> %a, i64* %ptr_b, < 4 x i64> %passThru, i8 %mask) {
1198; CHECK-LABEL: test_mask_mul_epi32_rmbk_256:
1199; CHECK:       ## BB#0:
1200; CHECK-NEXT:    kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
1201; CHECK-NEXT:    vpmuldq (%rdi){1to4}, %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf2,0xfd,0x39,0x28,0x0f]
1202; CHECK-NEXT:    vmovaps %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc1]
1203; CHECK-NEXT:    retq ## encoding: [0xc3]
1204  %q = load i64, i64* %ptr_b
1205  %vecinit.i = insertelement < 4 x i64> undef, i64 %q, i32 0
1206  %b64 = shufflevector < 4 x i64> %vecinit.i, < 4 x i64> undef, < 4 x i32> zeroinitializer
1207  %b = bitcast < 4 x i64> %b64 to < 8 x i32>
1208  %res = call < 4 x i64> @llvm.x86.avx512.mask.pmul.dq.256(< 8 x i32> %a, < 8 x i32> %b, < 4 x i64> %passThru, i8 %mask)
1209  ret < 4 x i64> %res
1210}
1211
1212define < 4 x i64> @test_mask_mul_epi32_rmbkz_256(< 8 x i32> %a, i64* %ptr_b, i8 %mask) {
1213; CHECK-LABEL: test_mask_mul_epi32_rmbkz_256:
1214; CHECK:       ## BB#0:
1215; CHECK-NEXT:    kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
1216; CHECK-NEXT:    vpmuldq (%rdi){1to4}, %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf2,0xfd,0xb9,0x28,0x07]
1217; CHECK-NEXT:    retq ## encoding: [0xc3]
1218  %q = load i64, i64* %ptr_b
1219  %vecinit.i = insertelement < 4 x i64> undef, i64 %q, i32 0
1220  %b64 = shufflevector < 4 x i64> %vecinit.i, < 4 x i64> undef, < 4 x i32> zeroinitializer
1221  %b = bitcast < 4 x i64> %b64 to < 8 x i32>
1222  %res = call < 4 x i64> @llvm.x86.avx512.mask.pmul.dq.256(< 8 x i32> %a, < 8 x i32> %b, < 4 x i64> zeroinitializer, i8 %mask)
1223  ret < 4 x i64> %res
1224}
1225
1226declare < 4 x i64> @llvm.x86.avx512.mask.pmul.dq.256(< 8 x i32>, < 8 x i32>, < 4 x i64>, i8)
1227
1228define < 2 x i64> @test_mask_mul_epu32_rr_128(< 4 x i32> %a, < 4 x i32> %b) {
1229; CHECK-LABEL: test_mask_mul_epu32_rr_128:
1230; CHECK:       ## BB#0:
1231; CHECK-NEXT:    vpmuludq %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0xfd,0x08,0xf4,0xc1]
1232; CHECK-NEXT:    retq ## encoding: [0xc3]
1233  %res = call < 2 x i64> @llvm.x86.avx512.mask.pmulu.dq.128(< 4 x i32> %a, < 4 x i32> %b, < 2 x i64> zeroinitializer, i8 -1)
1234  ret < 2 x i64> %res
1235}
1236
1237define < 2 x i64> @test_mask_mul_epu32_rrk_128(< 4 x i32> %a, < 4 x i32> %b, < 2 x i64> %passThru, i8 %mask) {
1238; CHECK-LABEL: test_mask_mul_epu32_rrk_128:
1239; CHECK:       ## BB#0:
1240; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
1241; CHECK-NEXT:    vpmuludq %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf1,0xfd,0x09,0xf4,0xd1]
1242; CHECK-NEXT:    vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2]
1243; CHECK-NEXT:    retq ## encoding: [0xc3]
1244  %res = call < 2 x i64> @llvm.x86.avx512.mask.pmulu.dq.128(< 4 x i32> %a, < 4 x i32> %b, < 2 x i64> %passThru, i8 %mask)
1245  ret < 2 x i64> %res
1246}
1247
1248define < 2 x i64> @test_mask_mul_epu32_rrkz_128(< 4 x i32> %a, < 4 x i32> %b, i8 %mask) {
1249; CHECK-LABEL: test_mask_mul_epu32_rrkz_128:
1250; CHECK:       ## BB#0:
1251; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
1252; CHECK-NEXT:    vpmuludq %xmm1, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0x89,0xf4,0xc1]
1253; CHECK-NEXT:    retq ## encoding: [0xc3]
1254  %res = call < 2 x i64> @llvm.x86.avx512.mask.pmulu.dq.128(< 4 x i32> %a, < 4 x i32> %b, < 2 x i64> zeroinitializer, i8 %mask)
1255  ret < 2 x i64> %res
1256}
1257
1258define < 2 x i64> @test_mask_mul_epu32_rm_128(< 4 x i32> %a, < 4 x i32>* %ptr_b) {
1259; CHECK-LABEL: test_mask_mul_epu32_rm_128:
1260; CHECK:       ## BB#0:
1261; CHECK-NEXT:    vpmuludq (%rdi), %xmm0, %xmm0 ## encoding: [0x62,0xf1,0xfd,0x08,0xf4,0x07]
1262; CHECK-NEXT:    retq ## encoding: [0xc3]
1263  %b = load < 4 x i32>, < 4 x i32>* %ptr_b
1264  %res = call < 2 x i64> @llvm.x86.avx512.mask.pmulu.dq.128(< 4 x i32> %a, < 4 x i32> %b, < 2 x i64> zeroinitializer, i8 -1)
1265  ret < 2 x i64> %res
1266}
1267
1268define < 2 x i64> @test_mask_mul_epu32_rmk_128(< 4 x i32> %a, < 4 x i32>* %ptr_b, < 2 x i64> %passThru, i8 %mask) {
1269; CHECK-LABEL: test_mask_mul_epu32_rmk_128:
1270; CHECK:       ## BB#0:
1271; CHECK-NEXT:    kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
1272; CHECK-NEXT:    vpmuludq (%rdi), %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0xfd,0x09,0xf4,0x0f]
1273; CHECK-NEXT:    vmovaps %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc1]
1274; CHECK-NEXT:    retq ## encoding: [0xc3]
1275  %b = load < 4 x i32>, < 4 x i32>* %ptr_b
1276  %res = call < 2 x i64> @llvm.x86.avx512.mask.pmulu.dq.128(< 4 x i32> %a, < 4 x i32> %b, < 2 x i64> %passThru, i8 %mask)
1277  ret < 2 x i64> %res
1278}
1279
1280define < 2 x i64> @test_mask_mul_epu32_rmkz_128(< 4 x i32> %a, < 4 x i32>* %ptr_b, i8 %mask) {
1281; CHECK-LABEL: test_mask_mul_epu32_rmkz_128:
1282; CHECK:       ## BB#0:
1283; CHECK-NEXT:    kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
1284; CHECK-NEXT:    vpmuludq (%rdi), %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0x89,0xf4,0x07]
1285; CHECK-NEXT:    retq ## encoding: [0xc3]
1286  %b = load < 4 x i32>, < 4 x i32>* %ptr_b
1287  %res = call < 2 x i64> @llvm.x86.avx512.mask.pmulu.dq.128(< 4 x i32> %a, < 4 x i32> %b, < 2 x i64> zeroinitializer, i8 %mask)
1288  ret < 2 x i64> %res
1289}
1290
1291define < 2 x i64> @test_mask_mul_epu32_rmb_128(< 4 x i32> %a, i64* %ptr_b) {
1292; CHECK-LABEL: test_mask_mul_epu32_rmb_128:
1293; CHECK:       ## BB#0:
1294; CHECK-NEXT:    vpmuludq (%rdi){1to2}, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0xfd,0x18,0xf4,0x07]
1295; CHECK-NEXT:    retq ## encoding: [0xc3]
1296  %q = load i64, i64* %ptr_b
1297  %vecinit.i = insertelement < 2 x i64> undef, i64 %q, i32 0
1298  %b64 = shufflevector < 2 x i64> %vecinit.i, < 2 x i64> undef, <2 x i32> zeroinitializer
1299  %b = bitcast < 2 x i64> %b64 to < 4 x i32>
1300  %res = call < 2 x i64> @llvm.x86.avx512.mask.pmulu.dq.128(< 4 x i32> %a, < 4 x i32> %b, < 2 x i64> zeroinitializer, i8 -1)
1301  ret < 2 x i64> %res
1302}
1303
1304define < 2 x i64> @test_mask_mul_epu32_rmbk_128(< 4 x i32> %a, i64* %ptr_b, < 2 x i64> %passThru, i8 %mask) {
1305; CHECK-LABEL: test_mask_mul_epu32_rmbk_128:
1306; CHECK:       ## BB#0:
1307; CHECK-NEXT:    kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
1308; CHECK-NEXT:    vpmuludq (%rdi){1to2}, %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0xfd,0x19,0xf4,0x0f]
1309; CHECK-NEXT:    vmovaps %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc1]
1310; CHECK-NEXT:    retq ## encoding: [0xc3]
1311  %q = load i64, i64* %ptr_b
1312  %vecinit.i = insertelement < 2 x i64> undef, i64 %q, i32 0
1313  %b64 = shufflevector < 2 x i64> %vecinit.i, < 2 x i64> undef, <2 x i32> zeroinitializer
1314  %b = bitcast < 2 x i64> %b64 to < 4 x i32>
1315  %res = call < 2 x i64> @llvm.x86.avx512.mask.pmulu.dq.128(< 4 x i32> %a, < 4 x i32> %b, < 2 x i64> %passThru, i8 %mask)
1316  ret < 2 x i64> %res
1317}
1318
1319define < 2 x i64> @test_mask_mul_epu32_rmbkz_128(< 4 x i32> %a, i64* %ptr_b, i8 %mask) {
1320; CHECK-LABEL: test_mask_mul_epu32_rmbkz_128:
1321; CHECK:       ## BB#0:
1322; CHECK-NEXT:    kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
1323; CHECK-NEXT:    vpmuludq (%rdi){1to2}, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0x99,0xf4,0x07]
1324; CHECK-NEXT:    retq ## encoding: [0xc3]
1325  %q = load i64, i64* %ptr_b
1326  %vecinit.i = insertelement < 2 x i64> undef, i64 %q, i32 0
1327  %b64 = shufflevector < 2 x i64> %vecinit.i, < 2 x i64> undef, < 2 x i32> zeroinitializer
1328  %b = bitcast < 2 x i64> %b64 to < 4 x i32>
1329  %res = call < 2 x i64> @llvm.x86.avx512.mask.pmulu.dq.128(< 4 x i32> %a, < 4 x i32> %b, < 2 x i64> zeroinitializer, i8 %mask)
1330  ret < 2 x i64> %res
1331}
1332
1333declare < 2 x i64> @llvm.x86.avx512.mask.pmulu.dq.128(< 4 x i32>, < 4 x i32>, < 2 x i64>, i8)
1334
1335define < 4 x i64> @test_mask_mul_epu32_rr_256(< 8 x i32> %a, < 8 x i32> %b) {
1336; CHECK-LABEL: test_mask_mul_epu32_rr_256:
1337; CHECK:       ## BB#0:
1338; CHECK-NEXT:    vpmuludq %ymm1, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0xfd,0x28,0xf4,0xc1]
1339; CHECK-NEXT:    retq ## encoding: [0xc3]
1340  %res = call < 4 x i64> @llvm.x86.avx512.mask.pmulu.dq.256(< 8 x i32> %a, < 8 x i32> %b, < 4 x i64> zeroinitializer, i8 -1)
1341  ret < 4 x i64> %res
1342}
1343
1344define < 4 x i64> @test_mask_mul_epu32_rrk_256(< 8 x i32> %a, < 8 x i32> %b, < 4 x i64> %passThru, i8 %mask) {
1345; CHECK-LABEL: test_mask_mul_epu32_rrk_256:
1346; CHECK:       ## BB#0:
1347; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
1348; CHECK-NEXT:    vpmuludq %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf1,0xfd,0x29,0xf4,0xd1]
1349; CHECK-NEXT:    vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2]
1350; CHECK-NEXT:    retq ## encoding: [0xc3]
1351  %res = call < 4 x i64> @llvm.x86.avx512.mask.pmulu.dq.256(< 8 x i32> %a, < 8 x i32> %b, < 4 x i64> %passThru, i8 %mask)
1352  ret < 4 x i64> %res
1353}
1354
1355define < 4 x i64> @test_mask_mul_epu32_rrkz_256(< 8 x i32> %a, < 8 x i32> %b, i8 %mask) {
1356; CHECK-LABEL: test_mask_mul_epu32_rrkz_256:
1357; CHECK:       ## BB#0:
1358; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
1359; CHECK-NEXT:    vpmuludq %ymm1, %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xa9,0xf4,0xc1]
1360; CHECK-NEXT:    retq ## encoding: [0xc3]
1361  %res = call < 4 x i64> @llvm.x86.avx512.mask.pmulu.dq.256(< 8 x i32> %a, < 8 x i32> %b, < 4 x i64> zeroinitializer, i8 %mask)
1362  ret < 4 x i64> %res
1363}
1364
1365define < 4 x i64> @test_mask_mul_epu32_rm_256(< 8 x i32> %a, < 8 x i32>* %ptr_b) {
1366; CHECK-LABEL: test_mask_mul_epu32_rm_256:
1367; CHECK:       ## BB#0:
1368; CHECK-NEXT:    vpmuludq (%rdi), %ymm0, %ymm0 ## encoding: [0x62,0xf1,0xfd,0x28,0xf4,0x07]
1369; CHECK-NEXT:    retq ## encoding: [0xc3]
1370  %b = load < 8 x i32>, < 8 x i32>* %ptr_b
1371  %res = call < 4 x i64> @llvm.x86.avx512.mask.pmulu.dq.256(< 8 x i32> %a, < 8 x i32> %b, < 4 x i64> zeroinitializer, i8 -1)
1372  ret < 4 x i64> %res
1373}
1374
1375define < 4 x i64> @test_mask_mul_epu32_rmk_256(< 8 x i32> %a, < 8 x i32>* %ptr_b, < 4 x i64> %passThru, i8 %mask) {
1376; CHECK-LABEL: test_mask_mul_epu32_rmk_256:
1377; CHECK:       ## BB#0:
1378; CHECK-NEXT:    kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
1379; CHECK-NEXT:    vpmuludq (%rdi), %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf1,0xfd,0x29,0xf4,0x0f]
1380; CHECK-NEXT:    vmovaps %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc1]
1381; CHECK-NEXT:    retq ## encoding: [0xc3]
1382  %b = load < 8 x i32>, < 8 x i32>* %ptr_b
1383  %res = call < 4 x i64> @llvm.x86.avx512.mask.pmulu.dq.256(< 8 x i32> %a, < 8 x i32> %b, < 4 x i64> %passThru, i8 %mask)
1384  ret < 4 x i64> %res
1385}
1386
1387define < 4 x i64> @test_mask_mul_epu32_rmkz_256(< 8 x i32> %a, < 8 x i32>* %ptr_b, i8 %mask) {
1388; CHECK-LABEL: test_mask_mul_epu32_rmkz_256:
1389; CHECK:       ## BB#0:
1390; CHECK-NEXT:    kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
1391; CHECK-NEXT:    vpmuludq (%rdi), %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xa9,0xf4,0x07]
1392; CHECK-NEXT:    retq ## encoding: [0xc3]
1393  %b = load < 8 x i32>, < 8 x i32>* %ptr_b
1394  %res = call < 4 x i64> @llvm.x86.avx512.mask.pmulu.dq.256(< 8 x i32> %a, < 8 x i32> %b, < 4 x i64> zeroinitializer, i8 %mask)
1395  ret < 4 x i64> %res
1396}
1397
1398define < 4 x i64> @test_mask_mul_epu32_rmb_256(< 8 x i32> %a, i64* %ptr_b) {
1399; CHECK-LABEL: test_mask_mul_epu32_rmb_256:
1400; CHECK:       ## BB#0:
1401; CHECK-NEXT:    vpmuludq (%rdi){1to4}, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0xfd,0x38,0xf4,0x07]
1402; CHECK-NEXT:    retq ## encoding: [0xc3]
1403  %q = load i64, i64* %ptr_b
1404  %vecinit.i = insertelement < 4 x i64> undef, i64 %q, i32 0
1405  %b64 = shufflevector < 4 x i64> %vecinit.i, < 4 x i64> undef, < 4 x i32> zeroinitializer
1406  %b = bitcast < 4 x i64> %b64 to < 8 x i32>
1407  %res = call < 4 x i64> @llvm.x86.avx512.mask.pmulu.dq.256(< 8 x i32> %a, < 8 x i32> %b, < 4 x i64> zeroinitializer, i8 -1)
1408  ret < 4 x i64> %res
1409}
1410
1411define < 4 x i64> @test_mask_mul_epu32_rmbk_256(< 8 x i32> %a, i64* %ptr_b, < 4 x i64> %passThru, i8 %mask) {
1412; CHECK-LABEL: test_mask_mul_epu32_rmbk_256:
1413; CHECK:       ## BB#0:
1414; CHECK-NEXT:    kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
1415; CHECK-NEXT:    vpmuludq (%rdi){1to4}, %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf1,0xfd,0x39,0xf4,0x0f]
1416; CHECK-NEXT:    vmovaps %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc1]
1417; CHECK-NEXT:    retq ## encoding: [0xc3]
1418  %q = load i64, i64* %ptr_b
1419  %vecinit.i = insertelement < 4 x i64> undef, i64 %q, i32 0
1420  %b64 = shufflevector < 4 x i64> %vecinit.i, < 4 x i64> undef, < 4 x i32> zeroinitializer
1421  %b = bitcast < 4 x i64> %b64 to < 8 x i32>
1422  %res = call < 4 x i64> @llvm.x86.avx512.mask.pmulu.dq.256(< 8 x i32> %a, < 8 x i32> %b, < 4 x i64> %passThru, i8 %mask)
1423  ret < 4 x i64> %res
1424}
1425
1426define < 4 x i64> @test_mask_mul_epu32_rmbkz_256(< 8 x i32> %a, i64* %ptr_b, i8 %mask) {
1427; CHECK-LABEL: test_mask_mul_epu32_rmbkz_256:
1428; CHECK:       ## BB#0:
1429; CHECK-NEXT:    kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
1430; CHECK-NEXT:    vpmuludq (%rdi){1to4}, %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xb9,0xf4,0x07]
1431; CHECK-NEXT:    retq ## encoding: [0xc3]
1432  %q = load i64, i64* %ptr_b
1433  %vecinit.i = insertelement < 4 x i64> undef, i64 %q, i32 0
1434  %b64 = shufflevector < 4 x i64> %vecinit.i, < 4 x i64> undef, < 4 x i32> zeroinitializer
1435  %b = bitcast < 4 x i64> %b64 to < 8 x i32>
1436  %res = call < 4 x i64> @llvm.x86.avx512.mask.pmulu.dq.256(< 8 x i32> %a, < 8 x i32> %b, < 4 x i64> zeroinitializer, i8 %mask)
1437  ret < 4 x i64> %res
1438}
1439
1440declare < 4 x i64> @llvm.x86.avx512.mask.pmulu.dq.256(< 8 x i32>, < 8 x i32>, < 4 x i64>, i8)
1441
1442define <4 x i32> @test_mask_add_epi32_rr_128(<4 x i32> %a, <4 x i32> %b) {
1443; CHECK-LABEL: test_mask_add_epi32_rr_128:
1444; CHECK:       ## BB#0:
1445; CHECK-NEXT:    vpaddd %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xfe,0xc1]
1446; CHECK-NEXT:    retq ## encoding: [0xc3]
1447  %res = call <4 x i32> @llvm.x86.avx512.mask.padd.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> zeroinitializer, i8 -1)
1448  ret <4 x i32> %res
1449}
1450
1451define <4 x i32> @test_mask_add_epi32_rrk_128(<4 x i32> %a, <4 x i32> %b, <4 x i32> %passThru, i8 %mask) {
1452; CHECK-LABEL: test_mask_add_epi32_rrk_128:
1453; CHECK:       ## BB#0:
1454; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
1455; CHECK-NEXT:    vpaddd %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x09,0xfe,0xd1]
1456; CHECK-NEXT:    vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2]
1457; CHECK-NEXT:    retq ## encoding: [0xc3]
1458  %res = call <4 x i32> @llvm.x86.avx512.mask.padd.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> %passThru, i8 %mask)
1459  ret <4 x i32> %res
1460}
1461
1462define <4 x i32> @test_mask_add_epi32_rrkz_128(<4 x i32> %a, <4 x i32> %b, i8 %mask) {
1463; CHECK-LABEL: test_mask_add_epi32_rrkz_128:
1464; CHECK:       ## BB#0:
1465; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
1466; CHECK-NEXT:    vpaddd %xmm1, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0x89,0xfe,0xc1]
1467; CHECK-NEXT:    retq ## encoding: [0xc3]
1468  %res = call <4 x i32> @llvm.x86.avx512.mask.padd.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> zeroinitializer, i8 %mask)
1469  ret <4 x i32> %res
1470}
1471
1472define <4 x i32> @test_mask_add_epi32_rm_128(<4 x i32> %a, <4 x i32>* %ptr_b) {
1473; CHECK-LABEL: test_mask_add_epi32_rm_128:
1474; CHECK:       ## BB#0:
1475; CHECK-NEXT:    vpaddd (%rdi), %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xfe,0x07]
1476; CHECK-NEXT:    retq ## encoding: [0xc3]
1477  %b = load <4 x i32>, <4 x i32>* %ptr_b
1478  %res = call <4 x i32> @llvm.x86.avx512.mask.padd.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> zeroinitializer, i8 -1)
1479  ret <4 x i32> %res
1480}
1481
1482define <4 x i32> @test_mask_add_epi32_rmk_128(<4 x i32> %a, <4 x i32>* %ptr_b, <4 x i32> %passThru, i8 %mask) {
1483; CHECK-LABEL: test_mask_add_epi32_rmk_128:
1484; CHECK:       ## BB#0:
1485; CHECK-NEXT:    kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
1486; CHECK-NEXT:    vpaddd (%rdi), %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x09,0xfe,0x0f]
1487; CHECK-NEXT:    vmovaps %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc1]
1488; CHECK-NEXT:    retq ## encoding: [0xc3]
1489  %b = load <4 x i32>, <4 x i32>* %ptr_b
1490  %res = call <4 x i32> @llvm.x86.avx512.mask.padd.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> %passThru, i8 %mask)
1491  ret <4 x i32> %res
1492}
1493
1494define <4 x i32> @test_mask_add_epi32_rmkz_128(<4 x i32> %a, <4 x i32>* %ptr_b, i8 %mask) {
1495; CHECK-LABEL: test_mask_add_epi32_rmkz_128:
1496; CHECK:       ## BB#0:
1497; CHECK-NEXT:    kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
1498; CHECK-NEXT:    vpaddd (%rdi), %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0x89,0xfe,0x07]
1499; CHECK-NEXT:    retq ## encoding: [0xc3]
1500  %b = load <4 x i32>, <4 x i32>* %ptr_b
1501  %res = call <4 x i32> @llvm.x86.avx512.mask.padd.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> zeroinitializer, i8 %mask)
1502  ret <4 x i32> %res
1503}
1504
1505define <4 x i32> @test_mask_add_epi32_rmb_128(<4 x i32> %a, i32* %ptr_b) {
1506; CHECK-LABEL: test_mask_add_epi32_rmb_128:
1507; CHECK:       ## BB#0:
1508; CHECK-NEXT:    vpaddd (%rdi){1to4}, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x18,0xfe,0x07]
1509; CHECK-NEXT:    retq ## encoding: [0xc3]
1510  %q = load i32, i32* %ptr_b
1511  %vecinit.i = insertelement <4 x i32> undef, i32 %q, i32 0
1512  %b = shufflevector <4 x i32> %vecinit.i, <4 x i32> undef, <4 x i32> zeroinitializer
1513  %res = call <4 x i32> @llvm.x86.avx512.mask.padd.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> zeroinitializer, i8 -1)
1514  ret <4 x i32> %res
1515}
1516
1517define <4 x i32> @test_mask_add_epi32_rmbk_128(<4 x i32> %a, i32* %ptr_b, <4 x i32> %passThru, i8 %mask) {
1518; CHECK-LABEL: test_mask_add_epi32_rmbk_128:
1519; CHECK:       ## BB#0:
1520; CHECK-NEXT:    kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
1521; CHECK-NEXT:    vpaddd (%rdi){1to4}, %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x19,0xfe,0x0f]
1522; CHECK-NEXT:    vmovaps %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc1]
1523; CHECK-NEXT:    retq ## encoding: [0xc3]
1524  %q = load i32, i32* %ptr_b
1525  %vecinit.i = insertelement <4 x i32> undef, i32 %q, i32 0
1526  %b = shufflevector <4 x i32> %vecinit.i, <4 x i32> undef, <4 x i32> zeroinitializer
1527  %res = call <4 x i32> @llvm.x86.avx512.mask.padd.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> %passThru, i8 %mask)
1528  ret <4 x i32> %res
1529}
1530
1531define <4 x i32> @test_mask_add_epi32_rmbkz_128(<4 x i32> %a, i32* %ptr_b, i8 %mask) {
1532; CHECK-LABEL: test_mask_add_epi32_rmbkz_128:
1533; CHECK:       ## BB#0:
1534; CHECK-NEXT:    kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
1535; CHECK-NEXT:    vpaddd (%rdi){1to4}, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0x99,0xfe,0x07]
1536; CHECK-NEXT:    retq ## encoding: [0xc3]
1537  %q = load i32, i32* %ptr_b
1538  %vecinit.i = insertelement <4 x i32> undef, i32 %q, i32 0
1539  %b = shufflevector <4 x i32> %vecinit.i, <4 x i32> undef, <4 x i32> zeroinitializer
1540  %res = call <4 x i32> @llvm.x86.avx512.mask.padd.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> zeroinitializer, i8 %mask)
1541  ret <4 x i32> %res
1542}
1543
1544declare <4 x i32> @llvm.x86.avx512.mask.padd.d.128(<4 x i32>, <4 x i32>, <4 x i32>, i8)
1545
1546define <4 x i32> @test_mask_sub_epi32_rr_128(<4 x i32> %a, <4 x i32> %b) {
1547; CHECK-LABEL: test_mask_sub_epi32_rr_128:
1548; CHECK:       ## BB#0:
1549; CHECK-NEXT:    vpsubd %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xfa,0xc1]
1550; CHECK-NEXT:    retq ## encoding: [0xc3]
1551  %res = call <4 x i32> @llvm.x86.avx512.mask.psub.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> zeroinitializer, i8 -1)
1552  ret <4 x i32> %res
1553}
1554
1555define <4 x i32> @test_mask_sub_epi32_rrk_128(<4 x i32> %a, <4 x i32> %b, <4 x i32> %passThru, i8 %mask) {
1556; CHECK-LABEL: test_mask_sub_epi32_rrk_128:
1557; CHECK:       ## BB#0:
1558; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
1559; CHECK-NEXT:    vpsubd %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x09,0xfa,0xd1]
1560; CHECK-NEXT:    vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2]
1561; CHECK-NEXT:    retq ## encoding: [0xc3]
1562  %res = call <4 x i32> @llvm.x86.avx512.mask.psub.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> %passThru, i8 %mask)
1563  ret <4 x i32> %res
1564}
1565
1566define <4 x i32> @test_mask_sub_epi32_rrkz_128(<4 x i32> %a, <4 x i32> %b, i8 %mask) {
1567; CHECK-LABEL: test_mask_sub_epi32_rrkz_128:
1568; CHECK:       ## BB#0:
1569; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
1570; CHECK-NEXT:    vpsubd %xmm1, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0x89,0xfa,0xc1]
1571; CHECK-NEXT:    retq ## encoding: [0xc3]
1572  %res = call <4 x i32> @llvm.x86.avx512.mask.psub.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> zeroinitializer, i8 %mask)
1573  ret <4 x i32> %res
1574}
1575
1576define <4 x i32> @test_mask_sub_epi32_rm_128(<4 x i32> %a, <4 x i32>* %ptr_b) {
1577; CHECK-LABEL: test_mask_sub_epi32_rm_128:
1578; CHECK:       ## BB#0:
1579; CHECK-NEXT:    vpsubd (%rdi), %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xfa,0x07]
1580; CHECK-NEXT:    retq ## encoding: [0xc3]
1581  %b = load <4 x i32>, <4 x i32>* %ptr_b
1582  %res = call <4 x i32> @llvm.x86.avx512.mask.psub.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> zeroinitializer, i8 -1)
1583  ret <4 x i32> %res
1584}
1585
1586define <4 x i32> @test_mask_sub_epi32_rmk_128(<4 x i32> %a, <4 x i32>* %ptr_b, <4 x i32> %passThru, i8 %mask) {
1587; CHECK-LABEL: test_mask_sub_epi32_rmk_128:
1588; CHECK:       ## BB#0:
1589; CHECK-NEXT:    kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
1590; CHECK-NEXT:    vpsubd (%rdi), %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x09,0xfa,0x0f]
1591; CHECK-NEXT:    vmovaps %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc1]
1592; CHECK-NEXT:    retq ## encoding: [0xc3]
1593  %b = load <4 x i32>, <4 x i32>* %ptr_b
1594  %res = call <4 x i32> @llvm.x86.avx512.mask.psub.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> %passThru, i8 %mask)
1595  ret <4 x i32> %res
1596}
1597
1598define <4 x i32> @test_mask_sub_epi32_rmkz_128(<4 x i32> %a, <4 x i32>* %ptr_b, i8 %mask) {
1599; CHECK-LABEL: test_mask_sub_epi32_rmkz_128:
1600; CHECK:       ## BB#0:
1601; CHECK-NEXT:    kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
1602; CHECK-NEXT:    vpsubd (%rdi), %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0x89,0xfa,0x07]
1603; CHECK-NEXT:    retq ## encoding: [0xc3]
1604  %b = load <4 x i32>, <4 x i32>* %ptr_b
1605  %res = call <4 x i32> @llvm.x86.avx512.mask.psub.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> zeroinitializer, i8 %mask)
1606  ret <4 x i32> %res
1607}
1608
1609define <4 x i32> @test_mask_sub_epi32_rmb_128(<4 x i32> %a, i32* %ptr_b) {
1610; CHECK-LABEL: test_mask_sub_epi32_rmb_128:
1611; CHECK:       ## BB#0:
1612; CHECK-NEXT:    vpsubd (%rdi){1to4}, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x18,0xfa,0x07]
1613; CHECK-NEXT:    retq ## encoding: [0xc3]
1614  %q = load i32, i32* %ptr_b
1615  %vecinit.i = insertelement <4 x i32> undef, i32 %q, i32 0
1616  %b = shufflevector <4 x i32> %vecinit.i, <4 x i32> undef, <4 x i32> zeroinitializer
1617  %res = call <4 x i32> @llvm.x86.avx512.mask.psub.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> zeroinitializer, i8 -1)
1618  ret <4 x i32> %res
1619}
1620
1621define <4 x i32> @test_mask_sub_epi32_rmbk_128(<4 x i32> %a, i32* %ptr_b, <4 x i32> %passThru, i8 %mask) {
1622; CHECK-LABEL: test_mask_sub_epi32_rmbk_128:
1623; CHECK:       ## BB#0:
1624; CHECK-NEXT:    kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
1625; CHECK-NEXT:    vpsubd (%rdi){1to4}, %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x19,0xfa,0x0f]
1626; CHECK-NEXT:    vmovaps %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc1]
1627; CHECK-NEXT:    retq ## encoding: [0xc3]
1628  %q = load i32, i32* %ptr_b
1629  %vecinit.i = insertelement <4 x i32> undef, i32 %q, i32 0
1630  %b = shufflevector <4 x i32> %vecinit.i, <4 x i32> undef, <4 x i32> zeroinitializer
1631  %res = call <4 x i32> @llvm.x86.avx512.mask.psub.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> %passThru, i8 %mask)
1632  ret <4 x i32> %res
1633}
1634
1635define <4 x i32> @test_mask_sub_epi32_rmbkz_128(<4 x i32> %a, i32* %ptr_b, i8 %mask) {
1636; CHECK-LABEL: test_mask_sub_epi32_rmbkz_128:
1637; CHECK:       ## BB#0:
1638; CHECK-NEXT:    kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
1639; CHECK-NEXT:    vpsubd (%rdi){1to4}, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0x99,0xfa,0x07]
1640; CHECK-NEXT:    retq ## encoding: [0xc3]
1641  %q = load i32, i32* %ptr_b
1642  %vecinit.i = insertelement <4 x i32> undef, i32 %q, i32 0
1643  %b = shufflevector <4 x i32> %vecinit.i, <4 x i32> undef, <4 x i32> zeroinitializer
1644  %res = call <4 x i32> @llvm.x86.avx512.mask.psub.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> zeroinitializer, i8 %mask)
1645  ret <4 x i32> %res
1646}
1647
1648declare <4 x i32> @llvm.x86.avx512.mask.psub.d.128(<4 x i32>, <4 x i32>, <4 x i32>, i8)
1649
1650define <8 x i32> @test_mask_sub_epi32_rr_256(<8 x i32> %a, <8 x i32> %b) {
1651; CHECK-LABEL: test_mask_sub_epi32_rr_256:
1652; CHECK:       ## BB#0:
1653; CHECK-NEXT:    vpsubd %ymm1, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7d,0x28,0xfa,0xc1]
1654; CHECK-NEXT:    retq ## encoding: [0xc3]
1655  %res = call <8 x i32> @llvm.x86.avx512.mask.psub.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> zeroinitializer, i8 -1)
1656  ret <8 x i32> %res
1657}
1658
1659define <8 x i32> @test_mask_sub_epi32_rrk_256(<8 x i32> %a, <8 x i32> %b, <8 x i32> %passThru, i8 %mask) {
1660; CHECK-LABEL: test_mask_sub_epi32_rrk_256:
1661; CHECK:       ## BB#0:
1662; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
1663; CHECK-NEXT:    vpsubd %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0xfa,0xd1]
1664; CHECK-NEXT:    vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2]
1665; CHECK-NEXT:    retq ## encoding: [0xc3]
1666  %res = call <8 x i32> @llvm.x86.avx512.mask.psub.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> %passThru, i8 %mask)
1667  ret <8 x i32> %res
1668}
1669
1670define <8 x i32> @test_mask_sub_epi32_rrkz_256(<8 x i32> %a, <8 x i32> %b, i8 %mask) {
1671; CHECK-LABEL: test_mask_sub_epi32_rrkz_256:
1672; CHECK:       ## BB#0:
1673; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
1674; CHECK-NEXT:    vpsubd %ymm1, %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xa9,0xfa,0xc1]
1675; CHECK-NEXT:    retq ## encoding: [0xc3]
1676  %res = call <8 x i32> @llvm.x86.avx512.mask.psub.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> zeroinitializer, i8 %mask)
1677  ret <8 x i32> %res
1678}
1679
1680define <8 x i32> @test_mask_sub_epi32_rm_256(<8 x i32> %a, <8 x i32>* %ptr_b) {
1681; CHECK-LABEL: test_mask_sub_epi32_rm_256:
1682; CHECK:       ## BB#0:
1683; CHECK-NEXT:    vpsubd (%rdi), %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7d,0x28,0xfa,0x07]
1684; CHECK-NEXT:    retq ## encoding: [0xc3]
1685  %b = load <8 x i32>, <8 x i32>* %ptr_b
1686  %res = call <8 x i32> @llvm.x86.avx512.mask.psub.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> zeroinitializer, i8 -1)
1687  ret <8 x i32> %res
1688}
1689
1690define <8 x i32> @test_mask_sub_epi32_rmk_256(<8 x i32> %a, <8 x i32>* %ptr_b, <8 x i32> %passThru, i8 %mask) {
1691; CHECK-LABEL: test_mask_sub_epi32_rmk_256:
1692; CHECK:       ## BB#0:
1693; CHECK-NEXT:    kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
1694; CHECK-NEXT:    vpsubd (%rdi), %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0xfa,0x0f]
1695; CHECK-NEXT:    vmovaps %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc1]
1696; CHECK-NEXT:    retq ## encoding: [0xc3]
1697  %b = load <8 x i32>, <8 x i32>* %ptr_b
1698  %res = call <8 x i32> @llvm.x86.avx512.mask.psub.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> %passThru, i8 %mask)
1699  ret <8 x i32> %res
1700}
1701
1702define <8 x i32> @test_mask_sub_epi32_rmkz_256(<8 x i32> %a, <8 x i32>* %ptr_b, i8 %mask) {
1703; CHECK-LABEL: test_mask_sub_epi32_rmkz_256:
1704; CHECK:       ## BB#0:
1705; CHECK-NEXT:    kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
1706; CHECK-NEXT:    vpsubd (%rdi), %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xa9,0xfa,0x07]
1707; CHECK-NEXT:    retq ## encoding: [0xc3]
1708  %b = load <8 x i32>, <8 x i32>* %ptr_b
1709  %res = call <8 x i32> @llvm.x86.avx512.mask.psub.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> zeroinitializer, i8 %mask)
1710  ret <8 x i32> %res
1711}
1712
1713define <8 x i32> @test_mask_sub_epi32_rmb_256(<8 x i32> %a, i32* %ptr_b) {
1714; CHECK-LABEL: test_mask_sub_epi32_rmb_256:
1715; CHECK:       ## BB#0:
1716; CHECK-NEXT:    vpsubd (%rdi){1to8}, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7d,0x38,0xfa,0x07]
1717; CHECK-NEXT:    retq ## encoding: [0xc3]
1718  %q = load i32, i32* %ptr_b
1719  %vecinit.i = insertelement <8 x i32> undef, i32 %q, i32 0
1720  %b = shufflevector <8 x i32> %vecinit.i, <8 x i32> undef, <8 x i32> zeroinitializer
1721  %res = call <8 x i32> @llvm.x86.avx512.mask.psub.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> zeroinitializer, i8 -1)
1722  ret <8 x i32> %res
1723}
1724
1725define <8 x i32> @test_mask_sub_epi32_rmbk_256(<8 x i32> %a, i32* %ptr_b, <8 x i32> %passThru, i8 %mask) {
1726; CHECK-LABEL: test_mask_sub_epi32_rmbk_256:
1727; CHECK:       ## BB#0:
1728; CHECK-NEXT:    kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
1729; CHECK-NEXT:    vpsubd (%rdi){1to8}, %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x39,0xfa,0x0f]
1730; CHECK-NEXT:    vmovaps %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc1]
1731; CHECK-NEXT:    retq ## encoding: [0xc3]
1732  %q = load i32, i32* %ptr_b
1733  %vecinit.i = insertelement <8 x i32> undef, i32 %q, i32 0
1734  %b = shufflevector <8 x i32> %vecinit.i, <8 x i32> undef, <8 x i32> zeroinitializer
1735  %res = call <8 x i32> @llvm.x86.avx512.mask.psub.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> %passThru, i8 %mask)
1736  ret <8 x i32> %res
1737}
1738
1739define <8 x i32> @test_mask_sub_epi32_rmbkz_256(<8 x i32> %a, i32* %ptr_b, i8 %mask) {
1740; CHECK-LABEL: test_mask_sub_epi32_rmbkz_256:
1741; CHECK:       ## BB#0:
1742; CHECK-NEXT:    kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
1743; CHECK-NEXT:    vpsubd (%rdi){1to8}, %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xb9,0xfa,0x07]
1744; CHECK-NEXT:    retq ## encoding: [0xc3]
1745  %q = load i32, i32* %ptr_b
1746  %vecinit.i = insertelement <8 x i32> undef, i32 %q, i32 0
1747  %b = shufflevector <8 x i32> %vecinit.i, <8 x i32> undef, <8 x i32> zeroinitializer
1748  %res = call <8 x i32> @llvm.x86.avx512.mask.psub.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> zeroinitializer, i8 %mask)
1749  ret <8 x i32> %res
1750}
1751
1752declare <8 x i32> @llvm.x86.avx512.mask.psub.d.256(<8 x i32>, <8 x i32>, <8 x i32>, i8)
1753
1754define <8 x i32> @test_mask_add_epi32_rr_256(<8 x i32> %a, <8 x i32> %b) {
1755; CHECK-LABEL: test_mask_add_epi32_rr_256:
1756; CHECK:       ## BB#0:
1757; CHECK-NEXT:    vpaddd %ymm1, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7d,0x28,0xfe,0xc1]
1758; CHECK-NEXT:    retq ## encoding: [0xc3]
1759  %res = call <8 x i32> @llvm.x86.avx512.mask.padd.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> zeroinitializer, i8 -1)
1760  ret <8 x i32> %res
1761}
1762
1763define <8 x i32> @test_mask_add_epi32_rrk_256(<8 x i32> %a, <8 x i32> %b, <8 x i32> %passThru, i8 %mask) {
1764; CHECK-LABEL: test_mask_add_epi32_rrk_256:
1765; CHECK:       ## BB#0:
1766; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
1767; CHECK-NEXT:    vpaddd %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0xfe,0xd1]
1768; CHECK-NEXT:    vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2]
1769; CHECK-NEXT:    retq ## encoding: [0xc3]
1770  %res = call <8 x i32> @llvm.x86.avx512.mask.padd.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> %passThru, i8 %mask)
1771  ret <8 x i32> %res
1772}
1773
1774define <8 x i32> @test_mask_add_epi32_rrkz_256(<8 x i32> %a, <8 x i32> %b, i8 %mask) {
1775; CHECK-LABEL: test_mask_add_epi32_rrkz_256:
1776; CHECK:       ## BB#0:
1777; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
1778; CHECK-NEXT:    vpaddd %ymm1, %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xa9,0xfe,0xc1]
1779; CHECK-NEXT:    retq ## encoding: [0xc3]
1780  %res = call <8 x i32> @llvm.x86.avx512.mask.padd.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> zeroinitializer, i8 %mask)
1781  ret <8 x i32> %res
1782}
1783
1784define <8 x i32> @test_mask_add_epi32_rm_256(<8 x i32> %a, <8 x i32>* %ptr_b) {
1785; CHECK-LABEL: test_mask_add_epi32_rm_256:
1786; CHECK:       ## BB#0:
1787; CHECK-NEXT:    vpaddd (%rdi), %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7d,0x28,0xfe,0x07]
1788; CHECK-NEXT:    retq ## encoding: [0xc3]
1789  %b = load <8 x i32>, <8 x i32>* %ptr_b
1790  %res = call <8 x i32> @llvm.x86.avx512.mask.padd.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> zeroinitializer, i8 -1)
1791  ret <8 x i32> %res
1792}
1793
1794define <8 x i32> @test_mask_add_epi32_rmk_256(<8 x i32> %a, <8 x i32>* %ptr_b, <8 x i32> %passThru, i8 %mask) {
1795; CHECK-LABEL: test_mask_add_epi32_rmk_256:
1796; CHECK:       ## BB#0:
1797; CHECK-NEXT:    kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
1798; CHECK-NEXT:    vpaddd (%rdi), %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0xfe,0x0f]
1799; CHECK-NEXT:    vmovaps %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc1]
1800; CHECK-NEXT:    retq ## encoding: [0xc3]
1801  %b = load <8 x i32>, <8 x i32>* %ptr_b
1802  %res = call <8 x i32> @llvm.x86.avx512.mask.padd.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> %passThru, i8 %mask)
1803  ret <8 x i32> %res
1804}
1805
1806define <8 x i32> @test_mask_add_epi32_rmkz_256(<8 x i32> %a, <8 x i32>* %ptr_b, i8 %mask) {
1807; CHECK-LABEL: test_mask_add_epi32_rmkz_256:
1808; CHECK:       ## BB#0:
1809; CHECK-NEXT:    kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
1810; CHECK-NEXT:    vpaddd (%rdi), %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xa9,0xfe,0x07]
1811; CHECK-NEXT:    retq ## encoding: [0xc3]
1812  %b = load <8 x i32>, <8 x i32>* %ptr_b
1813  %res = call <8 x i32> @llvm.x86.avx512.mask.padd.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> zeroinitializer, i8 %mask)
1814  ret <8 x i32> %res
1815}
1816
1817define <8 x i32> @test_mask_add_epi32_rmb_256(<8 x i32> %a, i32* %ptr_b) {
1818; CHECK-LABEL: test_mask_add_epi32_rmb_256:
1819; CHECK:       ## BB#0:
1820; CHECK-NEXT:    vpaddd (%rdi){1to8}, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7d,0x38,0xfe,0x07]
1821; CHECK-NEXT:    retq ## encoding: [0xc3]
1822  %q = load i32, i32* %ptr_b
1823  %vecinit.i = insertelement <8 x i32> undef, i32 %q, i32 0
1824  %b = shufflevector <8 x i32> %vecinit.i, <8 x i32> undef, <8 x i32> zeroinitializer
1825  %res = call <8 x i32> @llvm.x86.avx512.mask.padd.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> zeroinitializer, i8 -1)
1826  ret <8 x i32> %res
1827}
1828
1829define <8 x i32> @test_mask_add_epi32_rmbk_256(<8 x i32> %a, i32* %ptr_b, <8 x i32> %passThru, i8 %mask) {
1830; CHECK-LABEL: test_mask_add_epi32_rmbk_256:
1831; CHECK:       ## BB#0:
1832; CHECK-NEXT:    kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
1833; CHECK-NEXT:    vpaddd (%rdi){1to8}, %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x39,0xfe,0x0f]
1834; CHECK-NEXT:    vmovaps %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc1]
1835; CHECK-NEXT:    retq ## encoding: [0xc3]
1836  %q = load i32, i32* %ptr_b
1837  %vecinit.i = insertelement <8 x i32> undef, i32 %q, i32 0
1838  %b = shufflevector <8 x i32> %vecinit.i, <8 x i32> undef, <8 x i32> zeroinitializer
1839  %res = call <8 x i32> @llvm.x86.avx512.mask.padd.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> %passThru, i8 %mask)
1840  ret <8 x i32> %res
1841}
1842
1843define <8 x i32> @test_mask_add_epi32_rmbkz_256(<8 x i32> %a, i32* %ptr_b, i8 %mask) {
1844; CHECK-LABEL: test_mask_add_epi32_rmbkz_256:
1845; CHECK:       ## BB#0:
1846; CHECK-NEXT:    kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
1847; CHECK-NEXT:    vpaddd (%rdi){1to8}, %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xb9,0xfe,0x07]
1848; CHECK-NEXT:    retq ## encoding: [0xc3]
1849  %q = load i32, i32* %ptr_b
1850  %vecinit.i = insertelement <8 x i32> undef, i32 %q, i32 0
1851  %b = shufflevector <8 x i32> %vecinit.i, <8 x i32> undef, <8 x i32> zeroinitializer
1852  %res = call <8 x i32> @llvm.x86.avx512.mask.padd.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> zeroinitializer, i8 %mask)
1853  ret <8 x i32> %res
1854}
1855
1856declare <8 x i32> @llvm.x86.avx512.mask.padd.d.256(<8 x i32>, <8 x i32>, <8 x i32>, i8)
1857
1858define i8 @test_cmpps_256(<8 x float> %a, <8 x float> %b) {
1859; CHECK-LABEL: test_cmpps_256:
1860; CHECK:       ## BB#0:
1861; CHECK-NEXT:    vcmpleps %ymm1, %ymm0, %k0 ## encoding: [0x62,0xf1,0x7c,0x28,0xc2,0xc1,0x02]
1862; CHECK-NEXT:    kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0]
1863; CHECK-NEXT:    ## kill: %AL<def> %AL<kill> %EAX<kill>
1864; CHECK-NEXT:    retq ## encoding: [0xc3]
1865   %res = call i8 @llvm.x86.avx512.mask.cmp.ps.256(<8 x float> %a, <8 x float> %b, i32 2, i8 -1)
1866   ret i8 %res
1867 }
1868 declare i8 @llvm.x86.avx512.mask.cmp.ps.256(<8 x float> , <8 x float> , i32, i8)
1869
1870define i8 @test_cmpps_128(<4 x float> %a, <4 x float> %b) {
1871; CHECK-LABEL: test_cmpps_128:
1872; CHECK:       ## BB#0:
1873; CHECK-NEXT:    vcmpleps %xmm1, %xmm0, %k0 ## encoding: [0x62,0xf1,0x7c,0x08,0xc2,0xc1,0x02]
1874; CHECK-NEXT:    kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0]
1875; CHECK-NEXT:    ## kill: %AL<def> %AL<kill> %EAX<kill>
1876; CHECK-NEXT:    retq ## encoding: [0xc3]
1877   %res = call i8 @llvm.x86.avx512.mask.cmp.ps.128(<4 x float> %a, <4 x float> %b, i32 2, i8 -1)
1878   ret i8 %res
1879 }
1880 declare i8 @llvm.x86.avx512.mask.cmp.ps.128(<4 x float> , <4 x float> , i32, i8)
1881
1882define i8 @test_cmppd_256(<4 x double> %a, <4 x double> %b) {
1883; CHECK-LABEL: test_cmppd_256:
1884; CHECK:       ## BB#0:
1885; CHECK-NEXT:    vcmplepd %ymm1, %ymm0, %k0 ## encoding: [0x62,0xf1,0xfd,0x28,0xc2,0xc1,0x02]
1886; CHECK-NEXT:    kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0]
1887; CHECK-NEXT:    ## kill: %AL<def> %AL<kill> %EAX<kill>
1888; CHECK-NEXT:    retq ## encoding: [0xc3]
1889   %res = call i8 @llvm.x86.avx512.mask.cmp.pd.256(<4 x double> %a, <4 x double> %b, i32 2, i8 -1)
1890   ret i8 %res
1891 }
1892 declare i8 @llvm.x86.avx512.mask.cmp.pd.256(<4 x double> , <4 x double> , i32, i8)
1893
1894define i8 @test_cmppd_128(<2 x double> %a, <2 x double> %b) {
1895; CHECK-LABEL: test_cmppd_128:
1896; CHECK:       ## BB#0:
1897; CHECK-NEXT:    vcmplepd %xmm1, %xmm0, %k0 ## encoding: [0x62,0xf1,0xfd,0x08,0xc2,0xc1,0x02]
1898; CHECK-NEXT:    kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0]
1899; CHECK-NEXT:    ## kill: %AL<def> %AL<kill> %EAX<kill>
1900; CHECK-NEXT:    retq ## encoding: [0xc3]
1901   %res = call i8 @llvm.x86.avx512.mask.cmp.pd.128(<2 x double> %a, <2 x double> %b, i32 2, i8 -1)
1902   ret i8 %res
1903 }
1904 declare i8 @llvm.x86.avx512.mask.cmp.pd.128(<2 x double> , <2 x double> , i32, i8)
1905
1906define <8 x float> @test_mm512_maskz_add_ps_256(<8 x float> %a0, <8 x float> %a1, i8 %mask) {
1907; CHECK-LABEL: test_mm512_maskz_add_ps_256:
1908; CHECK:       ## BB#0:
1909; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
1910; CHECK-NEXT:    vaddps %ymm1, %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7c,0xa9,0x58,0xc1]
1911; CHECK-NEXT:    retq ## encoding: [0xc3]
1912  %res = call <8 x float> @llvm.x86.avx512.mask.add.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float>zeroinitializer, i8 %mask)
1913  ret <8 x float> %res
1914}
1915
1916define <8 x float> @test_mm512_mask_add_ps_256(<8 x float> %a0, <8 x float> %a1, <8 x float> %src, i8 %mask) {
1917; CHECK-LABEL: test_mm512_mask_add_ps_256:
1918; CHECK:       ## BB#0:
1919; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
1920; CHECK-NEXT:    vaddps %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf1,0x7c,0x29,0x58,0xd1]
1921; CHECK-NEXT:    vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2]
1922; CHECK-NEXT:    retq ## encoding: [0xc3]
1923  %res = call <8 x float> @llvm.x86.avx512.mask.add.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float> %src, i8 %mask)
1924  ret <8 x float> %res
1925}
1926
1927define <8 x float> @test_mm512_add_ps_256(<8 x float> %a0, <8 x float> %a1, i8 %mask) {
1928; CHECK-LABEL: test_mm512_add_ps_256:
1929; CHECK:       ## BB#0:
1930; CHECK-NEXT:    vaddps %ymm1, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7c,0x28,0x58,0xc1]
1931; CHECK-NEXT:    retq ## encoding: [0xc3]
1932  %res = call <8 x float> @llvm.x86.avx512.mask.add.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float>zeroinitializer, i8 -1)
1933  ret <8 x float> %res
1934}
1935declare <8 x float> @llvm.x86.avx512.mask.add.ps.256(<8 x float>, <8 x float>, <8 x float>, i8)
1936
1937define <4 x float> @test_mm512_maskz_add_ps_128(<4 x float> %a0, <4 x float> %a1, i8 %mask) {
1938; CHECK-LABEL: test_mm512_maskz_add_ps_128:
1939; CHECK:       ## BB#0:
1940; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
1941; CHECK-NEXT:    vaddps %xmm1, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7c,0x89,0x58,0xc1]
1942; CHECK-NEXT:    retq ## encoding: [0xc3]
1943  %res = call <4 x float> @llvm.x86.avx512.mask.add.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float>zeroinitializer, i8 %mask)
1944  ret <4 x float> %res
1945}
1946
1947define <4 x float> @test_mm512_mask_add_ps_128(<4 x float> %a0, <4 x float> %a1, <4 x float> %src, i8 %mask) {
1948; CHECK-LABEL: test_mm512_mask_add_ps_128:
1949; CHECK:       ## BB#0:
1950; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
1951; CHECK-NEXT:    vaddps %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf1,0x7c,0x09,0x58,0xd1]
1952; CHECK-NEXT:    vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2]
1953; CHECK-NEXT:    retq ## encoding: [0xc3]
1954  %res = call <4 x float> @llvm.x86.avx512.mask.add.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float> %src, i8 %mask)
1955  ret <4 x float> %res
1956}
1957
1958define <4 x float> @test_mm512_add_ps_128(<4 x float> %a0, <4 x float> %a1, i8 %mask) {
1959; CHECK-LABEL: test_mm512_add_ps_128:
1960; CHECK:       ## BB#0:
1961; CHECK-NEXT:    vaddps %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7c,0x08,0x58,0xc1]
1962; CHECK-NEXT:    retq ## encoding: [0xc3]
1963  %res = call <4 x float> @llvm.x86.avx512.mask.add.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float>zeroinitializer, i8 -1)
1964  ret <4 x float> %res
1965}
1966declare <4 x float> @llvm.x86.avx512.mask.add.ps.128(<4 x float>, <4 x float>, <4 x float>, i8)
1967
1968define <8 x float> @test_mm512_maskz_sub_ps_256(<8 x float> %a0, <8 x float> %a1, i8 %mask) {
1969; CHECK-LABEL: test_mm512_maskz_sub_ps_256:
1970; CHECK:       ## BB#0:
1971; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
1972; CHECK-NEXT:    vsubps %ymm1, %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7c,0xa9,0x5c,0xc1]
1973; CHECK-NEXT:    retq ## encoding: [0xc3]
1974  %res = call <8 x float> @llvm.x86.avx512.mask.sub.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float>zeroinitializer, i8 %mask)
1975  ret <8 x float> %res
1976}
1977
1978define <8 x float> @test_mm512_mask_sub_ps_256(<8 x float> %a0, <8 x float> %a1, <8 x float> %src, i8 %mask) {
1979; CHECK-LABEL: test_mm512_mask_sub_ps_256:
1980; CHECK:       ## BB#0:
1981; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
1982; CHECK-NEXT:    vsubps %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf1,0x7c,0x29,0x5c,0xd1]
1983; CHECK-NEXT:    vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2]
1984; CHECK-NEXT:    retq ## encoding: [0xc3]
1985  %res = call <8 x float> @llvm.x86.avx512.mask.sub.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float> %src, i8 %mask)
1986  ret <8 x float> %res
1987}
1988
1989define <8 x float> @test_mm512_sub_ps_256(<8 x float> %a0, <8 x float> %a1, i8 %mask) {
1990; CHECK-LABEL: test_mm512_sub_ps_256:
1991; CHECK:       ## BB#0:
1992; CHECK-NEXT:    vsubps %ymm1, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7c,0x28,0x5c,0xc1]
1993; CHECK-NEXT:    retq ## encoding: [0xc3]
1994  %res = call <8 x float> @llvm.x86.avx512.mask.sub.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float>zeroinitializer, i8 -1)
1995  ret <8 x float> %res
1996}
1997declare <8 x float> @llvm.x86.avx512.mask.sub.ps.256(<8 x float>, <8 x float>, <8 x float>, i8)
1998
1999define <4 x float> @test_mm512_maskz_sub_ps_128(<4 x float> %a0, <4 x float> %a1, i8 %mask) {
2000; CHECK-LABEL: test_mm512_maskz_sub_ps_128:
2001; CHECK:       ## BB#0:
2002; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
2003; CHECK-NEXT:    vsubps %xmm1, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7c,0x89,0x5c,0xc1]
2004; CHECK-NEXT:    retq ## encoding: [0xc3]
2005  %res = call <4 x float> @llvm.x86.avx512.mask.sub.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float>zeroinitializer, i8 %mask)
2006  ret <4 x float> %res
2007}
2008
2009define <4 x float> @test_mm512_mask_sub_ps_128(<4 x float> %a0, <4 x float> %a1, <4 x float> %src, i8 %mask) {
2010; CHECK-LABEL: test_mm512_mask_sub_ps_128:
2011; CHECK:       ## BB#0:
2012; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
2013; CHECK-NEXT:    vsubps %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf1,0x7c,0x09,0x5c,0xd1]
2014; CHECK-NEXT:    vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2]
2015; CHECK-NEXT:    retq ## encoding: [0xc3]
2016  %res = call <4 x float> @llvm.x86.avx512.mask.sub.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float> %src, i8 %mask)
2017  ret <4 x float> %res
2018}
2019
2020define <4 x float> @test_mm512_sub_ps_128(<4 x float> %a0, <4 x float> %a1, i8 %mask) {
2021; CHECK-LABEL: test_mm512_sub_ps_128:
2022; CHECK:       ## BB#0:
2023; CHECK-NEXT:    vsubps %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7c,0x08,0x5c,0xc1]
2024; CHECK-NEXT:    retq ## encoding: [0xc3]
2025  %res = call <4 x float> @llvm.x86.avx512.mask.sub.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float>zeroinitializer, i8 -1)
2026  ret <4 x float> %res
2027}
2028declare <4 x float> @llvm.x86.avx512.mask.sub.ps.128(<4 x float>, <4 x float>, <4 x float>, i8)
2029
2030define <8 x float> @test_mm512_maskz_mul_ps_256(<8 x float> %a0, <8 x float> %a1, i8 %mask) {
2031; CHECK-LABEL: test_mm512_maskz_mul_ps_256:
2032; CHECK:       ## BB#0:
2033; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
2034; CHECK-NEXT:    vmulps %ymm1, %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7c,0xa9,0x59,0xc1]
2035; CHECK-NEXT:    retq ## encoding: [0xc3]
2036  %res = call <8 x float> @llvm.x86.avx512.mask.mul.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float>zeroinitializer, i8 %mask)
2037  ret <8 x float> %res
2038}
2039
2040define <8 x float> @test_mm512_mask_mul_ps_256(<8 x float> %a0, <8 x float> %a1, <8 x float> %src, i8 %mask) {
2041; CHECK-LABEL: test_mm512_mask_mul_ps_256:
2042; CHECK:       ## BB#0:
2043; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
2044; CHECK-NEXT:    vmulps %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf1,0x7c,0x29,0x59,0xd1]
2045; CHECK-NEXT:    vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2]
2046; CHECK-NEXT:    retq ## encoding: [0xc3]
2047  %res = call <8 x float> @llvm.x86.avx512.mask.mul.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float> %src, i8 %mask)
2048  ret <8 x float> %res
2049}
2050
2051define <8 x float> @test_mm512_mul_ps_256(<8 x float> %a0, <8 x float> %a1, i8 %mask) {
2052; CHECK-LABEL: test_mm512_mul_ps_256:
2053; CHECK:       ## BB#0:
2054; CHECK-NEXT:    vmulps %ymm1, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7c,0x28,0x59,0xc1]
2055; CHECK-NEXT:    retq ## encoding: [0xc3]
2056  %res = call <8 x float> @llvm.x86.avx512.mask.mul.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float>zeroinitializer, i8 -1)
2057  ret <8 x float> %res
2058}
2059declare <8 x float> @llvm.x86.avx512.mask.mul.ps.256(<8 x float>, <8 x float>, <8 x float>, i8)
2060
2061define <4 x float> @test_mm512_maskz_mul_ps_128(<4 x float> %a0, <4 x float> %a1, i8 %mask) {
2062; CHECK-LABEL: test_mm512_maskz_mul_ps_128:
2063; CHECK:       ## BB#0:
2064; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
2065; CHECK-NEXT:    vmulps %xmm1, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7c,0x89,0x59,0xc1]
2066; CHECK-NEXT:    retq ## encoding: [0xc3]
2067  %res = call <4 x float> @llvm.x86.avx512.mask.mul.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float>zeroinitializer, i8 %mask)
2068  ret <4 x float> %res
2069}
2070
2071define <4 x float> @test_mm512_mask_mul_ps_128(<4 x float> %a0, <4 x float> %a1, <4 x float> %src, i8 %mask) {
2072; CHECK-LABEL: test_mm512_mask_mul_ps_128:
2073; CHECK:       ## BB#0:
2074; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
2075; CHECK-NEXT:    vmulps %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf1,0x7c,0x09,0x59,0xd1]
2076; CHECK-NEXT:    vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2]
2077; CHECK-NEXT:    retq ## encoding: [0xc3]
2078  %res = call <4 x float> @llvm.x86.avx512.mask.mul.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float> %src, i8 %mask)
2079  ret <4 x float> %res
2080}
2081
2082define <4 x float> @test_mm512_mul_ps_128(<4 x float> %a0, <4 x float> %a1, i8 %mask) {
2083; CHECK-LABEL: test_mm512_mul_ps_128:
2084; CHECK:       ## BB#0:
2085; CHECK-NEXT:    vmulps %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7c,0x08,0x59,0xc1]
2086; CHECK-NEXT:    retq ## encoding: [0xc3]
2087  %res = call <4 x float> @llvm.x86.avx512.mask.mul.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float>zeroinitializer, i8 -1)
2088  ret <4 x float> %res
2089}
2090declare <4 x float> @llvm.x86.avx512.mask.mul.ps.128(<4 x float>, <4 x float>, <4 x float>, i8)
2091
2092define <8 x float> @test_mm512_maskz_div_ps_256(<8 x float> %a0, <8 x float> %a1, i8 %mask) {
2093; CHECK-LABEL: test_mm512_maskz_div_ps_256:
2094; CHECK:       ## BB#0:
2095; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
2096; CHECK-NEXT:    vdivps %ymm1, %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7c,0xa9,0x5e,0xc1]
2097; CHECK-NEXT:    retq ## encoding: [0xc3]
2098  %res = call <8 x float> @llvm.x86.avx512.mask.div.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float>zeroinitializer, i8 %mask)
2099  ret <8 x float> %res
2100}
2101
2102define <8 x float> @test_mm512_mask_div_ps_256(<8 x float> %a0, <8 x float> %a1, <8 x float> %src, i8 %mask) {
2103; CHECK-LABEL: test_mm512_mask_div_ps_256:
2104; CHECK:       ## BB#0:
2105; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
2106; CHECK-NEXT:    vdivps %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf1,0x7c,0x29,0x5e,0xd1]
2107; CHECK-NEXT:    vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2]
2108; CHECK-NEXT:    retq ## encoding: [0xc3]
2109  %res = call <8 x float> @llvm.x86.avx512.mask.div.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float> %src, i8 %mask)
2110  ret <8 x float> %res
2111}
2112
2113define <8 x float> @test_mm512_div_ps_256(<8 x float> %a0, <8 x float> %a1, i8 %mask) {
2114; CHECK-LABEL: test_mm512_div_ps_256:
2115; CHECK:       ## BB#0:
2116; CHECK-NEXT:    vdivps %ymm1, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7c,0x28,0x5e,0xc1]
2117; CHECK-NEXT:    retq ## encoding: [0xc3]
2118  %res = call <8 x float> @llvm.x86.avx512.mask.div.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float>zeroinitializer, i8 -1)
2119  ret <8 x float> %res
2120}
2121declare <8 x float> @llvm.x86.avx512.mask.div.ps.256(<8 x float>, <8 x float>, <8 x float>, i8)
2122
2123define <4 x float> @test_mm512_maskz_div_ps_128(<4 x float> %a0, <4 x float> %a1, i8 %mask) {
2124; CHECK-LABEL: test_mm512_maskz_div_ps_128:
2125; CHECK:       ## BB#0:
2126; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
2127; CHECK-NEXT:    vdivps %xmm1, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7c,0x89,0x5e,0xc1]
2128; CHECK-NEXT:    retq ## encoding: [0xc3]
2129  %res = call <4 x float> @llvm.x86.avx512.mask.div.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float>zeroinitializer, i8 %mask)
2130  ret <4 x float> %res
2131}
2132
2133define <4 x float> @test_mm512_mask_div_ps_128(<4 x float> %a0, <4 x float> %a1, <4 x float> %src, i8 %mask) {
2134; CHECK-LABEL: test_mm512_mask_div_ps_128:
2135; CHECK:       ## BB#0:
2136; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
2137; CHECK-NEXT:    vdivps %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf1,0x7c,0x09,0x5e,0xd1]
2138; CHECK-NEXT:    vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2]
2139; CHECK-NEXT:    retq ## encoding: [0xc3]
2140  %res = call <4 x float> @llvm.x86.avx512.mask.div.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float> %src, i8 %mask)
2141  ret <4 x float> %res
2142}
2143
2144define <4 x float> @test_mm512_div_ps_128(<4 x float> %a0, <4 x float> %a1, i8 %mask) {
2145; CHECK-LABEL: test_mm512_div_ps_128:
2146; CHECK:       ## BB#0:
2147; CHECK-NEXT:    vdivps %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7c,0x08,0x5e,0xc1]
2148; CHECK-NEXT:    retq ## encoding: [0xc3]
2149  %res = call <4 x float> @llvm.x86.avx512.mask.div.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float>zeroinitializer, i8 -1)
2150  ret <4 x float> %res
2151}
2152declare <4 x float> @llvm.x86.avx512.mask.div.ps.128(<4 x float>, <4 x float>, <4 x float>, i8)
2153
2154define <8 x float> @test_mm512_maskz_max_ps_256(<8 x float> %a0, <8 x float> %a1, i8 %mask) {
2155; CHECK-LABEL: test_mm512_maskz_max_ps_256:
2156; CHECK:       ## BB#0:
2157; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
2158; CHECK-NEXT:    vmaxps %ymm1, %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7c,0xa9,0x5f,0xc1]
2159; CHECK-NEXT:    retq ## encoding: [0xc3]
2160  %res = call <8 x float> @llvm.x86.avx512.mask.max.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float>zeroinitializer, i8 %mask)
2161  ret <8 x float> %res
2162}
2163
2164define <8 x float> @test_mm512_mask_max_ps_256(<8 x float> %a0, <8 x float> %a1, <8 x float> %src, i8 %mask) {
2165; CHECK-LABEL: test_mm512_mask_max_ps_256:
2166; CHECK:       ## BB#0:
2167; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
2168; CHECK-NEXT:    vmaxps %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf1,0x7c,0x29,0x5f,0xd1]
2169; CHECK-NEXT:    vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2]
2170; CHECK-NEXT:    retq ## encoding: [0xc3]
2171  %res = call <8 x float> @llvm.x86.avx512.mask.max.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float> %src, i8 %mask)
2172  ret <8 x float> %res
2173}
2174
2175define <8 x float> @test_mm512_max_ps_256(<8 x float> %a0, <8 x float> %a1, i8 %mask) {
2176; CHECK-LABEL: test_mm512_max_ps_256:
2177; CHECK:       ## BB#0:
2178; CHECK-NEXT:    vmaxps %ymm1, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7c,0x28,0x5f,0xc1]
2179; CHECK-NEXT:    retq ## encoding: [0xc3]
2180  %res = call <8 x float> @llvm.x86.avx512.mask.max.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float>zeroinitializer, i8 -1)
2181  ret <8 x float> %res
2182}
2183declare <8 x float> @llvm.x86.avx512.mask.max.ps.256(<8 x float>, <8 x float>, <8 x float>, i8)
2184
2185define <4 x float> @test_mm512_maskz_max_ps_128(<4 x float> %a0, <4 x float> %a1, i8 %mask) {
2186; CHECK-LABEL: test_mm512_maskz_max_ps_128:
2187; CHECK:       ## BB#0:
2188; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
2189; CHECK-NEXT:    vmaxps %xmm1, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7c,0x89,0x5f,0xc1]
2190; CHECK-NEXT:    retq ## encoding: [0xc3]
2191  %res = call <4 x float> @llvm.x86.avx512.mask.max.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float>zeroinitializer, i8 %mask)
2192  ret <4 x float> %res
2193}
2194
2195define <4 x float> @test_mm512_mask_max_ps_128(<4 x float> %a0, <4 x float> %a1, <4 x float> %src, i8 %mask) {
2196; CHECK-LABEL: test_mm512_mask_max_ps_128:
2197; CHECK:       ## BB#0:
2198; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
2199; CHECK-NEXT:    vmaxps %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf1,0x7c,0x09,0x5f,0xd1]
2200; CHECK-NEXT:    vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2]
2201; CHECK-NEXT:    retq ## encoding: [0xc3]
2202  %res = call <4 x float> @llvm.x86.avx512.mask.max.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float> %src, i8 %mask)
2203  ret <4 x float> %res
2204}
2205
2206define <4 x float> @test_mm512_max_ps_128(<4 x float> %a0, <4 x float> %a1, i8 %mask) {
2207; CHECK-LABEL: test_mm512_max_ps_128:
2208; CHECK:       ## BB#0:
2209; CHECK-NEXT:    vmaxps %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7c,0x08,0x5f,0xc1]
2210; CHECK-NEXT:    retq ## encoding: [0xc3]
2211  %res = call <4 x float> @llvm.x86.avx512.mask.max.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float>zeroinitializer, i8 -1)
2212  ret <4 x float> %res
2213}
2214declare <4 x float> @llvm.x86.avx512.mask.max.ps.128(<4 x float>, <4 x float>, <4 x float>, i8)
2215
2216define <8 x float> @test_mm512_maskz_min_ps_256(<8 x float> %a0, <8 x float> %a1, i8 %mask) {
2217; CHECK-LABEL: test_mm512_maskz_min_ps_256:
2218; CHECK:       ## BB#0:
2219; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
2220; CHECK-NEXT:    vminps %ymm1, %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7c,0xa9,0x5d,0xc1]
2221; CHECK-NEXT:    retq ## encoding: [0xc3]
2222  %res = call <8 x float> @llvm.x86.avx512.mask.min.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float>zeroinitializer, i8 %mask)
2223  ret <8 x float> %res
2224}
2225
2226define <8 x float> @test_mm512_mask_min_ps_256(<8 x float> %a0, <8 x float> %a1, <8 x float> %src, i8 %mask) {
2227; CHECK-LABEL: test_mm512_mask_min_ps_256:
2228; CHECK:       ## BB#0:
2229; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
2230; CHECK-NEXT:    vminps %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf1,0x7c,0x29,0x5d,0xd1]
2231; CHECK-NEXT:    vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2]
2232; CHECK-NEXT:    retq ## encoding: [0xc3]
2233  %res = call <8 x float> @llvm.x86.avx512.mask.min.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float> %src, i8 %mask)
2234  ret <8 x float> %res
2235}
2236
2237define <8 x float> @test_mm512_min_ps_256(<8 x float> %a0, <8 x float> %a1, i8 %mask) {
2238; CHECK-LABEL: test_mm512_min_ps_256:
2239; CHECK:       ## BB#0:
2240; CHECK-NEXT:    vminps %ymm1, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7c,0x28,0x5d,0xc1]
2241; CHECK-NEXT:    retq ## encoding: [0xc3]
2242  %res = call <8 x float> @llvm.x86.avx512.mask.min.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float>zeroinitializer, i8 -1)
2243  ret <8 x float> %res
2244}
2245declare <8 x float> @llvm.x86.avx512.mask.min.ps.256(<8 x float>, <8 x float>, <8 x float>, i8)
2246
2247define <4 x float> @test_mm512_maskz_min_ps_128(<4 x float> %a0, <4 x float> %a1, i8 %mask) {
2248; CHECK-LABEL: test_mm512_maskz_min_ps_128:
2249; CHECK:       ## BB#0:
2250; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
2251; CHECK-NEXT:    vminps %xmm1, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7c,0x89,0x5d,0xc1]
2252; CHECK-NEXT:    retq ## encoding: [0xc3]
2253  %res = call <4 x float> @llvm.x86.avx512.mask.min.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float>zeroinitializer, i8 %mask)
2254  ret <4 x float> %res
2255}
2256
2257define <4 x float> @test_mm512_mask_min_ps_128(<4 x float> %a0, <4 x float> %a1, <4 x float> %src, i8 %mask) {
2258; CHECK-LABEL: test_mm512_mask_min_ps_128:
2259; CHECK:       ## BB#0:
2260; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
2261; CHECK-NEXT:    vminps %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf1,0x7c,0x09,0x5d,0xd1]
2262; CHECK-NEXT:    vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2]
2263; CHECK-NEXT:    retq ## encoding: [0xc3]
2264  %res = call <4 x float> @llvm.x86.avx512.mask.min.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float> %src, i8 %mask)
2265  ret <4 x float> %res
2266}
2267
2268define <4 x float> @test_mm512_min_ps_128(<4 x float> %a0, <4 x float> %a1, i8 %mask) {
2269; CHECK-LABEL: test_mm512_min_ps_128:
2270; CHECK:       ## BB#0:
2271; CHECK-NEXT:    vminps %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7c,0x08,0x5d,0xc1]
2272; CHECK-NEXT:    retq ## encoding: [0xc3]
2273  %res = call <4 x float> @llvm.x86.avx512.mask.min.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float>zeroinitializer, i8 -1)
2274  ret <4 x float> %res
2275}
2276declare <4 x float> @llvm.x86.avx512.mask.min.ps.128(<4 x float>, <4 x float>, <4 x float>, i8)
2277
2278define <4 x double> @test_sqrt_pd_256(<4 x double> %a0, i8 %mask) {
2279; CHECK-LABEL: test_sqrt_pd_256:
2280; CHECK:       ## BB#0:
2281; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
2282; CHECK-NEXT:    vsqrtpd %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xa9,0x51,0xc0]
2283; CHECK-NEXT:    retq ## encoding: [0xc3]
2284  %res = call <4 x double> @llvm.x86.avx512.mask.sqrt.pd.256(<4 x double> %a0,  <4 x double> zeroinitializer, i8 %mask)
2285  ret <4 x double> %res
2286}
2287declare <4 x double> @llvm.x86.avx512.mask.sqrt.pd.256(<4 x double>, <4 x double>, i8) nounwind readnone
2288
2289define <8 x float> @test_sqrt_ps_256(<8 x float> %a0, i8 %mask) {
2290; CHECK-LABEL: test_sqrt_ps_256:
2291; CHECK:       ## BB#0:
2292; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
2293; CHECK-NEXT:    vsqrtps %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7c,0xa9,0x51,0xc0]
2294; CHECK-NEXT:    retq ## encoding: [0xc3]
2295  %res = call <8 x float> @llvm.x86.avx512.mask.sqrt.ps.256(<8 x float> %a0, <8 x float> zeroinitializer, i8 %mask)
2296  ret <8 x float> %res
2297}
2298
2299declare <8 x float> @llvm.x86.avx512.mask.sqrt.ps.256(<8 x float>, <8 x float>, i8) nounwind readnone
2300
2301define <4 x double> @test_getexp_pd_256(<4 x double> %a0) {
2302; CHECK-LABEL: test_getexp_pd_256:
2303; CHECK:       ## BB#0:
2304; CHECK-NEXT:    vgetexppd %ymm0, %ymm0 ## encoding: [0x62,0xf2,0xfd,0x28,0x42,0xc0]
2305; CHECK-NEXT:    retq ## encoding: [0xc3]
2306  %res = call <4 x double> @llvm.x86.avx512.mask.getexp.pd.256(<4 x double> %a0,  <4 x double> zeroinitializer, i8 -1)
2307  ret <4 x double> %res
2308}
2309
2310declare <4 x double> @llvm.x86.avx512.mask.getexp.pd.256(<4 x double>, <4 x double>, i8) nounwind readnone
2311
2312define <8 x float> @test_getexp_ps_256(<8 x float> %a0) {
2313; CHECK-LABEL: test_getexp_ps_256:
2314; CHECK:       ## BB#0:
2315; CHECK-NEXT:    vgetexpps %ymm0, %ymm0 ## encoding: [0x62,0xf2,0x7d,0x28,0x42,0xc0]
2316; CHECK-NEXT:    retq ## encoding: [0xc3]
2317  %res = call <8 x float> @llvm.x86.avx512.mask.getexp.ps.256(<8 x float> %a0, <8 x float> zeroinitializer, i8 -1)
2318  ret <8 x float> %res
2319}
2320declare <8 x float> @llvm.x86.avx512.mask.getexp.ps.256(<8 x float>, <8 x float>, i8) nounwind readnone
2321
2322declare <4 x i32> @llvm.x86.avx512.mask.pmaxs.d.128(<4 x i32>, <4 x i32>, <4 x i32>, i8)
2323
2324define <4 x i32>@test_int_x86_avx512_mask_pmaxs_d_128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 %mask) {
2325; CHECK-LABEL: test_int_x86_avx512_mask_pmaxs_d_128:
2326; CHECK:       ## BB#0:
2327; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
2328; CHECK-NEXT:    vpmaxsd %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf2,0x7d,0x09,0x3d,0xd1]
2329; CHECK-NEXT:    vpmaxsd %xmm1, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0x89,0x3d,0xc1]
2330; CHECK-NEXT:    vpaddd %xmm0, %xmm2, %xmm0 ## encoding: [0x62,0xf1,0x6d,0x08,0xfe,0xc0]
2331; CHECK-NEXT:    retq ## encoding: [0xc3]
2332  %res = call <4 x i32> @llvm.x86.avx512.mask.pmaxs.d.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2 ,i8 %mask)
2333  %res1 = call <4 x i32> @llvm.x86.avx512.mask.pmaxs.d.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> zeroinitializer, i8 %mask)
2334  %res2 = add <4 x i32> %res, %res1
2335  ret <4 x i32> %res2
2336}
2337
2338declare <8 x i32> @llvm.x86.avx512.mask.pmaxs.d.256(<8 x i32>, <8 x i32>, <8 x i32>, i8)
2339
2340define <8 x i32>@test_int_x86_avx512_mask_pmaxs_d_256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 %x3) {
2341; CHECK-LABEL: test_int_x86_avx512_mask_pmaxs_d_256:
2342; CHECK:       ## BB#0:
2343; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
2344; CHECK-NEXT:    vpmaxsd %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf2,0x7d,0x29,0x3d,0xd1]
2345; CHECK-NEXT:    vpmaxsd %ymm1, %ymm0, %ymm0 ## encoding: [0x62,0xf2,0x7d,0x28,0x3d,0xc1]
2346; CHECK-NEXT:    vpaddd %ymm0, %ymm2, %ymm0 ## encoding: [0x62,0xf1,0x6d,0x28,0xfe,0xc0]
2347; CHECK-NEXT:    retq ## encoding: [0xc3]
2348  %res = call <8 x i32> @llvm.x86.avx512.mask.pmaxs.d.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 %x3)
2349  %res1 = call <8 x i32> @llvm.x86.avx512.mask.pmaxs.d.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 -1)
2350  %res2 = add <8 x i32> %res, %res1
2351  ret <8 x i32> %res2
2352}
2353
2354declare <2 x i64> @llvm.x86.avx512.mask.pmaxs.q.128(<2 x i64>, <2 x i64>, <2 x i64>, i8)
2355
2356define <2 x i64>@test_int_x86_avx512_mask_pmaxs_q_128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 %x3) {
2357; CHECK-LABEL: test_int_x86_avx512_mask_pmaxs_q_128:
2358; CHECK:       ## BB#0:
2359; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
2360; CHECK-NEXT:    vpmaxsq %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf2,0xfd,0x09,0x3d,0xd1]
2361; CHECK-NEXT:    vpmaxsq %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf2,0xfd,0x08,0x3d,0xc1]
2362; CHECK-NEXT:    vpaddq %xmm0, %xmm2, %xmm0 ## encoding: [0x62,0xf1,0xed,0x08,0xd4,0xc0]
2363; CHECK-NEXT:    retq ## encoding: [0xc3]
2364  %res = call <2 x i64> @llvm.x86.avx512.mask.pmaxs.q.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 %x3)
2365  %res1 = call <2 x i64> @llvm.x86.avx512.mask.pmaxs.q.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 -1)
2366  %res2 = add <2 x i64> %res, %res1
2367  ret <2 x i64> %res2
2368}
2369
2370declare <4 x i64> @llvm.x86.avx512.mask.pmaxs.q.256(<4 x i64>, <4 x i64>, <4 x i64>, i8)
2371
2372define <4 x i64>@test_int_x86_avx512_mask_pmaxs_q_256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 %mask) {
2373; CHECK-LABEL: test_int_x86_avx512_mask_pmaxs_q_256:
2374; CHECK:       ## BB#0:
2375; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
2376; CHECK-NEXT:    vpmaxsq %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf2,0xfd,0x29,0x3d,0xd1]
2377; CHECK-NEXT:    vpmaxsq %ymm1, %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf2,0xfd,0xa9,0x3d,0xc1]
2378; CHECK-NEXT:    vpaddq %ymm0, %ymm2, %ymm0 ## encoding: [0x62,0xf1,0xed,0x28,0xd4,0xc0]
2379; CHECK-NEXT:    retq ## encoding: [0xc3]
2380  %res = call <4 x i64> @llvm.x86.avx512.mask.pmaxs.q.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 %mask)
2381  %res1 = call <4 x i64> @llvm.x86.avx512.mask.pmaxs.q.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> zeroinitializer, i8 %mask)
2382  %res2 = add <4 x i64> %res, %res1
2383  ret <4 x i64> %res2
2384}
2385
2386declare <4 x i32> @llvm.x86.avx512.mask.pmaxu.d.128(<4 x i32>, <4 x i32>, <4 x i32>, i8)
2387
2388define <4 x i32>@test_int_x86_avx512_mask_pmaxu_d_128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2,i8 %mask) {
2389; CHECK-LABEL: test_int_x86_avx512_mask_pmaxu_d_128:
2390; CHECK:       ## BB#0:
2391; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
2392; CHECK-NEXT:    vpmaxud %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf2,0x7d,0x09,0x3f,0xd1]
2393; CHECK-NEXT:    vpmaxud %xmm1, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0x89,0x3f,0xc1]
2394; CHECK-NEXT:    vpaddd %xmm0, %xmm2, %xmm0 ## encoding: [0x62,0xf1,0x6d,0x08,0xfe,0xc0]
2395; CHECK-NEXT:    retq ## encoding: [0xc3]
2396  %res = call <4 x i32> @llvm.x86.avx512.mask.pmaxu.d.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 %mask)
2397  %res1 = call <4 x i32> @llvm.x86.avx512.mask.pmaxu.d.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> zeroinitializer, i8 %mask)
2398  %res2 = add <4 x i32> %res, %res1
2399  ret <4 x i32> %res2
2400}
2401
2402declare <8 x i32> @llvm.x86.avx512.mask.pmaxu.d.256(<8 x i32>, <8 x i32>, <8 x i32>, i8)
2403
2404define <8 x i32>@test_int_x86_avx512_mask_pmaxu_d_256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 %x3) {
2405; CHECK-LABEL: test_int_x86_avx512_mask_pmaxu_d_256:
2406; CHECK:       ## BB#0:
2407; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
2408; CHECK-NEXT:    vpmaxud %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf2,0x7d,0x29,0x3f,0xd1]
2409; CHECK-NEXT:    vpmaxud %ymm1, %ymm0, %ymm0 ## encoding: [0x62,0xf2,0x7d,0x28,0x3f,0xc1]
2410; CHECK-NEXT:    vpaddd %ymm0, %ymm2, %ymm0 ## encoding: [0x62,0xf1,0x6d,0x28,0xfe,0xc0]
2411; CHECK-NEXT:    retq ## encoding: [0xc3]
2412  %res = call <8 x i32> @llvm.x86.avx512.mask.pmaxu.d.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 %x3)
2413  %res1 = call <8 x i32> @llvm.x86.avx512.mask.pmaxu.d.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 -1)
2414  %res2 = add <8 x i32> %res, %res1
2415  ret <8 x i32> %res2
2416}
2417
2418declare <2 x i64> @llvm.x86.avx512.mask.pmaxu.q.128(<2 x i64>, <2 x i64>, <2 x i64>, i8)
2419
2420define <2 x i64>@test_int_x86_avx512_mask_pmaxu_q_128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 %x3) {
2421; CHECK-LABEL: test_int_x86_avx512_mask_pmaxu_q_128:
2422; CHECK:       ## BB#0:
2423; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
2424; CHECK-NEXT:    vpmaxuq %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf2,0xfd,0x09,0x3f,0xd1]
2425; CHECK-NEXT:    vpmaxuq %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf2,0xfd,0x08,0x3f,0xc1]
2426; CHECK-NEXT:    vpaddq %xmm0, %xmm2, %xmm0 ## encoding: [0x62,0xf1,0xed,0x08,0xd4,0xc0]
2427; CHECK-NEXT:    retq ## encoding: [0xc3]
2428  %res = call <2 x i64> @llvm.x86.avx512.mask.pmaxu.q.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 %x3)
2429  %res1 = call <2 x i64> @llvm.x86.avx512.mask.pmaxu.q.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 -1)
2430  %res2 = add <2 x i64> %res, %res1
2431  ret <2 x i64> %res2
2432}
2433
2434declare <4 x i64> @llvm.x86.avx512.mask.pmaxu.q.256(<4 x i64>, <4 x i64>, <4 x i64>, i8)
2435
2436define <4 x i64>@test_int_x86_avx512_mask_pmaxu_q_256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 %mask) {
2437; CHECK-LABEL: test_int_x86_avx512_mask_pmaxu_q_256:
2438; CHECK:       ## BB#0:
2439; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
2440; CHECK-NEXT:    vpmaxuq %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf2,0xfd,0x29,0x3f,0xd1]
2441; CHECK-NEXT:    vpmaxuq %ymm1, %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf2,0xfd,0xa9,0x3f,0xc1]
2442; CHECK-NEXT:    vpaddq %ymm0, %ymm2, %ymm0 ## encoding: [0x62,0xf1,0xed,0x28,0xd4,0xc0]
2443; CHECK-NEXT:    retq ## encoding: [0xc3]
2444  %res = call <4 x i64> @llvm.x86.avx512.mask.pmaxu.q.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 %mask)
2445  %res1 = call <4 x i64> @llvm.x86.avx512.mask.pmaxu.q.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> zeroinitializer, i8 %mask)
2446  %res2 = add <4 x i64> %res, %res1
2447  ret <4 x i64> %res2
2448}
2449
2450declare <4 x i32> @llvm.x86.avx512.mask.pmins.d.128(<4 x i32>, <4 x i32>, <4 x i32>, i8)
2451
2452define <4 x i32>@test_int_x86_avx512_mask_pmins_d_128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 %mask) {
2453; CHECK-LABEL: test_int_x86_avx512_mask_pmins_d_128:
2454; CHECK:       ## BB#0:
2455; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
2456; CHECK-NEXT:    vpminsd %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf2,0x7d,0x09,0x39,0xd1]
2457; CHECK-NEXT:    vpminsd %xmm1, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0x89,0x39,0xc1]
2458; CHECK-NEXT:    vpaddd %xmm0, %xmm2, %xmm0 ## encoding: [0x62,0xf1,0x6d,0x08,0xfe,0xc0]
2459; CHECK-NEXT:    retq ## encoding: [0xc3]
2460  %res = call <4 x i32> @llvm.x86.avx512.mask.pmins.d.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 %mask)
2461  %res1 = call <4 x i32> @llvm.x86.avx512.mask.pmins.d.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> zeroinitializer, i8 %mask)
2462  %res2 = add <4 x i32> %res, %res1
2463  ret <4 x i32> %res2
2464}
2465
2466declare <8 x i32> @llvm.x86.avx512.mask.pmins.d.256(<8 x i32>, <8 x i32>, <8 x i32>, i8)
2467
2468define <8 x i32>@test_int_x86_avx512_mask_pmins_d_256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 %x3) {
2469; CHECK-LABEL: test_int_x86_avx512_mask_pmins_d_256:
2470; CHECK:       ## BB#0:
2471; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
2472; CHECK-NEXT:    vpminsd %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf2,0x7d,0x29,0x39,0xd1]
2473; CHECK-NEXT:    vpminsd %ymm1, %ymm0, %ymm0 ## encoding: [0x62,0xf2,0x7d,0x28,0x39,0xc1]
2474; CHECK-NEXT:    vpaddd %ymm0, %ymm2, %ymm0 ## encoding: [0x62,0xf1,0x6d,0x28,0xfe,0xc0]
2475; CHECK-NEXT:    retq ## encoding: [0xc3]
2476  %res = call <8 x i32> @llvm.x86.avx512.mask.pmins.d.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 %x3)
2477  %res1 = call <8 x i32> @llvm.x86.avx512.mask.pmins.d.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 -1)
2478  %res2 = add <8 x i32> %res, %res1
2479  ret <8 x i32> %res2
2480}
2481
2482declare <2 x i64> @llvm.x86.avx512.mask.pmins.q.128(<2 x i64>, <2 x i64>, <2 x i64>, i8)
2483
2484define <2 x i64>@test_int_x86_avx512_mask_pmins_q_128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 %x3) {
2485; CHECK-LABEL: test_int_x86_avx512_mask_pmins_q_128:
2486; CHECK:       ## BB#0:
2487; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
2488; CHECK-NEXT:    vpminsq %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf2,0xfd,0x09,0x39,0xd1]
2489; CHECK-NEXT:    vpminsq %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf2,0xfd,0x08,0x39,0xc1]
2490; CHECK-NEXT:    vpaddq %xmm0, %xmm2, %xmm0 ## encoding: [0x62,0xf1,0xed,0x08,0xd4,0xc0]
2491; CHECK-NEXT:    retq ## encoding: [0xc3]
2492  %res = call <2 x i64> @llvm.x86.avx512.mask.pmins.q.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 %x3)
2493  %res1 = call <2 x i64> @llvm.x86.avx512.mask.pmins.q.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 -1)
2494  %res2 = add <2 x i64> %res, %res1
2495  ret <2 x i64> %res2
2496}
2497
2498declare <4 x i64> @llvm.x86.avx512.mask.pmins.q.256(<4 x i64>, <4 x i64>, <4 x i64>, i8)
2499
2500define <4 x i64>@test_int_x86_avx512_mask_pmins_q_256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 %mask) {
2501; CHECK-LABEL: test_int_x86_avx512_mask_pmins_q_256:
2502; CHECK:       ## BB#0:
2503; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
2504; CHECK-NEXT:    vpminsq %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf2,0xfd,0x29,0x39,0xd1]
2505; CHECK-NEXT:    vpminsq %ymm1, %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf2,0xfd,0xa9,0x39,0xc1]
2506; CHECK-NEXT:    vpaddq %ymm0, %ymm2, %ymm0 ## encoding: [0x62,0xf1,0xed,0x28,0xd4,0xc0]
2507; CHECK-NEXT:    retq ## encoding: [0xc3]
2508  %res = call <4 x i64> @llvm.x86.avx512.mask.pmins.q.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 %mask)
2509  %res1 = call <4 x i64> @llvm.x86.avx512.mask.pmins.q.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> zeroinitializer, i8 %mask)
2510  %res2 = add <4 x i64> %res, %res1
2511  ret <4 x i64> %res2
2512}
2513
2514declare <4 x i32> @llvm.x86.avx512.mask.pminu.d.128(<4 x i32>, <4 x i32>, <4 x i32>, i8)
2515
2516define <4 x i32>@test_int_x86_avx512_mask_pminu_d_128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 %mask) {
2517; CHECK-LABEL: test_int_x86_avx512_mask_pminu_d_128:
2518; CHECK:       ## BB#0:
2519; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
2520; CHECK-NEXT:    vpminud %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf2,0x7d,0x09,0x3b,0xd1]
2521; CHECK-NEXT:    vpminud %xmm1, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0x89,0x3b,0xc1]
2522; CHECK-NEXT:    vpaddd %xmm0, %xmm2, %xmm0 ## encoding: [0x62,0xf1,0x6d,0x08,0xfe,0xc0]
2523; CHECK-NEXT:    retq ## encoding: [0xc3]
2524  %res = call <4 x i32> @llvm.x86.avx512.mask.pminu.d.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 %mask)
2525  %res1 = call <4 x i32> @llvm.x86.avx512.mask.pminu.d.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> zeroinitializer, i8 %mask)
2526  %res2 = add <4 x i32> %res, %res1
2527  ret <4 x i32> %res2
2528}
2529
2530declare <8 x i32> @llvm.x86.avx512.mask.pminu.d.256(<8 x i32>, <8 x i32>, <8 x i32>, i8)
2531
2532define <8 x i32>@test_int_x86_avx512_mask_pminu_d_256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 %x3) {
2533; CHECK-LABEL: test_int_x86_avx512_mask_pminu_d_256:
2534; CHECK:       ## BB#0:
2535; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
2536; CHECK-NEXT:    vpminud %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf2,0x7d,0x29,0x3b,0xd1]
2537; CHECK-NEXT:    vpminud %ymm1, %ymm0, %ymm0 ## encoding: [0x62,0xf2,0x7d,0x28,0x3b,0xc1]
2538; CHECK-NEXT:    vpaddd %ymm0, %ymm2, %ymm0 ## encoding: [0x62,0xf1,0x6d,0x28,0xfe,0xc0]
2539; CHECK-NEXT:    retq ## encoding: [0xc3]
2540  %res = call <8 x i32> @llvm.x86.avx512.mask.pminu.d.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 %x3)
2541  %res1 = call <8 x i32> @llvm.x86.avx512.mask.pminu.d.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 -1)
2542  %res2 = add <8 x i32> %res, %res1
2543  ret <8 x i32> %res2
2544}
2545
2546declare <2 x i64> @llvm.x86.avx512.mask.pminu.q.128(<2 x i64>, <2 x i64>, <2 x i64>, i8)
2547
2548define <2 x i64>@test_int_x86_avx512_mask_pminu_q_128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 %x3) {
2549; CHECK-LABEL: test_int_x86_avx512_mask_pminu_q_128:
2550; CHECK:       ## BB#0:
2551; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
2552; CHECK-NEXT:    vpminuq %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf2,0xfd,0x09,0x3b,0xd1]
2553; CHECK-NEXT:    vpminuq %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf2,0xfd,0x08,0x3b,0xc1]
2554; CHECK-NEXT:    vpaddq %xmm0, %xmm2, %xmm0 ## encoding: [0x62,0xf1,0xed,0x08,0xd4,0xc0]
2555; CHECK-NEXT:    retq ## encoding: [0xc3]
2556  %res = call <2 x i64> @llvm.x86.avx512.mask.pminu.q.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 %x3)
2557  %res1 = call <2 x i64> @llvm.x86.avx512.mask.pminu.q.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 -1)
2558  %res2 = add <2 x i64> %res, %res1
2559  ret <2 x i64> %res2
2560}
2561
2562declare <4 x i64> @llvm.x86.avx512.mask.pminu.q.256(<4 x i64>, <4 x i64>, <4 x i64>, i8)
2563
2564define <4 x i64>@test_int_x86_avx512_mask_pminu_q_256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 %mask) {
2565; CHECK-LABEL: test_int_x86_avx512_mask_pminu_q_256:
2566; CHECK:       ## BB#0:
2567; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
2568; CHECK-NEXT:    vpminuq %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf2,0xfd,0x29,0x3b,0xd1]
2569; CHECK-NEXT:    vpminuq %ymm1, %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf2,0xfd,0xa9,0x3b,0xc1]
2570; CHECK-NEXT:    vpaddq %ymm0, %ymm2, %ymm0 ## encoding: [0x62,0xf1,0xed,0x28,0xd4,0xc0]
2571; CHECK-NEXT:    retq ## encoding: [0xc3]
2572  %res = call <4 x i64> @llvm.x86.avx512.mask.pminu.q.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 %mask)
2573  %res1 = call <4 x i64> @llvm.x86.avx512.mask.pminu.q.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> zeroinitializer, i8 %mask)
2574  %res2 = add <4 x i64> %res, %res1
2575  ret <4 x i64> %res2
2576}
2577
2578declare <4 x i32> @llvm.x86.avx512.mask.vpermt2var.d.128(<4 x i32>, <4 x i32>, <4 x i32>, i8)
2579
2580define <4 x i32>@test_int_x86_avx512_mask_vpermt2var_d_128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 %x3) {
2581; CHECK-LABEL: test_int_x86_avx512_mask_vpermt2var_d_128:
2582; CHECK:       ## BB#0:
2583; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
2584; CHECK-NEXT:    vmovaps %zmm1, %zmm3 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xd9]
2585; CHECK-NEXT:    vpermt2d %xmm2, %xmm0, %xmm3 {%k1} ## encoding: [0x62,0xf2,0x7d,0x09,0x7e,0xda]
2586; CHECK-NEXT:    vpermt2d %xmm2, %xmm0, %xmm1 ## encoding: [0x62,0xf2,0x7d,0x08,0x7e,0xca]
2587; CHECK-NEXT:    vpaddd %xmm1, %xmm3, %xmm0 ## encoding: [0x62,0xf1,0x65,0x08,0xfe,0xc1]
2588; CHECK-NEXT:    retq ## encoding: [0xc3]
2589  %res = call <4 x i32> @llvm.x86.avx512.mask.vpermt2var.d.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 %x3)
2590  %res1 = call <4 x i32> @llvm.x86.avx512.mask.vpermt2var.d.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 -1)
2591  %res2 = add <4 x i32> %res, %res1
2592  ret <4 x i32> %res2
2593}
2594
2595declare <4 x i32> @llvm.x86.avx512.maskz.vpermt2var.d.128(<4 x i32>, <4 x i32>, <4 x i32>, i8)
2596
2597define <4 x i32>@test_int_x86_avx512_maskz_vpermt2var_d_128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 %x3) {
2598; CHECK-LABEL: test_int_x86_avx512_maskz_vpermt2var_d_128:
2599; CHECK:       ## BB#0:
2600; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
2601; CHECK-NEXT:    vmovaps %zmm1, %zmm3 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xd9]
2602; CHECK-NEXT:    vpermt2d %xmm2, %xmm0, %xmm3 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0x89,0x7e,0xda]
2603; CHECK-NEXT:    vpermt2d %xmm2, %xmm0, %xmm1 ## encoding: [0x62,0xf2,0x7d,0x08,0x7e,0xca]
2604; CHECK-NEXT:    vpaddd %xmm1, %xmm3, %xmm0 ## encoding: [0x62,0xf1,0x65,0x08,0xfe,0xc1]
2605; CHECK-NEXT:    retq ## encoding: [0xc3]
2606  %res = call <4 x i32> @llvm.x86.avx512.maskz.vpermt2var.d.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 %x3)
2607  %res1 = call <4 x i32> @llvm.x86.avx512.maskz.vpermt2var.d.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 -1)
2608  %res2 = add <4 x i32> %res, %res1
2609  ret <4 x i32> %res2
2610}
2611
2612declare <8 x i32> @llvm.x86.avx512.mask.vpermt2var.d.256(<8 x i32>, <8 x i32>, <8 x i32>, i8)
2613
2614define <8 x i32>@test_int_x86_avx512_mask_vpermt2var_d_256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 %x3) {
2615; CHECK-LABEL: test_int_x86_avx512_mask_vpermt2var_d_256:
2616; CHECK:       ## BB#0:
2617; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
2618; CHECK-NEXT:    vmovaps %zmm1, %zmm3 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xd9]
2619; CHECK-NEXT:    vpermt2d %ymm2, %ymm0, %ymm3 {%k1} ## encoding: [0x62,0xf2,0x7d,0x29,0x7e,0xda]
2620; CHECK-NEXT:    vpermt2d %ymm2, %ymm0, %ymm1 ## encoding: [0x62,0xf2,0x7d,0x28,0x7e,0xca]
2621; CHECK-NEXT:    vpaddd %ymm1, %ymm3, %ymm0 ## encoding: [0x62,0xf1,0x65,0x28,0xfe,0xc1]
2622; CHECK-NEXT:    retq ## encoding: [0xc3]
2623  %res = call <8 x i32> @llvm.x86.avx512.mask.vpermt2var.d.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 %x3)
2624  %res1 = call <8 x i32> @llvm.x86.avx512.mask.vpermt2var.d.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 -1)
2625  %res2 = add <8 x i32> %res, %res1
2626  ret <8 x i32> %res2
2627}
2628
2629declare <8 x i32> @llvm.x86.avx512.maskz.vpermt2var.d.256(<8 x i32>, <8 x i32>, <8 x i32>, i8)
2630
2631define <8 x i32>@test_int_x86_avx512_maskz_vpermt2var_d_256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 %x3) {
2632; CHECK-LABEL: test_int_x86_avx512_maskz_vpermt2var_d_256:
2633; CHECK:       ## BB#0:
2634; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
2635; CHECK-NEXT:    vmovaps %zmm1, %zmm3 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xd9]
2636; CHECK-NEXT:    vpermt2d %ymm2, %ymm0, %ymm3 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0xa9,0x7e,0xda]
2637; CHECK-NEXT:    vpermt2d %ymm2, %ymm0, %ymm1 ## encoding: [0x62,0xf2,0x7d,0x28,0x7e,0xca]
2638; CHECK-NEXT:    vpaddd %ymm1, %ymm3, %ymm0 ## encoding: [0x62,0xf1,0x65,0x28,0xfe,0xc1]
2639; CHECK-NEXT:    retq ## encoding: [0xc3]
2640  %res = call <8 x i32> @llvm.x86.avx512.maskz.vpermt2var.d.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 %x3)
2641  %res1 = call <8 x i32> @llvm.x86.avx512.maskz.vpermt2var.d.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 -1)
2642  %res2 = add <8 x i32> %res, %res1
2643  ret <8 x i32> %res2
2644}
2645
2646declare <2 x double> @llvm.x86.avx512.mask.vpermi2var.pd.128(<2 x double>, <2 x i64>, <2 x double>, i8)
2647
2648define <2 x double>@test_int_x86_avx512_mask_vpermi2var_pd_128(<2 x double> %x0, <2 x i64> %x1, <2 x double> %x2, i8 %x3) {
2649; CHECK-LABEL: test_int_x86_avx512_mask_vpermi2var_pd_128:
2650; CHECK:       ## BB#0:
2651; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
2652; CHECK-NEXT:    vmovaps %zmm1, %zmm3 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xd9]
2653; CHECK-NEXT:    vpermi2pd %xmm2, %xmm0, %xmm3 {%k1} ## encoding: [0x62,0xf2,0xfd,0x09,0x77,0xda]
2654; CHECK-NEXT:    vpermi2pd %xmm2, %xmm0, %xmm1 ## encoding: [0x62,0xf2,0xfd,0x08,0x77,0xca]
2655; CHECK-NEXT:    vaddpd %xmm1, %xmm3, %xmm0 ## encoding: [0x62,0xf1,0xe5,0x08,0x58,0xc1]
2656; CHECK-NEXT:    retq ## encoding: [0xc3]
2657  %res = call <2 x double> @llvm.x86.avx512.mask.vpermi2var.pd.128(<2 x double> %x0, <2 x i64> %x1, <2 x double> %x2, i8 %x3)
2658  %res1 = call <2 x double> @llvm.x86.avx512.mask.vpermi2var.pd.128(<2 x double> %x0, <2 x i64> %x1, <2 x double> %x2, i8 -1)
2659  %res2 = fadd <2 x double> %res, %res1
2660  ret <2 x double> %res2
2661}
2662
2663declare <4 x double> @llvm.x86.avx512.mask.vpermi2var.pd.256(<4 x double>, <4 x i64>, <4 x double>, i8)
2664
2665define <4 x double>@test_int_x86_avx512_mask_vpermi2var_pd_256(<4 x double> %x0, <4 x i64> %x1, <4 x double> %x2, i8 %x3) {
2666; CHECK-LABEL: test_int_x86_avx512_mask_vpermi2var_pd_256:
2667; CHECK:       ## BB#0:
2668; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
2669; CHECK-NEXT:    vmovaps %zmm1, %zmm3 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xd9]
2670; CHECK-NEXT:    vpermi2pd %ymm2, %ymm0, %ymm3 {%k1} ## encoding: [0x62,0xf2,0xfd,0x29,0x77,0xda]
2671; CHECK-NEXT:    vpermi2pd %ymm2, %ymm0, %ymm1 ## encoding: [0x62,0xf2,0xfd,0x28,0x77,0xca]
2672; CHECK-NEXT:    vaddpd %ymm1, %ymm3, %ymm0 ## encoding: [0x62,0xf1,0xe5,0x28,0x58,0xc1]
2673; CHECK-NEXT:    retq ## encoding: [0xc3]
2674  %res = call <4 x double> @llvm.x86.avx512.mask.vpermi2var.pd.256(<4 x double> %x0, <4 x i64> %x1, <4 x double> %x2, i8 %x3)
2675  %res1 = call <4 x double> @llvm.x86.avx512.mask.vpermi2var.pd.256(<4 x double> %x0, <4 x i64> %x1, <4 x double> %x2, i8 -1)
2676  %res2 = fadd <4 x double> %res, %res1
2677  ret <4 x double> %res2
2678}
2679
2680declare <4 x float> @llvm.x86.avx512.mask.vpermi2var.ps.128(<4 x float>, <4 x i32>, <4 x float>, i8)
2681
2682define <4 x float>@test_int_x86_avx512_mask_vpermi2var_ps_128(<4 x float> %x0, <4 x i32> %x1, <4 x float> %x2, i8 %x3) {
2683; CHECK-LABEL: test_int_x86_avx512_mask_vpermi2var_ps_128:
2684; CHECK:       ## BB#0:
2685; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
2686; CHECK-NEXT:    vmovaps %zmm1, %zmm3 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xd9]
2687; CHECK-NEXT:    vpermi2ps %xmm2, %xmm0, %xmm3 {%k1} ## encoding: [0x62,0xf2,0x7d,0x09,0x77,0xda]
2688; CHECK-NEXT:    vpermi2ps %xmm2, %xmm0, %xmm1 ## encoding: [0x62,0xf2,0x7d,0x08,0x77,0xca]
2689; CHECK-NEXT:    vaddps %xmm1, %xmm3, %xmm0 ## encoding: [0x62,0xf1,0x64,0x08,0x58,0xc1]
2690; CHECK-NEXT:    retq ## encoding: [0xc3]
2691  %res = call <4 x float> @llvm.x86.avx512.mask.vpermi2var.ps.128(<4 x float> %x0, <4 x i32> %x1, <4 x float> %x2, i8 %x3)
2692  %res1 = call <4 x float> @llvm.x86.avx512.mask.vpermi2var.ps.128(<4 x float> %x0, <4 x i32> %x1, <4 x float> %x2, i8 -1)
2693  %res2 = fadd <4 x float> %res, %res1
2694  ret <4 x float> %res2
2695}
2696
2697declare <8 x float> @llvm.x86.avx512.mask.vpermi2var.ps.256(<8 x float>, <8 x i32>, <8 x float>, i8)
2698
2699define <8 x float>@test_int_x86_avx512_mask_vpermi2var_ps_256(<8 x float> %x0, <8 x i32> %x1, <8 x float> %x2, i8 %x3) {
2700; CHECK-LABEL: test_int_x86_avx512_mask_vpermi2var_ps_256:
2701; CHECK:       ## BB#0:
2702; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
2703; CHECK-NEXT:    vmovaps %zmm1, %zmm3 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xd9]
2704; CHECK-NEXT:    vpermi2ps %ymm2, %ymm0, %ymm3 {%k1} ## encoding: [0x62,0xf2,0x7d,0x29,0x77,0xda]
2705; CHECK-NEXT:    vpermi2ps %ymm2, %ymm0, %ymm1 ## encoding: [0x62,0xf2,0x7d,0x28,0x77,0xca]
2706; CHECK-NEXT:    vaddps %ymm1, %ymm3, %ymm0 ## encoding: [0x62,0xf1,0x64,0x28,0x58,0xc1]
2707; CHECK-NEXT:    retq ## encoding: [0xc3]
2708  %res = call <8 x float> @llvm.x86.avx512.mask.vpermi2var.ps.256(<8 x float> %x0, <8 x i32> %x1, <8 x float> %x2, i8 %x3)
2709  %res1 = call <8 x float> @llvm.x86.avx512.mask.vpermi2var.ps.256(<8 x float> %x0, <8 x i32> %x1, <8 x float> %x2, i8 -1)
2710  %res2 = fadd <8 x float> %res, %res1
2711  ret <8 x float> %res2
2712}
2713
2714declare <2 x i64> @llvm.x86.avx512.mask.pabs.q.128(<2 x i64>, <2 x i64>, i8)
2715
2716define <2 x i64>@test_int_x86_avx512_mask_pabs_q_128(<2 x i64> %x0, <2 x i64> %x1, i8 %x2) {
2717; CHECK-LABEL: test_int_x86_avx512_mask_pabs_q_128:
2718; CHECK:       ## BB#0:
2719; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
2720; CHECK-NEXT:    vpabsq %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf2,0xfd,0x09,0x1f,0xc8]
2721; CHECK-NEXT:    vpabsq %xmm0, %xmm0 ## encoding: [0x62,0xf2,0xfd,0x08,0x1f,0xc0]
2722; CHECK-NEXT:    vpaddq %xmm0, %xmm1, %xmm0 ## encoding: [0x62,0xf1,0xf5,0x08,0xd4,0xc0]
2723; CHECK-NEXT:    retq ## encoding: [0xc3]
2724  %res = call <2 x i64> @llvm.x86.avx512.mask.pabs.q.128(<2 x i64> %x0, <2 x i64> %x1, i8 %x2)
2725  %res1 = call <2 x i64> @llvm.x86.avx512.mask.pabs.q.128(<2 x i64> %x0, <2 x i64> %x1, i8 -1)
2726  %res2 = add <2 x i64> %res, %res1
2727  ret <2 x i64> %res2
2728}
2729
2730declare <4 x i64> @llvm.x86.avx512.mask.pabs.q.256(<4 x i64>, <4 x i64>, i8)
2731
2732define <4 x i64>@test_int_x86_avx512_mask_pabs_q_256(<4 x i64> %x0, <4 x i64> %x1, i8 %x2) {
2733; CHECK-LABEL: test_int_x86_avx512_mask_pabs_q_256:
2734; CHECK:       ## BB#0:
2735; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
2736; CHECK-NEXT:    vpabsq %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf2,0xfd,0x29,0x1f,0xc8]
2737; CHECK-NEXT:    vpabsq %ymm0, %ymm0 ## encoding: [0x62,0xf2,0xfd,0x28,0x1f,0xc0]
2738; CHECK-NEXT:    vpaddq %ymm0, %ymm1, %ymm0 ## encoding: [0x62,0xf1,0xf5,0x28,0xd4,0xc0]
2739; CHECK-NEXT:    retq ## encoding: [0xc3]
2740  %res = call <4 x i64> @llvm.x86.avx512.mask.pabs.q.256(<4 x i64> %x0, <4 x i64> %x1, i8 %x2)
2741  %res1 = call <4 x i64> @llvm.x86.avx512.mask.pabs.q.256(<4 x i64> %x0, <4 x i64> %x1, i8 -1)
2742  %res2 = add <4 x i64> %res, %res1
2743  ret <4 x i64> %res2
2744}
2745
2746declare <4 x i32> @llvm.x86.avx512.mask.pabs.d.128(<4 x i32>, <4 x i32>, i8)
2747
2748define <4 x i32>@test_int_x86_avx512_mask_pabs_d_128(<4 x i32> %x0, <4 x i32> %x1, i8 %x2) {
2749; CHECK-LABEL: test_int_x86_avx512_mask_pabs_d_128:
2750; CHECK:       ## BB#0:
2751; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
2752; CHECK-NEXT:    vpabsd %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf2,0x7d,0x09,0x1e,0xc8]
2753; CHECK-NEXT:    vpabsd %xmm0, %xmm0 ## encoding: [0x62,0xf2,0x7d,0x08,0x1e,0xc0]
2754; CHECK-NEXT:    vpaddd %xmm0, %xmm1, %xmm0 ## encoding: [0x62,0xf1,0x75,0x08,0xfe,0xc0]
2755; CHECK-NEXT:    retq ## encoding: [0xc3]
2756  %res = call <4 x i32> @llvm.x86.avx512.mask.pabs.d.128(<4 x i32> %x0, <4 x i32> %x1, i8 %x2)
2757  %res1 = call <4 x i32> @llvm.x86.avx512.mask.pabs.d.128(<4 x i32> %x0, <4 x i32> %x1, i8 -1)
2758  %res2 = add <4 x i32> %res, %res1
2759  ret <4 x i32> %res2
2760}
2761
2762declare <8 x i32> @llvm.x86.avx512.mask.pabs.d.256(<8 x i32>, <8 x i32>, i8)
2763
2764define <8 x i32>@test_int_x86_avx512_mask_pabs_d_256(<8 x i32> %x0, <8 x i32> %x1, i8 %x2) {
2765; CHECK-LABEL: test_int_x86_avx512_mask_pabs_d_256:
2766; CHECK:       ## BB#0:
2767; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
2768; CHECK-NEXT:    vpabsd %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf2,0x7d,0x29,0x1e,0xc8]
2769; CHECK-NEXT:    vpabsd %ymm0, %ymm0 ## encoding: [0x62,0xf2,0x7d,0x28,0x1e,0xc0]
2770; CHECK-NEXT:    vpaddd %ymm0, %ymm1, %ymm0 ## encoding: [0x62,0xf1,0x75,0x28,0xfe,0xc0]
2771; CHECK-NEXT:    retq ## encoding: [0xc3]
2772  %res = call <8 x i32> @llvm.x86.avx512.mask.pabs.d.256(<8 x i32> %x0, <8 x i32> %x1, i8 %x2)
2773  %res1 = call <8 x i32> @llvm.x86.avx512.mask.pabs.d.256(<8 x i32> %x0, <8 x i32> %x1, i8 -1)
2774  %res2 = add <8 x i32> %res, %res1
2775  ret <8 x i32> %res2
2776}
2777
2778declare <2 x double> @llvm.x86.avx512.mask.scalef.pd.128(<2 x double>, <2 x double>, <2 x double>, i8)
2779
2780define <2 x double>@test_int_x86_avx512_mask_scalef_pd_128(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3) {
2781; CHECK-LABEL: test_int_x86_avx512_mask_scalef_pd_128:
2782; CHECK:       ## BB#0:
2783; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
2784; CHECK-NEXT:    vscalefpd %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf2,0xfd,0x09,0x2c,0xd1]
2785; CHECK-NEXT:    vscalefpd %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf2,0xfd,0x08,0x2c,0xc1]
2786; CHECK-NEXT:    vaddpd %xmm0, %xmm2, %xmm0 ## encoding: [0x62,0xf1,0xed,0x08,0x58,0xc0]
2787; CHECK-NEXT:    retq ## encoding: [0xc3]
2788  %res = call <2 x double> @llvm.x86.avx512.mask.scalef.pd.128(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3)
2789  %res1 = call <2 x double> @llvm.x86.avx512.mask.scalef.pd.128(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 -1)
2790  %res2 = fadd <2 x double> %res, %res1
2791  ret <2 x double> %res2
2792}
2793
2794declare <4 x double> @llvm.x86.avx512.mask.scalef.pd.256(<4 x double>, <4 x double>, <4 x double>, i8)
2795
2796define <4 x double>@test_int_x86_avx512_mask_scalef_pd_256(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2, i8 %x3) {
2797; CHECK-LABEL: test_int_x86_avx512_mask_scalef_pd_256:
2798; CHECK:       ## BB#0:
2799; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
2800; CHECK-NEXT:    vscalefpd %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf2,0xfd,0x29,0x2c,0xd1]
2801; CHECK-NEXT:    vscalefpd %ymm1, %ymm0, %ymm0 ## encoding: [0x62,0xf2,0xfd,0x28,0x2c,0xc1]
2802; CHECK-NEXT:    vaddpd %ymm0, %ymm2, %ymm0 ## encoding: [0x62,0xf1,0xed,0x28,0x58,0xc0]
2803; CHECK-NEXT:    retq ## encoding: [0xc3]
2804  %res = call <4 x double> @llvm.x86.avx512.mask.scalef.pd.256(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2, i8 %x3)
2805  %res1 = call <4 x double> @llvm.x86.avx512.mask.scalef.pd.256(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2, i8 -1)
2806  %res2 = fadd <4 x double> %res, %res1
2807  ret <4 x double> %res2
2808}
2809
2810declare <4 x float> @llvm.x86.avx512.mask.scalef.ps.128(<4 x float>, <4 x float>, <4 x float>, i8)
2811
2812define <4 x float>@test_int_x86_avx512_mask_scalef_ps_128(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3) {
2813; CHECK-LABEL: test_int_x86_avx512_mask_scalef_ps_128:
2814; CHECK:       ## BB#0:
2815; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
2816; CHECK-NEXT:    vscalefps %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf2,0x7d,0x09,0x2c,0xd1]
2817; CHECK-NEXT:    vscalefps %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf2,0x7d,0x08,0x2c,0xc1]
2818; CHECK-NEXT:    vaddps %xmm0, %xmm2, %xmm0 ## encoding: [0x62,0xf1,0x6c,0x08,0x58,0xc0]
2819; CHECK-NEXT:    retq ## encoding: [0xc3]
2820  %res = call <4 x float> @llvm.x86.avx512.mask.scalef.ps.128(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3)
2821  %res1 = call <4 x float> @llvm.x86.avx512.mask.scalef.ps.128(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 -1)
2822  %res2 = fadd <4 x float> %res, %res1
2823  ret <4 x float> %res2
2824}
2825
2826declare <8 x float> @llvm.x86.avx512.mask.scalef.ps.256(<8 x float>, <8 x float>, <8 x float>, i8)
2827
2828define <8 x float>@test_int_x86_avx512_mask_scalef_ps_256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, i8 %x3) {
2829; CHECK-LABEL: test_int_x86_avx512_mask_scalef_ps_256:
2830; CHECK:       ## BB#0:
2831; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
2832; CHECK-NEXT:    vscalefps %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf2,0x7d,0x29,0x2c,0xd1]
2833; CHECK-NEXT:    vscalefps %ymm1, %ymm0, %ymm0 ## encoding: [0x62,0xf2,0x7d,0x28,0x2c,0xc1]
2834; CHECK-NEXT:    vaddps %ymm0, %ymm2, %ymm0 ## encoding: [0x62,0xf1,0x6c,0x28,0x58,0xc0]
2835; CHECK-NEXT:    retq ## encoding: [0xc3]
2836  %res = call <8 x float> @llvm.x86.avx512.mask.scalef.ps.256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, i8 %x3)
2837  %res1 = call <8 x float> @llvm.x86.avx512.mask.scalef.ps.256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, i8 -1)
2838  %res2 = fadd <8 x float> %res, %res1
2839  ret <8 x float> %res2
2840}
2841
2842declare <16 x i8> @llvm.x86.avx512.mask.pmov.qb.128(<2 x i64>, <16 x i8>, i8)
2843
2844define <16 x i8>@test_int_x86_avx512_mask_pmov_qb_128(<2 x i64> %x0, <16 x i8> %x1, i8 %x2) {
2845; CHECK-LABEL: test_int_x86_avx512_mask_pmov_qb_128:
2846; CHECK:       ## BB#0:
2847; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
2848; CHECK-NEXT:    vpmovqb %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf2,0x7e,0x09,0x32,0xc1]
2849; CHECK-NEXT:    vpmovqb %xmm0, %xmm2 {%k1} {z} ## encoding: [0x62,0xf2,0x7e,0x89,0x32,0xc2]
2850; CHECK-NEXT:    vpmovqb %xmm0, %xmm0 ## encoding: [0x62,0xf2,0x7e,0x08,0x32,0xc0]
2851; CHECK-NEXT:    vpaddb %xmm1, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xfc,0xc1]
2852; CHECK-NEXT:    vpaddb %xmm2, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xfc,0xc2]
2853; CHECK-NEXT:    retq ## encoding: [0xc3]
2854    %res0 = call <16 x i8> @llvm.x86.avx512.mask.pmov.qb.128(<2 x i64> %x0, <16 x i8> %x1, i8 -1)
2855    %res1 = call <16 x i8> @llvm.x86.avx512.mask.pmov.qb.128(<2 x i64> %x0, <16 x i8> %x1, i8 %x2)
2856    %res2 = call <16 x i8> @llvm.x86.avx512.mask.pmov.qb.128(<2 x i64> %x0, <16 x i8> zeroinitializer, i8 %x2)
2857    %res3 = add <16 x i8> %res0, %res1
2858    %res4 = add <16 x i8> %res3, %res2
2859    ret <16 x i8> %res4
2860}
2861
2862declare void @llvm.x86.avx512.mask.pmov.qb.mem.128(i8* %ptr, <2 x i64>, i8)
2863
2864define void @test_int_x86_avx512_mask_pmov_qb_mem_128(i8* %ptr, <2 x i64> %x1, i8 %x2) {
2865; CHECK-LABEL: test_int_x86_avx512_mask_pmov_qb_mem_128:
2866; CHECK:       ## BB#0:
2867; CHECK-NEXT:    kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
2868; CHECK-NEXT:    vpmovqb %xmm0, (%rdi) ## encoding: [0x62,0xf2,0x7e,0x08,0x32,0x07]
2869; CHECK-NEXT:    vpmovqb %xmm0, (%rdi) {%k1} ## encoding: [0x62,0xf2,0x7e,0x09,0x32,0x07]
2870; CHECK-NEXT:    retq ## encoding: [0xc3]
2871    call void @llvm.x86.avx512.mask.pmov.qb.mem.128(i8* %ptr, <2 x i64> %x1, i8 -1)
2872    call void @llvm.x86.avx512.mask.pmov.qb.mem.128(i8* %ptr, <2 x i64> %x1, i8 %x2)
2873    ret void
2874}
2875
2876declare <16 x i8> @llvm.x86.avx512.mask.pmovs.qb.128(<2 x i64>, <16 x i8>, i8)
2877
2878define <16 x i8>@test_int_x86_avx512_mask_pmovs_qb_128(<2 x i64> %x0, <16 x i8> %x1, i8 %x2) {
2879; CHECK-LABEL: test_int_x86_avx512_mask_pmovs_qb_128:
2880; CHECK:       ## BB#0:
2881; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
2882; CHECK-NEXT:    vpmovsqb %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf2,0x7e,0x09,0x22,0xc1]
2883; CHECK-NEXT:    vpmovsqb %xmm0, %xmm2 {%k1} {z} ## encoding: [0x62,0xf2,0x7e,0x89,0x22,0xc2]
2884; CHECK-NEXT:    vpmovsqb %xmm0, %xmm0 ## encoding: [0x62,0xf2,0x7e,0x08,0x22,0xc0]
2885; CHECK-NEXT:    vpaddb %xmm1, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xfc,0xc1]
2886; CHECK-NEXT:    vpaddb %xmm2, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xfc,0xc2]
2887; CHECK-NEXT:    retq ## encoding: [0xc3]
2888    %res0 = call <16 x i8> @llvm.x86.avx512.mask.pmovs.qb.128(<2 x i64> %x0, <16 x i8> %x1, i8 -1)
2889    %res1 = call <16 x i8> @llvm.x86.avx512.mask.pmovs.qb.128(<2 x i64> %x0, <16 x i8> %x1, i8 %x2)
2890    %res2 = call <16 x i8> @llvm.x86.avx512.mask.pmovs.qb.128(<2 x i64> %x0, <16 x i8> zeroinitializer, i8 %x2)
2891    %res3 = add <16 x i8> %res0, %res1
2892    %res4 = add <16 x i8> %res3, %res2
2893    ret <16 x i8> %res4
2894}
2895
2896declare void @llvm.x86.avx512.mask.pmovs.qb.mem.128(i8* %ptr, <2 x i64>, i8)
2897
2898define void @test_int_x86_avx512_mask_pmovs_qb_mem_128(i8* %ptr, <2 x i64> %x1, i8 %x2) {
2899; CHECK-LABEL: test_int_x86_avx512_mask_pmovs_qb_mem_128:
2900; CHECK:       ## BB#0:
2901; CHECK-NEXT:    vpmovsqb %xmm0, (%rdi) ## encoding: [0x62,0xf2,0x7e,0x08,0x22,0x07]
2902; CHECK-NEXT:    kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
2903; CHECK-NEXT:    vpmovsqb %xmm0, (%rdi) {%k1} ## encoding: [0x62,0xf2,0x7e,0x09,0x22,0x07]
2904; CHECK-NEXT:    retq ## encoding: [0xc3]
2905    call void @llvm.x86.avx512.mask.pmovs.qb.mem.128(i8* %ptr, <2 x i64> %x1, i8 -1)
2906    call void @llvm.x86.avx512.mask.pmovs.qb.mem.128(i8* %ptr, <2 x i64> %x1, i8 %x2)
2907    ret void
2908}
2909
2910declare <16 x i8> @llvm.x86.avx512.mask.pmovus.qb.128(<2 x i64>, <16 x i8>, i8)
2911
2912define <16 x i8>@test_int_x86_avx512_mask_pmovus_qb_128(<2 x i64> %x0, <16 x i8> %x1, i8 %x2) {
2913; CHECK-LABEL: test_int_x86_avx512_mask_pmovus_qb_128:
2914; CHECK:       ## BB#0:
2915; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
2916; CHECK-NEXT:    vpmovusqb %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf2,0x7e,0x09,0x12,0xc1]
2917; CHECK-NEXT:    vpmovusqb %xmm0, %xmm2 {%k1} {z} ## encoding: [0x62,0xf2,0x7e,0x89,0x12,0xc2]
2918; CHECK-NEXT:    vpmovusqb %xmm0, %xmm0 ## encoding: [0x62,0xf2,0x7e,0x08,0x12,0xc0]
2919; CHECK-NEXT:    vpaddb %xmm1, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xfc,0xc1]
2920; CHECK-NEXT:    vpaddb %xmm2, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xfc,0xc2]
2921; CHECK-NEXT:    retq ## encoding: [0xc3]
2922    %res0 = call <16 x i8> @llvm.x86.avx512.mask.pmovus.qb.128(<2 x i64> %x0, <16 x i8> %x1, i8 -1)
2923    %res1 = call <16 x i8> @llvm.x86.avx512.mask.pmovus.qb.128(<2 x i64> %x0, <16 x i8> %x1, i8 %x2)
2924    %res2 = call <16 x i8> @llvm.x86.avx512.mask.pmovus.qb.128(<2 x i64> %x0, <16 x i8> zeroinitializer, i8 %x2)
2925    %res3 = add <16 x i8> %res0, %res1
2926    %res4 = add <16 x i8> %res3, %res2
2927    ret <16 x i8> %res4
2928}
2929
2930declare void @llvm.x86.avx512.mask.pmovus.qb.mem.128(i8* %ptr, <2 x i64>, i8)
2931
2932define void @test_int_x86_avx512_mask_pmovus_qb_mem_128(i8* %ptr, <2 x i64> %x1, i8 %x2) {
2933; CHECK-LABEL: test_int_x86_avx512_mask_pmovus_qb_mem_128:
2934; CHECK:       ## BB#0:
2935; CHECK-NEXT:    vpmovusqb %xmm0, (%rdi) ## encoding: [0x62,0xf2,0x7e,0x08,0x12,0x07]
2936; CHECK-NEXT:    kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
2937; CHECK-NEXT:    vpmovusqb %xmm0, (%rdi) {%k1} ## encoding: [0x62,0xf2,0x7e,0x09,0x12,0x07]
2938; CHECK-NEXT:    retq ## encoding: [0xc3]
2939    call void @llvm.x86.avx512.mask.pmovus.qb.mem.128(i8* %ptr, <2 x i64> %x1, i8 -1)
2940    call void @llvm.x86.avx512.mask.pmovus.qb.mem.128(i8* %ptr, <2 x i64> %x1, i8 %x2)
2941    ret void
2942}
2943
2944declare <16 x i8> @llvm.x86.avx512.mask.pmov.qb.256(<4 x i64>, <16 x i8>, i8)
2945
2946define <16 x i8>@test_int_x86_avx512_mask_pmov_qb_256(<4 x i64> %x0, <16 x i8> %x1, i8 %x2) {
2947; CHECK-LABEL: test_int_x86_avx512_mask_pmov_qb_256:
2948; CHECK:       ## BB#0:
2949; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
2950; CHECK-NEXT:    vpmovqb %ymm0, %xmm1 {%k1} ## encoding: [0x62,0xf2,0x7e,0x29,0x32,0xc1]
2951; CHECK-NEXT:    vpmovqb %ymm0, %xmm2 {%k1} {z} ## encoding: [0x62,0xf2,0x7e,0xa9,0x32,0xc2]
2952; CHECK-NEXT:    vpmovqb %ymm0, %xmm0 ## encoding: [0x62,0xf2,0x7e,0x28,0x32,0xc0]
2953; CHECK-NEXT:    vpaddb %xmm1, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xfc,0xc1]
2954; CHECK-NEXT:    vpaddb %xmm2, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xfc,0xc2]
2955; CHECK-NEXT:    retq ## encoding: [0xc3]
2956    %res0 = call <16 x i8> @llvm.x86.avx512.mask.pmov.qb.256(<4 x i64> %x0, <16 x i8> %x1, i8 -1)
2957    %res1 = call <16 x i8> @llvm.x86.avx512.mask.pmov.qb.256(<4 x i64> %x0, <16 x i8> %x1, i8 %x2)
2958    %res2 = call <16 x i8> @llvm.x86.avx512.mask.pmov.qb.256(<4 x i64> %x0, <16 x i8> zeroinitializer, i8 %x2)
2959    %res3 = add <16 x i8> %res0, %res1
2960    %res4 = add <16 x i8> %res3, %res2
2961    ret <16 x i8> %res4
2962}
2963
2964declare void @llvm.x86.avx512.mask.pmov.qb.mem.256(i8* %ptr, <4 x i64>, i8)
2965
2966define void @test_int_x86_avx512_mask_pmov_qb_mem_256(i8* %ptr, <4 x i64> %x1, i8 %x2) {
2967; CHECK-LABEL: test_int_x86_avx512_mask_pmov_qb_mem_256:
2968; CHECK:       ## BB#0:
2969; CHECK-NEXT:    kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
2970; CHECK-NEXT:    vpmovqb %ymm0, (%rdi) ## encoding: [0x62,0xf2,0x7e,0x28,0x32,0x07]
2971; CHECK-NEXT:    vpmovqb %ymm0, (%rdi) {%k1} ## encoding: [0x62,0xf2,0x7e,0x29,0x32,0x07]
2972; CHECK-NEXT:    retq ## encoding: [0xc3]
2973    call void @llvm.x86.avx512.mask.pmov.qb.mem.256(i8* %ptr, <4 x i64> %x1, i8 -1)
2974    call void @llvm.x86.avx512.mask.pmov.qb.mem.256(i8* %ptr, <4 x i64> %x1, i8 %x2)
2975    ret void
2976}
2977
2978declare <16 x i8> @llvm.x86.avx512.mask.pmovs.qb.256(<4 x i64>, <16 x i8>, i8)
2979
2980define <16 x i8>@test_int_x86_avx512_mask_pmovs_qb_256(<4 x i64> %x0, <16 x i8> %x1, i8 %x2) {
2981; CHECK-LABEL: test_int_x86_avx512_mask_pmovs_qb_256:
2982; CHECK:       ## BB#0:
2983; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
2984; CHECK-NEXT:    vpmovsqb %ymm0, %xmm1 {%k1} ## encoding: [0x62,0xf2,0x7e,0x29,0x22,0xc1]
2985; CHECK-NEXT:    vpmovsqb %ymm0, %xmm2 {%k1} {z} ## encoding: [0x62,0xf2,0x7e,0xa9,0x22,0xc2]
2986; CHECK-NEXT:    vpmovsqb %ymm0, %xmm0 ## encoding: [0x62,0xf2,0x7e,0x28,0x22,0xc0]
2987; CHECK-NEXT:    vpaddb %xmm1, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xfc,0xc1]
2988; CHECK-NEXT:    vpaddb %xmm2, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xfc,0xc2]
2989; CHECK-NEXT:    retq ## encoding: [0xc3]
2990    %res0 = call <16 x i8> @llvm.x86.avx512.mask.pmovs.qb.256(<4 x i64> %x0, <16 x i8> %x1, i8 -1)
2991    %res1 = call <16 x i8> @llvm.x86.avx512.mask.pmovs.qb.256(<4 x i64> %x0, <16 x i8> %x1, i8 %x2)
2992    %res2 = call <16 x i8> @llvm.x86.avx512.mask.pmovs.qb.256(<4 x i64> %x0, <16 x i8> zeroinitializer, i8 %x2)
2993    %res3 = add <16 x i8> %res0, %res1
2994    %res4 = add <16 x i8> %res3, %res2
2995    ret <16 x i8> %res4
2996}
2997
2998declare void @llvm.x86.avx512.mask.pmovs.qb.mem.256(i8* %ptr, <4 x i64>, i8)
2999
3000define void @test_int_x86_avx512_mask_pmovs_qb_mem_256(i8* %ptr, <4 x i64> %x1, i8 %x2) {
3001; CHECK-LABEL: test_int_x86_avx512_mask_pmovs_qb_mem_256:
3002; CHECK:       ## BB#0:
3003; CHECK-NEXT:    vpmovsqb %ymm0, (%rdi) ## encoding: [0x62,0xf2,0x7e,0x28,0x22,0x07]
3004; CHECK-NEXT:    kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
3005; CHECK-NEXT:    vpmovsqb %ymm0, (%rdi) {%k1} ## encoding: [0x62,0xf2,0x7e,0x29,0x22,0x07]
3006; CHECK-NEXT:    retq ## encoding: [0xc3]
3007    call void @llvm.x86.avx512.mask.pmovs.qb.mem.256(i8* %ptr, <4 x i64> %x1, i8 -1)
3008    call void @llvm.x86.avx512.mask.pmovs.qb.mem.256(i8* %ptr, <4 x i64> %x1, i8 %x2)
3009    ret void
3010}
3011
3012declare <16 x i8> @llvm.x86.avx512.mask.pmovus.qb.256(<4 x i64>, <16 x i8>, i8)
3013
3014define <16 x i8>@test_int_x86_avx512_mask_pmovus_qb_256(<4 x i64> %x0, <16 x i8> %x1, i8 %x2) {
3015; CHECK-LABEL: test_int_x86_avx512_mask_pmovus_qb_256:
3016; CHECK:       ## BB#0:
3017; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
3018; CHECK-NEXT:    vpmovusqb %ymm0, %xmm1 {%k1} ## encoding: [0x62,0xf2,0x7e,0x29,0x12,0xc1]
3019; CHECK-NEXT:    vpmovusqb %ymm0, %xmm2 {%k1} {z} ## encoding: [0x62,0xf2,0x7e,0xa9,0x12,0xc2]
3020; CHECK-NEXT:    vpmovusqb %ymm0, %xmm0 ## encoding: [0x62,0xf2,0x7e,0x28,0x12,0xc0]
3021; CHECK-NEXT:    vpaddb %xmm1, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xfc,0xc1]
3022; CHECK-NEXT:    vpaddb %xmm2, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xfc,0xc2]
3023; CHECK-NEXT:    retq ## encoding: [0xc3]
3024    %res0 = call <16 x i8> @llvm.x86.avx512.mask.pmovus.qb.256(<4 x i64> %x0, <16 x i8> %x1, i8 -1)
3025    %res1 = call <16 x i8> @llvm.x86.avx512.mask.pmovus.qb.256(<4 x i64> %x0, <16 x i8> %x1, i8 %x2)
3026    %res2 = call <16 x i8> @llvm.x86.avx512.mask.pmovus.qb.256(<4 x i64> %x0, <16 x i8> zeroinitializer, i8 %x2)
3027    %res3 = add <16 x i8> %res0, %res1
3028    %res4 = add <16 x i8> %res3, %res2
3029    ret <16 x i8> %res4
3030}
3031
3032declare void @llvm.x86.avx512.mask.pmovus.qb.mem.256(i8* %ptr, <4 x i64>, i8)
3033
3034define void @test_int_x86_avx512_mask_pmovus_qb_mem_256(i8* %ptr, <4 x i64> %x1, i8 %x2) {
3035; CHECK-LABEL: test_int_x86_avx512_mask_pmovus_qb_mem_256:
3036; CHECK:       ## BB#0:
3037; CHECK-NEXT:    vpmovusqb %ymm0, (%rdi) ## encoding: [0x62,0xf2,0x7e,0x28,0x12,0x07]
3038; CHECK-NEXT:    kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
3039; CHECK-NEXT:    vpmovusqb %ymm0, (%rdi) {%k1} ## encoding: [0x62,0xf2,0x7e,0x29,0x12,0x07]
3040; CHECK-NEXT:    retq ## encoding: [0xc3]
3041    call void @llvm.x86.avx512.mask.pmovus.qb.mem.256(i8* %ptr, <4 x i64> %x1, i8 -1)
3042    call void @llvm.x86.avx512.mask.pmovus.qb.mem.256(i8* %ptr, <4 x i64> %x1, i8 %x2)
3043    ret void
3044}
3045
3046declare <8 x i16> @llvm.x86.avx512.mask.pmov.qw.128(<2 x i64>, <8 x i16>, i8)
3047
3048define <8 x i16>@test_int_x86_avx512_mask_pmov_qw_128(<2 x i64> %x0, <8 x i16> %x1, i8 %x2) {
3049; CHECK-LABEL: test_int_x86_avx512_mask_pmov_qw_128:
3050; CHECK:       ## BB#0:
3051; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
3052; CHECK-NEXT:    vpmovqw %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf2,0x7e,0x09,0x34,0xc1]
3053; CHECK-NEXT:    vpmovqw %xmm0, %xmm2 {%k1} {z} ## encoding: [0x62,0xf2,0x7e,0x89,0x34,0xc2]
3054; CHECK-NEXT:    vpmovqw %xmm0, %xmm0 ## encoding: [0x62,0xf2,0x7e,0x08,0x34,0xc0]
3055; CHECK-NEXT:    vpaddw %xmm1, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xfd,0xc1]
3056; CHECK-NEXT:    vpaddw %xmm2, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xfd,0xc2]
3057; CHECK-NEXT:    retq ## encoding: [0xc3]
3058    %res0 = call <8 x i16> @llvm.x86.avx512.mask.pmov.qw.128(<2 x i64> %x0, <8 x i16> %x1, i8 -1)
3059    %res1 = call <8 x i16> @llvm.x86.avx512.mask.pmov.qw.128(<2 x i64> %x0, <8 x i16> %x1, i8 %x2)
3060    %res2 = call <8 x i16> @llvm.x86.avx512.mask.pmov.qw.128(<2 x i64> %x0, <8 x i16> zeroinitializer, i8 %x2)
3061    %res3 = add <8 x i16> %res0, %res1
3062    %res4 = add <8 x i16> %res3, %res2
3063    ret <8 x i16> %res4
3064}
3065
3066declare void @llvm.x86.avx512.mask.pmov.qw.mem.128(i8* %ptr, <2 x i64>, i8)
3067
3068define void @test_int_x86_avx512_mask_pmov_qw_mem_128(i8* %ptr, <2 x i64> %x1, i8 %x2) {
3069; CHECK-LABEL: test_int_x86_avx512_mask_pmov_qw_mem_128:
3070; CHECK:       ## BB#0:
3071; CHECK-NEXT:    kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
3072; CHECK-NEXT:    vpmovqw %xmm0, (%rdi) ## encoding: [0x62,0xf2,0x7e,0x08,0x34,0x07]
3073; CHECK-NEXT:    vpmovqw %xmm0, (%rdi) {%k1} ## encoding: [0x62,0xf2,0x7e,0x09,0x34,0x07]
3074; CHECK-NEXT:    retq ## encoding: [0xc3]
3075    call void @llvm.x86.avx512.mask.pmov.qw.mem.128(i8* %ptr, <2 x i64> %x1, i8 -1)
3076    call void @llvm.x86.avx512.mask.pmov.qw.mem.128(i8* %ptr, <2 x i64> %x1, i8 %x2)
3077    ret void
3078}
3079
3080declare <8 x i16> @llvm.x86.avx512.mask.pmovs.qw.128(<2 x i64>, <8 x i16>, i8)
3081
3082define <8 x i16>@test_int_x86_avx512_mask_pmovs_qw_128(<2 x i64> %x0, <8 x i16> %x1, i8 %x2) {
3083; CHECK-LABEL: test_int_x86_avx512_mask_pmovs_qw_128:
3084; CHECK:       ## BB#0:
3085; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
3086; CHECK-NEXT:    vpmovsqw %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf2,0x7e,0x09,0x24,0xc1]
3087; CHECK-NEXT:    vpmovsqw %xmm0, %xmm2 {%k1} {z} ## encoding: [0x62,0xf2,0x7e,0x89,0x24,0xc2]
3088; CHECK-NEXT:    vpmovsqw %xmm0, %xmm0 ## encoding: [0x62,0xf2,0x7e,0x08,0x24,0xc0]
3089; CHECK-NEXT:    vpaddw %xmm1, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xfd,0xc1]
3090; CHECK-NEXT:    vpaddw %xmm2, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xfd,0xc2]
3091; CHECK-NEXT:    retq ## encoding: [0xc3]
3092    %res0 = call <8 x i16> @llvm.x86.avx512.mask.pmovs.qw.128(<2 x i64> %x0, <8 x i16> %x1, i8 -1)
3093    %res1 = call <8 x i16> @llvm.x86.avx512.mask.pmovs.qw.128(<2 x i64> %x0, <8 x i16> %x1, i8 %x2)
3094    %res2 = call <8 x i16> @llvm.x86.avx512.mask.pmovs.qw.128(<2 x i64> %x0, <8 x i16> zeroinitializer, i8 %x2)
3095    %res3 = add <8 x i16> %res0, %res1
3096    %res4 = add <8 x i16> %res3, %res2
3097    ret <8 x i16> %res4
3098}
3099
3100declare void @llvm.x86.avx512.mask.pmovs.qw.mem.128(i8* %ptr, <2 x i64>, i8)
3101
3102define void @test_int_x86_avx512_mask_pmovs_qw_mem_128(i8* %ptr, <2 x i64> %x1, i8 %x2) {
3103; CHECK-LABEL: test_int_x86_avx512_mask_pmovs_qw_mem_128:
3104; CHECK:       ## BB#0:
3105; CHECK-NEXT:    vpmovsqw %xmm0, (%rdi) ## encoding: [0x62,0xf2,0x7e,0x08,0x24,0x07]
3106; CHECK-NEXT:    kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
3107; CHECK-NEXT:    vpmovsqw %xmm0, (%rdi) {%k1} ## encoding: [0x62,0xf2,0x7e,0x09,0x24,0x07]
3108; CHECK-NEXT:    retq ## encoding: [0xc3]
3109    call void @llvm.x86.avx512.mask.pmovs.qw.mem.128(i8* %ptr, <2 x i64> %x1, i8 -1)
3110    call void @llvm.x86.avx512.mask.pmovs.qw.mem.128(i8* %ptr, <2 x i64> %x1, i8 %x2)
3111    ret void
3112}
3113
3114declare <8 x i16> @llvm.x86.avx512.mask.pmovus.qw.128(<2 x i64>, <8 x i16>, i8)
3115
3116define <8 x i16>@test_int_x86_avx512_mask_pmovus_qw_128(<2 x i64> %x0, <8 x i16> %x1, i8 %x2) {
3117; CHECK-LABEL: test_int_x86_avx512_mask_pmovus_qw_128:
3118; CHECK:       ## BB#0:
3119; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
3120; CHECK-NEXT:    vpmovusqw %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf2,0x7e,0x09,0x14,0xc1]
3121; CHECK-NEXT:    vpmovusqw %xmm0, %xmm2 {%k1} {z} ## encoding: [0x62,0xf2,0x7e,0x89,0x14,0xc2]
3122; CHECK-NEXT:    vpmovusqw %xmm0, %xmm0 ## encoding: [0x62,0xf2,0x7e,0x08,0x14,0xc0]
3123; CHECK-NEXT:    vpaddw %xmm1, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xfd,0xc1]
3124; CHECK-NEXT:    vpaddw %xmm2, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xfd,0xc2]
3125; CHECK-NEXT:    retq ## encoding: [0xc3]
3126    %res0 = call <8 x i16> @llvm.x86.avx512.mask.pmovus.qw.128(<2 x i64> %x0, <8 x i16> %x1, i8 -1)
3127    %res1 = call <8 x i16> @llvm.x86.avx512.mask.pmovus.qw.128(<2 x i64> %x0, <8 x i16> %x1, i8 %x2)
3128    %res2 = call <8 x i16> @llvm.x86.avx512.mask.pmovus.qw.128(<2 x i64> %x0, <8 x i16> zeroinitializer, i8 %x2)
3129    %res3 = add <8 x i16> %res0, %res1
3130    %res4 = add <8 x i16> %res3, %res2
3131    ret <8 x i16> %res4
3132}
3133
3134declare void @llvm.x86.avx512.mask.pmovus.qw.mem.128(i8* %ptr, <2 x i64>, i8)
3135
3136define void @test_int_x86_avx512_mask_pmovus_qw_mem_128(i8* %ptr, <2 x i64> %x1, i8 %x2) {
3137; CHECK-LABEL: test_int_x86_avx512_mask_pmovus_qw_mem_128:
3138; CHECK:       ## BB#0:
3139; CHECK-NEXT:    vpmovusqw %xmm0, (%rdi) ## encoding: [0x62,0xf2,0x7e,0x08,0x14,0x07]
3140; CHECK-NEXT:    kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
3141; CHECK-NEXT:    vpmovusqw %xmm0, (%rdi) {%k1} ## encoding: [0x62,0xf2,0x7e,0x09,0x14,0x07]
3142; CHECK-NEXT:    retq ## encoding: [0xc3]
3143    call void @llvm.x86.avx512.mask.pmovus.qw.mem.128(i8* %ptr, <2 x i64> %x1, i8 -1)
3144    call void @llvm.x86.avx512.mask.pmovus.qw.mem.128(i8* %ptr, <2 x i64> %x1, i8 %x2)
3145    ret void
3146}
3147
3148declare <8 x i16> @llvm.x86.avx512.mask.pmov.qw.256(<4 x i64>, <8 x i16>, i8)
3149
3150define <8 x i16>@test_int_x86_avx512_mask_pmov_qw_256(<4 x i64> %x0, <8 x i16> %x1, i8 %x2) {
3151; CHECK-LABEL: test_int_x86_avx512_mask_pmov_qw_256:
3152; CHECK:       ## BB#0:
3153; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
3154; CHECK-NEXT:    vpmovqw %ymm0, %xmm1 {%k1} ## encoding: [0x62,0xf2,0x7e,0x29,0x34,0xc1]
3155; CHECK-NEXT:    vpmovqw %ymm0, %xmm2 {%k1} {z} ## encoding: [0x62,0xf2,0x7e,0xa9,0x34,0xc2]
3156; CHECK-NEXT:    vpmovqw %ymm0, %xmm0 ## encoding: [0x62,0xf2,0x7e,0x28,0x34,0xc0]
3157; CHECK-NEXT:    vpaddw %xmm1, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xfd,0xc1]
3158; CHECK-NEXT:    vpaddw %xmm2, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xfd,0xc2]
3159; CHECK-NEXT:    retq ## encoding: [0xc3]
3160    %res0 = call <8 x i16> @llvm.x86.avx512.mask.pmov.qw.256(<4 x i64> %x0, <8 x i16> %x1, i8 -1)
3161    %res1 = call <8 x i16> @llvm.x86.avx512.mask.pmov.qw.256(<4 x i64> %x0, <8 x i16> %x1, i8 %x2)
3162    %res2 = call <8 x i16> @llvm.x86.avx512.mask.pmov.qw.256(<4 x i64> %x0, <8 x i16> zeroinitializer, i8 %x2)
3163    %res3 = add <8 x i16> %res0, %res1
3164    %res4 = add <8 x i16> %res3, %res2
3165    ret <8 x i16> %res4
3166}
3167
3168declare void @llvm.x86.avx512.mask.pmov.qw.mem.256(i8* %ptr, <4 x i64>, i8)
3169
3170define void @test_int_x86_avx512_mask_pmov_qw_mem_256(i8* %ptr, <4 x i64> %x1, i8 %x2) {
3171; CHECK-LABEL: test_int_x86_avx512_mask_pmov_qw_mem_256:
3172; CHECK:       ## BB#0:
3173; CHECK-NEXT:    kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
3174; CHECK-NEXT:    vpmovqw %ymm0, (%rdi) ## encoding: [0x62,0xf2,0x7e,0x28,0x34,0x07]
3175; CHECK-NEXT:    vpmovqw %ymm0, (%rdi) {%k1} ## encoding: [0x62,0xf2,0x7e,0x29,0x34,0x07]
3176; CHECK-NEXT:    retq ## encoding: [0xc3]
3177    call void @llvm.x86.avx512.mask.pmov.qw.mem.256(i8* %ptr, <4 x i64> %x1, i8 -1)
3178    call void @llvm.x86.avx512.mask.pmov.qw.mem.256(i8* %ptr, <4 x i64> %x1, i8 %x2)
3179    ret void
3180}
3181
3182declare <8 x i16> @llvm.x86.avx512.mask.pmovs.qw.256(<4 x i64>, <8 x i16>, i8)
3183
3184define <8 x i16>@test_int_x86_avx512_mask_pmovs_qw_256(<4 x i64> %x0, <8 x i16> %x1, i8 %x2) {
3185; CHECK-LABEL: test_int_x86_avx512_mask_pmovs_qw_256:
3186; CHECK:       ## BB#0:
3187; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
3188; CHECK-NEXT:    vpmovsqw %ymm0, %xmm1 {%k1} ## encoding: [0x62,0xf2,0x7e,0x29,0x24,0xc1]
3189; CHECK-NEXT:    vpmovsqw %ymm0, %xmm2 {%k1} {z} ## encoding: [0x62,0xf2,0x7e,0xa9,0x24,0xc2]
3190; CHECK-NEXT:    vpmovsqw %ymm0, %xmm0 ## encoding: [0x62,0xf2,0x7e,0x28,0x24,0xc0]
3191; CHECK-NEXT:    vpaddw %xmm1, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xfd,0xc1]
3192; CHECK-NEXT:    vpaddw %xmm2, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xfd,0xc2]
3193; CHECK-NEXT:    retq ## encoding: [0xc3]
3194    %res0 = call <8 x i16> @llvm.x86.avx512.mask.pmovs.qw.256(<4 x i64> %x0, <8 x i16> %x1, i8 -1)
3195    %res1 = call <8 x i16> @llvm.x86.avx512.mask.pmovs.qw.256(<4 x i64> %x0, <8 x i16> %x1, i8 %x2)
3196    %res2 = call <8 x i16> @llvm.x86.avx512.mask.pmovs.qw.256(<4 x i64> %x0, <8 x i16> zeroinitializer, i8 %x2)
3197    %res3 = add <8 x i16> %res0, %res1
3198    %res4 = add <8 x i16> %res3, %res2
3199    ret <8 x i16> %res4
3200}
3201
3202declare void @llvm.x86.avx512.mask.pmovs.qw.mem.256(i8* %ptr, <4 x i64>, i8)
3203
3204define void @test_int_x86_avx512_mask_pmovs_qw_mem_256(i8* %ptr, <4 x i64> %x1, i8 %x2) {
3205; CHECK-LABEL: test_int_x86_avx512_mask_pmovs_qw_mem_256:
3206; CHECK:       ## BB#0:
3207; CHECK-NEXT:    vpmovsqw %ymm0, (%rdi) ## encoding: [0x62,0xf2,0x7e,0x28,0x24,0x07]
3208; CHECK-NEXT:    kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
3209; CHECK-NEXT:    vpmovsqw %ymm0, (%rdi) {%k1} ## encoding: [0x62,0xf2,0x7e,0x29,0x24,0x07]
3210; CHECK-NEXT:    retq ## encoding: [0xc3]
3211    call void @llvm.x86.avx512.mask.pmovs.qw.mem.256(i8* %ptr, <4 x i64> %x1, i8 -1)
3212    call void @llvm.x86.avx512.mask.pmovs.qw.mem.256(i8* %ptr, <4 x i64> %x1, i8 %x2)
3213    ret void
3214}
3215
3216declare <8 x i16> @llvm.x86.avx512.mask.pmovus.qw.256(<4 x i64>, <8 x i16>, i8)
3217
3218define <8 x i16>@test_int_x86_avx512_mask_pmovus_qw_256(<4 x i64> %x0, <8 x i16> %x1, i8 %x2) {
3219; CHECK-LABEL: test_int_x86_avx512_mask_pmovus_qw_256:
3220; CHECK:       ## BB#0:
3221; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
3222; CHECK-NEXT:    vpmovusqw %ymm0, %xmm1 {%k1} ## encoding: [0x62,0xf2,0x7e,0x29,0x14,0xc1]
3223; CHECK-NEXT:    vpmovusqw %ymm0, %xmm2 {%k1} {z} ## encoding: [0x62,0xf2,0x7e,0xa9,0x14,0xc2]
3224; CHECK-NEXT:    vpmovusqw %ymm0, %xmm0 ## encoding: [0x62,0xf2,0x7e,0x28,0x14,0xc0]
3225; CHECK-NEXT:    vpaddw %xmm1, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xfd,0xc1]
3226; CHECK-NEXT:    vpaddw %xmm2, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xfd,0xc2]
3227; CHECK-NEXT:    retq ## encoding: [0xc3]
3228    %res0 = call <8 x i16> @llvm.x86.avx512.mask.pmovus.qw.256(<4 x i64> %x0, <8 x i16> %x1, i8 -1)
3229    %res1 = call <8 x i16> @llvm.x86.avx512.mask.pmovus.qw.256(<4 x i64> %x0, <8 x i16> %x1, i8 %x2)
3230    %res2 = call <8 x i16> @llvm.x86.avx512.mask.pmovus.qw.256(<4 x i64> %x0, <8 x i16> zeroinitializer, i8 %x2)
3231    %res3 = add <8 x i16> %res0, %res1
3232    %res4 = add <8 x i16> %res3, %res2
3233    ret <8 x i16> %res4
3234}
3235
3236declare void @llvm.x86.avx512.mask.pmovus.qw.mem.256(i8* %ptr, <4 x i64>, i8)
3237
3238define void @test_int_x86_avx512_mask_pmovus_qw_mem_256(i8* %ptr, <4 x i64> %x1, i8 %x2) {
3239; CHECK-LABEL: test_int_x86_avx512_mask_pmovus_qw_mem_256:
3240; CHECK:       ## BB#0:
3241; CHECK-NEXT:    vpmovusqw %ymm0, (%rdi) ## encoding: [0x62,0xf2,0x7e,0x28,0x14,0x07]
3242; CHECK-NEXT:    kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
3243; CHECK-NEXT:    vpmovusqw %ymm0, (%rdi) {%k1} ## encoding: [0x62,0xf2,0x7e,0x29,0x14,0x07]
3244; CHECK-NEXT:    retq ## encoding: [0xc3]
3245    call void @llvm.x86.avx512.mask.pmovus.qw.mem.256(i8* %ptr, <4 x i64> %x1, i8 -1)
3246    call void @llvm.x86.avx512.mask.pmovus.qw.mem.256(i8* %ptr, <4 x i64> %x1, i8 %x2)
3247    ret void
3248}
3249
3250declare <4 x i32> @llvm.x86.avx512.mask.pmov.qd.128(<2 x i64>, <4 x i32>, i8)
3251
3252define <4 x i32>@test_int_x86_avx512_mask_pmov_qd_128(<2 x i64> %x0, <4 x i32> %x1, i8 %x2) {
3253; CHECK-LABEL: test_int_x86_avx512_mask_pmov_qd_128:
3254; CHECK:       ## BB#0:
3255; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
3256; CHECK-NEXT:    vpmovqd %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf2,0x7e,0x09,0x35,0xc1]
3257; CHECK-NEXT:    vpmovqd %xmm0, %xmm2 {%k1} {z} ## encoding: [0x62,0xf2,0x7e,0x89,0x35,0xc2]
3258; CHECK-NEXT:    vpmovqd %xmm0, %xmm0 ## encoding: [0x62,0xf2,0x7e,0x08,0x35,0xc0]
3259; CHECK-NEXT:    vpaddd %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xfe,0xc1]
3260; CHECK-NEXT:    vpaddd %xmm2, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xfe,0xc2]
3261; CHECK-NEXT:    retq ## encoding: [0xc3]
3262    %res0 = call <4 x i32> @llvm.x86.avx512.mask.pmov.qd.128(<2 x i64> %x0, <4 x i32> %x1, i8 -1)
3263    %res1 = call <4 x i32> @llvm.x86.avx512.mask.pmov.qd.128(<2 x i64> %x0, <4 x i32> %x1, i8 %x2)
3264    %res2 = call <4 x i32> @llvm.x86.avx512.mask.pmov.qd.128(<2 x i64> %x0, <4 x i32> zeroinitializer, i8 %x2)
3265    %res3 = add <4 x i32> %res0, %res1
3266    %res4 = add <4 x i32> %res3, %res2
3267    ret <4 x i32> %res4
3268}
3269
3270declare void @llvm.x86.avx512.mask.pmov.qd.mem.128(i8* %ptr, <2 x i64>, i8)
3271
3272define void @test_int_x86_avx512_mask_pmov_qd_mem_128(i8* %ptr, <2 x i64> %x1, i8 %x2) {
3273; CHECK-LABEL: test_int_x86_avx512_mask_pmov_qd_mem_128:
3274; CHECK:       ## BB#0:
3275; CHECK-NEXT:    kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
3276; CHECK-NEXT:    vpmovqd %xmm0, (%rdi) ## encoding: [0x62,0xf2,0x7e,0x08,0x35,0x07]
3277; CHECK-NEXT:    vpmovqd %xmm0, (%rdi) {%k1} ## encoding: [0x62,0xf2,0x7e,0x09,0x35,0x07]
3278; CHECK-NEXT:    retq ## encoding: [0xc3]
3279    call void @llvm.x86.avx512.mask.pmov.qd.mem.128(i8* %ptr, <2 x i64> %x1, i8 -1)
3280    call void @llvm.x86.avx512.mask.pmov.qd.mem.128(i8* %ptr, <2 x i64> %x1, i8 %x2)
3281    ret void
3282}
3283
3284declare <4 x i32> @llvm.x86.avx512.mask.pmovs.qd.128(<2 x i64>, <4 x i32>, i8)
3285
3286define <4 x i32>@test_int_x86_avx512_mask_pmovs_qd_128(<2 x i64> %x0, <4 x i32> %x1, i8 %x2) {
3287; CHECK-LABEL: test_int_x86_avx512_mask_pmovs_qd_128:
3288; CHECK:       ## BB#0:
3289; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
3290; CHECK-NEXT:    vpmovsqd %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf2,0x7e,0x09,0x25,0xc1]
3291; CHECK-NEXT:    vpmovsqd %xmm0, %xmm2 {%k1} {z} ## encoding: [0x62,0xf2,0x7e,0x89,0x25,0xc2]
3292; CHECK-NEXT:    vpmovsqd %xmm0, %xmm0 ## encoding: [0x62,0xf2,0x7e,0x08,0x25,0xc0]
3293; CHECK-NEXT:    vpaddd %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xfe,0xc1]
3294; CHECK-NEXT:    vpaddd %xmm2, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xfe,0xc2]
3295; CHECK-NEXT:    retq ## encoding: [0xc3]
3296    %res0 = call <4 x i32> @llvm.x86.avx512.mask.pmovs.qd.128(<2 x i64> %x0, <4 x i32> %x1, i8 -1)
3297    %res1 = call <4 x i32> @llvm.x86.avx512.mask.pmovs.qd.128(<2 x i64> %x0, <4 x i32> %x1, i8 %x2)
3298    %res2 = call <4 x i32> @llvm.x86.avx512.mask.pmovs.qd.128(<2 x i64> %x0, <4 x i32> zeroinitializer, i8 %x2)
3299    %res3 = add <4 x i32> %res0, %res1
3300    %res4 = add <4 x i32> %res3, %res2
3301    ret <4 x i32> %res4
3302}
3303
3304declare void @llvm.x86.avx512.mask.pmovs.qd.mem.128(i8* %ptr, <2 x i64>, i8)
3305
3306define void @test_int_x86_avx512_mask_pmovs_qd_mem_128(i8* %ptr, <2 x i64> %x1, i8 %x2) {
3307; CHECK-LABEL: test_int_x86_avx512_mask_pmovs_qd_mem_128:
3308; CHECK:       ## BB#0:
3309; CHECK-NEXT:    vpmovsqd %xmm0, (%rdi) ## encoding: [0x62,0xf2,0x7e,0x08,0x25,0x07]
3310; CHECK-NEXT:    kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
3311; CHECK-NEXT:    vpmovsqd %xmm0, (%rdi) {%k1} ## encoding: [0x62,0xf2,0x7e,0x09,0x25,0x07]
3312; CHECK-NEXT:    retq ## encoding: [0xc3]
3313    call void @llvm.x86.avx512.mask.pmovs.qd.mem.128(i8* %ptr, <2 x i64> %x1, i8 -1)
3314    call void @llvm.x86.avx512.mask.pmovs.qd.mem.128(i8* %ptr, <2 x i64> %x1, i8 %x2)
3315    ret void
3316}
3317
3318declare <4 x i32> @llvm.x86.avx512.mask.pmovus.qd.128(<2 x i64>, <4 x i32>, i8)
3319
3320define <4 x i32>@test_int_x86_avx512_mask_pmovus_qd_128(<2 x i64> %x0, <4 x i32> %x1, i8 %x2) {
3321; CHECK-LABEL: test_int_x86_avx512_mask_pmovus_qd_128:
3322; CHECK:       ## BB#0:
3323; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
3324; CHECK-NEXT:    vpmovusqd %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf2,0x7e,0x09,0x15,0xc1]
3325; CHECK-NEXT:    vpmovusqd %xmm0, %xmm2 {%k1} {z} ## encoding: [0x62,0xf2,0x7e,0x89,0x15,0xc2]
3326; CHECK-NEXT:    vpmovusqd %xmm0, %xmm0 ## encoding: [0x62,0xf2,0x7e,0x08,0x15,0xc0]
3327; CHECK-NEXT:    vpaddd %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xfe,0xc1]
3328; CHECK-NEXT:    vpaddd %xmm2, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xfe,0xc2]
3329; CHECK-NEXT:    retq ## encoding: [0xc3]
3330    %res0 = call <4 x i32> @llvm.x86.avx512.mask.pmovus.qd.128(<2 x i64> %x0, <4 x i32> %x1, i8 -1)
3331    %res1 = call <4 x i32> @llvm.x86.avx512.mask.pmovus.qd.128(<2 x i64> %x0, <4 x i32> %x1, i8 %x2)
3332    %res2 = call <4 x i32> @llvm.x86.avx512.mask.pmovus.qd.128(<2 x i64> %x0, <4 x i32> zeroinitializer, i8 %x2)
3333    %res3 = add <4 x i32> %res0, %res1
3334    %res4 = add <4 x i32> %res3, %res2
3335    ret <4 x i32> %res4
3336}
3337
3338declare void @llvm.x86.avx512.mask.pmovus.qd.mem.128(i8* %ptr, <2 x i64>, i8)
3339
3340define void @test_int_x86_avx512_mask_pmovus_qd_mem_128(i8* %ptr, <2 x i64> %x1, i8 %x2) {
3341; CHECK-LABEL: test_int_x86_avx512_mask_pmovus_qd_mem_128:
3342; CHECK:       ## BB#0:
3343; CHECK-NEXT:    vpmovusqd %xmm0, (%rdi) ## encoding: [0x62,0xf2,0x7e,0x08,0x15,0x07]
3344; CHECK-NEXT:    kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
3345; CHECK-NEXT:    vpmovusqd %xmm0, (%rdi) {%k1} ## encoding: [0x62,0xf2,0x7e,0x09,0x15,0x07]
3346; CHECK-NEXT:    retq ## encoding: [0xc3]
3347    call void @llvm.x86.avx512.mask.pmovus.qd.mem.128(i8* %ptr, <2 x i64> %x1, i8 -1)
3348    call void @llvm.x86.avx512.mask.pmovus.qd.mem.128(i8* %ptr, <2 x i64> %x1, i8 %x2)
3349    ret void
3350}
3351
3352declare <4 x i32> @llvm.x86.avx512.mask.pmov.qd.256(<4 x i64>, <4 x i32>, i8)
3353
3354define <4 x i32>@test_int_x86_avx512_mask_pmov_qd_256(<4 x i64> %x0, <4 x i32> %x1, i8 %x2) {
3355; CHECK-LABEL: test_int_x86_avx512_mask_pmov_qd_256:
3356; CHECK:       ## BB#0:
3357; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
3358; CHECK-NEXT:    vpmovqd %ymm0, %xmm1 {%k1} ## encoding: [0x62,0xf2,0x7e,0x29,0x35,0xc1]
3359; CHECK-NEXT:    vpmovqd %ymm0, %xmm2 {%k1} {z} ## encoding: [0x62,0xf2,0x7e,0xa9,0x35,0xc2]
3360; CHECK-NEXT:    vpmovqd %ymm0, %xmm0 ## encoding: [0x62,0xf2,0x7e,0x28,0x35,0xc0]
3361; CHECK-NEXT:    vpaddd %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xfe,0xc1]
3362; CHECK-NEXT:    vpaddd %xmm2, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xfe,0xc2]
3363; CHECK-NEXT:    retq ## encoding: [0xc3]
3364    %res0 = call <4 x i32> @llvm.x86.avx512.mask.pmov.qd.256(<4 x i64> %x0, <4 x i32> %x1, i8 -1)
3365    %res1 = call <4 x i32> @llvm.x86.avx512.mask.pmov.qd.256(<4 x i64> %x0, <4 x i32> %x1, i8 %x2)
3366    %res2 = call <4 x i32> @llvm.x86.avx512.mask.pmov.qd.256(<4 x i64> %x0, <4 x i32> zeroinitializer, i8 %x2)
3367    %res3 = add <4 x i32> %res0, %res1
3368    %res4 = add <4 x i32> %res3, %res2
3369    ret <4 x i32> %res4
3370}
3371
3372declare void @llvm.x86.avx512.mask.pmov.qd.mem.256(i8* %ptr, <4 x i64>, i8)
3373
3374define void @test_int_x86_avx512_mask_pmov_qd_mem_256(i8* %ptr, <4 x i64> %x1, i8 %x2) {
3375; CHECK-LABEL: test_int_x86_avx512_mask_pmov_qd_mem_256:
3376; CHECK:       ## BB#0:
3377; CHECK-NEXT:    kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
3378; CHECK-NEXT:    vpmovqd %ymm0, (%rdi) ## encoding: [0x62,0xf2,0x7e,0x28,0x35,0x07]
3379; CHECK-NEXT:    vpmovqd %ymm0, (%rdi) {%k1} ## encoding: [0x62,0xf2,0x7e,0x29,0x35,0x07]
3380; CHECK-NEXT:    retq ## encoding: [0xc3]
3381    call void @llvm.x86.avx512.mask.pmov.qd.mem.256(i8* %ptr, <4 x i64> %x1, i8 -1)
3382    call void @llvm.x86.avx512.mask.pmov.qd.mem.256(i8* %ptr, <4 x i64> %x1, i8 %x2)
3383    ret void
3384}
3385
3386declare <4 x i32> @llvm.x86.avx512.mask.pmovs.qd.256(<4 x i64>, <4 x i32>, i8)
3387
3388define <4 x i32>@test_int_x86_avx512_mask_pmovs_qd_256(<4 x i64> %x0, <4 x i32> %x1, i8 %x2) {
3389; CHECK-LABEL: test_int_x86_avx512_mask_pmovs_qd_256:
3390; CHECK:       ## BB#0:
3391; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
3392; CHECK-NEXT:    vpmovsqd %ymm0, %xmm1 {%k1} ## encoding: [0x62,0xf2,0x7e,0x29,0x25,0xc1]
3393; CHECK-NEXT:    vpmovsqd %ymm0, %xmm2 {%k1} {z} ## encoding: [0x62,0xf2,0x7e,0xa9,0x25,0xc2]
3394; CHECK-NEXT:    vpmovsqd %ymm0, %xmm0 ## encoding: [0x62,0xf2,0x7e,0x28,0x25,0xc0]
3395; CHECK-NEXT:    vpaddd %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xfe,0xc1]
3396; CHECK-NEXT:    vpaddd %xmm2, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xfe,0xc2]
3397; CHECK-NEXT:    retq ## encoding: [0xc3]
3398    %res0 = call <4 x i32> @llvm.x86.avx512.mask.pmovs.qd.256(<4 x i64> %x0, <4 x i32> %x1, i8 -1)
3399    %res1 = call <4 x i32> @llvm.x86.avx512.mask.pmovs.qd.256(<4 x i64> %x0, <4 x i32> %x1, i8 %x2)
3400    %res2 = call <4 x i32> @llvm.x86.avx512.mask.pmovs.qd.256(<4 x i64> %x0, <4 x i32> zeroinitializer, i8 %x2)
3401    %res3 = add <4 x i32> %res0, %res1
3402    %res4 = add <4 x i32> %res3, %res2
3403    ret <4 x i32> %res4
3404}
3405
3406declare void @llvm.x86.avx512.mask.pmovs.qd.mem.256(i8* %ptr, <4 x i64>, i8)
3407
3408define void @test_int_x86_avx512_mask_pmovs_qd_mem_256(i8* %ptr, <4 x i64> %x1, i8 %x2) {
3409; CHECK-LABEL: test_int_x86_avx512_mask_pmovs_qd_mem_256:
3410; CHECK:       ## BB#0:
3411; CHECK-NEXT:    vpmovsqd %ymm0, (%rdi) ## encoding: [0x62,0xf2,0x7e,0x28,0x25,0x07]
3412; CHECK-NEXT:    kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
3413; CHECK-NEXT:    vpmovsqd %ymm0, (%rdi) {%k1} ## encoding: [0x62,0xf2,0x7e,0x29,0x25,0x07]
3414; CHECK-NEXT:    retq ## encoding: [0xc3]
3415    call void @llvm.x86.avx512.mask.pmovs.qd.mem.256(i8* %ptr, <4 x i64> %x1, i8 -1)
3416    call void @llvm.x86.avx512.mask.pmovs.qd.mem.256(i8* %ptr, <4 x i64> %x1, i8 %x2)
3417    ret void
3418}
3419
3420declare <4 x i32> @llvm.x86.avx512.mask.pmovus.qd.256(<4 x i64>, <4 x i32>, i8)
3421
3422define <4 x i32>@test_int_x86_avx512_mask_pmovus_qd_256(<4 x i64> %x0, <4 x i32> %x1, i8 %x2) {
3423; CHECK-LABEL: test_int_x86_avx512_mask_pmovus_qd_256:
3424; CHECK:       ## BB#0:
3425; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
3426; CHECK-NEXT:    vpmovusqd %ymm0, %xmm1 {%k1} ## encoding: [0x62,0xf2,0x7e,0x29,0x15,0xc1]
3427; CHECK-NEXT:    vpmovusqd %ymm0, %xmm2 {%k1} {z} ## encoding: [0x62,0xf2,0x7e,0xa9,0x15,0xc2]
3428; CHECK-NEXT:    vpmovusqd %ymm0, %xmm0 ## encoding: [0x62,0xf2,0x7e,0x28,0x15,0xc0]
3429; CHECK-NEXT:    vpaddd %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xfe,0xc1]
3430; CHECK-NEXT:    vpaddd %xmm2, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xfe,0xc2]
3431; CHECK-NEXT:    retq ## encoding: [0xc3]
3432    %res0 = call <4 x i32> @llvm.x86.avx512.mask.pmovus.qd.256(<4 x i64> %x0, <4 x i32> %x1, i8 -1)
3433    %res1 = call <4 x i32> @llvm.x86.avx512.mask.pmovus.qd.256(<4 x i64> %x0, <4 x i32> %x1, i8 %x2)
3434    %res2 = call <4 x i32> @llvm.x86.avx512.mask.pmovus.qd.256(<4 x i64> %x0, <4 x i32> zeroinitializer, i8 %x2)
3435    %res3 = add <4 x i32> %res0, %res1
3436    %res4 = add <4 x i32> %res3, %res2
3437    ret <4 x i32> %res4
3438}
3439
3440declare void @llvm.x86.avx512.mask.pmovus.qd.mem.256(i8* %ptr, <4 x i64>, i8)
3441
3442define void @test_int_x86_avx512_mask_pmovus_qd_mem_256(i8* %ptr, <4 x i64> %x1, i8 %x2) {
3443; CHECK-LABEL: test_int_x86_avx512_mask_pmovus_qd_mem_256:
3444; CHECK:       ## BB#0:
3445; CHECK-NEXT:    vpmovusqd %ymm0, (%rdi) ## encoding: [0x62,0xf2,0x7e,0x28,0x15,0x07]
3446; CHECK-NEXT:    kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
3447; CHECK-NEXT:    vpmovusqd %ymm0, (%rdi) {%k1} ## encoding: [0x62,0xf2,0x7e,0x29,0x15,0x07]
3448; CHECK-NEXT:    retq ## encoding: [0xc3]
3449    call void @llvm.x86.avx512.mask.pmovus.qd.mem.256(i8* %ptr, <4 x i64> %x1, i8 -1)
3450    call void @llvm.x86.avx512.mask.pmovus.qd.mem.256(i8* %ptr, <4 x i64> %x1, i8 %x2)
3451    ret void
3452}
3453
3454declare <16 x i8> @llvm.x86.avx512.mask.pmov.db.128(<4 x i32>, <16 x i8>, i8)
3455
3456define <16 x i8>@test_int_x86_avx512_mask_pmov_db_128(<4 x i32> %x0, <16 x i8> %x1, i8 %x2) {
3457; CHECK-LABEL: test_int_x86_avx512_mask_pmov_db_128:
3458; CHECK:       ## BB#0:
3459; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
3460; CHECK-NEXT:    vpmovdb %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf2,0x7e,0x09,0x31,0xc1]
3461; CHECK-NEXT:    vpmovdb %xmm0, %xmm2 {%k1} {z} ## encoding: [0x62,0xf2,0x7e,0x89,0x31,0xc2]
3462; CHECK-NEXT:    vpmovdb %xmm0, %xmm0 ## encoding: [0x62,0xf2,0x7e,0x08,0x31,0xc0]
3463; CHECK-NEXT:    vpaddb %xmm1, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xfc,0xc1]
3464; CHECK-NEXT:    vpaddb %xmm2, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xfc,0xc2]
3465; CHECK-NEXT:    retq ## encoding: [0xc3]
3466    %res0 = call <16 x i8> @llvm.x86.avx512.mask.pmov.db.128(<4 x i32> %x0, <16 x i8> %x1, i8 -1)
3467    %res1 = call <16 x i8> @llvm.x86.avx512.mask.pmov.db.128(<4 x i32> %x0, <16 x i8> %x1, i8 %x2)
3468    %res2 = call <16 x i8> @llvm.x86.avx512.mask.pmov.db.128(<4 x i32> %x0, <16 x i8> zeroinitializer, i8 %x2)
3469    %res3 = add <16 x i8> %res0, %res1
3470    %res4 = add <16 x i8> %res3, %res2
3471    ret <16 x i8> %res4
3472}
3473
3474declare void @llvm.x86.avx512.mask.pmov.db.mem.128(i8* %ptr, <4 x i32>, i8)
3475
3476define void @test_int_x86_avx512_mask_pmov_db_mem_128(i8* %ptr, <4 x i32> %x1, i8 %x2) {
3477; CHECK-LABEL: test_int_x86_avx512_mask_pmov_db_mem_128:
3478; CHECK:       ## BB#0:
3479; CHECK-NEXT:    kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
3480; CHECK-NEXT:    vpmovdb %xmm0, (%rdi) ## encoding: [0x62,0xf2,0x7e,0x08,0x31,0x07]
3481; CHECK-NEXT:    vpmovdb %xmm0, (%rdi) {%k1} ## encoding: [0x62,0xf2,0x7e,0x09,0x31,0x07]
3482; CHECK-NEXT:    retq ## encoding: [0xc3]
3483    call void @llvm.x86.avx512.mask.pmov.db.mem.128(i8* %ptr, <4 x i32> %x1, i8 -1)
3484    call void @llvm.x86.avx512.mask.pmov.db.mem.128(i8* %ptr, <4 x i32> %x1, i8 %x2)
3485    ret void
3486}
3487
3488declare <16 x i8> @llvm.x86.avx512.mask.pmovs.db.128(<4 x i32>, <16 x i8>, i8)
3489
3490define <16 x i8>@test_int_x86_avx512_mask_pmovs_db_128(<4 x i32> %x0, <16 x i8> %x1, i8 %x2) {
3491; CHECK-LABEL: test_int_x86_avx512_mask_pmovs_db_128:
3492; CHECK:       ## BB#0:
3493; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
3494; CHECK-NEXT:    vpmovsdb %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf2,0x7e,0x09,0x21,0xc1]
3495; CHECK-NEXT:    vpmovsdb %xmm0, %xmm2 {%k1} {z} ## encoding: [0x62,0xf2,0x7e,0x89,0x21,0xc2]
3496; CHECK-NEXT:    vpmovsdb %xmm0, %xmm0 ## encoding: [0x62,0xf2,0x7e,0x08,0x21,0xc0]
3497; CHECK-NEXT:    vpaddb %xmm1, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xfc,0xc1]
3498; CHECK-NEXT:    vpaddb %xmm2, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xfc,0xc2]
3499; CHECK-NEXT:    retq ## encoding: [0xc3]
3500    %res0 = call <16 x i8> @llvm.x86.avx512.mask.pmovs.db.128(<4 x i32> %x0, <16 x i8> %x1, i8 -1)
3501    %res1 = call <16 x i8> @llvm.x86.avx512.mask.pmovs.db.128(<4 x i32> %x0, <16 x i8> %x1, i8 %x2)
3502    %res2 = call <16 x i8> @llvm.x86.avx512.mask.pmovs.db.128(<4 x i32> %x0, <16 x i8> zeroinitializer, i8 %x2)
3503    %res3 = add <16 x i8> %res0, %res1
3504    %res4 = add <16 x i8> %res3, %res2
3505    ret <16 x i8> %res4
3506}
3507
3508declare void @llvm.x86.avx512.mask.pmovs.db.mem.128(i8* %ptr, <4 x i32>, i8)
3509
3510define void @test_int_x86_avx512_mask_pmovs_db_mem_128(i8* %ptr, <4 x i32> %x1, i8 %x2) {
3511; CHECK-LABEL: test_int_x86_avx512_mask_pmovs_db_mem_128:
3512; CHECK:       ## BB#0:
3513; CHECK-NEXT:    vpmovsdb %xmm0, (%rdi) ## encoding: [0x62,0xf2,0x7e,0x08,0x21,0x07]
3514; CHECK-NEXT:    kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
3515; CHECK-NEXT:    vpmovsdb %xmm0, (%rdi) {%k1} ## encoding: [0x62,0xf2,0x7e,0x09,0x21,0x07]
3516; CHECK-NEXT:    retq ## encoding: [0xc3]
3517    call void @llvm.x86.avx512.mask.pmovs.db.mem.128(i8* %ptr, <4 x i32> %x1, i8 -1)
3518    call void @llvm.x86.avx512.mask.pmovs.db.mem.128(i8* %ptr, <4 x i32> %x1, i8 %x2)
3519    ret void
3520}
3521
3522declare <16 x i8> @llvm.x86.avx512.mask.pmovus.db.128(<4 x i32>, <16 x i8>, i8)
3523
3524define <16 x i8>@test_int_x86_avx512_mask_pmovus_db_128(<4 x i32> %x0, <16 x i8> %x1, i8 %x2) {
3525; CHECK-LABEL: test_int_x86_avx512_mask_pmovus_db_128:
3526; CHECK:       ## BB#0:
3527; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
3528; CHECK-NEXT:    vpmovusdb %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf2,0x7e,0x09,0x11,0xc1]
3529; CHECK-NEXT:    vpmovusdb %xmm0, %xmm2 {%k1} {z} ## encoding: [0x62,0xf2,0x7e,0x89,0x11,0xc2]
3530; CHECK-NEXT:    vpmovusdb %xmm0, %xmm0 ## encoding: [0x62,0xf2,0x7e,0x08,0x11,0xc0]
3531; CHECK-NEXT:    vpaddb %xmm1, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xfc,0xc1]
3532; CHECK-NEXT:    vpaddb %xmm2, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xfc,0xc2]
3533; CHECK-NEXT:    retq ## encoding: [0xc3]
3534    %res0 = call <16 x i8> @llvm.x86.avx512.mask.pmovus.db.128(<4 x i32> %x0, <16 x i8> %x1, i8 -1)
3535    %res1 = call <16 x i8> @llvm.x86.avx512.mask.pmovus.db.128(<4 x i32> %x0, <16 x i8> %x1, i8 %x2)
3536    %res2 = call <16 x i8> @llvm.x86.avx512.mask.pmovus.db.128(<4 x i32> %x0, <16 x i8> zeroinitializer, i8 %x2)
3537    %res3 = add <16 x i8> %res0, %res1
3538    %res4 = add <16 x i8> %res3, %res2
3539    ret <16 x i8> %res4
3540}
3541
3542declare void @llvm.x86.avx512.mask.pmovus.db.mem.128(i8* %ptr, <4 x i32>, i8)
3543
3544define void @test_int_x86_avx512_mask_pmovus_db_mem_128(i8* %ptr, <4 x i32> %x1, i8 %x2) {
3545; CHECK-LABEL: test_int_x86_avx512_mask_pmovus_db_mem_128:
3546; CHECK:       ## BB#0:
3547; CHECK-NEXT:    vpmovusdb %xmm0, (%rdi) ## encoding: [0x62,0xf2,0x7e,0x08,0x11,0x07]
3548; CHECK-NEXT:    kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
3549; CHECK-NEXT:    vpmovusdb %xmm0, (%rdi) {%k1} ## encoding: [0x62,0xf2,0x7e,0x09,0x11,0x07]
3550; CHECK-NEXT:    retq ## encoding: [0xc3]
3551    call void @llvm.x86.avx512.mask.pmovus.db.mem.128(i8* %ptr, <4 x i32> %x1, i8 -1)
3552    call void @llvm.x86.avx512.mask.pmovus.db.mem.128(i8* %ptr, <4 x i32> %x1, i8 %x2)
3553    ret void
3554}
3555
3556declare <16 x i8> @llvm.x86.avx512.mask.pmov.db.256(<8 x i32>, <16 x i8>, i8)
3557
3558define <16 x i8>@test_int_x86_avx512_mask_pmov_db_256(<8 x i32> %x0, <16 x i8> %x1, i8 %x2) {
3559; CHECK-LABEL: test_int_x86_avx512_mask_pmov_db_256:
3560; CHECK:       ## BB#0:
3561; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
3562; CHECK-NEXT:    vpmovdb %ymm0, %xmm1 {%k1} ## encoding: [0x62,0xf2,0x7e,0x29,0x31,0xc1]
3563; CHECK-NEXT:    vpmovdb %ymm0, %xmm2 {%k1} {z} ## encoding: [0x62,0xf2,0x7e,0xa9,0x31,0xc2]
3564; CHECK-NEXT:    vpmovdb %ymm0, %xmm0 ## encoding: [0x62,0xf2,0x7e,0x28,0x31,0xc0]
3565; CHECK-NEXT:    vpaddb %xmm1, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xfc,0xc1]
3566; CHECK-NEXT:    vpaddb %xmm2, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xfc,0xc2]
3567; CHECK-NEXT:    retq ## encoding: [0xc3]
3568    %res0 = call <16 x i8> @llvm.x86.avx512.mask.pmov.db.256(<8 x i32> %x0, <16 x i8> %x1, i8 -1)
3569    %res1 = call <16 x i8> @llvm.x86.avx512.mask.pmov.db.256(<8 x i32> %x0, <16 x i8> %x1, i8 %x2)
3570    %res2 = call <16 x i8> @llvm.x86.avx512.mask.pmov.db.256(<8 x i32> %x0, <16 x i8> zeroinitializer, i8 %x2)
3571    %res3 = add <16 x i8> %res0, %res1
3572    %res4 = add <16 x i8> %res3, %res2
3573    ret <16 x i8> %res4
3574}
3575
3576declare void @llvm.x86.avx512.mask.pmov.db.mem.256(i8* %ptr, <8 x i32>, i8)
3577
3578define void @test_int_x86_avx512_mask_pmov_db_mem_256(i8* %ptr, <8 x i32> %x1, i8 %x2) {
3579; CHECK-LABEL: test_int_x86_avx512_mask_pmov_db_mem_256:
3580; CHECK:       ## BB#0:
3581; CHECK-NEXT:    kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
3582; CHECK-NEXT:    vpmovdb %ymm0, (%rdi) ## encoding: [0x62,0xf2,0x7e,0x28,0x31,0x07]
3583; CHECK-NEXT:    vpmovdb %ymm0, (%rdi) {%k1} ## encoding: [0x62,0xf2,0x7e,0x29,0x31,0x07]
3584; CHECK-NEXT:    retq ## encoding: [0xc3]
3585    call void @llvm.x86.avx512.mask.pmov.db.mem.256(i8* %ptr, <8 x i32> %x1, i8 -1)
3586    call void @llvm.x86.avx512.mask.pmov.db.mem.256(i8* %ptr, <8 x i32> %x1, i8 %x2)
3587    ret void
3588}
3589
3590declare <16 x i8> @llvm.x86.avx512.mask.pmovs.db.256(<8 x i32>, <16 x i8>, i8)
3591
3592define <16 x i8>@test_int_x86_avx512_mask_pmovs_db_256(<8 x i32> %x0, <16 x i8> %x1, i8 %x2) {
3593; CHECK-LABEL: test_int_x86_avx512_mask_pmovs_db_256:
3594; CHECK:       ## BB#0:
3595; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
3596; CHECK-NEXT:    vpmovsdb %ymm0, %xmm1 {%k1} ## encoding: [0x62,0xf2,0x7e,0x29,0x21,0xc1]
3597; CHECK-NEXT:    vpmovsdb %ymm0, %xmm2 {%k1} {z} ## encoding: [0x62,0xf2,0x7e,0xa9,0x21,0xc2]
3598; CHECK-NEXT:    vpmovsdb %ymm0, %xmm0 ## encoding: [0x62,0xf2,0x7e,0x28,0x21,0xc0]
3599; CHECK-NEXT:    vpaddb %xmm1, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xfc,0xc1]
3600; CHECK-NEXT:    vpaddb %xmm2, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xfc,0xc2]
3601; CHECK-NEXT:    retq ## encoding: [0xc3]
3602    %res0 = call <16 x i8> @llvm.x86.avx512.mask.pmovs.db.256(<8 x i32> %x0, <16 x i8> %x1, i8 -1)
3603    %res1 = call <16 x i8> @llvm.x86.avx512.mask.pmovs.db.256(<8 x i32> %x0, <16 x i8> %x1, i8 %x2)
3604    %res2 = call <16 x i8> @llvm.x86.avx512.mask.pmovs.db.256(<8 x i32> %x0, <16 x i8> zeroinitializer, i8 %x2)
3605    %res3 = add <16 x i8> %res0, %res1
3606    %res4 = add <16 x i8> %res3, %res2
3607    ret <16 x i8> %res4
3608}
3609
3610declare void @llvm.x86.avx512.mask.pmovs.db.mem.256(i8* %ptr, <8 x i32>, i8)
3611
3612define void @test_int_x86_avx512_mask_pmovs_db_mem_256(i8* %ptr, <8 x i32> %x1, i8 %x2) {
3613; CHECK-LABEL: test_int_x86_avx512_mask_pmovs_db_mem_256:
3614; CHECK:       ## BB#0:
3615; CHECK-NEXT:    vpmovsdb %ymm0, (%rdi) ## encoding: [0x62,0xf2,0x7e,0x28,0x21,0x07]
3616; CHECK-NEXT:    kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
3617; CHECK-NEXT:    vpmovsdb %ymm0, (%rdi) {%k1} ## encoding: [0x62,0xf2,0x7e,0x29,0x21,0x07]
3618; CHECK-NEXT:    retq ## encoding: [0xc3]
3619    call void @llvm.x86.avx512.mask.pmovs.db.mem.256(i8* %ptr, <8 x i32> %x1, i8 -1)
3620    call void @llvm.x86.avx512.mask.pmovs.db.mem.256(i8* %ptr, <8 x i32> %x1, i8 %x2)
3621    ret void
3622}
3623
3624declare <16 x i8> @llvm.x86.avx512.mask.pmovus.db.256(<8 x i32>, <16 x i8>, i8)
3625
3626define <16 x i8>@test_int_x86_avx512_mask_pmovus_db_256(<8 x i32> %x0, <16 x i8> %x1, i8 %x2) {
3627; CHECK-LABEL: test_int_x86_avx512_mask_pmovus_db_256:
3628; CHECK:       ## BB#0:
3629; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
3630; CHECK-NEXT:    vpmovusdb %ymm0, %xmm1 {%k1} ## encoding: [0x62,0xf2,0x7e,0x29,0x11,0xc1]
3631; CHECK-NEXT:    vpmovusdb %ymm0, %xmm2 {%k1} {z} ## encoding: [0x62,0xf2,0x7e,0xa9,0x11,0xc2]
3632; CHECK-NEXT:    vpmovusdb %ymm0, %xmm0 ## encoding: [0x62,0xf2,0x7e,0x28,0x11,0xc0]
3633; CHECK-NEXT:    vpaddb %xmm1, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xfc,0xc1]
3634; CHECK-NEXT:    vpaddb %xmm2, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xfc,0xc2]
3635; CHECK-NEXT:    retq ## encoding: [0xc3]
3636    %res0 = call <16 x i8> @llvm.x86.avx512.mask.pmovus.db.256(<8 x i32> %x0, <16 x i8> %x1, i8 -1)
3637    %res1 = call <16 x i8> @llvm.x86.avx512.mask.pmovus.db.256(<8 x i32> %x0, <16 x i8> %x1, i8 %x2)
3638    %res2 = call <16 x i8> @llvm.x86.avx512.mask.pmovus.db.256(<8 x i32> %x0, <16 x i8> zeroinitializer, i8 %x2)
3639    %res3 = add <16 x i8> %res0, %res1
3640    %res4 = add <16 x i8> %res3, %res2
3641    ret <16 x i8> %res4
3642}
3643
3644declare void @llvm.x86.avx512.mask.pmovus.db.mem.256(i8* %ptr, <8 x i32>, i8)
3645
3646define void @test_int_x86_avx512_mask_pmovus_db_mem_256(i8* %ptr, <8 x i32> %x1, i8 %x2) {
3647; CHECK-LABEL: test_int_x86_avx512_mask_pmovus_db_mem_256:
3648; CHECK:       ## BB#0:
3649; CHECK-NEXT:    vpmovusdb %ymm0, (%rdi) ## encoding: [0x62,0xf2,0x7e,0x28,0x11,0x07]
3650; CHECK-NEXT:    kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
3651; CHECK-NEXT:    vpmovusdb %ymm0, (%rdi) {%k1} ## encoding: [0x62,0xf2,0x7e,0x29,0x11,0x07]
3652; CHECK-NEXT:    retq ## encoding: [0xc3]
3653    call void @llvm.x86.avx512.mask.pmovus.db.mem.256(i8* %ptr, <8 x i32> %x1, i8 -1)
3654    call void @llvm.x86.avx512.mask.pmovus.db.mem.256(i8* %ptr, <8 x i32> %x1, i8 %x2)
3655    ret void
3656}
3657
3658declare <8 x i16> @llvm.x86.avx512.mask.pmov.dw.128(<4 x i32>, <8 x i16>, i8)
3659
3660define <8 x i16>@test_int_x86_avx512_mask_pmov_dw_128(<4 x i32> %x0, <8 x i16> %x1, i8 %x2) {
3661; CHECK-LABEL: test_int_x86_avx512_mask_pmov_dw_128:
3662; CHECK:       ## BB#0:
3663; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
3664; CHECK-NEXT:    vpmovdw %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf2,0x7e,0x09,0x33,0xc1]
3665; CHECK-NEXT:    vpmovdw %xmm0, %xmm2 {%k1} {z} ## encoding: [0x62,0xf2,0x7e,0x89,0x33,0xc2]
3666; CHECK-NEXT:    vpmovdw %xmm0, %xmm0 ## encoding: [0x62,0xf2,0x7e,0x08,0x33,0xc0]
3667; CHECK-NEXT:    vpaddw %xmm1, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xfd,0xc1]
3668; CHECK-NEXT:    vpaddw %xmm2, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xfd,0xc2]
3669; CHECK-NEXT:    retq ## encoding: [0xc3]
3670    %res0 = call <8 x i16> @llvm.x86.avx512.mask.pmov.dw.128(<4 x i32> %x0, <8 x i16> %x1, i8 -1)
3671    %res1 = call <8 x i16> @llvm.x86.avx512.mask.pmov.dw.128(<4 x i32> %x0, <8 x i16> %x1, i8 %x2)
3672    %res2 = call <8 x i16> @llvm.x86.avx512.mask.pmov.dw.128(<4 x i32> %x0, <8 x i16> zeroinitializer, i8 %x2)
3673    %res3 = add <8 x i16> %res0, %res1
3674    %res4 = add <8 x i16> %res3, %res2
3675    ret <8 x i16> %res4
3676}
3677
3678declare void @llvm.x86.avx512.mask.pmov.dw.mem.128(i8* %ptr, <4 x i32>, i8)
3679
3680define void @test_int_x86_avx512_mask_pmov_dw_mem_128(i8* %ptr, <4 x i32> %x1, i8 %x2) {
3681; CHECK-LABEL: test_int_x86_avx512_mask_pmov_dw_mem_128:
3682; CHECK:       ## BB#0:
3683; CHECK-NEXT:    kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
3684; CHECK-NEXT:    vpmovdw %xmm0, (%rdi) ## encoding: [0x62,0xf2,0x7e,0x08,0x33,0x07]
3685; CHECK-NEXT:    vpmovdw %xmm0, (%rdi) {%k1} ## encoding: [0x62,0xf2,0x7e,0x09,0x33,0x07]
3686; CHECK-NEXT:    retq ## encoding: [0xc3]
3687    call void @llvm.x86.avx512.mask.pmov.dw.mem.128(i8* %ptr, <4 x i32> %x1, i8 -1)
3688    call void @llvm.x86.avx512.mask.pmov.dw.mem.128(i8* %ptr, <4 x i32> %x1, i8 %x2)
3689    ret void
3690}
3691
3692declare <8 x i16> @llvm.x86.avx512.mask.pmovs.dw.128(<4 x i32>, <8 x i16>, i8)
3693
3694define <8 x i16>@test_int_x86_avx512_mask_pmovs_dw_128(<4 x i32> %x0, <8 x i16> %x1, i8 %x2) {
3695; CHECK-LABEL: test_int_x86_avx512_mask_pmovs_dw_128:
3696; CHECK:       ## BB#0:
3697; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
3698; CHECK-NEXT:    vpmovsdw %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf2,0x7e,0x09,0x23,0xc1]
3699; CHECK-NEXT:    vpmovsdw %xmm0, %xmm2 {%k1} {z} ## encoding: [0x62,0xf2,0x7e,0x89,0x23,0xc2]
3700; CHECK-NEXT:    vpmovsdw %xmm0, %xmm0 ## encoding: [0x62,0xf2,0x7e,0x08,0x23,0xc0]
3701; CHECK-NEXT:    vpaddw %xmm1, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xfd,0xc1]
3702; CHECK-NEXT:    vpaddw %xmm2, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xfd,0xc2]
3703; CHECK-NEXT:    retq ## encoding: [0xc3]
3704    %res0 = call <8 x i16> @llvm.x86.avx512.mask.pmovs.dw.128(<4 x i32> %x0, <8 x i16> %x1, i8 -1)
3705    %res1 = call <8 x i16> @llvm.x86.avx512.mask.pmovs.dw.128(<4 x i32> %x0, <8 x i16> %x1, i8 %x2)
3706    %res2 = call <8 x i16> @llvm.x86.avx512.mask.pmovs.dw.128(<4 x i32> %x0, <8 x i16> zeroinitializer, i8 %x2)
3707    %res3 = add <8 x i16> %res0, %res1
3708    %res4 = add <8 x i16> %res3, %res2
3709    ret <8 x i16> %res4
3710}
3711
3712declare void @llvm.x86.avx512.mask.pmovs.dw.mem.128(i8* %ptr, <4 x i32>, i8)
3713
3714define void @test_int_x86_avx512_mask_pmovs_dw_mem_128(i8* %ptr, <4 x i32> %x1, i8 %x2) {
3715; CHECK-LABEL: test_int_x86_avx512_mask_pmovs_dw_mem_128:
3716; CHECK:       ## BB#0:
3717; CHECK-NEXT:    vpmovsdw %xmm0, (%rdi) ## encoding: [0x62,0xf2,0x7e,0x08,0x23,0x07]
3718; CHECK-NEXT:    kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
3719; CHECK-NEXT:    vpmovsdw %xmm0, (%rdi) {%k1} ## encoding: [0x62,0xf2,0x7e,0x09,0x23,0x07]
3720; CHECK-NEXT:    retq ## encoding: [0xc3]
3721    call void @llvm.x86.avx512.mask.pmovs.dw.mem.128(i8* %ptr, <4 x i32> %x1, i8 -1)
3722    call void @llvm.x86.avx512.mask.pmovs.dw.mem.128(i8* %ptr, <4 x i32> %x1, i8 %x2)
3723    ret void
3724}
3725
3726declare <8 x i16> @llvm.x86.avx512.mask.pmovus.dw.128(<4 x i32>, <8 x i16>, i8)
3727
3728define <8 x i16>@test_int_x86_avx512_mask_pmovus_dw_128(<4 x i32> %x0, <8 x i16> %x1, i8 %x2) {
3729; CHECK-LABEL: test_int_x86_avx512_mask_pmovus_dw_128:
3730; CHECK:       ## BB#0:
3731; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
3732; CHECK-NEXT:    vpmovusdw %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf2,0x7e,0x09,0x13,0xc1]
3733; CHECK-NEXT:    vpmovusdw %xmm0, %xmm2 {%k1} {z} ## encoding: [0x62,0xf2,0x7e,0x89,0x13,0xc2]
3734; CHECK-NEXT:    vpmovusdw %xmm0, %xmm0 ## encoding: [0x62,0xf2,0x7e,0x08,0x13,0xc0]
3735; CHECK-NEXT:    vpaddw %xmm1, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xfd,0xc1]
3736; CHECK-NEXT:    vpaddw %xmm2, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xfd,0xc2]
3737; CHECK-NEXT:    retq ## encoding: [0xc3]
3738    %res0 = call <8 x i16> @llvm.x86.avx512.mask.pmovus.dw.128(<4 x i32> %x0, <8 x i16> %x1, i8 -1)
3739    %res1 = call <8 x i16> @llvm.x86.avx512.mask.pmovus.dw.128(<4 x i32> %x0, <8 x i16> %x1, i8 %x2)
3740    %res2 = call <8 x i16> @llvm.x86.avx512.mask.pmovus.dw.128(<4 x i32> %x0, <8 x i16> zeroinitializer, i8 %x2)
3741    %res3 = add <8 x i16> %res0, %res1
3742    %res4 = add <8 x i16> %res3, %res2
3743    ret <8 x i16> %res4
3744}
3745
3746declare void @llvm.x86.avx512.mask.pmovus.dw.mem.128(i8* %ptr, <4 x i32>, i8)
3747
3748define void @test_int_x86_avx512_mask_pmovus_dw_mem_128(i8* %ptr, <4 x i32> %x1, i8 %x2) {
3749; CHECK-LABEL: test_int_x86_avx512_mask_pmovus_dw_mem_128:
3750; CHECK:       ## BB#0:
3751; CHECK-NEXT:    vpmovusdw %xmm0, (%rdi) ## encoding: [0x62,0xf2,0x7e,0x08,0x13,0x07]
3752; CHECK-NEXT:    kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
3753; CHECK-NEXT:    vpmovusdw %xmm0, (%rdi) {%k1} ## encoding: [0x62,0xf2,0x7e,0x09,0x13,0x07]
3754; CHECK-NEXT:    retq ## encoding: [0xc3]
3755    call void @llvm.x86.avx512.mask.pmovus.dw.mem.128(i8* %ptr, <4 x i32> %x1, i8 -1)
3756    call void @llvm.x86.avx512.mask.pmovus.dw.mem.128(i8* %ptr, <4 x i32> %x1, i8 %x2)
3757    ret void
3758}
3759
3760declare <8 x i16> @llvm.x86.avx512.mask.pmov.dw.256(<8 x i32>, <8 x i16>, i8)
3761
3762define <8 x i16>@test_int_x86_avx512_mask_pmov_dw_256(<8 x i32> %x0, <8 x i16> %x1, i8 %x2) {
3763; CHECK-LABEL: test_int_x86_avx512_mask_pmov_dw_256:
3764; CHECK:       ## BB#0:
3765; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
3766; CHECK-NEXT:    vpmovdw %ymm0, %xmm1 {%k1} ## encoding: [0x62,0xf2,0x7e,0x29,0x33,0xc1]
3767; CHECK-NEXT:    vpmovdw %ymm0, %xmm2 {%k1} {z} ## encoding: [0x62,0xf2,0x7e,0xa9,0x33,0xc2]
3768; CHECK-NEXT:    vpmovdw %ymm0, %xmm0 ## encoding: [0x62,0xf2,0x7e,0x28,0x33,0xc0]
3769; CHECK-NEXT:    vpaddw %xmm1, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xfd,0xc1]
3770; CHECK-NEXT:    vpaddw %xmm2, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xfd,0xc2]
3771; CHECK-NEXT:    retq ## encoding: [0xc3]
3772    %res0 = call <8 x i16> @llvm.x86.avx512.mask.pmov.dw.256(<8 x i32> %x0, <8 x i16> %x1, i8 -1)
3773    %res1 = call <8 x i16> @llvm.x86.avx512.mask.pmov.dw.256(<8 x i32> %x0, <8 x i16> %x1, i8 %x2)
3774    %res2 = call <8 x i16> @llvm.x86.avx512.mask.pmov.dw.256(<8 x i32> %x0, <8 x i16> zeroinitializer, i8 %x2)
3775    %res3 = add <8 x i16> %res0, %res1
3776    %res4 = add <8 x i16> %res3, %res2
3777    ret <8 x i16> %res4
3778}
3779
3780declare void @llvm.x86.avx512.mask.pmov.dw.mem.256(i8* %ptr, <8 x i32>, i8)
3781
3782define void @test_int_x86_avx512_mask_pmov_dw_mem_256(i8* %ptr, <8 x i32> %x1, i8 %x2) {
3783; CHECK-LABEL: test_int_x86_avx512_mask_pmov_dw_mem_256:
3784; CHECK:       ## BB#0:
3785; CHECK-NEXT:    kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
3786; CHECK-NEXT:    vpmovdw %ymm0, (%rdi) ## encoding: [0x62,0xf2,0x7e,0x28,0x33,0x07]
3787; CHECK-NEXT:    vpmovdw %ymm0, (%rdi) {%k1} ## encoding: [0x62,0xf2,0x7e,0x29,0x33,0x07]
3788; CHECK-NEXT:    retq ## encoding: [0xc3]
3789    call void @llvm.x86.avx512.mask.pmov.dw.mem.256(i8* %ptr, <8 x i32> %x1, i8 -1)
3790    call void @llvm.x86.avx512.mask.pmov.dw.mem.256(i8* %ptr, <8 x i32> %x1, i8 %x2)
3791    ret void
3792}
3793
3794declare <8 x i16> @llvm.x86.avx512.mask.pmovs.dw.256(<8 x i32>, <8 x i16>, i8)
3795
3796define <8 x i16>@test_int_x86_avx512_mask_pmovs_dw_256(<8 x i32> %x0, <8 x i16> %x1, i8 %x2) {
3797; CHECK-LABEL: test_int_x86_avx512_mask_pmovs_dw_256:
3798; CHECK:       ## BB#0:
3799; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
3800; CHECK-NEXT:    vpmovsdw %ymm0, %xmm1 {%k1} ## encoding: [0x62,0xf2,0x7e,0x29,0x23,0xc1]
3801; CHECK-NEXT:    vpmovsdw %ymm0, %xmm2 {%k1} {z} ## encoding: [0x62,0xf2,0x7e,0xa9,0x23,0xc2]
3802; CHECK-NEXT:    vpmovsdw %ymm0, %xmm0 ## encoding: [0x62,0xf2,0x7e,0x28,0x23,0xc0]
3803; CHECK-NEXT:    vpaddw %xmm1, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xfd,0xc1]
3804; CHECK-NEXT:    vpaddw %xmm2, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xfd,0xc2]
3805; CHECK-NEXT:    retq ## encoding: [0xc3]
3806    %res0 = call <8 x i16> @llvm.x86.avx512.mask.pmovs.dw.256(<8 x i32> %x0, <8 x i16> %x1, i8 -1)
3807    %res1 = call <8 x i16> @llvm.x86.avx512.mask.pmovs.dw.256(<8 x i32> %x0, <8 x i16> %x1, i8 %x2)
3808    %res2 = call <8 x i16> @llvm.x86.avx512.mask.pmovs.dw.256(<8 x i32> %x0, <8 x i16> zeroinitializer, i8 %x2)
3809    %res3 = add <8 x i16> %res0, %res1
3810    %res4 = add <8 x i16> %res3, %res2
3811    ret <8 x i16> %res4
3812}
3813
3814declare void @llvm.x86.avx512.mask.pmovs.dw.mem.256(i8* %ptr, <8 x i32>, i8)
3815
3816define void @test_int_x86_avx512_mask_pmovs_dw_mem_256(i8* %ptr, <8 x i32> %x1, i8 %x2) {
3817; CHECK-LABEL: test_int_x86_avx512_mask_pmovs_dw_mem_256:
3818; CHECK:       ## BB#0:
3819; CHECK-NEXT:    vpmovsdw %ymm0, (%rdi) ## encoding: [0x62,0xf2,0x7e,0x28,0x23,0x07]
3820; CHECK-NEXT:    kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
3821; CHECK-NEXT:    vpmovsdw %ymm0, (%rdi) {%k1} ## encoding: [0x62,0xf2,0x7e,0x29,0x23,0x07]
3822; CHECK-NEXT:    retq ## encoding: [0xc3]
3823    call void @llvm.x86.avx512.mask.pmovs.dw.mem.256(i8* %ptr, <8 x i32> %x1, i8 -1)
3824    call void @llvm.x86.avx512.mask.pmovs.dw.mem.256(i8* %ptr, <8 x i32> %x1, i8 %x2)
3825    ret void
3826}
3827
3828declare <8 x i16> @llvm.x86.avx512.mask.pmovus.dw.256(<8 x i32>, <8 x i16>, i8)
3829
3830define <8 x i16>@test_int_x86_avx512_mask_pmovus_dw_256(<8 x i32> %x0, <8 x i16> %x1, i8 %x2) {
3831; CHECK-LABEL: test_int_x86_avx512_mask_pmovus_dw_256:
3832; CHECK:       ## BB#0:
3833; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
3834; CHECK-NEXT:    vpmovusdw %ymm0, %xmm1 {%k1} ## encoding: [0x62,0xf2,0x7e,0x29,0x13,0xc1]
3835; CHECK-NEXT:    vpmovusdw %ymm0, %xmm2 {%k1} {z} ## encoding: [0x62,0xf2,0x7e,0xa9,0x13,0xc2]
3836; CHECK-NEXT:    vpmovusdw %ymm0, %xmm0 ## encoding: [0x62,0xf2,0x7e,0x28,0x13,0xc0]
3837; CHECK-NEXT:    vpaddw %xmm1, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xfd,0xc1]
3838; CHECK-NEXT:    vpaddw %xmm2, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xfd,0xc2]
3839; CHECK-NEXT:    retq ## encoding: [0xc3]
3840    %res0 = call <8 x i16> @llvm.x86.avx512.mask.pmovus.dw.256(<8 x i32> %x0, <8 x i16> %x1, i8 -1)
3841    %res1 = call <8 x i16> @llvm.x86.avx512.mask.pmovus.dw.256(<8 x i32> %x0, <8 x i16> %x1, i8 %x2)
3842    %res2 = call <8 x i16> @llvm.x86.avx512.mask.pmovus.dw.256(<8 x i32> %x0, <8 x i16> zeroinitializer, i8 %x2)
3843    %res3 = add <8 x i16> %res0, %res1
3844    %res4 = add <8 x i16> %res3, %res2
3845    ret <8 x i16> %res4
3846}
3847
3848declare void @llvm.x86.avx512.mask.pmovus.dw.mem.256(i8* %ptr, <8 x i32>, i8)
3849
3850define void @test_int_x86_avx512_mask_pmovus_dw_mem_256(i8* %ptr, <8 x i32> %x1, i8 %x2) {
3851; CHECK-LABEL: test_int_x86_avx512_mask_pmovus_dw_mem_256:
3852; CHECK:       ## BB#0:
3853; CHECK-NEXT:    vpmovusdw %ymm0, (%rdi) ## encoding: [0x62,0xf2,0x7e,0x28,0x13,0x07]
3854; CHECK-NEXT:    kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
3855; CHECK-NEXT:    vpmovusdw %ymm0, (%rdi) {%k1} ## encoding: [0x62,0xf2,0x7e,0x29,0x13,0x07]
3856; CHECK-NEXT:    retq ## encoding: [0xc3]
3857    call void @llvm.x86.avx512.mask.pmovus.dw.mem.256(i8* %ptr, <8 x i32> %x1, i8 -1)
3858    call void @llvm.x86.avx512.mask.pmovus.dw.mem.256(i8* %ptr, <8 x i32> %x1, i8 %x2)
3859    ret void
3860}
3861
3862declare <2 x double> @llvm.x86.avx512.mask.cvtdq2pd.128(<4 x i32>, <2 x double>, i8)
3863
3864define <2 x double>@test_int_x86_avx512_mask_cvt_dq2pd_128(<4 x i32> %x0, <2 x double> %x1, i8 %x2) {
3865; CHECK-LABEL: test_int_x86_avx512_mask_cvt_dq2pd_128:
3866; CHECK:       ## BB#0:
3867; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
3868; CHECK-NEXT:    vcvtdq2pd %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0x7e,0x09,0xe6,0xc8]
3869; CHECK-NEXT:    vcvtdq2pd %xmm0, %xmm0 ## encoding: [0xc5,0xfa,0xe6,0xc0]
3870; CHECK-NEXT:    vaddpd %xmm0, %xmm1, %xmm0 ## encoding: [0x62,0xf1,0xf5,0x08,0x58,0xc0]
3871; CHECK-NEXT:    retq ## encoding: [0xc3]
3872  %res = call <2 x double> @llvm.x86.avx512.mask.cvtdq2pd.128(<4 x i32> %x0, <2 x double> %x1, i8 %x2)
3873  %res1 = call <2 x double> @llvm.x86.avx512.mask.cvtdq2pd.128(<4 x i32> %x0, <2 x double> %x1, i8 -1)
3874  %res2 = fadd <2 x double> %res, %res1
3875  ret <2 x double> %res2
3876}
3877
3878declare <4 x double> @llvm.x86.avx512.mask.cvtdq2pd.256(<4 x i32>, <4 x double>, i8)
3879
3880define <4 x double>@test_int_x86_avx512_mask_cvt_dq2pd_256(<4 x i32> %x0, <4 x double> %x1, i8 %x2) {
3881; CHECK-LABEL: test_int_x86_avx512_mask_cvt_dq2pd_256:
3882; CHECK:       ## BB#0:
3883; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
3884; CHECK-NEXT:    vcvtdq2pd %xmm0, %ymm1 {%k1} ## encoding: [0x62,0xf1,0x7e,0x29,0xe6,0xc8]
3885; CHECK-NEXT:    vcvtdq2pd %xmm0, %ymm0 ## encoding: [0xc5,0xfe,0xe6,0xc0]
3886; CHECK-NEXT:    vaddpd %ymm0, %ymm1, %ymm0 ## encoding: [0x62,0xf1,0xf5,0x28,0x58,0xc0]
3887; CHECK-NEXT:    retq ## encoding: [0xc3]
3888  %res = call <4 x double> @llvm.x86.avx512.mask.cvtdq2pd.256(<4 x i32> %x0, <4 x double> %x1, i8 %x2)
3889  %res1 = call <4 x double> @llvm.x86.avx512.mask.cvtdq2pd.256(<4 x i32> %x0, <4 x double> %x1, i8 -1)
3890  %res2 = fadd <4 x double> %res, %res1
3891  ret <4 x double> %res2
3892}
3893
3894declare <4 x float> @llvm.x86.avx512.mask.cvtdq2ps.128(<4 x i32>, <4 x float>, i8)
3895
3896define <4 x float>@test_int_x86_avx512_mask_cvt_dq2ps_128(<4 x i32> %x0, <4 x float> %x1, i8 %x2) {
3897; CHECK-LABEL: test_int_x86_avx512_mask_cvt_dq2ps_128:
3898; CHECK:       ## BB#0:
3899; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
3900; CHECK-NEXT:    vcvtdq2ps %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0x7c,0x09,0x5b,0xc8]
3901; CHECK-NEXT:    vcvtdq2ps %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7c,0x08,0x5b,0xc0]
3902; CHECK-NEXT:    vaddps %xmm0, %xmm1, %xmm0 ## encoding: [0x62,0xf1,0x74,0x08,0x58,0xc0]
3903; CHECK-NEXT:    retq ## encoding: [0xc3]
3904  %res = call <4 x float> @llvm.x86.avx512.mask.cvtdq2ps.128(<4 x i32> %x0, <4 x float> %x1, i8 %x2)
3905  %res1 = call <4 x float> @llvm.x86.avx512.mask.cvtdq2ps.128(<4 x i32> %x0, <4 x float> %x1, i8 -1)
3906  %res2 = fadd <4 x float> %res, %res1
3907  ret <4 x float> %res2
3908}
3909
3910declare <8 x float> @llvm.x86.avx512.mask.cvtdq2ps.256(<8 x i32>, <8 x float>, i8)
3911
3912define <8 x float>@test_int_x86_avx512_mask_cvt_dq2ps_256(<8 x i32> %x0, <8 x float> %x1, i8 %x2) {
3913; CHECK-LABEL: test_int_x86_avx512_mask_cvt_dq2ps_256:
3914; CHECK:       ## BB#0:
3915; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
3916; CHECK-NEXT:    vcvtdq2ps %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf1,0x7c,0x29,0x5b,0xc8]
3917; CHECK-NEXT:    vcvtdq2ps %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7c,0x28,0x5b,0xc0]
3918; CHECK-NEXT:    vaddps %ymm0, %ymm1, %ymm0 ## encoding: [0x62,0xf1,0x74,0x28,0x58,0xc0]
3919; CHECK-NEXT:    retq ## encoding: [0xc3]
3920  %res = call <8 x float> @llvm.x86.avx512.mask.cvtdq2ps.256(<8 x i32> %x0, <8 x float> %x1, i8 %x2)
3921  %res1 = call <8 x float> @llvm.x86.avx512.mask.cvtdq2ps.256(<8 x i32> %x0, <8 x float> %x1, i8 -1)
3922  %res2 = fadd <8 x float> %res, %res1
3923  ret <8 x float> %res2
3924}
3925
3926declare <4 x i32> @llvm.x86.avx512.mask.cvtpd2dq.128(<2 x double>, <4 x i32>, i8)
3927
3928define <4 x i32>@test_int_x86_avx512_mask_cvt_pd2dq_128(<2 x double> %x0, <4 x i32> %x1, i8 %x2) {
3929; CHECK-LABEL: test_int_x86_avx512_mask_cvt_pd2dq_128:
3930; CHECK:       ## BB#0:
3931; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
3932; CHECK-NEXT:    vcvtpd2dq %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0xff,0x09,0xe6,0xc8]
3933; CHECK-NEXT:    vcvtpd2dq %xmm0, %xmm0 ## encoding: [0x62,0xf1,0xff,0x08,0xe6,0xc0]
3934; CHECK-NEXT:    vpaddd %xmm0, %xmm1, %xmm0 ## encoding: [0x62,0xf1,0x75,0x08,0xfe,0xc0]
3935; CHECK-NEXT:    retq ## encoding: [0xc3]
3936  %res = call <4 x i32> @llvm.x86.avx512.mask.cvtpd2dq.128(<2 x double> %x0, <4 x i32> %x1, i8 %x2)
3937  %res1 = call <4 x i32> @llvm.x86.avx512.mask.cvtpd2dq.128(<2 x double> %x0, <4 x i32> %x1, i8 -1)
3938  %res2 = add <4 x i32> %res, %res1
3939  ret <4 x i32> %res2
3940}
3941
3942declare <4 x i32> @llvm.x86.avx512.mask.cvtpd2dq.256(<4 x double>, <4 x i32>, i8)
3943
3944define <4 x i32>@test_int_x86_avx512_mask_cvt_pd2dq_256(<4 x double> %x0, <4 x i32> %x1, i8 %x2) {
3945; CHECK-LABEL: test_int_x86_avx512_mask_cvt_pd2dq_256:
3946; CHECK:       ## BB#0:
3947; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
3948; CHECK-NEXT:    vcvtpd2dq %ymm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0xff,0x29,0xe6,0xc8]
3949; CHECK-NEXT:    vcvtpd2dq %ymm0, %xmm0 ## encoding: [0x62,0xf1,0xff,0x28,0xe6,0xc0]
3950; CHECK-NEXT:    vpaddd %xmm0, %xmm1, %xmm0 ## encoding: [0x62,0xf1,0x75,0x08,0xfe,0xc0]
3951; CHECK-NEXT:    retq ## encoding: [0xc3]
3952  %res = call <4 x i32> @llvm.x86.avx512.mask.cvtpd2dq.256(<4 x double> %x0, <4 x i32> %x1, i8 %x2)
3953  %res1 = call <4 x i32> @llvm.x86.avx512.mask.cvtpd2dq.256(<4 x double> %x0, <4 x i32> %x1, i8 -1)
3954  %res2 = add <4 x i32> %res, %res1
3955  ret <4 x i32> %res2
3956}
3957
3958declare <4 x float> @llvm.x86.avx512.mask.cvtpd2ps.256(<4 x double>, <4 x float>, i8)
3959
3960define <4 x float>@test_int_x86_avx512_mask_cvt_pd2ps_256(<4 x double> %x0, <4 x float> %x1, i8 %x2) {
3961; CHECK-LABEL: test_int_x86_avx512_mask_cvt_pd2ps_256:
3962; CHECK:       ## BB#0:
3963; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
3964; CHECK-NEXT:    vcvtpd2ps %ymm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0xfd,0x29,0x5a,0xc8]
3965; CHECK-NEXT:    vcvtpd2ps %ymm0, %xmm0 ## encoding: [0x62,0xf1,0xfd,0x28,0x5a,0xc0]
3966; CHECK-NEXT:    vaddps %xmm0, %xmm1, %xmm0 ## encoding: [0x62,0xf1,0x74,0x08,0x58,0xc0]
3967; CHECK-NEXT:    retq ## encoding: [0xc3]
3968  %res = call <4 x float> @llvm.x86.avx512.mask.cvtpd2ps.256(<4 x double> %x0, <4 x float> %x1, i8 %x2)
3969  %res1 = call <4 x float> @llvm.x86.avx512.mask.cvtpd2ps.256(<4 x double> %x0, <4 x float> %x1, i8 -1)
3970  %res2 = fadd <4 x float> %res, %res1
3971  ret <4 x float> %res2
3972}
3973
3974declare <4 x float> @llvm.x86.avx512.mask.cvtpd2ps(<2 x double>, <4 x float>, i8)
3975
3976define <4 x float>@test_int_x86_avx512_mask_cvt_pd2ps(<2 x double> %x0, <4 x float> %x1, i8 %x2) {
3977; CHECK-LABEL: test_int_x86_avx512_mask_cvt_pd2ps:
3978; CHECK:       ## BB#0:
3979; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
3980; CHECK-NEXT:    vcvtpd2ps %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0xfd,0x09,0x5a,0xc8]
3981; CHECK-NEXT:    vcvtpd2ps %xmm0, %xmm0 ## encoding: [0x62,0xf1,0xfd,0x08,0x5a,0xc0]
3982; CHECK-NEXT:    vaddps %xmm0, %xmm1, %xmm0 ## encoding: [0x62,0xf1,0x74,0x08,0x58,0xc0]
3983; CHECK-NEXT:    retq ## encoding: [0xc3]
3984  %res = call <4 x float> @llvm.x86.avx512.mask.cvtpd2ps(<2 x double> %x0, <4 x float> %x1, i8 %x2)
3985  %res1 = call <4 x float> @llvm.x86.avx512.mask.cvtpd2ps(<2 x double> %x0, <4 x float> %x1, i8 -1)
3986  %res2 = fadd <4 x float> %res, %res1
3987  ret <4 x float> %res2
3988}
3989
3990declare <4 x i32> @llvm.x86.avx512.mask.cvtpd2udq.128(<2 x double>, <4 x i32>, i8)
3991
3992define <4 x i32>@test_int_x86_avx512_mask_cvt_pd2udq_128(<2 x double> %x0, <4 x i32> %x1, i8 %x2) {
3993; CHECK-LABEL: test_int_x86_avx512_mask_cvt_pd2udq_128:
3994; CHECK:       ## BB#0:
3995; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
3996; CHECK-NEXT:    vcvtpd2udq %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0xfc,0x09,0x79,0xc8]
3997; CHECK-NEXT:    vcvtpd2udq %xmm0, %xmm0 ## encoding: [0x62,0xf1,0xfc,0x08,0x79,0xc0]
3998; CHECK-NEXT:    vpaddd %xmm0, %xmm1, %xmm0 ## encoding: [0x62,0xf1,0x75,0x08,0xfe,0xc0]
3999; CHECK-NEXT:    retq ## encoding: [0xc3]
4000  %res = call <4 x i32> @llvm.x86.avx512.mask.cvtpd2udq.128(<2 x double> %x0, <4 x i32> %x1, i8 %x2)
4001  %res1 = call <4 x i32> @llvm.x86.avx512.mask.cvtpd2udq.128(<2 x double> %x0, <4 x i32> %x1, i8 -1)
4002  %res2 = add <4 x i32> %res, %res1
4003  ret <4 x i32> %res2
4004}
4005
4006declare <4 x i32> @llvm.x86.avx512.mask.cvtpd2udq.256(<4 x double>, <4 x i32>, i8)
4007
4008define <4 x i32>@test_int_x86_avx512_mask_cvt_pd2udq_256(<4 x double> %x0, <4 x i32> %x1, i8 %x2) {
4009; CHECK-LABEL: test_int_x86_avx512_mask_cvt_pd2udq_256:
4010; CHECK:       ## BB#0:
4011; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
4012; CHECK-NEXT:    vcvtpd2udq %ymm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0xfc,0x29,0x79,0xc8]
4013; CHECK-NEXT:    vcvtpd2udq %ymm0, %xmm0 ## encoding: [0x62,0xf1,0xfc,0x28,0x79,0xc0]
4014; CHECK-NEXT:    vpaddd %xmm0, %xmm1, %xmm0 ## encoding: [0x62,0xf1,0x75,0x08,0xfe,0xc0]
4015; CHECK-NEXT:    retq ## encoding: [0xc3]
4016  %res = call <4 x i32> @llvm.x86.avx512.mask.cvtpd2udq.256(<4 x double> %x0, <4 x i32> %x1, i8 %x2)
4017  %res1 = call <4 x i32> @llvm.x86.avx512.mask.cvtpd2udq.256(<4 x double> %x0, <4 x i32> %x1, i8 -1)
4018  %res2 = add <4 x i32> %res, %res1
4019  ret <4 x i32> %res2
4020}
4021
4022declare <4 x i32> @llvm.x86.avx512.mask.cvtps2dq.128(<4 x float>, <4 x i32>, i8)
4023
4024define <4 x i32>@test_int_x86_avx512_mask_cvt_ps2dq_128(<4 x float> %x0, <4 x i32> %x1, i8 %x2) {
4025; CHECK-LABEL: test_int_x86_avx512_mask_cvt_ps2dq_128:
4026; CHECK:       ## BB#0:
4027; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
4028; CHECK-NEXT:    vcvtps2dq %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x09,0x5b,0xc8]
4029; CHECK-NEXT:    vcvtps2dq %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0x5b,0xc0]
4030; CHECK-NEXT:    vpaddd %xmm0, %xmm1, %xmm0 ## encoding: [0x62,0xf1,0x75,0x08,0xfe,0xc0]
4031; CHECK-NEXT:    retq ## encoding: [0xc3]
4032  %res = call <4 x i32> @llvm.x86.avx512.mask.cvtps2dq.128(<4 x float> %x0, <4 x i32> %x1, i8 %x2)
4033  %res1 = call <4 x i32> @llvm.x86.avx512.mask.cvtps2dq.128(<4 x float> %x0, <4 x i32> %x1, i8 -1)
4034  %res2 = add <4 x i32> %res, %res1
4035  ret <4 x i32> %res2
4036}
4037
4038declare <8 x i32> @llvm.x86.avx512.mask.cvtps2dq.256(<8 x float>, <8 x i32>, i8)
4039
4040define <8 x i32>@test_int_x86_avx512_mask_cvt_ps2dq_256(<8 x float> %x0, <8 x i32> %x1, i8 %x2) {
4041; CHECK-LABEL: test_int_x86_avx512_mask_cvt_ps2dq_256:
4042; CHECK:       ## BB#0:
4043; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
4044; CHECK-NEXT:    vcvtps2dq %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0x5b,0xc8]
4045; CHECK-NEXT:    vcvtps2dq %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7d,0x28,0x5b,0xc0]
4046; CHECK-NEXT:    vpaddd %ymm0, %ymm1, %ymm0 ## encoding: [0x62,0xf1,0x75,0x28,0xfe,0xc0]
4047; CHECK-NEXT:    retq ## encoding: [0xc3]
4048  %res = call <8 x i32> @llvm.x86.avx512.mask.cvtps2dq.256(<8 x float> %x0, <8 x i32> %x1, i8 %x2)
4049  %res1 = call <8 x i32> @llvm.x86.avx512.mask.cvtps2dq.256(<8 x float> %x0, <8 x i32> %x1, i8 -1)
4050  %res2 = add <8 x i32> %res, %res1
4051  ret <8 x i32> %res2
4052}
4053
4054declare <2 x double> @llvm.x86.avx512.mask.cvtps2pd.128(<4 x float>, <2 x double>, i8)
4055
4056define <2 x double>@test_int_x86_avx512_mask_cvt_ps2pd_128(<4 x float> %x0, <2 x double> %x1, i8 %x2) {
4057; CHECK-LABEL: test_int_x86_avx512_mask_cvt_ps2pd_128:
4058; CHECK:       ## BB#0:
4059; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
4060; CHECK-NEXT:    vcvtps2pd %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0x7c,0x09,0x5a,0xc8]
4061; CHECK-NEXT:    vcvtps2pd %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7c,0x08,0x5a,0xc0]
4062; CHECK-NEXT:    vaddpd %xmm0, %xmm1, %xmm0 ## encoding: [0x62,0xf1,0xf5,0x08,0x58,0xc0]
4063; CHECK-NEXT:    retq ## encoding: [0xc3]
4064  %res = call <2 x double> @llvm.x86.avx512.mask.cvtps2pd.128(<4 x float> %x0, <2 x double> %x1, i8 %x2)
4065  %res1 = call <2 x double> @llvm.x86.avx512.mask.cvtps2pd.128(<4 x float> %x0, <2 x double> %x1, i8 -1)
4066  %res2 = fadd <2 x double> %res, %res1
4067  ret <2 x double> %res2
4068}
4069
4070declare <4 x double> @llvm.x86.avx512.mask.cvtps2pd.256(<4 x float>, <4 x double>, i8)
4071
4072define <4 x double>@test_int_x86_avx512_mask_cvt_ps2pd_256(<4 x float> %x0, <4 x double> %x1, i8 %x2) {
4073; CHECK-LABEL: test_int_x86_avx512_mask_cvt_ps2pd_256:
4074; CHECK:       ## BB#0:
4075; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
4076; CHECK-NEXT:    vcvtps2pd %xmm0, %ymm1 {%k1} ## encoding: [0x62,0xf1,0x7c,0x29,0x5a,0xc8]
4077; CHECK-NEXT:    vcvtps2pd %xmm0, %ymm0 ## encoding: [0x62,0xf1,0x7c,0x28,0x5a,0xc0]
4078; CHECK-NEXT:    vaddpd %ymm0, %ymm1, %ymm0 ## encoding: [0x62,0xf1,0xf5,0x28,0x58,0xc0]
4079; CHECK-NEXT:    retq ## encoding: [0xc3]
4080  %res = call <4 x double> @llvm.x86.avx512.mask.cvtps2pd.256(<4 x float> %x0, <4 x double> %x1, i8 %x2)
4081  %res1 = call <4 x double> @llvm.x86.avx512.mask.cvtps2pd.256(<4 x float> %x0, <4 x double> %x1, i8 -1)
4082  %res2 = fadd <4 x double> %res, %res1
4083  ret <4 x double> %res2
4084}
4085
4086declare <4 x i32> @llvm.x86.avx512.mask.cvtps2udq.128(<4 x float>, <4 x i32>, i8)
4087
4088define <4 x i32>@test_int_x86_avx512_mask_cvt_ps2udq_128(<4 x float> %x0, <4 x i32> %x1, i8 %x2) {
4089; CHECK-LABEL: test_int_x86_avx512_mask_cvt_ps2udq_128:
4090; CHECK:       ## BB#0:
4091; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
4092; CHECK-NEXT:    vcvtps2udq %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0x7c,0x09,0x79,0xc8]
4093; CHECK-NEXT:    vcvtps2udq %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7c,0x08,0x79,0xc0]
4094; CHECK-NEXT:    vpaddd %xmm0, %xmm1, %xmm0 ## encoding: [0x62,0xf1,0x75,0x08,0xfe,0xc0]
4095; CHECK-NEXT:    retq ## encoding: [0xc3]
4096  %res = call <4 x i32> @llvm.x86.avx512.mask.cvtps2udq.128(<4 x float> %x0, <4 x i32> %x1, i8 %x2)
4097  %res1 = call <4 x i32> @llvm.x86.avx512.mask.cvtps2udq.128(<4 x float> %x0, <4 x i32> %x1, i8 -1)
4098  %res2 = add <4 x i32> %res, %res1
4099  ret <4 x i32> %res2
4100}
4101
4102declare <8 x i32> @llvm.x86.avx512.mask.cvtps2udq.256(<8 x float>, <8 x i32>, i8)
4103
4104define <8 x i32>@test_int_x86_avx512_mask_cvt_ps2udq_256(<8 x float> %x0, <8 x i32> %x1, i8 %x2) {
4105; CHECK-LABEL: test_int_x86_avx512_mask_cvt_ps2udq_256:
4106; CHECK:       ## BB#0:
4107; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
4108; CHECK-NEXT:    vcvtps2udq %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf1,0x7c,0x29,0x79,0xc8]
4109; CHECK-NEXT:    vcvtps2udq %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7c,0x28,0x79,0xc0]
4110; CHECK-NEXT:    vpaddd %ymm0, %ymm1, %ymm0 ## encoding: [0x62,0xf1,0x75,0x28,0xfe,0xc0]
4111; CHECK-NEXT:    retq ## encoding: [0xc3]
4112  %res = call <8 x i32> @llvm.x86.avx512.mask.cvtps2udq.256(<8 x float> %x0, <8 x i32> %x1, i8 %x2)
4113  %res1 = call <8 x i32> @llvm.x86.avx512.mask.cvtps2udq.256(<8 x float> %x0, <8 x i32> %x1, i8 -1)
4114  %res2 = add <8 x i32> %res, %res1
4115  ret <8 x i32> %res2
4116}
4117
4118declare <4 x i32> @llvm.x86.avx512.mask.cvttpd2dq.128(<2 x double>, <4 x i32>, i8)
4119
4120define <4 x i32>@test_int_x86_avx512_mask_cvtt_pd2dq_128(<2 x double> %x0, <4 x i32> %x1, i8 %x2) {
4121; CHECK-LABEL: test_int_x86_avx512_mask_cvtt_pd2dq_128:
4122; CHECK:       ## BB#0:
4123; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
4124; CHECK-NEXT:    vcvttpd2dq %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0xfd,0x09,0xe6,0xc8]
4125; CHECK-NEXT:    vcvttpd2dq %xmm0, %xmm0 ## encoding: [0x62,0xf1,0xfd,0x08,0xe6,0xc0]
4126; CHECK-NEXT:    vpaddd %xmm0, %xmm1, %xmm0 ## encoding: [0x62,0xf1,0x75,0x08,0xfe,0xc0]
4127; CHECK-NEXT:    retq ## encoding: [0xc3]
4128  %res = call <4 x i32> @llvm.x86.avx512.mask.cvttpd2dq.128(<2 x double> %x0, <4 x i32> %x1, i8 %x2)
4129  %res1 = call <4 x i32> @llvm.x86.avx512.mask.cvttpd2dq.128(<2 x double> %x0, <4 x i32> %x1, i8 -1)
4130  %res2 = add <4 x i32> %res, %res1
4131  ret <4 x i32> %res2
4132}
4133
4134declare <4 x i32> @llvm.x86.avx512.mask.cvttpd2dq.256(<4 x double>, <4 x i32>, i8)
4135
4136define <4 x i32>@test_int_x86_avx512_mask_cvtt_pd2dq_256(<4 x double> %x0, <4 x i32> %x1, i8 %x2) {
4137; CHECK-LABEL: test_int_x86_avx512_mask_cvtt_pd2dq_256:
4138; CHECK:       ## BB#0:
4139; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
4140; CHECK-NEXT:    vcvttpd2dq %ymm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0xfd,0x29,0xe6,0xc8]
4141; CHECK-NEXT:    vcvttpd2dq %ymm0, %xmm0 ## encoding: [0x62,0xf1,0xfd,0x28,0xe6,0xc0]
4142; CHECK-NEXT:    vpaddd %xmm0, %xmm1, %xmm0 ## encoding: [0x62,0xf1,0x75,0x08,0xfe,0xc0]
4143; CHECK-NEXT:    retq ## encoding: [0xc3]
4144  %res = call <4 x i32> @llvm.x86.avx512.mask.cvttpd2dq.256(<4 x double> %x0, <4 x i32> %x1, i8 %x2)
4145  %res1 = call <4 x i32> @llvm.x86.avx512.mask.cvttpd2dq.256(<4 x double> %x0, <4 x i32> %x1, i8 -1)
4146  %res2 = add <4 x i32> %res, %res1
4147  ret <4 x i32> %res2
4148}
4149
4150declare <4 x i32> @llvm.x86.avx512.mask.cvttpd2udq.128(<2 x double>, <4 x i32>, i8)
4151
4152define <4 x i32>@test_int_x86_avx512_mask_cvtt_pd2udq_128(<2 x double> %x0, <4 x i32> %x1, i8 %x2) {
4153; CHECK-LABEL: test_int_x86_avx512_mask_cvtt_pd2udq_128:
4154; CHECK:       ## BB#0:
4155; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
4156; CHECK-NEXT:    vcvttpd2udq %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0xfc,0x09,0x78,0xc8]
4157; CHECK-NEXT:    vcvttpd2udq %xmm0, %xmm0 ## encoding: [0x62,0xf1,0xfc,0x08,0x78,0xc0]
4158; CHECK-NEXT:    vpaddd %xmm0, %xmm1, %xmm0 ## encoding: [0x62,0xf1,0x75,0x08,0xfe,0xc0]
4159; CHECK-NEXT:    retq ## encoding: [0xc3]
4160  %res = call <4 x i32> @llvm.x86.avx512.mask.cvttpd2udq.128(<2 x double> %x0, <4 x i32> %x1, i8 %x2)
4161  %res1 = call <4 x i32> @llvm.x86.avx512.mask.cvttpd2udq.128(<2 x double> %x0, <4 x i32> %x1, i8 -1)
4162  %res2 = add <4 x i32> %res, %res1
4163  ret <4 x i32> %res2
4164}
4165
4166declare <4 x i32> @llvm.x86.avx512.mask.cvttpd2udq.256(<4 x double>, <4 x i32>, i8)
4167
4168define <4 x i32>@test_int_x86_avx512_mask_cvtt_pd2udq_256(<4 x double> %x0, <4 x i32> %x1, i8 %x2) {
4169; CHECK-LABEL: test_int_x86_avx512_mask_cvtt_pd2udq_256:
4170; CHECK:       ## BB#0:
4171; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
4172; CHECK-NEXT:    vcvttpd2udq %ymm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0xfc,0x29,0x78,0xc8]
4173; CHECK-NEXT:    vcvttpd2udq %ymm0, %xmm0 ## encoding: [0x62,0xf1,0xfc,0x28,0x78,0xc0]
4174; CHECK-NEXT:    vpaddd %xmm0, %xmm1, %xmm0 ## encoding: [0x62,0xf1,0x75,0x08,0xfe,0xc0]
4175; CHECK-NEXT:    retq ## encoding: [0xc3]
4176  %res = call <4 x i32> @llvm.x86.avx512.mask.cvttpd2udq.256(<4 x double> %x0, <4 x i32> %x1, i8 %x2)
4177  %res1 = call <4 x i32> @llvm.x86.avx512.mask.cvttpd2udq.256(<4 x double> %x0, <4 x i32> %x1, i8 -1)
4178  %res2 = add <4 x i32> %res, %res1
4179  ret <4 x i32> %res2
4180}
4181
4182declare <4 x i32> @llvm.x86.avx512.mask.cvttps2dq.128(<4 x float>, <4 x i32>, i8)
4183
4184define <4 x i32>@test_int_x86_avx512_mask_cvtt_ps2dq_128(<4 x float> %x0, <4 x i32> %x1, i8 %x2) {
4185; CHECK-LABEL: test_int_x86_avx512_mask_cvtt_ps2dq_128:
4186; CHECK:       ## BB#0:
4187; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
4188; CHECK-NEXT:    vcvttps2dq %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0x7e,0x09,0x5b,0xc8]
4189; CHECK-NEXT:    vcvttps2dq %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7e,0x08,0x5b,0xc0]
4190; CHECK-NEXT:    vpaddd %xmm0, %xmm1, %xmm0 ## encoding: [0x62,0xf1,0x75,0x08,0xfe,0xc0]
4191; CHECK-NEXT:    retq ## encoding: [0xc3]
4192  %res = call <4 x i32> @llvm.x86.avx512.mask.cvttps2dq.128(<4 x float> %x0, <4 x i32> %x1, i8 %x2)
4193  %res1 = call <4 x i32> @llvm.x86.avx512.mask.cvttps2dq.128(<4 x float> %x0, <4 x i32> %x1, i8 -1)
4194  %res2 = add <4 x i32> %res, %res1
4195  ret <4 x i32> %res2
4196}
4197
4198declare <8 x i32> @llvm.x86.avx512.mask.cvttps2dq.256(<8 x float>, <8 x i32>, i8)
4199
4200define <8 x i32>@test_int_x86_avx512_mask_cvtt_ps2dq_256(<8 x float> %x0, <8 x i32> %x1, i8 %x2) {
4201; CHECK-LABEL: test_int_x86_avx512_mask_cvtt_ps2dq_256:
4202; CHECK:       ## BB#0:
4203; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
4204; CHECK-NEXT:    vcvttps2dq %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf1,0x7e,0x29,0x5b,0xc8]
4205; CHECK-NEXT:    vcvttps2dq %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7e,0x28,0x5b,0xc0]
4206; CHECK-NEXT:    vpaddd %ymm0, %ymm1, %ymm0 ## encoding: [0x62,0xf1,0x75,0x28,0xfe,0xc0]
4207; CHECK-NEXT:    retq ## encoding: [0xc3]
4208  %res = call <8 x i32> @llvm.x86.avx512.mask.cvttps2dq.256(<8 x float> %x0, <8 x i32> %x1, i8 %x2)
4209  %res1 = call <8 x i32> @llvm.x86.avx512.mask.cvttps2dq.256(<8 x float> %x0, <8 x i32> %x1, i8 -1)
4210  %res2 = add <8 x i32> %res, %res1
4211  ret <8 x i32> %res2
4212}
4213
4214declare <4 x i32> @llvm.x86.avx512.mask.cvttps2udq.128(<4 x float>, <4 x i32>, i8)
4215
4216define <4 x i32>@test_int_x86_avx512_mask_cvtt_ps2udq_128(<4 x float> %x0, <4 x i32> %x1, i8 %x2) {
4217; CHECK-LABEL: test_int_x86_avx512_mask_cvtt_ps2udq_128:
4218; CHECK:       ## BB#0:
4219; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
4220; CHECK-NEXT:    vcvttps2udq %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0x7c,0x09,0x78,0xc8]
4221; CHECK-NEXT:    vcvttps2udq %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7c,0x08,0x78,0xc0]
4222; CHECK-NEXT:    vpaddd %xmm0, %xmm1, %xmm0 ## encoding: [0x62,0xf1,0x75,0x08,0xfe,0xc0]
4223; CHECK-NEXT:    retq ## encoding: [0xc3]
4224  %res = call <4 x i32> @llvm.x86.avx512.mask.cvttps2udq.128(<4 x float> %x0, <4 x i32> %x1, i8 %x2)
4225  %res1 = call <4 x i32> @llvm.x86.avx512.mask.cvttps2udq.128(<4 x float> %x0, <4 x i32> %x1, i8 -1)
4226  %res2 = add <4 x i32> %res, %res1
4227  ret <4 x i32> %res2
4228}
4229
4230declare <8 x i32> @llvm.x86.avx512.mask.cvttps2udq.256(<8 x float>, <8 x i32>, i8)
4231
4232define <8 x i32>@test_int_x86_avx512_mask_cvtt_ps2udq_256(<8 x float> %x0, <8 x i32> %x1, i8 %x2) {
4233; CHECK-LABEL: test_int_x86_avx512_mask_cvtt_ps2udq_256:
4234; CHECK:       ## BB#0:
4235; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
4236; CHECK-NEXT:    vcvttps2udq %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf1,0x7c,0x29,0x78,0xc8]
4237; CHECK-NEXT:    vcvttps2udq %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7c,0x28,0x78,0xc0]
4238; CHECK-NEXT:    vpaddd %ymm0, %ymm1, %ymm0 ## encoding: [0x62,0xf1,0x75,0x28,0xfe,0xc0]
4239; CHECK-NEXT:    retq ## encoding: [0xc3]
4240  %res = call <8 x i32> @llvm.x86.avx512.mask.cvttps2udq.256(<8 x float> %x0, <8 x i32> %x1, i8 %x2)
4241  %res1 = call <8 x i32> @llvm.x86.avx512.mask.cvttps2udq.256(<8 x float> %x0, <8 x i32> %x1, i8 -1)
4242  %res2 = add <8 x i32> %res, %res1
4243  ret <8 x i32> %res2
4244}
4245
4246declare <2 x double> @llvm.x86.avx512.mask.cvtudq2pd.128(<4 x i32>, <2 x double>, i8)
4247
4248define <2 x double>@test_int_x86_avx512_mask_cvt_udq2pd_128(<4 x i32> %x0, <2 x double> %x1, i8 %x2) {
4249; CHECK-LABEL: test_int_x86_avx512_mask_cvt_udq2pd_128:
4250; CHECK:       ## BB#0:
4251; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
4252; CHECK-NEXT:    vcvtudq2pd %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0x7e,0x09,0x7a,0xc8]
4253; CHECK-NEXT:    vcvtudq2pd %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7e,0x08,0x7a,0xc0]
4254; CHECK-NEXT:    vaddpd %xmm0, %xmm1, %xmm0 ## encoding: [0x62,0xf1,0xf5,0x08,0x58,0xc0]
4255; CHECK-NEXT:    retq ## encoding: [0xc3]
4256  %res = call <2 x double> @llvm.x86.avx512.mask.cvtudq2pd.128(<4 x i32> %x0, <2 x double> %x1, i8 %x2)
4257  %res1 = call <2 x double> @llvm.x86.avx512.mask.cvtudq2pd.128(<4 x i32> %x0, <2 x double> %x1, i8 -1)
4258  %res2 = fadd <2 x double> %res, %res1
4259  ret <2 x double> %res2
4260}
4261
4262declare <4 x double> @llvm.x86.avx512.mask.cvtudq2pd.256(<4 x i32>, <4 x double>, i8)
4263
4264define <4 x double>@test_int_x86_avx512_mask_cvt_udq2pd_256(<4 x i32> %x0, <4 x double> %x1, i8 %x2) {
4265; CHECK-LABEL: test_int_x86_avx512_mask_cvt_udq2pd_256:
4266; CHECK:       ## BB#0:
4267; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
4268; CHECK-NEXT:    vcvtudq2pd %xmm0, %ymm1 {%k1} ## encoding: [0x62,0xf1,0x7e,0x29,0x7a,0xc8]
4269; CHECK-NEXT:    vcvtudq2pd %xmm0, %ymm0 ## encoding: [0x62,0xf1,0x7e,0x28,0x7a,0xc0]
4270; CHECK-NEXT:    vaddpd %ymm0, %ymm1, %ymm0 ## encoding: [0x62,0xf1,0xf5,0x28,0x58,0xc0]
4271; CHECK-NEXT:    retq ## encoding: [0xc3]
4272  %res = call <4 x double> @llvm.x86.avx512.mask.cvtudq2pd.256(<4 x i32> %x0, <4 x double> %x1, i8 %x2)
4273  %res1 = call <4 x double> @llvm.x86.avx512.mask.cvtudq2pd.256(<4 x i32> %x0, <4 x double> %x1, i8 -1)
4274  %res2 = fadd <4 x double> %res, %res1
4275  ret <4 x double> %res2
4276}
4277
4278declare <4 x float> @llvm.x86.avx512.mask.cvtudq2ps.128(<4 x i32>, <4 x float>, i8)
4279
4280define <4 x float>@test_int_x86_avx512_mask_cvt_udq2ps_128(<4 x i32> %x0, <4 x float> %x1, i8 %x2) {
4281; CHECK-LABEL: test_int_x86_avx512_mask_cvt_udq2ps_128:
4282; CHECK:       ## BB#0:
4283; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
4284; CHECK-NEXT:    vcvtudq2ps %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0x7f,0x09,0x7a,0xc8]
4285; CHECK-NEXT:    vcvtudq2ps %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7f,0x08,0x7a,0xc0]
4286; CHECK-NEXT:    vaddps %xmm0, %xmm1, %xmm0 ## encoding: [0x62,0xf1,0x74,0x08,0x58,0xc0]
4287; CHECK-NEXT:    retq ## encoding: [0xc3]
4288  %res = call <4 x float> @llvm.x86.avx512.mask.cvtudq2ps.128(<4 x i32> %x0, <4 x float> %x1, i8 %x2)
4289  %res1 = call <4 x float> @llvm.x86.avx512.mask.cvtudq2ps.128(<4 x i32> %x0, <4 x float> %x1, i8 -1)
4290  %res2 = fadd <4 x float> %res, %res1
4291  ret <4 x float> %res2
4292}
4293
4294declare <8 x float> @llvm.x86.avx512.mask.cvtudq2ps.256(<8 x i32>, <8 x float>, i8)
4295
4296define <8 x float>@test_int_x86_avx512_mask_cvt_udq2ps_256(<8 x i32> %x0, <8 x float> %x1, i8 %x2) {
4297; CHECK-LABEL: test_int_x86_avx512_mask_cvt_udq2ps_256:
4298; CHECK:       ## BB#0:
4299; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
4300; CHECK-NEXT:    vcvtudq2ps %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf1,0x7f,0x29,0x7a,0xc8]
4301; CHECK-NEXT:    vcvtudq2ps %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7f,0x28,0x7a,0xc0]
4302; CHECK-NEXT:    vaddps %ymm0, %ymm1, %ymm0 ## encoding: [0x62,0xf1,0x74,0x28,0x58,0xc0]
4303; CHECK-NEXT:    retq ## encoding: [0xc3]
4304  %res = call <8 x float> @llvm.x86.avx512.mask.cvtudq2ps.256(<8 x i32> %x0, <8 x float> %x1, i8 %x2)
4305  %res1 = call <8 x float> @llvm.x86.avx512.mask.cvtudq2ps.256(<8 x i32> %x0, <8 x float> %x1, i8 -1)
4306  %res2 = fadd <8 x float> %res, %res1
4307  ret <8 x float> %res2
4308}
4309
4310declare <2 x double> @llvm.x86.avx512.mask.rndscale.pd.128(<2 x double>, i32, <2 x double>, i8)
4311
4312define <2 x double>@test_int_x86_avx512_mask_rndscale_pd_128(<2 x double> %x0, <2 x double> %x2, i8 %x3) {
4313; CHECK-LABEL: test_int_x86_avx512_mask_rndscale_pd_128:
4314; CHECK:       ## BB#0:
4315; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
4316; CHECK-NEXT:    vrndscalepd $4, %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf3,0xfd,0x09,0x09,0xc8,0x04]
4317; CHECK-NEXT:    vrndscalepd $88, %xmm0, %xmm0 ## encoding: [0x62,0xf3,0xfd,0x08,0x09,0xc0,0x58]
4318; CHECK-NEXT:    vaddpd %xmm0, %xmm1, %xmm0 ## encoding: [0x62,0xf1,0xf5,0x08,0x58,0xc0]
4319; CHECK-NEXT:    retq ## encoding: [0xc3]
4320  %res = call <2 x double> @llvm.x86.avx512.mask.rndscale.pd.128(<2 x double> %x0, i32 4, <2 x double> %x2, i8 %x3)
4321  %res1 = call <2 x double> @llvm.x86.avx512.mask.rndscale.pd.128(<2 x double> %x0, i32 88, <2 x double> %x2, i8 -1)
4322  %res2 = fadd <2 x double> %res, %res1
4323  ret <2 x double> %res2
4324}
4325
4326declare <4 x double> @llvm.x86.avx512.mask.rndscale.pd.256(<4 x double>, i32, <4 x double>, i8)
4327
4328define <4 x double>@test_int_x86_avx512_mask_rndscale_pd_256(<4 x double> %x0, <4 x double> %x2, i8 %x3) {
4329; CHECK-LABEL: test_int_x86_avx512_mask_rndscale_pd_256:
4330; CHECK:       ## BB#0:
4331; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
4332; CHECK-NEXT:    vrndscalepd $4, %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf3,0xfd,0x29,0x09,0xc8,0x04]
4333; CHECK-NEXT:    vrndscalepd $88, %ymm0, %ymm0 ## encoding: [0x62,0xf3,0xfd,0x28,0x09,0xc0,0x58]
4334; CHECK-NEXT:    vaddpd %ymm0, %ymm1, %ymm0 ## encoding: [0x62,0xf1,0xf5,0x28,0x58,0xc0]
4335; CHECK-NEXT:    retq ## encoding: [0xc3]
4336  %res = call <4 x double> @llvm.x86.avx512.mask.rndscale.pd.256(<4 x double> %x0, i32 4, <4 x double> %x2, i8 %x3)
4337  %res1 = call <4 x double> @llvm.x86.avx512.mask.rndscale.pd.256(<4 x double> %x0, i32 88, <4 x double> %x2, i8 -1)
4338  %res2 = fadd <4 x double> %res, %res1
4339  ret <4 x double> %res2
4340}
4341
4342declare <4 x float> @llvm.x86.avx512.mask.rndscale.ps.128(<4 x float>, i32, <4 x float>, i8)
4343
4344define <4 x float>@test_int_x86_avx512_mask_rndscale_ps_128(<4 x float> %x0, <4 x float> %x2, i8 %x3) {
4345; CHECK-LABEL: test_int_x86_avx512_mask_rndscale_ps_128:
4346; CHECK:       ## BB#0:
4347; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
4348; CHECK-NEXT:    vrndscaleps $88, %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf3,0x7d,0x09,0x08,0xc8,0x58]
4349; CHECK-NEXT:    vrndscaleps $4, %xmm0, %xmm0 ## encoding: [0x62,0xf3,0x7d,0x08,0x08,0xc0,0x04]
4350; CHECK-NEXT:    vaddps %xmm0, %xmm1, %xmm0 ## encoding: [0x62,0xf1,0x74,0x08,0x58,0xc0]
4351; CHECK-NEXT:    retq ## encoding: [0xc3]
4352  %res = call <4 x float> @llvm.x86.avx512.mask.rndscale.ps.128(<4 x float> %x0, i32 88, <4 x float> %x2, i8 %x3)
4353  %res1 = call <4 x float> @llvm.x86.avx512.mask.rndscale.ps.128(<4 x float> %x0, i32 4, <4 x float> %x2, i8 -1)
4354  %res2 = fadd <4 x float> %res, %res1
4355  ret <4 x float> %res2
4356}
4357
4358declare <8 x float> @llvm.x86.avx512.mask.rndscale.ps.256(<8 x float>, i32, <8 x float>, i8)
4359
4360define <8 x float>@test_int_x86_avx512_mask_rndscale_ps_256(<8 x float> %x0, <8 x float> %x2, i8 %x3) {
4361; CHECK-LABEL: test_int_x86_avx512_mask_rndscale_ps_256:
4362; CHECK:       ## BB#0:
4363; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
4364; CHECK-NEXT:    vrndscaleps $5, %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf3,0x7d,0x29,0x08,0xc8,0x05]
4365; CHECK-NEXT:    vrndscaleps $66, %ymm0, %ymm0 ## encoding: [0x62,0xf3,0x7d,0x28,0x08,0xc0,0x42]
4366; CHECK-NEXT:    vaddps %ymm0, %ymm1, %ymm0 ## encoding: [0x62,0xf1,0x74,0x28,0x58,0xc0]
4367; CHECK-NEXT:    retq ## encoding: [0xc3]
4368  %res = call <8 x float> @llvm.x86.avx512.mask.rndscale.ps.256(<8 x float> %x0, i32 5, <8 x float> %x2, i8 %x3)
4369  %res1 = call <8 x float> @llvm.x86.avx512.mask.rndscale.ps.256(<8 x float> %x0, i32 66, <8 x float> %x2, i8 -1)
4370  %res2 = fadd <8 x float> %res, %res1
4371  ret <8 x float> %res2
4372}
4373
4374declare <8 x float> @llvm.x86.avx512.mask.shuf.f32x4.256(<8 x float>, <8 x float>, i32, <8 x float>, i8)
4375
4376define <8 x float>@test_int_x86_avx512_mask_shuf_f32x4_256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x3, i8 %x4) {
4377; CHECK-LABEL: test_int_x86_avx512_mask_shuf_f32x4_256:
4378; CHECK:       ## BB#0:
4379; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
4380; CHECK-NEXT:    vshuff32x4 $22, %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf3,0x7d,0x29,0x23,0xd1,0x16]
4381; CHECK-NEXT:    ## ymm2 {%k1} = ymm0[0,1,2,3],ymm1[4,5,6,7]
4382; CHECK-NEXT:    vshuff32x4 $22, %ymm1, %ymm0, %ymm3 {%k1} {z} ## encoding: [0x62,0xf3,0x7d,0xa9,0x23,0xd9,0x16]
4383; CHECK-NEXT:    ## ymm3 {%k1} {z} = ymm0[0,1,2,3],ymm1[4,5,6,7]
4384; CHECK-NEXT:    vshuff32x4 $22, %ymm1, %ymm0, %ymm0 ## encoding: [0x62,0xf3,0x7d,0x28,0x23,0xc1,0x16]
4385; CHECK-NEXT:    ## ymm0 = ymm0[0,1,2,3],ymm1[4,5,6,7]
4386; CHECK-NEXT:    vaddps %ymm0, %ymm2, %ymm0 ## encoding: [0x62,0xf1,0x6c,0x28,0x58,0xc0]
4387; CHECK-NEXT:    vaddps %ymm0, %ymm3, %ymm0 ## encoding: [0x62,0xf1,0x64,0x28,0x58,0xc0]
4388; CHECK-NEXT:    retq ## encoding: [0xc3]
4389  %res = call <8 x float> @llvm.x86.avx512.mask.shuf.f32x4.256(<8 x float> %x0, <8 x float> %x1, i32 22, <8 x float> %x3, i8 %x4)
4390  %res1 = call <8 x float> @llvm.x86.avx512.mask.shuf.f32x4.256(<8 x float> %x0, <8 x float> %x1, i32 22, <8 x float> %x3, i8 -1)
4391  %res2 = call <8 x float> @llvm.x86.avx512.mask.shuf.f32x4.256(<8 x float> %x0, <8 x float> %x1, i32 22, <8 x float> zeroinitializer, i8 %x4)
4392  %res3 = fadd <8 x float> %res, %res1
4393  %res4 = fadd <8 x float> %res2, %res3
4394  ret <8 x float> %res4
4395}
4396
4397declare <4 x double> @llvm.x86.avx512.mask.shuf.f64x2.256(<4 x double>, <4 x double>, i32, <4 x double>, i8)
4398
4399define <4 x double>@test_int_x86_avx512_mask_shuf_f64x2_256(<4 x double> %x0, <4 x double> %x1, <4 x double> %x3, i8 %x4) {
4400; CHECK-LABEL: test_int_x86_avx512_mask_shuf_f64x2_256:
4401; CHECK:       ## BB#0:
4402; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
4403; CHECK-NEXT:    vshuff64x2 $22, %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf3,0xfd,0x29,0x23,0xd1,0x16]
4404; CHECK-NEXT:    ## ymm2 {%k1} = ymm0[0,1],ymm1[2,3]
4405; CHECK-NEXT:    vshuff64x2 $22, %ymm1, %ymm0, %ymm3 {%k1} {z} ## encoding: [0x62,0xf3,0xfd,0xa9,0x23,0xd9,0x16]
4406; CHECK-NEXT:    ## ymm3 {%k1} {z} = ymm0[0,1],ymm1[2,3]
4407; CHECK-NEXT:    vshuff64x2 $22, %ymm1, %ymm0, %ymm0 ## encoding: [0x62,0xf3,0xfd,0x28,0x23,0xc1,0x16]
4408; CHECK-NEXT:    ## ymm0 = ymm0[0,1],ymm1[2,3]
4409; CHECK-NEXT:    vaddpd %ymm0, %ymm2, %ymm0 ## encoding: [0x62,0xf1,0xed,0x28,0x58,0xc0]
4410; CHECK-NEXT:    vaddpd %ymm0, %ymm3, %ymm0 ## encoding: [0x62,0xf1,0xe5,0x28,0x58,0xc0]
4411; CHECK-NEXT:    retq ## encoding: [0xc3]
4412  %res = call <4 x double> @llvm.x86.avx512.mask.shuf.f64x2.256(<4 x double> %x0, <4 x double> %x1, i32 22, <4 x double> %x3, i8 %x4)
4413  %res1 = call <4 x double> @llvm.x86.avx512.mask.shuf.f64x2.256(<4 x double> %x0, <4 x double> %x1, i32 22, <4 x double> %x3, i8 -1)
4414  %res2 = call <4 x double> @llvm.x86.avx512.mask.shuf.f64x2.256(<4 x double> %x0, <4 x double> %x1, i32 22, <4 x double> zeroinitializer, i8 %x4)
4415  %res3 = fadd <4 x double> %res, %res1
4416  %res4 = fadd <4 x double> %res2, %res3
4417  ret <4 x double> %res4
4418}
4419
4420declare <8 x i32> @llvm.x86.avx512.mask.shuf.i32x4.256(<8 x i32>, <8 x i32>, i32, <8 x i32>, i8)
4421
4422define <8 x i32>@test_int_x86_avx512_mask_shuf_i32x4_256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x3, i8 %x4) {
4423; CHECK-LABEL: test_int_x86_avx512_mask_shuf_i32x4_256:
4424; CHECK:       ## BB#0:
4425; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
4426; CHECK-NEXT:    vshufi32x4 $22, %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf3,0x7d,0x29,0x43,0xd1,0x16]
4427; CHECK-NEXT:    ## ymm2 {%k1} = ymm0[0,1,2,3],ymm1[4,5,6,7]
4428; CHECK-NEXT:    vshufi32x4 $22, %ymm1, %ymm0, %ymm0 ## encoding: [0x62,0xf3,0x7d,0x28,0x43,0xc1,0x16]
4429; CHECK-NEXT:    ## ymm0 = ymm0[0,1,2,3],ymm1[4,5,6,7]
4430; CHECK-NEXT:    vpaddd %ymm0, %ymm2, %ymm0 ## encoding: [0x62,0xf1,0x6d,0x28,0xfe,0xc0]
4431; CHECK-NEXT:    retq ## encoding: [0xc3]
4432  %res = call <8 x i32> @llvm.x86.avx512.mask.shuf.i32x4.256(<8 x i32> %x0, <8 x i32> %x1, i32 22, <8 x i32> %x3, i8 %x4)
4433  %res1 = call <8 x i32> @llvm.x86.avx512.mask.shuf.i32x4.256(<8 x i32> %x0, <8 x i32> %x1, i32 22, <8 x i32> %x3, i8 -1)
4434  %res2 = add <8 x i32> %res, %res1
4435  ret <8 x i32> %res2
4436}
4437
4438declare <4 x i64> @llvm.x86.avx512.mask.shuf.i64x2.256(<4 x i64>, <4 x i64>, i32, <4 x i64>, i8)
4439
4440define <4 x i64>@test_int_x86_avx512_mask_shuf_i64x2_256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x3, i8 %x4) {
4441; CHECK-LABEL: test_int_x86_avx512_mask_shuf_i64x2_256:
4442; CHECK:       ## BB#0:
4443; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
4444; CHECK-NEXT:    vshufi64x2 $22, %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf3,0xfd,0x29,0x43,0xd1,0x16]
4445; CHECK-NEXT:    ## ymm2 {%k1} = ymm0[0,1],ymm1[2,3]
4446; CHECK-NEXT:    vshufi64x2 $22, %ymm1, %ymm0, %ymm0 ## encoding: [0x62,0xf3,0xfd,0x28,0x43,0xc1,0x16]
4447; CHECK-NEXT:    ## ymm0 = ymm0[0,1],ymm1[2,3]
4448; CHECK-NEXT:    vpaddq %ymm0, %ymm2, %ymm0 ## encoding: [0x62,0xf1,0xed,0x28,0xd4,0xc0]
4449; CHECK-NEXT:    retq ## encoding: [0xc3]
4450  %res = call <4 x i64> @llvm.x86.avx512.mask.shuf.i64x2.256(<4 x i64> %x0, <4 x i64> %x1, i32 22, <4 x i64> %x3, i8 %x4)
4451  %res1 = call <4 x i64> @llvm.x86.avx512.mask.shuf.i64x2.256(<4 x i64> %x0, <4 x i64> %x1, i32 22, <4 x i64> %x3, i8 -1)
4452  %res2 = add <4 x i64> %res, %res1
4453  ret <4 x i64> %res2
4454}
4455
4456declare <4 x float> @llvm.x86.avx512.mask.vextractf32x4.256(<8 x float>, i32, <4 x float>, i8)
4457
4458define <4 x float>@test_int_x86_avx512_mask_vextractf32x4_256(<8 x float> %x0, <4 x float> %x2, i8 %x3) {
4459; CHECK-LABEL: test_int_x86_avx512_mask_vextractf32x4_256:
4460; CHECK:       ## BB#0:
4461; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
4462; CHECK-NEXT:    vextractf32x4 $1, %ymm0, %xmm1 {%k1} ## encoding: [0x62,0xf3,0x7d,0x29,0x19,0xc1,0x01]
4463; CHECK-NEXT:    vextractf32x4 $1, %ymm0, %xmm2 {%k1} {z} ## encoding: [0x62,0xf3,0x7d,0xa9,0x19,0xc2,0x01]
4464; CHECK-NEXT:    vextractf32x4 $1, %ymm0, %xmm0 ## encoding: [0x62,0xf3,0x7d,0x28,0x19,0xc0,0x01]
4465; CHECK-NEXT:    vaddps %xmm2, %xmm1, %xmm1 ## encoding: [0x62,0xf1,0x74,0x08,0x58,0xca]
4466; CHECK-NEXT:    vaddps %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7c,0x08,0x58,0xc1]
4467; CHECK-NEXT:    retq ## encoding: [0xc3]
4468  %res  = call <4 x float> @llvm.x86.avx512.mask.vextractf32x4.256(<8 x float> %x0, i32 1, <4 x float> %x2, i8 %x3)
4469  %res1 = call <4 x float> @llvm.x86.avx512.mask.vextractf32x4.256(<8 x float> %x0, i32 1, <4 x float> zeroinitializer, i8 %x3)
4470  %res2 = call <4 x float> @llvm.x86.avx512.mask.vextractf32x4.256(<8 x float> %x0, i32 1, <4 x float> zeroinitializer, i8 -1)
4471  %res3 = fadd <4 x float> %res, %res1
4472  %res4 = fadd <4 x float> %res2, %res3
4473  ret <4 x float> %res4
4474}
4475
4476declare <2 x double> @llvm.x86.avx512.mask.getmant.pd.128(<2 x double>, i32, <2 x double>, i8)
4477
4478define <2 x double>@test_int_x86_avx512_mask_getmant_pd_128(<2 x double> %x0, <2 x double> %x2, i8 %x3) {
4479; CHECK-LABEL: test_int_x86_avx512_mask_getmant_pd_128:
4480; CHECK:       ## BB#0:
4481; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
4482; CHECK-NEXT:    vgetmantpd $11, %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf3,0xfd,0x09,0x26,0xc8,0x0b]
4483; CHECK-NEXT:    vgetmantpd $11, %xmm0, %xmm2 {%k1} {z} ## encoding: [0x62,0xf3,0xfd,0x89,0x26,0xd0,0x0b]
4484; CHECK-NEXT:    vgetmantpd $11, %xmm0, %xmm0 ## encoding: [0x62,0xf3,0xfd,0x08,0x26,0xc0,0x0b]
4485; CHECK-NEXT:    vaddpd %xmm0, %xmm1, %xmm0 ## encoding: [0x62,0xf1,0xf5,0x08,0x58,0xc0]
4486; CHECK-NEXT:    vaddpd %xmm0, %xmm2, %xmm0 ## encoding: [0x62,0xf1,0xed,0x08,0x58,0xc0]
4487; CHECK-NEXT:    retq ## encoding: [0xc3]
4488  %res = call <2 x double> @llvm.x86.avx512.mask.getmant.pd.128(<2 x double> %x0, i32 11, <2 x double> %x2, i8 %x3)
4489  %res2 = call <2 x double> @llvm.x86.avx512.mask.getmant.pd.128(<2 x double> %x0, i32 11, <2 x double> zeroinitializer, i8 %x3)
4490  %res1 = call <2 x double> @llvm.x86.avx512.mask.getmant.pd.128(<2 x double> %x0, i32 11, <2 x double> %x2, i8 -1)
4491  %res3 = fadd <2 x double> %res, %res1
4492  %res4 = fadd <2 x double> %res2, %res3
4493  ret <2 x double> %res4
4494}
4495
4496declare <4 x double> @llvm.x86.avx512.mask.getmant.pd.256(<4 x double>, i32, <4 x double>, i8)
4497
4498define <4 x double>@test_int_x86_avx512_mask_getmant_pd_256(<4 x double> %x0, <4 x double> %x2, i8 %x3) {
4499; CHECK-LABEL: test_int_x86_avx512_mask_getmant_pd_256:
4500; CHECK:       ## BB#0:
4501; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
4502; CHECK-NEXT:    vgetmantpd $11, %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf3,0xfd,0x29,0x26,0xc8,0x0b]
4503; CHECK-NEXT:    vgetmantpd $11, %ymm0, %ymm0 ## encoding: [0x62,0xf3,0xfd,0x28,0x26,0xc0,0x0b]
4504; CHECK-NEXT:    vaddpd %ymm0, %ymm1, %ymm0 ## encoding: [0x62,0xf1,0xf5,0x28,0x58,0xc0]
4505; CHECK-NEXT:    retq ## encoding: [0xc3]
4506  %res = call <4 x double> @llvm.x86.avx512.mask.getmant.pd.256(<4 x double> %x0, i32 11, <4 x double> %x2, i8 %x3)
4507  %res1 = call <4 x double> @llvm.x86.avx512.mask.getmant.pd.256(<4 x double> %x0, i32 11, <4 x double> %x2, i8 -1)
4508  %res2 = fadd <4 x double> %res, %res1
4509  ret <4 x double> %res2
4510}
4511
4512declare <4 x float> @llvm.x86.avx512.mask.getmant.ps.128(<4 x float>, i32, <4 x float>, i8)
4513
4514define <4 x float>@test_int_x86_avx512_mask_getmant_ps_128(<4 x float> %x0, <4 x float> %x2, i8 %x3) {
4515; CHECK-LABEL: test_int_x86_avx512_mask_getmant_ps_128:
4516; CHECK:       ## BB#0:
4517; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
4518; CHECK-NEXT:    vgetmantps $11, %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf3,0x7d,0x09,0x26,0xc8,0x0b]
4519; CHECK-NEXT:    vgetmantps $11, %xmm0, %xmm0 ## encoding: [0x62,0xf3,0x7d,0x08,0x26,0xc0,0x0b]
4520; CHECK-NEXT:    vaddps %xmm0, %xmm1, %xmm0 ## encoding: [0x62,0xf1,0x74,0x08,0x58,0xc0]
4521; CHECK-NEXT:    retq ## encoding: [0xc3]
4522  %res = call <4 x float> @llvm.x86.avx512.mask.getmant.ps.128(<4 x float> %x0, i32 11, <4 x float> %x2, i8 %x3)
4523  %res1 = call <4 x float> @llvm.x86.avx512.mask.getmant.ps.128(<4 x float> %x0, i32 11, <4 x float> %x2, i8 -1)
4524  %res2 = fadd <4 x float> %res, %res1
4525  ret <4 x float> %res2
4526}
4527
4528declare <8 x float> @llvm.x86.avx512.mask.getmant.ps.256(<8 x float>, i32, <8 x float>, i8)
4529
4530define <8 x float>@test_int_x86_avx512_mask_getmant_ps_256(<8 x float> %x0, <8 x float> %x2, i8 %x3) {
4531; CHECK-LABEL: test_int_x86_avx512_mask_getmant_ps_256:
4532; CHECK:       ## BB#0:
4533; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
4534; CHECK-NEXT:    vgetmantps $11, %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf3,0x7d,0x29,0x26,0xc8,0x0b]
4535; CHECK-NEXT:    vgetmantps $11, %ymm0, %ymm0 ## encoding: [0x62,0xf3,0x7d,0x28,0x26,0xc0,0x0b]
4536; CHECK-NEXT:    vaddps %ymm0, %ymm1, %ymm0 ## encoding: [0x62,0xf1,0x74,0x28,0x58,0xc0]
4537; CHECK-NEXT:    retq ## encoding: [0xc3]
4538  %res = call <8 x float> @llvm.x86.avx512.mask.getmant.ps.256(<8 x float> %x0, i32 11, <8 x float> %x2, i8 %x3)
4539  %res1 = call <8 x float> @llvm.x86.avx512.mask.getmant.ps.256(<8 x float> %x0, i32 11, <8 x float> %x2, i8 -1)
4540  %res2 = fadd <8 x float> %res, %res1
4541  ret <8 x float> %res2
4542}
4543
4544declare <2 x double> @llvm.x86.avx512.mask.shuf.pd.128(<2 x double>, <2 x double>, i32, <2 x double>, i8)
4545
4546define <2 x double>@test_int_x86_avx512_mask_shuf_pd_128(<2 x double> %x0, <2 x double> %x1, <2 x double> %x3, i8 %x4) {
4547; CHECK-LABEL: test_int_x86_avx512_mask_shuf_pd_128:
4548; CHECK:       ## BB#0:
4549; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
4550; CHECK-NEXT:    vshufpd $22, %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf1,0xfd,0x09,0xc6,0xd1,0x16]
4551; CHECK-NEXT:    ## xmm2 {%k1} = xmm0[0],xmm1[1]
4552; CHECK-NEXT:    vshufpd $22, %xmm1, %xmm0, %xmm3 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0x89,0xc6,0xd9,0x16]
4553; CHECK-NEXT:    ## xmm3 {%k1} {z} = xmm0[0],xmm1[1]
4554; CHECK-NEXT:    vshufpd $22, %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0xfd,0x08,0xc6,0xc1,0x16]
4555; CHECK-NEXT:    ## xmm0 = xmm0[0],xmm1[1]
4556; CHECK-NEXT:    vaddpd %xmm0, %xmm2, %xmm0 ## encoding: [0x62,0xf1,0xed,0x08,0x58,0xc0]
4557; CHECK-NEXT:    vaddpd %xmm0, %xmm3, %xmm0 ## encoding: [0x62,0xf1,0xe5,0x08,0x58,0xc0]
4558; CHECK-NEXT:    retq ## encoding: [0xc3]
4559  %res = call <2 x double> @llvm.x86.avx512.mask.shuf.pd.128(<2 x double> %x0, <2 x double> %x1, i32 22, <2 x double> %x3, i8 %x4)
4560  %res1 = call <2 x double> @llvm.x86.avx512.mask.shuf.pd.128(<2 x double> %x0, <2 x double> %x1, i32 22, <2 x double> %x3, i8 -1)
4561  %res2 = call <2 x double> @llvm.x86.avx512.mask.shuf.pd.128(<2 x double> %x0, <2 x double> %x1, i32 22, <2 x double> zeroinitializer, i8 %x4)
4562  %res3 = fadd <2 x double> %res, %res1
4563  %res4 = fadd <2 x double> %res2, %res3
4564  ret <2 x double> %res4
4565}
4566
4567declare <4 x double> @llvm.x86.avx512.mask.shuf.pd.256(<4 x double>, <4 x double>, i32, <4 x double>, i8)
4568
4569define <4 x double>@test_int_x86_avx512_mask_shuf_pd_256(<4 x double> %x0, <4 x double> %x1, <4 x double> %x3, i8 %x4) {
4570; CHECK-LABEL: test_int_x86_avx512_mask_shuf_pd_256:
4571; CHECK:       ## BB#0:
4572; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
4573; CHECK-NEXT:    vshufpd $22, %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf1,0xfd,0x29,0xc6,0xd1,0x16]
4574; CHECK-NEXT:    ## ymm2 {%k1} = ymm0[0],ymm1[1],ymm0[3],ymm1[2]
4575; CHECK-NEXT:    vshufpd $22, %ymm1, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0xfd,0x28,0xc6,0xc1,0x16]
4576; CHECK-NEXT:    ## ymm0 = ymm0[0],ymm1[1],ymm0[3],ymm1[2]
4577; CHECK-NEXT:    vaddpd %ymm0, %ymm2, %ymm0 ## encoding: [0x62,0xf1,0xed,0x28,0x58,0xc0]
4578; CHECK-NEXT:    retq ## encoding: [0xc3]
4579  %res = call <4 x double> @llvm.x86.avx512.mask.shuf.pd.256(<4 x double> %x0, <4 x double> %x1, i32 22, <4 x double> %x3, i8 %x4)
4580  %res1 = call <4 x double> @llvm.x86.avx512.mask.shuf.pd.256(<4 x double> %x0, <4 x double> %x1, i32 22, <4 x double> %x3, i8 -1)
4581  %res2 = fadd <4 x double> %res, %res1
4582  ret <4 x double> %res2
4583}
4584
4585declare <4 x float> @llvm.x86.avx512.mask.shuf.ps.128(<4 x float>, <4 x float>, i32, <4 x float>, i8)
4586
4587define <4 x float>@test_int_x86_avx512_mask_shuf_ps_128(<4 x float> %x0, <4 x float> %x1, <4 x float> %x3, i8 %x4) {
4588; CHECK-LABEL: test_int_x86_avx512_mask_shuf_ps_128:
4589; CHECK:       ## BB#0:
4590; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
4591; CHECK-NEXT:    vshufps $22, %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf1,0x7c,0x09,0xc6,0xd1,0x16]
4592; CHECK-NEXT:    ## xmm2 {%k1} = xmm0[2,1],xmm1[1,0]
4593; CHECK-NEXT:    vshufps $22, %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7c,0x08,0xc6,0xc1,0x16]
4594; CHECK-NEXT:    ## xmm0 = xmm0[2,1],xmm1[1,0]
4595; CHECK-NEXT:    vaddps %xmm0, %xmm2, %xmm0 ## encoding: [0x62,0xf1,0x6c,0x08,0x58,0xc0]
4596; CHECK-NEXT:    retq ## encoding: [0xc3]
4597  %res = call <4 x float> @llvm.x86.avx512.mask.shuf.ps.128(<4 x float> %x0, <4 x float> %x1, i32 22, <4 x float> %x3, i8 %x4)
4598  %res1 = call <4 x float> @llvm.x86.avx512.mask.shuf.ps.128(<4 x float> %x0, <4 x float> %x1, i32 22, <4 x float> %x3, i8 -1)
4599  %res2 = fadd <4 x float> %res, %res1
4600  ret <4 x float> %res2
4601}
4602
4603declare <8 x float> @llvm.x86.avx512.mask.shuf.ps.256(<8 x float>, <8 x float>, i32, <8 x float>, i8)
4604
4605define <8 x float>@test_int_x86_avx512_mask_shuf_ps_256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x3, i8 %x4) {
4606; CHECK-LABEL: test_int_x86_avx512_mask_shuf_ps_256:
4607; CHECK:       ## BB#0:
4608; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
4609; CHECK-NEXT:    vshufps $22, %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf1,0x7c,0x29,0xc6,0xd1,0x16]
4610; CHECK-NEXT:    ## ymm2 {%k1} = ymm0[2,1],ymm1[1,0],ymm0[6,5],ymm1[5,4]
4611; CHECK-NEXT:    vshufps $22, %ymm1, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7c,0x28,0xc6,0xc1,0x16]
4612; CHECK-NEXT:    ## ymm0 = ymm0[2,1],ymm1[1,0],ymm0[6,5],ymm1[5,4]
4613; CHECK-NEXT:    vaddps %ymm0, %ymm2, %ymm0 ## encoding: [0x62,0xf1,0x6c,0x28,0x58,0xc0]
4614; CHECK-NEXT:    retq ## encoding: [0xc3]
4615  %res = call <8 x float> @llvm.x86.avx512.mask.shuf.ps.256(<8 x float> %x0, <8 x float> %x1, i32 22, <8 x float> %x3, i8 %x4)
4616  %res1 = call <8 x float> @llvm.x86.avx512.mask.shuf.ps.256(<8 x float> %x0, <8 x float> %x1, i32 22, <8 x float> %x3, i8 -1)
4617  %res2 = fadd <8 x float> %res, %res1
4618  ret <8 x float> %res2
4619}
4620
4621declare <4 x i32> @llvm.x86.avx512.mask.valign.d.128(<4 x i32>, <4 x i32>, i32, <4 x i32>, i8)
4622
4623define <4 x i32>@test_int_x86_avx512_mask_valign_d_128(<4 x i32> %x0, <4 x i32> %x1,<4 x i32> %x3, i8 %x4) {
4624; CHECK-LABEL: test_int_x86_avx512_mask_valign_d_128:
4625; CHECK:       ## BB#0:
4626; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
4627; CHECK-NEXT:    valignd $22, %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf3,0x7d,0x09,0x03,0xd1,0x16]
4628; CHECK-NEXT:    valignd $22, %xmm1, %xmm0, %xmm3 {%k1} {z} ## encoding: [0x62,0xf3,0x7d,0x89,0x03,0xd9,0x16]
4629; CHECK-NEXT:    valignd $22, %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf3,0x7d,0x08,0x03,0xc1,0x16]
4630; CHECK-NEXT:    vpaddd %xmm0, %xmm2, %xmm0 ## encoding: [0x62,0xf1,0x6d,0x08,0xfe,0xc0]
4631; CHECK-NEXT:    vpaddd %xmm3, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xfe,0xc3]
4632; CHECK-NEXT:    retq ## encoding: [0xc3]
4633  %res = call <4 x i32> @llvm.x86.avx512.mask.valign.d.128(<4 x i32> %x0, <4 x i32> %x1, i32 22, <4 x i32> %x3, i8 %x4)
4634  %res1 = call <4 x i32> @llvm.x86.avx512.mask.valign.d.128(<4 x i32> %x0, <4 x i32> %x1, i32 22, <4 x i32> %x3, i8 -1)
4635    %res2 = call <4 x i32> @llvm.x86.avx512.mask.valign.d.128(<4 x i32> %x0, <4 x i32> %x1, i32 22, <4 x i32> zeroinitializer,i8 %x4)
4636  %res3 = add <4 x i32> %res, %res1
4637    %res4 = add <4 x i32> %res3, %res2
4638  ret <4 x i32> %res4
4639}
4640
4641declare <8 x i32> @llvm.x86.avx512.mask.valign.d.256(<8 x i32>, <8 x i32>, i32, <8 x i32>, i8)
4642
4643define <8 x i32>@test_int_x86_avx512_mask_valign_d_256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x3, i8 %x4) {
4644; CHECK-LABEL: test_int_x86_avx512_mask_valign_d_256:
4645; CHECK:       ## BB#0:
4646; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
4647; CHECK-NEXT:    valignd $22, %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf3,0x7d,0x29,0x03,0xd1,0x16]
4648; CHECK-NEXT:    valignd $22, %ymm1, %ymm0, %ymm0 ## encoding: [0x62,0xf3,0x7d,0x28,0x03,0xc1,0x16]
4649; CHECK-NEXT:    vpaddd %ymm0, %ymm2, %ymm0 ## encoding: [0x62,0xf1,0x6d,0x28,0xfe,0xc0]
4650; CHECK-NEXT:    retq ## encoding: [0xc3]
4651  %res = call <8 x i32> @llvm.x86.avx512.mask.valign.d.256(<8 x i32> %x0, <8 x i32> %x1, i32 22, <8 x i32> %x3, i8 %x4)
4652  %res1 = call <8 x i32> @llvm.x86.avx512.mask.valign.d.256(<8 x i32> %x0, <8 x i32> %x1, i32 22, <8 x i32> %x3, i8 -1)
4653  %res2 = add <8 x i32> %res, %res1
4654  ret <8 x i32> %res2
4655}
4656
4657declare <2 x i64> @llvm.x86.avx512.mask.valign.q.128(<2 x i64>, <2 x i64>, i32, <2 x i64>, i8)
4658
4659define <2 x i64>@test_int_x86_avx512_mask_valign_q_128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x3, i8 %x4) {
4660; CHECK-LABEL: test_int_x86_avx512_mask_valign_q_128:
4661; CHECK:       ## BB#0:
4662; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
4663; CHECK-NEXT:    valignq $22, %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf3,0xfd,0x09,0x03,0xd1,0x16]
4664; CHECK-NEXT:    valignq $22, %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf3,0xfd,0x08,0x03,0xc1,0x16]
4665; CHECK-NEXT:    vpaddq %xmm0, %xmm2, %xmm0 ## encoding: [0x62,0xf1,0xed,0x08,0xd4,0xc0]
4666; CHECK-NEXT:    retq ## encoding: [0xc3]
4667  %res = call <2 x i64> @llvm.x86.avx512.mask.valign.q.128(<2 x i64> %x0, <2 x i64> %x1, i32 22, <2 x i64> %x3, i8 %x4)
4668  %res1 = call <2 x i64> @llvm.x86.avx512.mask.valign.q.128(<2 x i64> %x0, <2 x i64> %x1, i32 22, <2 x i64> %x3, i8 -1)
4669  %res2 = add <2 x i64> %res, %res1
4670  ret <2 x i64> %res2
4671}
4672
4673declare <4 x i64> @llvm.x86.avx512.mask.valign.q.256(<4 x i64>, <4 x i64>, i32, <4 x i64>, i8)
4674
4675define <4 x i64>@test_int_x86_avx512_mask_valign_q_256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x3, i8 %x4) {
4676; CHECK-LABEL: test_int_x86_avx512_mask_valign_q_256:
4677; CHECK:       ## BB#0:
4678; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
4679; CHECK-NEXT:    valignq $22, %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf3,0xfd,0x29,0x03,0xd1,0x16]
4680; CHECK-NEXT:    valignq $22, %ymm1, %ymm0, %ymm0 ## encoding: [0x62,0xf3,0xfd,0x28,0x03,0xc1,0x16]
4681; CHECK-NEXT:    vpaddq %ymm0, %ymm2, %ymm0 ## encoding: [0x62,0xf1,0xed,0x28,0xd4,0xc0]
4682; CHECK-NEXT:    retq ## encoding: [0xc3]
4683  %res = call <4 x i64> @llvm.x86.avx512.mask.valign.q.256(<4 x i64> %x0, <4 x i64> %x1, i32 22, <4 x i64> %x3, i8 %x4)
4684  %res1 = call <4 x i64> @llvm.x86.avx512.mask.valign.q.256(<4 x i64> %x0, <4 x i64> %x1, i32 22, <4 x i64> %x3, i8 -1)
4685  %res2 = add <4 x i64> %res, %res1
4686  ret <4 x i64> %res2
4687}
4688
4689declare <4 x double> @llvm.x86.avx512.mask.vpermilvar.pd.256(<4 x double>, <4 x i64>, <4 x double>, i8)
4690
4691define <4 x double>@test_int_x86_avx512_mask_vpermilvar_pd_256(<4 x double> %x0, <4 x i64> %x1, <4 x double> %x2, i8 %x3) {
4692; CHECK-LABEL: test_int_x86_avx512_mask_vpermilvar_pd_256:
4693; CHECK:       ## BB#0:
4694; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
4695; CHECK-NEXT:    vpermilpd %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf2,0xfd,0x29,0x0d,0xd1]
4696; CHECK-NEXT:    vpermilpd %ymm1, %ymm0, %ymm3 {%k1} {z} ## encoding: [0x62,0xf2,0xfd,0xa9,0x0d,0xd9]
4697; CHECK-NEXT:    vpermilpd %ymm1, %ymm0, %ymm0 ## encoding: [0x62,0xf2,0xfd,0x28,0x0d,0xc1]
4698; CHECK-NEXT:    vaddpd %ymm3, %ymm2, %ymm1 ## encoding: [0x62,0xf1,0xed,0x28,0x58,0xcb]
4699; CHECK-NEXT:    vaddpd %ymm1, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0xfd,0x28,0x58,0xc1]
4700; CHECK-NEXT:    retq ## encoding: [0xc3]
4701  %res = call <4 x double> @llvm.x86.avx512.mask.vpermilvar.pd.256(<4 x double> %x0, <4 x i64> %x1, <4 x double> %x2, i8 %x3)
4702  %res1 = call <4 x double> @llvm.x86.avx512.mask.vpermilvar.pd.256(<4 x double> %x0, <4 x i64> %x1, <4 x double> zeroinitializer, i8 %x3)
4703  %res2 = call <4 x double> @llvm.x86.avx512.mask.vpermilvar.pd.256(<4 x double> %x0, <4 x i64> %x1, <4 x double> %x2, i8 -1)
4704  %res3 = fadd <4 x double> %res, %res1
4705  %res4 = fadd <4 x double> %res2, %res3
4706  ret <4 x double> %res4
4707}
4708
4709declare <2 x double> @llvm.x86.avx512.mask.vpermilvar.pd.128(<2 x double>, <2 x i64>, <2 x double>, i8)
4710
4711define <2 x double>@test_int_x86_avx512_mask_vpermilvar_pd_128(<2 x double> %x0, <2 x i64> %x1, <2 x double> %x2, i8 %x3) {
4712; CHECK-LABEL: test_int_x86_avx512_mask_vpermilvar_pd_128:
4713; CHECK:       ## BB#0:
4714; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
4715; CHECK-NEXT:    vpermilpd %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf2,0xfd,0x09,0x0d,0xd1]
4716; CHECK-NEXT:    vpermilpd %xmm1, %xmm0, %xmm3 {%k1} {z} ## encoding: [0x62,0xf2,0xfd,0x89,0x0d,0xd9]
4717; CHECK-NEXT:    vpermilpd %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf2,0xfd,0x08,0x0d,0xc1]
4718; CHECK-NEXT:    vaddpd %xmm3, %xmm2, %xmm1 ## encoding: [0x62,0xf1,0xed,0x08,0x58,0xcb]
4719; CHECK-NEXT:    vaddpd %xmm0, %xmm1, %xmm0 ## encoding: [0x62,0xf1,0xf5,0x08,0x58,0xc0]
4720; CHECK-NEXT:    retq ## encoding: [0xc3]
4721  %res = call <2 x double> @llvm.x86.avx512.mask.vpermilvar.pd.128(<2 x double> %x0, <2 x i64> %x1, <2 x double> %x2, i8 %x3)
4722  %res1 = call <2 x double> @llvm.x86.avx512.mask.vpermilvar.pd.128(<2 x double> %x0, <2 x i64> %x1, <2 x double> zeroinitializer, i8 %x3)
4723  %res2 = call <2 x double> @llvm.x86.avx512.mask.vpermilvar.pd.128(<2 x double> %x0, <2 x i64> %x1, <2 x double> %x2, i8 -1)
4724  %res3 = fadd <2 x double> %res, %res1
4725  %res4 = fadd <2 x double> %res3, %res2
4726  ret <2 x double> %res4
4727}
4728
4729declare <8 x float> @llvm.x86.avx512.mask.vpermilvar.ps.256(<8 x float>, <8 x i32>, <8 x float>, i8)
4730
4731define <8 x float>@test_int_x86_avx512_mask_vpermilvar_ps_256(<8 x float> %x0, <8 x i32> %x1, <8 x float> %x2, i8 %x3) {
4732; CHECK-LABEL: test_int_x86_avx512_mask_vpermilvar_ps_256:
4733; CHECK:       ## BB#0:
4734; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
4735; CHECK-NEXT:    vpermilps %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf2,0x7d,0x29,0x0c,0xd1]
4736; CHECK-NEXT:    vpermilps %ymm1, %ymm0, %ymm3 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0xa9,0x0c,0xd9]
4737; CHECK-NEXT:    vpermilps %ymm1, %ymm0, %ymm0 ## encoding: [0x62,0xf2,0x7d,0x28,0x0c,0xc1]
4738; CHECK-NEXT:    vaddps %ymm3, %ymm2, %ymm1 ## encoding: [0x62,0xf1,0x6c,0x28,0x58,0xcb]
4739; CHECK-NEXT:    vaddps %ymm0, %ymm1, %ymm0 ## encoding: [0x62,0xf1,0x74,0x28,0x58,0xc0]
4740; CHECK-NEXT:    retq ## encoding: [0xc3]
4741  %res = call <8 x float> @llvm.x86.avx512.mask.vpermilvar.ps.256(<8 x float> %x0, <8 x i32> %x1, <8 x float> %x2, i8 %x3)
4742  %res1 = call <8 x float> @llvm.x86.avx512.mask.vpermilvar.ps.256(<8 x float> %x0, <8 x i32> %x1, <8 x float> zeroinitializer, i8 %x3)
4743  %res2 = call <8 x float> @llvm.x86.avx512.mask.vpermilvar.ps.256(<8 x float> %x0, <8 x i32> %x1, <8 x float> %x2, i8 -1)
4744  %res3 = fadd <8 x float> %res, %res1
4745  %res4 = fadd <8 x float> %res3, %res2
4746  ret <8 x float> %res4
4747}
4748
4749declare <4 x float> @llvm.x86.avx512.mask.vpermilvar.ps.128(<4 x float>, <4 x i32>, <4 x float>, i8)
4750
4751define <4 x float>@test_int_x86_avx512_mask_vpermilvar_ps_128(<4 x float> %x0, <4 x i32> %x1, <4 x float> %x2, i8 %x3) {
4752; CHECK-LABEL: test_int_x86_avx512_mask_vpermilvar_ps_128:
4753; CHECK:       ## BB#0:
4754; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
4755; CHECK-NEXT:    vpermilps %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf2,0x7d,0x09,0x0c,0xd1]
4756; CHECK-NEXT:    vpermilps %xmm1, %xmm0, %xmm3 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0x89,0x0c,0xd9]
4757; CHECK-NEXT:    vpermilps %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf2,0x7d,0x08,0x0c,0xc1]
4758; CHECK-NEXT:    vaddps %xmm3, %xmm2, %xmm1 ## encoding: [0x62,0xf1,0x6c,0x08,0x58,0xcb]
4759; CHECK-NEXT:    vaddps %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7c,0x08,0x58,0xc1]
4760; CHECK-NEXT:    retq ## encoding: [0xc3]
4761  %res = call <4 x float> @llvm.x86.avx512.mask.vpermilvar.ps.128(<4 x float> %x0, <4 x i32> %x1, <4 x float> %x2, i8 %x3)
4762  %res1 = call <4 x float> @llvm.x86.avx512.mask.vpermilvar.ps.128(<4 x float> %x0, <4 x i32> %x1, <4 x float> zeroinitializer, i8 %x3)
4763  %res2 = call <4 x float> @llvm.x86.avx512.mask.vpermilvar.ps.128(<4 x float> %x0, <4 x i32> %x1, <4 x float> %x2, i8 -1)
4764  %res3 = fadd <4 x float> %res, %res1
4765  %res4 = fadd <4 x float> %res2, %res3
4766  ret <4 x float> %res4
4767}
4768
4769declare <8 x float> @llvm.x86.avx512.mask.insertf32x4.256(<8 x float>, <4 x float>, i32, <8 x float>, i8)
4770
4771define <8 x float>@test_int_x86_avx512_mask_insertf32x4_256(<8 x float> %x0, <4 x float> %x1, <8 x float> %x3, i8 %x4) {
4772; CHECK-LABEL: test_int_x86_avx512_mask_insertf32x4_256:
4773; CHECK:       ## BB#0:
4774; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
4775; CHECK-NEXT:    vinsertf32x4 $1, %xmm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf3,0x7d,0x29,0x18,0xd1,0x01]
4776; CHECK-NEXT:    vinsertf32x4 $1, %xmm1, %ymm0, %ymm3 {%k1} {z} ## encoding: [0x62,0xf3,0x7d,0xa9,0x18,0xd9,0x01]
4777; CHECK-NEXT:    vinsertf32x4 $1, %xmm1, %ymm0, %ymm0 ## encoding: [0x62,0xf3,0x7d,0x28,0x18,0xc1,0x01]
4778; CHECK-NEXT:    vaddps %ymm0, %ymm2, %ymm0 ## encoding: [0x62,0xf1,0x6c,0x28,0x58,0xc0]
4779; CHECK-NEXT:    vaddps %ymm0, %ymm3, %ymm0 ## encoding: [0x62,0xf1,0x64,0x28,0x58,0xc0]
4780; CHECK-NEXT:    retq ## encoding: [0xc3]
4781  %res = call <8 x float> @llvm.x86.avx512.mask.insertf32x4.256(<8 x float> %x0, <4 x float> %x1, i32 1, <8 x float> %x3, i8 %x4)
4782  %res1 = call <8 x float> @llvm.x86.avx512.mask.insertf32x4.256(<8 x float> %x0, <4 x float> %x1, i32 1, <8 x float> %x3, i8 -1)
4783  %res2 = call <8 x float> @llvm.x86.avx512.mask.insertf32x4.256(<8 x float> %x0, <4 x float> %x1, i32 1, <8 x float> zeroinitializer, i8 %x4)
4784  %res3 = fadd <8 x float> %res, %res1
4785  %res4 = fadd <8 x float> %res2, %res3
4786  ret <8 x float> %res4
4787}
4788
4789declare <8 x i32> @llvm.x86.avx512.mask.inserti32x4.256(<8 x i32>, <4 x i32>, i32, <8 x i32>, i8)
4790
4791define <8 x i32>@test_int_x86_avx512_mask_inserti32x4_256(<8 x i32> %x0, <4 x i32> %x1, <8 x i32> %x3, i8 %x4) {
4792; CHECK-LABEL: test_int_x86_avx512_mask_inserti32x4_256:
4793; CHECK:       ## BB#0:
4794; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
4795; CHECK-NEXT:    vinserti32x4 $1, %xmm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf3,0x7d,0x29,0x38,0xd1,0x01]
4796; CHECK-NEXT:    vinserti32x4 $1, %xmm1, %ymm0, %ymm3 {%k1} {z} ## encoding: [0x62,0xf3,0x7d,0xa9,0x38,0xd9,0x01]
4797; CHECK-NEXT:    vinserti32x4 $1, %xmm1, %ymm0, %ymm0 ## encoding: [0x62,0xf3,0x7d,0x28,0x38,0xc1,0x01]
4798; CHECK-NEXT:    vpaddd %ymm0, %ymm2, %ymm0 ## encoding: [0x62,0xf1,0x6d,0x28,0xfe,0xc0]
4799; CHECK-NEXT:    vpaddd %ymm0, %ymm3, %ymm0 ## encoding: [0x62,0xf1,0x65,0x28,0xfe,0xc0]
4800; CHECK-NEXT:    retq ## encoding: [0xc3]
4801
4802  %res = call <8 x i32> @llvm.x86.avx512.mask.inserti32x4.256(<8 x i32> %x0, <4 x i32> %x1, i32 1, <8 x i32> %x3, i8 %x4)
4803  %res1 = call <8 x i32> @llvm.x86.avx512.mask.inserti32x4.256(<8 x i32> %x0, <4 x i32> %x1, i32 1, <8 x i32> %x3, i8 -1)
4804  %res2 = call <8 x i32> @llvm.x86.avx512.mask.inserti32x4.256(<8 x i32> %x0, <4 x i32> %x1, i32 1, <8 x i32> zeroinitializer, i8 %x4)
4805  %res3 = add <8 x i32> %res, %res1
4806  %res4 = add <8 x i32> %res2, %res3
4807  ret <8 x i32> %res4
4808}
4809
4810declare <4 x i32> @llvm.x86.avx512.mask.pternlog.d.128(<4 x i32>, <4 x i32>, <4 x i32>, i32, i8)
4811
4812define <4 x i32>@test_int_x86_avx512_mask_pternlog_d_128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 %x4) {
4813; CHECK-LABEL: test_int_x86_avx512_mask_pternlog_d_128:
4814; CHECK:       ## BB#0:
4815; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
4816; CHECK-NEXT:    vmovaps %zmm0, %zmm3 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xd8]
4817; CHECK-NEXT:    vpternlogd $33, %xmm2, %xmm1, %xmm3 {%k1} ## encoding: [0x62,0xf3,0x75,0x09,0x25,0xda,0x21]
4818; CHECK-NEXT:    vpternlogd $33, %xmm2, %xmm1, %xmm0 ## encoding: [0x62,0xf3,0x75,0x08,0x25,0xc2,0x21]
4819; CHECK-NEXT:    vpaddd %xmm0, %xmm3, %xmm0 ## encoding: [0x62,0xf1,0x65,0x08,0xfe,0xc0]
4820; CHECK-NEXT:    retq ## encoding: [0xc3]
4821  %res = call <4 x i32> @llvm.x86.avx512.mask.pternlog.d.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i32 33, i8 %x4)
4822  %res1 = call <4 x i32> @llvm.x86.avx512.mask.pternlog.d.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i32 33, i8 -1)
4823  %res2 = add <4 x i32> %res, %res1
4824  ret <4 x i32> %res2
4825}
4826
4827declare <4 x i32> @llvm.x86.avx512.maskz.pternlog.d.128(<4 x i32>, <4 x i32>, <4 x i32>, i32, i8)
4828
4829define <4 x i32>@test_int_x86_avx512_maskz_pternlog_d_128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 %x4) {
4830; CHECK-LABEL: test_int_x86_avx512_maskz_pternlog_d_128:
4831; CHECK:       ## BB#0:
4832; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
4833; CHECK-NEXT:    vmovaps %zmm0, %zmm3 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xd8]
4834; CHECK-NEXT:    vpternlogd $33, %xmm2, %xmm1, %xmm3 {%k1} {z} ## encoding: [0x62,0xf3,0x75,0x89,0x25,0xda,0x21]
4835; CHECK-NEXT:    vpternlogd $33, %xmm2, %xmm1, %xmm0 ## encoding: [0x62,0xf3,0x75,0x08,0x25,0xc2,0x21]
4836; CHECK-NEXT:    vpaddd %xmm0, %xmm3, %xmm0 ## encoding: [0x62,0xf1,0x65,0x08,0xfe,0xc0]
4837; CHECK-NEXT:    retq ## encoding: [0xc3]
4838  %res = call <4 x i32> @llvm.x86.avx512.maskz.pternlog.d.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i32 33, i8 %x4)
4839  %res1 = call <4 x i32> @llvm.x86.avx512.maskz.pternlog.d.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i32 33, i8 -1)
4840  %res2 = add <4 x i32> %res, %res1
4841  ret <4 x i32> %res2
4842}
4843
4844declare <8 x i32> @llvm.x86.avx512.mask.pternlog.d.256(<8 x i32>, <8 x i32>, <8 x i32>, i32, i8)
4845
4846define <8 x i32>@test_int_x86_avx512_mask_pternlog_d_256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 %x4) {
4847; CHECK-LABEL: test_int_x86_avx512_mask_pternlog_d_256:
4848; CHECK:       ## BB#0:
4849; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
4850; CHECK-NEXT:    vmovaps %zmm0, %zmm3 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xd8]
4851; CHECK-NEXT:    vpternlogd $33, %ymm2, %ymm1, %ymm3 {%k1} ## encoding: [0x62,0xf3,0x75,0x29,0x25,0xda,0x21]
4852; CHECK-NEXT:    vpternlogd $33, %ymm2, %ymm1, %ymm0 ## encoding: [0x62,0xf3,0x75,0x28,0x25,0xc2,0x21]
4853; CHECK-NEXT:    vpaddd %ymm0, %ymm3, %ymm0 ## encoding: [0x62,0xf1,0x65,0x28,0xfe,0xc0]
4854; CHECK-NEXT:    retq ## encoding: [0xc3]
4855  %res = call <8 x i32> @llvm.x86.avx512.mask.pternlog.d.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i32 33, i8 %x4)
4856  %res1 = call <8 x i32> @llvm.x86.avx512.mask.pternlog.d.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i32 33, i8 -1)
4857  %res2 = add <8 x i32> %res, %res1
4858  ret <8 x i32> %res2
4859}
4860
4861declare <8 x i32> @llvm.x86.avx512.maskz.pternlog.d.256(<8 x i32>, <8 x i32>, <8 x i32>, i32, i8)
4862
4863define <8 x i32>@test_int_x86_avx512_maskz_pternlog_d_256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 %x4) {
4864; CHECK-LABEL: test_int_x86_avx512_maskz_pternlog_d_256:
4865; CHECK:       ## BB#0:
4866; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
4867; CHECK-NEXT:    vmovaps %zmm0, %zmm3 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xd8]
4868; CHECK-NEXT:    vpternlogd $33, %ymm2, %ymm1, %ymm3 {%k1} {z} ## encoding: [0x62,0xf3,0x75,0xa9,0x25,0xda,0x21]
4869; CHECK-NEXT:    vpternlogd $33, %ymm2, %ymm1, %ymm0 ## encoding: [0x62,0xf3,0x75,0x28,0x25,0xc2,0x21]
4870; CHECK-NEXT:    vpaddd %ymm0, %ymm3, %ymm0 ## encoding: [0x62,0xf1,0x65,0x28,0xfe,0xc0]
4871; CHECK-NEXT:    retq ## encoding: [0xc3]
4872  %res = call <8 x i32> @llvm.x86.avx512.maskz.pternlog.d.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i32 33, i8 %x4)
4873  %res1 = call <8 x i32> @llvm.x86.avx512.maskz.pternlog.d.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i32 33, i8 -1)
4874  %res2 = add <8 x i32> %res, %res1
4875  ret <8 x i32> %res2
4876}
4877
4878declare <2 x i64> @llvm.x86.avx512.mask.pternlog.q.128(<2 x i64>, <2 x i64>, <2 x i64>, i32, i8)
4879
4880define <2 x i64>@test_int_x86_avx512_mask_pternlog_q_128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 %x4) {
4881; CHECK-LABEL: test_int_x86_avx512_mask_pternlog_q_128:
4882; CHECK:       ## BB#0:
4883; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
4884; CHECK-NEXT:    vmovaps %zmm0, %zmm3 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xd8]
4885; CHECK-NEXT:    vpternlogq $33, %xmm2, %xmm1, %xmm3 {%k1} ## encoding: [0x62,0xf3,0xf5,0x09,0x25,0xda,0x21]
4886; CHECK-NEXT:    vpternlogq $33, %xmm2, %xmm1, %xmm0 ## encoding: [0x62,0xf3,0xf5,0x08,0x25,0xc2,0x21]
4887; CHECK-NEXT:    vpaddq %xmm0, %xmm3, %xmm0 ## encoding: [0x62,0xf1,0xe5,0x08,0xd4,0xc0]
4888; CHECK-NEXT:    retq ## encoding: [0xc3]
4889  %res = call <2 x i64> @llvm.x86.avx512.mask.pternlog.q.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i32 33, i8 %x4)
4890  %res1 = call <2 x i64> @llvm.x86.avx512.mask.pternlog.q.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i32 33, i8 -1)
4891  %res2 = add <2 x i64> %res, %res1
4892  ret <2 x i64> %res2
4893}
4894
4895declare <2 x i64> @llvm.x86.avx512.maskz.pternlog.q.128(<2 x i64>, <2 x i64>, <2 x i64>, i32, i8)
4896
4897define <2 x i64>@test_int_x86_avx512_maskz_pternlog_q_128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 %x4) {
4898; CHECK-LABEL: test_int_x86_avx512_maskz_pternlog_q_128:
4899; CHECK:       ## BB#0:
4900; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
4901; CHECK-NEXT:    vmovaps %zmm0, %zmm3 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xd8]
4902; CHECK-NEXT:    vpternlogq $33, %xmm2, %xmm1, %xmm3 {%k1} {z} ## encoding: [0x62,0xf3,0xf5,0x89,0x25,0xda,0x21]
4903; CHECK-NEXT:    vpternlogq $33, %xmm2, %xmm1, %xmm0 ## encoding: [0x62,0xf3,0xf5,0x08,0x25,0xc2,0x21]
4904; CHECK-NEXT:    vpaddq %xmm0, %xmm3, %xmm0 ## encoding: [0x62,0xf1,0xe5,0x08,0xd4,0xc0]
4905; CHECK-NEXT:    retq ## encoding: [0xc3]
4906  %res = call <2 x i64> @llvm.x86.avx512.maskz.pternlog.q.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i32 33, i8 %x4)
4907  %res1 = call <2 x i64> @llvm.x86.avx512.maskz.pternlog.q.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i32 33, i8 -1)
4908  %res2 = add <2 x i64> %res, %res1
4909  ret <2 x i64> %res2
4910}
4911
4912declare <4 x i64> @llvm.x86.avx512.mask.pternlog.q.256(<4 x i64>, <4 x i64>, <4 x i64>, i32, i8)
4913
4914define <4 x i64>@test_int_x86_avx512_mask_pternlog_q_256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 %x4) {
4915; CHECK-LABEL: test_int_x86_avx512_mask_pternlog_q_256:
4916; CHECK:       ## BB#0:
4917; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
4918; CHECK-NEXT:    vmovaps %zmm0, %zmm3 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xd8]
4919; CHECK-NEXT:    vpternlogq $33, %ymm2, %ymm1, %ymm3 {%k1} ## encoding: [0x62,0xf3,0xf5,0x29,0x25,0xda,0x21]
4920; CHECK-NEXT:    vpternlogq $33, %ymm2, %ymm1, %ymm0 ## encoding: [0x62,0xf3,0xf5,0x28,0x25,0xc2,0x21]
4921; CHECK-NEXT:    vpaddq %ymm0, %ymm3, %ymm0 ## encoding: [0x62,0xf1,0xe5,0x28,0xd4,0xc0]
4922; CHECK-NEXT:    retq ## encoding: [0xc3]
4923  %res = call <4 x i64> @llvm.x86.avx512.mask.pternlog.q.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i32 33, i8 %x4)
4924  %res1 = call <4 x i64> @llvm.x86.avx512.mask.pternlog.q.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i32 33, i8 -1)
4925  %res2 = add <4 x i64> %res, %res1
4926  ret <4 x i64> %res2
4927}
4928
4929declare <4 x i64> @llvm.x86.avx512.maskz.pternlog.q.256(<4 x i64>, <4 x i64>, <4 x i64>, i32, i8)
4930
4931define <4 x i64>@test_int_x86_avx512_maskz_pternlog_q_256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 %x4) {
4932; CHECK-LABEL: test_int_x86_avx512_maskz_pternlog_q_256:
4933; CHECK:       ## BB#0:
4934; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
4935; CHECK-NEXT:    vmovaps %zmm0, %zmm3 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xd8]
4936; CHECK-NEXT:    vpternlogq $33, %ymm2, %ymm1, %ymm3 {%k1} {z} ## encoding: [0x62,0xf3,0xf5,0xa9,0x25,0xda,0x21]
4937; CHECK-NEXT:    vpternlogq $33, %ymm2, %ymm1, %ymm0 ## encoding: [0x62,0xf3,0xf5,0x28,0x25,0xc2,0x21]
4938; CHECK-NEXT:    vpaddq %ymm0, %ymm3, %ymm0 ## encoding: [0x62,0xf1,0xe5,0x28,0xd4,0xc0]
4939; CHECK-NEXT:    retq ## encoding: [0xc3]
4940  %res = call <4 x i64> @llvm.x86.avx512.maskz.pternlog.q.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i32 33, i8 %x4)
4941  %res1 = call <4 x i64> @llvm.x86.avx512.maskz.pternlog.q.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i32 33, i8 -1)
4942  %res2 = add <4 x i64> %res, %res1
4943  ret <4 x i64> %res2
4944}
4945
4946define <4 x float> @test_x86_vcvtph2ps_128(<8 x i16> %a0) {
4947; CHECK-LABEL: test_x86_vcvtph2ps_128:
4948; CHECK:       ## BB#0:
4949; CHECK-NEXT:    vcvtph2ps %xmm0, %xmm0 ## encoding: [0x62,0xf2,0x7d,0x08,0x13,0xc0]
4950; CHECK-NEXT:    retq ## encoding: [0xc3]
4951  %res = call <4 x float> @llvm.x86.avx512.mask.vcvtph2ps.128(<8 x i16> %a0, <4 x float> zeroinitializer, i8 -1)
4952  ret <4 x float> %res
4953}
4954
4955define <4 x float> @test_x86_vcvtph2ps_128_rrk(<8 x i16> %a0,<4 x float> %a1, i8 %mask) {
4956; CHECK-LABEL: test_x86_vcvtph2ps_128_rrk:
4957; CHECK:       ## BB#0:
4958; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
4959; CHECK-NEXT:    vcvtph2ps %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf2,0x7d,0x09,0x13,0xc8]
4960; CHECK-NEXT:    vmovaps %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc1]
4961; CHECK-NEXT:    retq ## encoding: [0xc3]
4962  %res = call <4 x float> @llvm.x86.avx512.mask.vcvtph2ps.128(<8 x i16> %a0, <4 x float> %a1, i8 %mask)
4963  ret <4 x float> %res
4964}
4965
4966
4967define <4 x float> @test_x86_vcvtph2ps_128_rrkz(<8 x i16> %a0, i8 %mask) {
4968; CHECK-LABEL: test_x86_vcvtph2ps_128_rrkz:
4969; CHECK:       ## BB#0:
4970; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
4971; CHECK-NEXT:    vcvtph2ps %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0x89,0x13,0xc0]
4972; CHECK-NEXT:    retq ## encoding: [0xc3]
4973  %res = call <4 x float> @llvm.x86.avx512.mask.vcvtph2ps.128(<8 x i16> %a0, <4 x float> zeroinitializer, i8 %mask)
4974  ret <4 x float> %res
4975}
4976
4977declare <4 x float> @llvm.x86.avx512.mask.vcvtph2ps.128(<8 x i16>, <4 x float>, i8) nounwind readonly
4978
4979define <8 x float> @test_x86_vcvtph2ps_256(<8 x i16> %a0) {
4980; CHECK-LABEL: test_x86_vcvtph2ps_256:
4981; CHECK:       ## BB#0:
4982; CHECK-NEXT:    vcvtph2ps %xmm0, %ymm0 ## encoding: [0x62,0xf2,0x7d,0x28,0x13,0xc0]
4983; CHECK-NEXT:    retq ## encoding: [0xc3]
4984  %res = call <8 x float> @llvm.x86.avx512.mask.vcvtph2ps.256(<8 x i16> %a0, <8 x float> zeroinitializer, i8 -1)
4985  ret <8 x float> %res
4986}
4987
4988define <8 x float> @test_x86_vcvtph2ps_256_rrk(<8 x i16> %a0,<8 x float> %a1, i8 %mask) {
4989; CHECK-LABEL: test_x86_vcvtph2ps_256_rrk:
4990; CHECK:       ## BB#0:
4991; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
4992; CHECK-NEXT:    vcvtph2ps %xmm0, %ymm1 {%k1} ## encoding: [0x62,0xf2,0x7d,0x29,0x13,0xc8]
4993; CHECK-NEXT:    vmovaps %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc1]
4994; CHECK-NEXT:    retq ## encoding: [0xc3]
4995  %res = call <8 x float> @llvm.x86.avx512.mask.vcvtph2ps.256(<8 x i16> %a0, <8 x float> %a1, i8 %mask)
4996  ret <8 x float> %res
4997}
4998
4999define <8 x float> @test_x86_vcvtph2ps_256_rrkz(<8 x i16> %a0, i8 %mask) {
5000; CHECK-LABEL: test_x86_vcvtph2ps_256_rrkz:
5001; CHECK:       ## BB#0:
5002; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
5003; CHECK-NEXT:    vcvtph2ps %xmm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0xa9,0x13,0xc0]
5004; CHECK-NEXT:    retq ## encoding: [0xc3]
5005  %res = call <8 x float> @llvm.x86.avx512.mask.vcvtph2ps.256(<8 x i16> %a0, <8 x float> zeroinitializer, i8 %mask)
5006  ret <8 x float> %res
5007}
5008
5009declare <8 x float> @llvm.x86.avx512.mask.vcvtph2ps.256(<8 x i16>, <8 x float>, i8) nounwind readonly
5010
5011define <8 x i16> @test_x86_vcvtps2ph_128(<4 x float> %a0, i8 %mask, <8 x i16> %src) {
5012; CHECK-LABEL: test_x86_vcvtps2ph_128:
5013; CHECK:       ## BB#0:
5014; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
5015; CHECK-NEXT:    vcvtps2ph $2, %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf3,0x7d,0x09,0x1d,0xc1,0x02]
5016; CHECK-NEXT:    vcvtps2ph $2, %xmm0, %xmm2 {%k1} {z} ## encoding: [0x62,0xf3,0x7d,0x89,0x1d,0xc2,0x02]
5017; CHECK-NEXT:    vcvtps2ph $2, %xmm0, %xmm0 ## encoding: [0x62,0xf3,0x7d,0x08,0x1d,0xc0,0x02]
5018; CHECK-NEXT:    vpaddw %xmm2, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xfd,0xc2]
5019; CHECK-NEXT:    vpaddw %xmm0, %xmm1, %xmm0 ## encoding: [0xc5,0xf1,0xfd,0xc0]
5020; CHECK-NEXT:    retq ## encoding: [0xc3]
5021  %res1 = call <8 x i16> @llvm.x86.avx512.mask.vcvtps2ph.128(<4 x float> %a0, i32 2, <8 x i16> zeroinitializer, i8 -1)
5022  %res2 = call <8 x i16> @llvm.x86.avx512.mask.vcvtps2ph.128(<4 x float> %a0, i32 2, <8 x i16> zeroinitializer, i8 %mask)
5023  %res3 = call <8 x i16> @llvm.x86.avx512.mask.vcvtps2ph.128(<4 x float> %a0, i32 2, <8 x i16> %src, i8 %mask)
5024  %res0 = add <8 x i16> %res1, %res2
5025  %res = add <8 x i16> %res3, %res0
5026  ret <8 x i16> %res
5027}
5028
5029declare <8 x i16> @llvm.x86.avx512.mask.vcvtps2ph.128(<4 x float>, i32, <8 x i16>, i8) nounwind readonly
5030
5031define <8 x i16> @test_x86_vcvtps2ph_256(<8 x float> %a0, i8 %mask, <8 x i16> %src) {
5032; CHECK-LABEL: test_x86_vcvtps2ph_256:
5033; CHECK:       ## BB#0:
5034; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
5035; CHECK-NEXT:    vcvtps2ph $2, %ymm0, %xmm1 {%k1} ## encoding: [0x62,0xf3,0x7d,0x29,0x1d,0xc1,0x02]
5036; CHECK-NEXT:    vcvtps2ph $2, %ymm0, %xmm2 {%k1} {z} ## encoding: [0x62,0xf3,0x7d,0xa9,0x1d,0xc2,0x02]
5037; CHECK-NEXT:    vcvtps2ph $2, %ymm0, %xmm0 ## encoding: [0x62,0xf3,0x7d,0x28,0x1d,0xc0,0x02]
5038; CHECK-NEXT:    vpaddw %xmm2, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xfd,0xc2]
5039; CHECK-NEXT:    vpaddw %xmm0, %xmm1, %xmm0 ## encoding: [0xc5,0xf1,0xfd,0xc0]
5040; CHECK-NEXT:    retq ## encoding: [0xc3]
5041  %res1 = call <8 x i16> @llvm.x86.avx512.mask.vcvtps2ph.256(<8 x float> %a0, i32 2, <8 x i16> zeroinitializer, i8 -1)
5042  %res2 = call <8 x i16> @llvm.x86.avx512.mask.vcvtps2ph.256(<8 x float> %a0, i32 2, <8 x i16> zeroinitializer, i8 %mask)
5043  %res3 = call <8 x i16> @llvm.x86.avx512.mask.vcvtps2ph.256(<8 x float> %a0, i32 2, <8 x i16> %src, i8 %mask)
5044  %res0 = add <8 x i16> %res1, %res2
5045  %res = add <8 x i16> %res3, %res0
5046  ret <8 x i16> %res
5047}
5048
5049declare <8 x i16> @llvm.x86.avx512.mask.vcvtps2ph.256(<8 x float>, i32, <8 x i16>, i8) nounwind readonly
5050
5051define <8 x float> @test_rsqrt_ps_256_rr(<8 x float> %a0) {
5052; CHECK-LABEL: test_rsqrt_ps_256_rr:
5053; CHECK:       ## BB#0:
5054; CHECK-NEXT:    vrsqrt14ps %ymm0, %ymm0 ## encoding: [0x62,0xf2,0x7d,0x28,0x4e,0xc0]
5055; CHECK-NEXT:    retq ## encoding: [0xc3]
5056  %res = call <8 x float> @llvm.x86.avx512.rsqrt14.ps.256(<8 x float> %a0, <8 x float> zeroinitializer, i8 -1)
5057  ret <8 x float> %res
5058}
5059
5060define <8 x float> @test_rsqrt_ps_256_rrkz(<8 x float> %a0, i8 %mask) {
5061; CHECK-LABEL: test_rsqrt_ps_256_rrkz:
5062; CHECK:       ## BB#0:
5063; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
5064; CHECK-NEXT:    vrsqrt14ps %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0xa9,0x4e,0xc0]
5065; CHECK-NEXT:    retq ## encoding: [0xc3]
5066  %res = call <8 x float> @llvm.x86.avx512.rsqrt14.ps.256(<8 x float> %a0, <8 x float> zeroinitializer, i8 %mask)
5067  ret <8 x float> %res
5068}
5069
5070define <8 x float> @test_rsqrt_ps_256_rrk(<8 x float> %a0, <8 x float> %a1, i8 %mask) {
5071; CHECK-LABEL: test_rsqrt_ps_256_rrk:
5072; CHECK:       ## BB#0:
5073; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
5074; CHECK-NEXT:    vrsqrt14ps %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf2,0x7d,0x29,0x4e,0xc8]
5075; CHECK-NEXT:    vmovaps %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc1]
5076; CHECK-NEXT:    retq ## encoding: [0xc3]
5077  %res = call <8 x float> @llvm.x86.avx512.rsqrt14.ps.256(<8 x float> %a0, <8 x float> %a1, i8 %mask)
5078  ret <8 x float> %res
5079}
5080
5081define <4 x float> @test_rsqrt_ps_128_rr(<4 x float> %a0) {
5082; CHECK-LABEL: test_rsqrt_ps_128_rr:
5083; CHECK:       ## BB#0:
5084; CHECK-NEXT:    vrsqrt14ps %xmm0, %xmm0 ## encoding: [0x62,0xf2,0x7d,0x08,0x4e,0xc0]
5085; CHECK-NEXT:    retq ## encoding: [0xc3]
5086  %res = call <4 x float> @llvm.x86.avx512.rsqrt14.ps.128(<4 x float> %a0, <4 x float> zeroinitializer, i8 -1)
5087  ret <4 x float> %res
5088}
5089
5090define <4 x float> @test_rsqrt_ps_128_rrkz(<4 x float> %a0, i8 %mask) {
5091; CHECK-LABEL: test_rsqrt_ps_128_rrkz:
5092; CHECK:       ## BB#0:
5093; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
5094; CHECK-NEXT:    vrsqrt14ps %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0x89,0x4e,0xc0]
5095; CHECK-NEXT:    retq ## encoding: [0xc3]
5096  %res = call <4 x float> @llvm.x86.avx512.rsqrt14.ps.128(<4 x float> %a0, <4 x float> zeroinitializer, i8 %mask)
5097  ret <4 x float> %res
5098}
5099
5100define <4 x float> @test_rsqrt_ps_128_rrk(<4 x float> %a0, <4 x float> %a1, i8 %mask) {
5101; CHECK-LABEL: test_rsqrt_ps_128_rrk:
5102; CHECK:       ## BB#0:
5103; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
5104; CHECK-NEXT:    vrsqrt14ps %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf2,0x7d,0x09,0x4e,0xc8]
5105; CHECK-NEXT:    vmovaps %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc1]
5106; CHECK-NEXT:    retq ## encoding: [0xc3]
5107  %res = call <4 x float> @llvm.x86.avx512.rsqrt14.ps.128(<4 x float> %a0, <4 x float> %a1, i8 %mask)
5108  ret <4 x float> %res
5109}
5110
5111declare <8 x float> @llvm.x86.avx512.rsqrt14.ps.256(<8 x float>, <8 x float>, i8) nounwind readnone
5112declare <4 x float> @llvm.x86.avx512.rsqrt14.ps.128(<4 x float>, <4 x float>, i8) nounwind readnone
5113
5114define <8 x float> @test_rcp_ps_256_rr(<8 x float> %a0) {
5115; CHECK-LABEL: test_rcp_ps_256_rr:
5116; CHECK:       ## BB#0:
5117; CHECK-NEXT:    vrcp14ps %ymm0, %ymm0 ## encoding: [0x62,0xf2,0x7d,0x28,0x4c,0xc0]
5118; CHECK-NEXT:    retq ## encoding: [0xc3]
5119  %res = call <8 x float> @llvm.x86.avx512.rcp14.ps.256(<8 x float> %a0, <8 x float> zeroinitializer, i8 -1)
5120  ret <8 x float> %res
5121}
5122
5123define <8 x float> @test_rcp_ps_256_rrkz(<8 x float> %a0, i8 %mask) {
5124; CHECK-LABEL: test_rcp_ps_256_rrkz:
5125; CHECK:       ## BB#0:
5126; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
5127; CHECK-NEXT:    vrcp14ps %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0xa9,0x4c,0xc0]
5128; CHECK-NEXT:    retq ## encoding: [0xc3]
5129  %res = call <8 x float> @llvm.x86.avx512.rcp14.ps.256(<8 x float> %a0, <8 x float> zeroinitializer, i8 %mask)
5130  ret <8 x float> %res
5131}
5132
5133define <8 x float> @test_rcp_ps_256_rrk(<8 x float> %a0, <8 x float> %a1, i8 %mask) {
5134; CHECK-LABEL: test_rcp_ps_256_rrk:
5135; CHECK:       ## BB#0:
5136; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
5137; CHECK-NEXT:    vrcp14ps %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf2,0x7d,0x29,0x4c,0xc8]
5138; CHECK-NEXT:    vmovaps %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc1]
5139; CHECK-NEXT:    retq ## encoding: [0xc3]
5140  %res = call <8 x float> @llvm.x86.avx512.rcp14.ps.256(<8 x float> %a0, <8 x float> %a1, i8 %mask)
5141  ret <8 x float> %res
5142}
5143
5144define <4 x float> @test_rcp_ps_128_rr(<4 x float> %a0) {
5145; CHECK-LABEL: test_rcp_ps_128_rr:
5146; CHECK:       ## BB#0:
5147; CHECK-NEXT:    vrcp14ps %xmm0, %xmm0 ## encoding: [0x62,0xf2,0x7d,0x08,0x4c,0xc0]
5148; CHECK-NEXT:    retq ## encoding: [0xc3]
5149  %res = call <4 x float> @llvm.x86.avx512.rcp14.ps.128(<4 x float> %a0, <4 x float> zeroinitializer, i8 -1)
5150  ret <4 x float> %res
5151}
5152
5153define <4 x float> @test_rcp_ps_128_rrkz(<4 x float> %a0, i8 %mask) {
5154; CHECK-LABEL: test_rcp_ps_128_rrkz:
5155; CHECK:       ## BB#0:
5156; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
5157; CHECK-NEXT:    vrcp14ps %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0x89,0x4c,0xc0]
5158; CHECK-NEXT:    retq ## encoding: [0xc3]
5159  %res = call <4 x float> @llvm.x86.avx512.rcp14.ps.128(<4 x float> %a0, <4 x float> zeroinitializer, i8 %mask)
5160  ret <4 x float> %res
5161}
5162
5163define <4 x float> @test_rcp_ps_128_rrk(<4 x float> %a0, <4 x float> %a1, i8 %mask) {
5164; CHECK-LABEL: test_rcp_ps_128_rrk:
5165; CHECK:       ## BB#0:
5166; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
5167; CHECK-NEXT:    vrcp14ps %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf2,0x7d,0x09,0x4c,0xc8]
5168; CHECK-NEXT:    vmovaps %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc1]
5169; CHECK-NEXT:    retq ## encoding: [0xc3]
5170  %res = call <4 x float> @llvm.x86.avx512.rcp14.ps.128(<4 x float> %a0, <4 x float> %a1, i8 %mask)
5171  ret <4 x float> %res
5172}
5173
5174declare <8 x float> @llvm.x86.avx512.rcp14.ps.256(<8 x float>, <8 x float>, i8) nounwind readnone
5175declare <4 x float> @llvm.x86.avx512.rcp14.ps.128(<4 x float>, <4 x float>, i8) nounwind readnone
5176
5177define <4 x double> @test_rsqrt_pd_256_rr(<4 x double> %a0) {
5178; CHECK-LABEL: test_rsqrt_pd_256_rr:
5179; CHECK:       ## BB#0:
5180; CHECK-NEXT:    vrsqrt14pd %ymm0, %ymm0 ## encoding: [0x62,0xf2,0xfd,0x28,0x4e,0xc0]
5181; CHECK-NEXT:    retq ## encoding: [0xc3]
5182  %res = call <4 x double> @llvm.x86.avx512.rsqrt14.pd.256(<4 x double> %a0, <4 x double> zeroinitializer, i8 -1)
5183  ret <4 x double> %res
5184}
5185
5186define <4 x double> @test_rsqrt_pd_256_rrkz(<4 x double> %a0, i8 %mask) {
5187; CHECK-LABEL: test_rsqrt_pd_256_rrkz:
5188; CHECK:       ## BB#0:
5189; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
5190; CHECK-NEXT:    vrsqrt14pd %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf2,0xfd,0xa9,0x4e,0xc0]
5191; CHECK-NEXT:    retq ## encoding: [0xc3]
5192  %res = call <4 x double> @llvm.x86.avx512.rsqrt14.pd.256(<4 x double> %a0, <4 x double> zeroinitializer, i8 %mask)
5193  ret <4 x double> %res
5194}
5195
5196define <4 x double> @test_rsqrt_pd_256_rrk(<4 x double> %a0, <4 x double> %a1, i8 %mask) {
5197; CHECK-LABEL: test_rsqrt_pd_256_rrk:
5198; CHECK:       ## BB#0:
5199; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
5200; CHECK-NEXT:    vrsqrt14pd %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf2,0xfd,0x29,0x4e,0xc8]
5201; CHECK-NEXT:    vmovaps %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc1]
5202; CHECK-NEXT:    retq ## encoding: [0xc3]
5203  %res = call <4 x double> @llvm.x86.avx512.rsqrt14.pd.256(<4 x double> %a0, <4 x double> %a1, i8 %mask)
5204  ret <4 x double> %res
5205}
5206
5207define <2 x double> @test_rsqrt_pd_128_rr(<2 x double> %a0) {
5208; CHECK-LABEL: test_rsqrt_pd_128_rr:
5209; CHECK:       ## BB#0:
5210; CHECK-NEXT:    vrsqrt14pd %xmm0, %xmm0 ## encoding: [0x62,0xf2,0xfd,0x08,0x4e,0xc0]
5211; CHECK-NEXT:    retq ## encoding: [0xc3]
5212  %res = call <2 x double> @llvm.x86.avx512.rsqrt14.pd.128(<2 x double> %a0, <2 x double> zeroinitializer, i8 -1)
5213  ret <2 x double> %res
5214}
5215
5216define <2 x double> @test_rsqrt_pd_128_rrkz(<2 x double> %a0, i8 %mask) {
5217; CHECK-LABEL: test_rsqrt_pd_128_rrkz:
5218; CHECK:       ## BB#0:
5219; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
5220; CHECK-NEXT:    vrsqrt14pd %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf2,0xfd,0x89,0x4e,0xc0]
5221; CHECK-NEXT:    retq ## encoding: [0xc3]
5222  %res = call <2 x double> @llvm.x86.avx512.rsqrt14.pd.128(<2 x double> %a0, <2 x double> zeroinitializer, i8 %mask)
5223  ret <2 x double> %res
5224}
5225
5226define <2 x double> @test_rsqrt_pd_128_rrk(<2 x double> %a0, <2 x double> %a1, i8 %mask) {
5227; CHECK-LABEL: test_rsqrt_pd_128_rrk:
5228; CHECK:       ## BB#0:
5229; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
5230; CHECK-NEXT:    vrsqrt14pd %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf2,0xfd,0x09,0x4e,0xc8]
5231; CHECK-NEXT:    vmovaps %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc1]
5232; CHECK-NEXT:    retq ## encoding: [0xc3]
5233  %res = call <2 x double> @llvm.x86.avx512.rsqrt14.pd.128(<2 x double> %a0, <2 x double> %a1, i8 %mask)
5234  ret <2 x double> %res
5235}
5236
5237declare <4 x double> @llvm.x86.avx512.rsqrt14.pd.256(<4 x double>, <4 x double>, i8) nounwind readnone
5238declare <2 x double> @llvm.x86.avx512.rsqrt14.pd.128(<2 x double>, <2 x double>, i8) nounwind readnone
5239
5240define <4 x double> @test_rcp_pd_256_rr(<4 x double> %a0) {
5241; CHECK-LABEL: test_rcp_pd_256_rr:
5242; CHECK:       ## BB#0:
5243; CHECK-NEXT:    vrcp14pd %ymm0, %ymm0 ## encoding: [0x62,0xf2,0xfd,0x28,0x4c,0xc0]
5244; CHECK-NEXT:    retq ## encoding: [0xc3]
5245  %res = call <4 x double> @llvm.x86.avx512.rcp14.pd.256(<4 x double> %a0, <4 x double> zeroinitializer, i8 -1)
5246  ret <4 x double> %res
5247}
5248
5249define <4 x double> @test_rcp_pd_256_rrkz(<4 x double> %a0, i8 %mask) {
5250; CHECK-LABEL: test_rcp_pd_256_rrkz:
5251; CHECK:       ## BB#0:
5252; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
5253; CHECK-NEXT:    vrcp14pd %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf2,0xfd,0xa9,0x4c,0xc0]
5254; CHECK-NEXT:    retq ## encoding: [0xc3]
5255  %res = call <4 x double> @llvm.x86.avx512.rcp14.pd.256(<4 x double> %a0, <4 x double> zeroinitializer, i8 %mask)
5256  ret <4 x double> %res
5257}
5258
5259define <4 x double> @test_rcp_pd_256_rrk(<4 x double> %a0, <4 x double> %a1, i8 %mask) {
5260; CHECK-LABEL: test_rcp_pd_256_rrk:
5261; CHECK:       ## BB#0:
5262; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
5263; CHECK-NEXT:    vrcp14pd %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf2,0xfd,0x29,0x4c,0xc8]
5264; CHECK-NEXT:    vmovaps %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc1]
5265; CHECK-NEXT:    retq ## encoding: [0xc3]
5266  %res = call <4 x double> @llvm.x86.avx512.rcp14.pd.256(<4 x double> %a0, <4 x double> %a1, i8 %mask)
5267  ret <4 x double> %res
5268}
5269
5270define <2 x double> @test_rcp_pd_128_rr(<2 x double> %a0) {
5271; CHECK-LABEL: test_rcp_pd_128_rr:
5272; CHECK:       ## BB#0:
5273; CHECK-NEXT:    vrcp14pd %xmm0, %xmm0 ## encoding: [0x62,0xf2,0xfd,0x08,0x4c,0xc0]
5274; CHECK-NEXT:    retq ## encoding: [0xc3]
5275  %res = call <2 x double> @llvm.x86.avx512.rcp14.pd.128(<2 x double> %a0, <2 x double> zeroinitializer, i8 -1)
5276  ret <2 x double> %res
5277}
5278
5279define <2 x double> @test_rcp_pd_128_rrkz(<2 x double> %a0, i8 %mask) {
5280; CHECK-LABEL: test_rcp_pd_128_rrkz:
5281; CHECK:       ## BB#0:
5282; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
5283; CHECK-NEXT:    vrcp14pd %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf2,0xfd,0x89,0x4c,0xc0]
5284; CHECK-NEXT:    retq ## encoding: [0xc3]
5285  %res = call <2 x double> @llvm.x86.avx512.rcp14.pd.128(<2 x double> %a0, <2 x double> zeroinitializer, i8 %mask)
5286  ret <2 x double> %res
5287}
5288
5289define <2 x double> @test_rcp_pd_128_rrk(<2 x double> %a0, <2 x double> %a1, i8 %mask) {
5290; CHECK-LABEL: test_rcp_pd_128_rrk:
5291; CHECK:       ## BB#0:
5292; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
5293; CHECK-NEXT:    vrcp14pd %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf2,0xfd,0x09,0x4c,0xc8]
5294; CHECK-NEXT:    vmovaps %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc1]
5295; CHECK-NEXT:    retq ## encoding: [0xc3]
5296  %res = call <2 x double> @llvm.x86.avx512.rcp14.pd.128(<2 x double> %a0, <2 x double> %a1, i8 %mask)
5297  ret <2 x double> %res
5298}
5299
5300declare <4 x double> @llvm.x86.avx512.rcp14.pd.256(<4 x double>, <4 x double>, i8) nounwind readnone
5301declare <2 x double> @llvm.x86.avx512.rcp14.pd.128(<2 x double>, <2 x double>, i8) nounwind readnone
5302
5303declare <8 x float> @llvm.x86.avx512.mask.broadcastf32x4.256(<4 x float>, <8 x float>, i8)
5304
5305define <8 x float>@test_int_x86_avx512_mask_broadcastf32x4_256(<4 x float> %x0, <8 x float> %x2, i8 %mask) {
5306; CHECK-LABEL: test_int_x86_avx512_mask_broadcastf32x4_256:
5307; CHECK:       ## BB#0:
5308; CHECK-NEXT:    ## kill: %XMM0<def> %XMM0<kill> %YMM0<def>
5309; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
5310; CHECK-NEXT:    vshuff32x4 $0, %ymm0, %ymm0, %ymm2 {%k1} {z} ## encoding: [0x62,0xf3,0x7d,0xa9,0x23,0xd0,0x00]
5311; CHECK-NEXT:    ## ymm2 {%k1} {z} = ymm0[0,1,2,3,0,1,2,3]
5312; CHECK-NEXT:    vshuff32x4 $0, %ymm0, %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf3,0x7d,0x29,0x23,0xc8,0x00]
5313; CHECK-NEXT:    ## ymm1 {%k1} = ymm0[0,1,2,3,0,1,2,3]
5314; CHECK-NEXT:    vshuff32x4 $0, %ymm0, %ymm0, %ymm0 ## encoding: [0x62,0xf3,0x7d,0x28,0x23,0xc0,0x00]
5315; CHECK-NEXT:    ## ymm0 = ymm0[0,1,2,3,0,1,2,3]
5316; CHECK-NEXT:    vaddps %ymm1, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7c,0x28,0x58,0xc1]
5317; CHECK-NEXT:    vaddps %ymm0, %ymm2, %ymm0 ## encoding: [0x62,0xf1,0x6c,0x28,0x58,0xc0]
5318; CHECK-NEXT:    retq ## encoding: [0xc3]
5319  %res1 = call <8 x float> @llvm.x86.avx512.mask.broadcastf32x4.256(<4 x float> %x0, <8 x float> %x2, i8 -1)
5320  %res2 = call <8 x float> @llvm.x86.avx512.mask.broadcastf32x4.256(<4 x float> %x0, <8 x float> %x2, i8 %mask)
5321  %res3 = call <8 x float> @llvm.x86.avx512.mask.broadcastf32x4.256(<4 x float> %x0, <8 x float> zeroinitializer, i8 %mask)
5322  %res4 = fadd <8 x float> %res1, %res2
5323  %res5 = fadd <8 x float> %res3, %res4
5324  ret <8 x float> %res5
5325}
5326
5327declare <8 x i32> @llvm.x86.avx512.mask.broadcasti32x4.256(<4 x i32>, <8 x i32>, i8)
5328
5329define <8 x i32>@test_int_x86_avx512_mask_broadcasti32x4_256(<4 x i32> %x0, <8 x i32> %x2, i8 %mask) {
5330; CHECK-LABEL: test_int_x86_avx512_mask_broadcasti32x4_256:
5331; CHECK:       ## BB#0:
5332; CHECK-NEXT:    ## kill: %XMM0<def> %XMM0<kill> %YMM0<def>
5333; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
5334; CHECK-NEXT:    vshufi32x4 $0, %ymm0, %ymm0, %ymm2 {%k1} {z} ## encoding: [0x62,0xf3,0x7d,0xa9,0x43,0xd0,0x00]
5335; CHECK-NEXT:    ## ymm2 {%k1} {z} = ymm0[0,1,2,3,0,1,2,3]
5336; CHECK-NEXT:    vshufi32x4 $0, %ymm0, %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf3,0x7d,0x29,0x43,0xc8,0x00]
5337; CHECK-NEXT:    ## ymm1 {%k1} = ymm0[0,1,2,3,0,1,2,3]
5338; CHECK-NEXT:    vshufi32x4 $0, %ymm0, %ymm0, %ymm0 ## encoding: [0x62,0xf3,0x7d,0x28,0x43,0xc0,0x00]
5339; CHECK-NEXT:    ## ymm0 = ymm0[0,1,2,3,0,1,2,3]
5340; CHECK-NEXT:    vpaddd %ymm1, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7d,0x28,0xfe,0xc1]
5341; CHECK-NEXT:    vpaddd %ymm0, %ymm2, %ymm0 ## encoding: [0x62,0xf1,0x6d,0x28,0xfe,0xc0]
5342; CHECK-NEXT:    retq ## encoding: [0xc3]
5343  %res1 = call <8 x i32> @llvm.x86.avx512.mask.broadcasti32x4.256(<4 x i32> %x0, <8 x i32> %x2, i8 -1)
5344  %res2 = call <8 x i32> @llvm.x86.avx512.mask.broadcasti32x4.256(<4 x i32> %x0, <8 x i32> %x2, i8 %mask)
5345  %res3 = call <8 x i32> @llvm.x86.avx512.mask.broadcasti32x4.256(<4 x i32> %x0, <8 x i32> zeroinitializer, i8 %mask)
5346  %res4 = add <8 x i32> %res1, %res2
5347  %res5 = add <8 x i32> %res3, %res4
5348  ret <8 x i32> %res5
5349}
5350
5351declare <2 x i64> @llvm.x86.avx512.mask.psrl.q.128(<2 x i64>, <2 x i64>, <2 x i64>, i8)
5352
5353define <2 x i64>@test_int_x86_avx512_mask_psrl_q_128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 %x3) {
5354; CHECK-LABEL: test_int_x86_avx512_mask_psrl_q_128:
5355; CHECK:       ## BB#0:
5356; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
5357; CHECK-NEXT:    vpsrlq %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf1,0xfd,0x09,0xd3,0xd1]
5358; CHECK-NEXT:    vpsrlq %xmm1, %xmm0, %xmm3 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0x89,0xd3,0xd9]
5359; CHECK-NEXT:    vpsrlq %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0xfd,0x08,0xd3,0xc1]
5360; CHECK-NEXT:    vpaddq %xmm0, %xmm2, %xmm0 ## encoding: [0x62,0xf1,0xed,0x08,0xd4,0xc0]
5361; CHECK-NEXT:    vpaddq %xmm3, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0xfd,0x08,0xd4,0xc3]
5362; CHECK-NEXT:    retq ## encoding: [0xc3]
5363  %res = call <2 x i64> @llvm.x86.avx512.mask.psrl.q.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 %x3)
5364  %res1 = call <2 x i64> @llvm.x86.avx512.mask.psrl.q.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 -1)
5365  %res2 = call <2 x i64> @llvm.x86.avx512.mask.psrl.q.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> zeroinitializer, i8 %x3)
5366  %res3 = add <2 x i64> %res, %res1
5367  %res4 = add <2 x i64> %res3, %res2
5368  ret <2 x i64> %res4
5369}
5370
5371declare <4 x i64> @llvm.x86.avx512.mask.psrl.q.256(<4 x i64>, <2 x i64>, <4 x i64>, i8)
5372
5373define <4 x i64>@test_int_x86_avx512_mask_psrl_q_256(<4 x i64> %x0, <2 x i64> %x1, <4 x i64> %x2, i8 %x3) {
5374; CHECK-LABEL: test_int_x86_avx512_mask_psrl_q_256:
5375; CHECK:       ## BB#0:
5376; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
5377; CHECK-NEXT:    vpsrlq %xmm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf1,0xfd,0x29,0xd3,0xd1]
5378; CHECK-NEXT:    vpsrlq %xmm1, %ymm0, %ymm3 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xa9,0xd3,0xd9]
5379; CHECK-NEXT:    vpsrlq %xmm1, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0xfd,0x28,0xd3,0xc1]
5380; CHECK-NEXT:    vpaddq %ymm0, %ymm2, %ymm0 ## encoding: [0x62,0xf1,0xed,0x28,0xd4,0xc0]
5381; CHECK-NEXT:    vpaddq %ymm3, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0xfd,0x28,0xd4,0xc3]
5382; CHECK-NEXT:    retq ## encoding: [0xc3]
5383  %res = call <4 x i64> @llvm.x86.avx512.mask.psrl.q.256(<4 x i64> %x0, <2 x i64> %x1, <4 x i64> %x2, i8 %x3)
5384  %res1 = call <4 x i64> @llvm.x86.avx512.mask.psrl.q.256(<4 x i64> %x0, <2 x i64> %x1, <4 x i64> %x2, i8 -1)
5385  %res2 = call <4 x i64> @llvm.x86.avx512.mask.psrl.q.256(<4 x i64> %x0, <2 x i64> %x1, <4 x i64> zeroinitializer, i8 %x3)
5386  %res3 = add <4 x i64> %res, %res1
5387  %res4 = add <4 x i64> %res3, %res2
5388  ret <4 x i64> %res4
5389}
5390
5391declare <2 x i64> @llvm.x86.avx512.mask.psrl.qi.128(<2 x i64>, i32, <2 x i64>, i8)
5392
5393define <2 x i64>@test_int_x86_avx512_mask_psrl_qi_128(<2 x i64> %x0, i32 %x1, <2 x i64> %x2, i8 %x3) {
5394; CHECK-LABEL: test_int_x86_avx512_mask_psrl_qi_128:
5395; CHECK:       ## BB#0:
5396; CHECK-NEXT:    kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
5397; CHECK-NEXT:    vpsrlq $255, %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0xf5,0x09,0x73,0xd0,0xff]
5398; CHECK-NEXT:    vpsrlq $255, %xmm0, %xmm2 {%k1} {z} ## encoding: [0x62,0xf1,0xed,0x89,0x73,0xd0,0xff]
5399; CHECK-NEXT:    vpsrlq $255, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0xfd,0x08,0x73,0xd0,0xff]
5400; CHECK-NEXT:    vpaddq %xmm0, %xmm1, %xmm0 ## encoding: [0x62,0xf1,0xf5,0x08,0xd4,0xc0]
5401; CHECK-NEXT:    vpaddq %xmm0, %xmm2, %xmm0 ## encoding: [0x62,0xf1,0xed,0x08,0xd4,0xc0]
5402; CHECK-NEXT:    retq ## encoding: [0xc3]
5403  %res = call <2 x i64> @llvm.x86.avx512.mask.psrl.qi.128(<2 x i64> %x0, i32 255, <2 x i64> %x2, i8 %x3)
5404  %res1 = call <2 x i64> @llvm.x86.avx512.mask.psrl.qi.128(<2 x i64> %x0, i32 255, <2 x i64> %x2, i8 -1)
5405  %res2 = call <2 x i64> @llvm.x86.avx512.mask.psrl.qi.128(<2 x i64> %x0, i32 255, <2 x i64> zeroinitializer, i8 %x3)
5406  %res3 = add <2 x i64> %res, %res1
5407  %res4 = add <2 x i64> %res2, %res3
5408  ret <2 x i64> %res4
5409}
5410
5411declare <4 x i64> @llvm.x86.avx512.mask.psrl.qi.256(<4 x i64>, i32, <4 x i64>, i8)
5412
5413define <4 x i64>@test_int_x86_avx512_mask_psrl_qi_256(<4 x i64> %x0, i32 %x1, <4 x i64> %x2, i8 %x3) {
5414; CHECK-LABEL: test_int_x86_avx512_mask_psrl_qi_256:
5415; CHECK:       ## BB#0:
5416; CHECK-NEXT:    kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
5417; CHECK-NEXT:    vpsrlq $255, %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf1,0xf5,0x29,0x73,0xd0,0xff]
5418; CHECK-NEXT:    vpsrlq $255, %ymm0, %ymm2 {%k1} {z} ## encoding: [0x62,0xf1,0xed,0xa9,0x73,0xd0,0xff]
5419; CHECK-NEXT:    vpsrlq $255, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0xfd,0x28,0x73,0xd0,0xff]
5420; CHECK-NEXT:    vpaddq %ymm0, %ymm1, %ymm0 ## encoding: [0x62,0xf1,0xf5,0x28,0xd4,0xc0]
5421; CHECK-NEXT:    vpaddq %ymm0, %ymm2, %ymm0 ## encoding: [0x62,0xf1,0xed,0x28,0xd4,0xc0]
5422; CHECK-NEXT:    retq ## encoding: [0xc3]
5423  %res = call <4 x i64> @llvm.x86.avx512.mask.psrl.qi.256(<4 x i64> %x0, i32 255, <4 x i64> %x2, i8 %x3)
5424  %res1 = call <4 x i64> @llvm.x86.avx512.mask.psrl.qi.256(<4 x i64> %x0, i32 255, <4 x i64> %x2, i8 -1)
5425  %res2 = call <4 x i64> @llvm.x86.avx512.mask.psrl.qi.256(<4 x i64> %x0, i32 255, <4 x i64> zeroinitializer, i8 %x3)
5426  %res3 = add <4 x i64> %res, %res1
5427  %res4 = add <4 x i64> %res2, %res3
5428  ret <4 x i64> %res4
5429}
5430declare <4 x i32> @llvm.x86.avx512.mask.psrl.d.128(<4 x i32>, <4 x i32>, <4 x i32>, i8)
5431define <4 x i32>@test_int_x86_avx512_mask_psrl_d_128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 %x3) {
5432; CHECK-LABEL: test_int_x86_avx512_mask_psrl_d_128:
5433; CHECK:       ## BB#0:
5434; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
5435; CHECK-NEXT:    vpsrld %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x09,0xd2,0xd1]
5436; CHECK-NEXT:    vpsrld %xmm1, %xmm0, %xmm3 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0x89,0xd2,0xd9]
5437; CHECK-NEXT:    vpsrld %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xd2,0xc1]
5438; CHECK-NEXT:    vpaddd %xmm0, %xmm2, %xmm0 ## encoding: [0x62,0xf1,0x6d,0x08,0xfe,0xc0]
5439; CHECK-NEXT:    vpaddd %xmm3, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xfe,0xc3]
5440; CHECK-NEXT:    retq ## encoding: [0xc3]
5441  %res = call <4 x i32> @llvm.x86.avx512.mask.psrl.d.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 %x3)
5442  %res1 = call <4 x i32> @llvm.x86.avx512.mask.psrl.d.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 -1)
5443  %res2 = call <4 x i32> @llvm.x86.avx512.mask.psrl.d.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> zeroinitializer, i8 %x3)
5444  %res3 = add <4 x i32> %res, %res1
5445  %res4 = add <4 x i32> %res3, %res2
5446  ret <4 x i32> %res4
5447}
5448
5449declare <8 x i32> @llvm.x86.avx512.mask.psrl.d.256(<8 x i32>, <4 x i32>, <8 x i32>, i8)
5450
5451define <8 x i32>@test_int_x86_avx512_mask_psrl_d_256(<8 x i32> %x0, <4 x i32> %x1, <8 x i32> %x2, i8 %x3) {
5452; CHECK-LABEL: test_int_x86_avx512_mask_psrl_d_256:
5453; CHECK:       ## BB#0:
5454; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
5455; CHECK-NEXT:    vpsrld %xmm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0xd2,0xd1]
5456; CHECK-NEXT:    vpsrld %xmm1, %ymm0, %ymm3 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xa9,0xd2,0xd9]
5457; CHECK-NEXT:    vpsrld %xmm1, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7d,0x28,0xd2,0xc1]
5458; CHECK-NEXT:    vpaddd %ymm0, %ymm2, %ymm0 ## encoding: [0x62,0xf1,0x6d,0x28,0xfe,0xc0]
5459; CHECK-NEXT:    vpaddd %ymm0, %ymm3, %ymm0 ## encoding: [0x62,0xf1,0x65,0x28,0xfe,0xc0]
5460; CHECK-NEXT:    retq ## encoding: [0xc3]
5461  %res = call <8 x i32> @llvm.x86.avx512.mask.psrl.d.256(<8 x i32> %x0, <4 x i32> %x1, <8 x i32> %x2, i8 %x3)
5462  %res1 = call <8 x i32> @llvm.x86.avx512.mask.psrl.d.256(<8 x i32> %x0, <4 x i32> %x1, <8 x i32> %x2, i8 -1)
5463  %res2 = call <8 x i32> @llvm.x86.avx512.mask.psrl.d.256(<8 x i32> %x0, <4 x i32> %x1, <8 x i32> zeroinitializer, i8 %x3)
5464  %res3 = add <8 x i32> %res, %res1
5465  %res4 = add <8 x i32> %res2, %res3
5466  ret <8 x i32> %res4
5467}
5468
5469declare <4 x i32> @llvm.x86.avx512.mask.psrl.di.128(<4 x i32>, i32, <4 x i32>, i8)
5470
5471define <4 x i32>@test_int_x86_avx512_mask_psrl_di_128(<4 x i32> %x0, i32 %x1, <4 x i32> %x2, i8 %x3) {
5472; CHECK-LABEL: test_int_x86_avx512_mask_psrl_di_128:
5473; CHECK:       ## BB#0:
5474; CHECK-NEXT:    kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
5475; CHECK-NEXT:    vpsrld $255, %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0x75,0x09,0x72,0xd0,0xff]
5476; CHECK-NEXT:    vpsrld $255, %xmm0, %xmm2 {%k1} {z} ## encoding: [0x62,0xf1,0x6d,0x89,0x72,0xd0,0xff]
5477; CHECK-NEXT:    vpsrld $255, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0x72,0xd0,0xff]
5478; CHECK-NEXT:    vpaddd %xmm0, %xmm1, %xmm0 ## encoding: [0x62,0xf1,0x75,0x08,0xfe,0xc0]
5479; CHECK-NEXT:    vpaddd %xmm0, %xmm2, %xmm0 ## encoding: [0x62,0xf1,0x6d,0x08,0xfe,0xc0]
5480; CHECK-NEXT:    retq ## encoding: [0xc3]
5481  %res = call <4 x i32> @llvm.x86.avx512.mask.psrl.di.128(<4 x i32> %x0, i32 255, <4 x i32> %x2, i8 %x3)
5482  %res1 = call <4 x i32> @llvm.x86.avx512.mask.psrl.di.128(<4 x i32> %x0, i32 255, <4 x i32> %x2, i8 -1)
5483  %res2 = call <4 x i32> @llvm.x86.avx512.mask.psrl.di.128(<4 x i32> %x0, i32 255, <4 x i32> zeroinitializer, i8 %x3)
5484  %res3 = add <4 x i32> %res, %res1
5485  %res4 = add <4 x i32> %res2, %res3
5486  ret <4 x i32> %res4
5487}
5488
5489declare <8 x i32> @llvm.x86.avx512.mask.psrl.di.256(<8 x i32>, i32, <8 x i32>, i8)
5490
5491define <8 x i32>@test_int_x86_avx512_mask_psrl_di_256(<8 x i32> %x0, i32 %x1, <8 x i32> %x2, i8 %x3) {
5492; CHECK-LABEL: test_int_x86_avx512_mask_psrl_di_256:
5493; CHECK:       ## BB#0:
5494; CHECK-NEXT:    kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
5495; CHECK-NEXT:    vpsrld $255, %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf1,0x75,0x29,0x72,0xd0,0xff]
5496; CHECK-NEXT:    vpsrld $255, %ymm0, %ymm2 {%k1} {z} ## encoding: [0x62,0xf1,0x6d,0xa9,0x72,0xd0,0xff]
5497; CHECK-NEXT:    vpsrld $255, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7d,0x28,0x72,0xd0,0xff]
5498; CHECK-NEXT:    vpaddd %ymm0, %ymm1, %ymm0 ## encoding: [0x62,0xf1,0x75,0x28,0xfe,0xc0]
5499; CHECK-NEXT:    vpaddd %ymm0, %ymm2, %ymm0 ## encoding: [0x62,0xf1,0x6d,0x28,0xfe,0xc0]
5500; CHECK-NEXT:    retq ## encoding: [0xc3]
5501  %res = call <8 x i32> @llvm.x86.avx512.mask.psrl.di.256(<8 x i32> %x0, i32 255, <8 x i32> %x2, i8 %x3)
5502  %res1 = call <8 x i32> @llvm.x86.avx512.mask.psrl.di.256(<8 x i32> %x0, i32 255, <8 x i32> %x2, i8 -1)
5503  %res2 = call <8 x i32> @llvm.x86.avx512.mask.psrl.di.256(<8 x i32> %x0, i32 255, <8 x i32> zeroinitializer, i8 %x3)
5504  %res3 = add <8 x i32> %res, %res1
5505  %res4 = add <8 x i32> %res2, %res3
5506  ret <8 x i32> %res4
5507}
5508
5509declare <16 x i32> @llvm.x86.avx512.mask.psrl.di.512(<16 x i32>, i32, <16 x i32>, i16)
5510
5511define <16 x i32>@test_int_x86_avx512_mask_psrl_di_512(<16 x i32> %x0, i32 %x1, <16 x i32> %x2, i16 %x3) {
5512; CHECK-LABEL: test_int_x86_avx512_mask_psrl_di_512:
5513; CHECK:       ## BB#0:
5514; CHECK-NEXT:    kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
5515; CHECK-NEXT:    vpsrld $255, %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0x75,0x49,0x72,0xd0,0xff]
5516; CHECK-NEXT:    vpsrld $255, %zmm0, %zmm2 {%k1} {z} ## encoding: [0x62,0xf1,0x6d,0xc9,0x72,0xd0,0xff]
5517; CHECK-NEXT:    vpsrld $255, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7d,0x48,0x72,0xd0,0xff]
5518; CHECK-NEXT:    vpaddd %zmm0, %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x75,0x48,0xfe,0xc0]
5519; CHECK-NEXT:    vpaddd %zmm0, %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x6d,0x48,0xfe,0xc0]
5520; CHECK-NEXT:    retq ## encoding: [0xc3]
5521  %res = call <16 x i32> @llvm.x86.avx512.mask.psrl.di.512(<16 x i32> %x0, i32 255, <16 x i32> %x2, i16 %x3)
5522  %res1 = call <16 x i32> @llvm.x86.avx512.mask.psrl.di.512(<16 x i32> %x0, i32 255, <16 x i32> %x2, i16 -1)
5523  %res2 = call <16 x i32> @llvm.x86.avx512.mask.psrl.di.512(<16 x i32> %x0, i32 255, <16 x i32> zeroinitializer, i16 %x3)
5524  %res3 = add <16 x i32> %res, %res1
5525  %res4 = add <16 x i32> %res2, %res3
5526  ret <16 x i32> %res4
5527}
5528
5529declare <2 x i64> @llvm.x86.avx512.mask.psrlv2.di(<2 x i64>, <2 x i64>, <2 x i64>, i8)
5530
5531define <2 x i64>@test_int_x86_avx512_mask_psrlv2_di(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 %x3) {
5532; CHECK-LABEL: test_int_x86_avx512_mask_psrlv2_di:
5533; CHECK:       ## BB#0:
5534; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
5535; CHECK-NEXT:    vpsrlvq %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf2,0xfd,0x09,0x45,0xd1]
5536; CHECK-NEXT:    vpsrlvq %xmm1, %xmm0, %xmm3 {%k1} {z} ## encoding: [0x62,0xf2,0xfd,0x89,0x45,0xd9]
5537; CHECK-NEXT:    vpsrlvq %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf2,0xfd,0x08,0x45,0xc1]
5538; CHECK-NEXT:    vpaddq %xmm3, %xmm2, %xmm1 ## encoding: [0x62,0xf1,0xed,0x08,0xd4,0xcb]
5539; CHECK-NEXT:    vpaddq %xmm0, %xmm1, %xmm0 ## encoding: [0x62,0xf1,0xf5,0x08,0xd4,0xc0]
5540; CHECK-NEXT:    retq ## encoding: [0xc3]
5541  %res = call <2 x i64> @llvm.x86.avx512.mask.psrlv2.di(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 %x3)
5542  %res1 = call <2 x i64> @llvm.x86.avx512.mask.psrlv2.di(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> zeroinitializer, i8 %x3)
5543  %res2 = call <2 x i64> @llvm.x86.avx512.mask.psrlv2.di(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 -1)
5544  %res3 = add <2 x i64> %res, %res1
5545  %res4 = add <2 x i64> %res3, %res2
5546  ret <2 x i64> %res4
5547}
5548
5549declare <4 x i64> @llvm.x86.avx512.mask.psrlv4.di(<4 x i64>, <4 x i64>, <4 x i64>, i8)
5550
5551define <4 x i64>@test_int_x86_avx512_mask_psrlv4_di(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 %x3) {
5552; CHECK-LABEL: test_int_x86_avx512_mask_psrlv4_di:
5553; CHECK:       ## BB#0:
5554; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
5555; CHECK-NEXT:    vpsrlvq %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf2,0xfd,0x29,0x45,0xd1]
5556; CHECK-NEXT:    vpsrlvq %ymm1, %ymm0, %ymm3 {%k1} {z} ## encoding: [0x62,0xf2,0xfd,0xa9,0x45,0xd9]
5557; CHECK-NEXT:    vpsrlvq %ymm1, %ymm0, %ymm0 ## encoding: [0x62,0xf2,0xfd,0x28,0x45,0xc1]
5558; CHECK-NEXT:    vpaddq %ymm3, %ymm2, %ymm1 ## encoding: [0x62,0xf1,0xed,0x28,0xd4,0xcb]
5559; CHECK-NEXT:    vpaddq %ymm0, %ymm1, %ymm0 ## encoding: [0x62,0xf1,0xf5,0x28,0xd4,0xc0]
5560; CHECK-NEXT:    retq ## encoding: [0xc3]
5561  %res = call <4 x i64> @llvm.x86.avx512.mask.psrlv4.di(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 %x3)
5562  %res1 = call <4 x i64> @llvm.x86.avx512.mask.psrlv4.di(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> zeroinitializer, i8 %x3)
5563  %res2 = call <4 x i64> @llvm.x86.avx512.mask.psrlv4.di(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 -1)
5564  %res3 = add <4 x i64> %res, %res1
5565  %res4 = add <4 x i64> %res3, %res2
5566  ret <4 x i64> %res4
5567}
5568
5569declare <4 x i32> @llvm.x86.avx512.mask.psrlv4.si(<4 x i32>, <4 x i32>, <4 x i32>, i8)
5570
5571define <4 x i32>@test_int_x86_avx512_mask_psrlv4_si(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 %x3) {
5572; CHECK-LABEL: test_int_x86_avx512_mask_psrlv4_si:
5573; CHECK:       ## BB#0:
5574; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
5575; CHECK-NEXT:    vpsrlvd %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf2,0x7d,0x09,0x45,0xd1]
5576; CHECK-NEXT:    vpsrlvd %xmm1, %xmm0, %xmm3 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0x89,0x45,0xd9]
5577; CHECK-NEXT:    vpsrlvd %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf2,0x7d,0x08,0x45,0xc1]
5578; CHECK-NEXT:    vpaddd %xmm3, %xmm2, %xmm1 ## encoding: [0x62,0xf1,0x6d,0x08,0xfe,0xcb]
5579; CHECK-NEXT:    vpaddd %xmm0, %xmm1, %xmm0 ## encoding: [0x62,0xf1,0x75,0x08,0xfe,0xc0]
5580; CHECK-NEXT:    retq ## encoding: [0xc3]
5581  %res = call <4 x i32> @llvm.x86.avx512.mask.psrlv4.si(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 %x3)
5582  %res1 = call <4 x i32> @llvm.x86.avx512.mask.psrlv4.si(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> zeroinitializer, i8 %x3)
5583  %res2 = call <4 x i32> @llvm.x86.avx512.mask.psrlv4.si(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 -1)
5584  %res3 = add <4 x i32> %res, %res1
5585  %res4 = add <4 x i32> %res3, %res2
5586  ret <4 x i32> %res4
5587}
5588
5589declare <8 x i32> @llvm.x86.avx512.mask.psrlv8.si(<8 x i32>, <8 x i32>, <8 x i32>, i8)
5590
5591define <8 x i32>@test_int_x86_avx512_mask_psrlv8_si(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 %x3) {
5592; CHECK-LABEL: test_int_x86_avx512_mask_psrlv8_si:
5593; CHECK:       ## BB#0:
5594; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
5595; CHECK-NEXT:    vpsrlvd %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf2,0x7d,0x29,0x45,0xd1]
5596; CHECK-NEXT:    vpsrlvd %ymm1, %ymm0, %ymm3 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0xa9,0x45,0xd9]
5597; CHECK-NEXT:    vpsrlvd %ymm1, %ymm0, %ymm0 ## encoding: [0x62,0xf2,0x7d,0x28,0x45,0xc1]
5598; CHECK-NEXT:    vpaddd %ymm3, %ymm2, %ymm1 ## encoding: [0x62,0xf1,0x6d,0x28,0xfe,0xcb]
5599; CHECK-NEXT:    vpaddd %ymm0, %ymm1, %ymm0 ## encoding: [0x62,0xf1,0x75,0x28,0xfe,0xc0]
5600; CHECK-NEXT:    retq ## encoding: [0xc3]
5601  %res = call <8 x i32> @llvm.x86.avx512.mask.psrlv8.si(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 %x3)
5602  %res1 = call <8 x i32> @llvm.x86.avx512.mask.psrlv8.si(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> zeroinitializer, i8 %x3)
5603  %res2 = call <8 x i32> @llvm.x86.avx512.mask.psrlv8.si(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 -1)
5604  %res3 = add <8 x i32> %res, %res1
5605  %res4 = add <8 x i32> %res3, %res2
5606  ret <8 x i32> %res4
5607}
5608
5609declare <4 x i32> @llvm.x86.avx512.mask.psra.d.128(<4 x i32>, <4 x i32>, <4 x i32>, i8)
5610
5611define <4 x i32>@test_int_x86_avx512_mask_psra_d_128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 %x3) {
5612; CHECK-LABEL: test_int_x86_avx512_mask_psra_d_128:
5613; CHECK:       ## BB#0:
5614; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
5615; CHECK-NEXT:    vpsrad %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x09,0xe2,0xd1]
5616; CHECK-NEXT:    vpsrad %xmm1, %xmm0, %xmm3 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0x89,0xe2,0xd9]
5617; CHECK-NEXT:    vpsrad %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xe2,0xc1]
5618; CHECK-NEXT:    vpaddd %xmm3, %xmm2, %xmm1 ## encoding: [0x62,0xf1,0x6d,0x08,0xfe,0xcb]
5619; CHECK-NEXT:    vpaddd %xmm0, %xmm1, %xmm0 ## encoding: [0x62,0xf1,0x75,0x08,0xfe,0xc0]
5620; CHECK-NEXT:    retq ## encoding: [0xc3]
5621  %res = call <4 x i32> @llvm.x86.avx512.mask.psra.d.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 %x3)
5622  %res1 = call <4 x i32> @llvm.x86.avx512.mask.psra.d.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> zeroinitializer, i8 %x3)
5623  %res2 = call <4 x i32> @llvm.x86.avx512.mask.psra.d.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 -1)
5624  %res3 = add <4 x i32> %res, %res1
5625  %res4 = add <4 x i32> %res3, %res2
5626  ret <4 x i32> %res4
5627}
5628
5629declare <8 x i32> @llvm.x86.avx512.mask.psra.d.256(<8 x i32>, <4 x i32>, <8 x i32>, i8)
5630
5631define <8 x i32>@test_int_x86_avx512_mask_psra_d_256(<8 x i32> %x0, <4 x i32> %x1, <8 x i32> %x2, i8 %x3) {
5632; CHECK-LABEL: test_int_x86_avx512_mask_psra_d_256:
5633; CHECK:       ## BB#0:
5634; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
5635; CHECK-NEXT:    vpsrad %xmm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0xe2,0xd1]
5636; CHECK-NEXT:    vpsrad %xmm1, %ymm0, %ymm3 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xa9,0xe2,0xd9]
5637; CHECK-NEXT:    vpsrad %xmm1, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7d,0x28,0xe2,0xc1]
5638; CHECK-NEXT:    vpaddd %ymm3, %ymm2, %ymm1 ## encoding: [0x62,0xf1,0x6d,0x28,0xfe,0xcb]
5639; CHECK-NEXT:    vpaddd %ymm0, %ymm1, %ymm0 ## encoding: [0x62,0xf1,0x75,0x28,0xfe,0xc0]
5640; CHECK-NEXT:    retq ## encoding: [0xc3]
5641  %res = call <8 x i32> @llvm.x86.avx512.mask.psra.d.256(<8 x i32> %x0, <4 x i32> %x1, <8 x i32> %x2, i8 %x3)
5642  %res1 = call <8 x i32> @llvm.x86.avx512.mask.psra.d.256(<8 x i32> %x0, <4 x i32> %x1, <8 x i32> zeroinitializer, i8 %x3)
5643  %res2 = call <8 x i32> @llvm.x86.avx512.mask.psra.d.256(<8 x i32> %x0, <4 x i32> %x1, <8 x i32> %x2, i8 -1)
5644  %res3 = add <8 x i32> %res, %res1
5645  %res4 = add <8 x i32> %res3, %res2
5646  ret <8 x i32> %res4
5647}
5648
5649declare <4 x i32> @llvm.x86.avx512.mask.psra.di.128(<4 x i32>, i32, <4 x i32>, i8)
5650
5651define <4 x i32>@test_int_x86_avx512_mask_psra_di_128(<4 x i32> %x0, i32 %x1, <4 x i32> %x2, i8 %x3) {
5652; CHECK-LABEL: test_int_x86_avx512_mask_psra_di_128:
5653; CHECK:       ## BB#0:
5654; CHECK-NEXT:    kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
5655; CHECK-NEXT:    vpsrad $3, %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0x75,0x09,0x72,0xe0,0x03]
5656; CHECK-NEXT:    vpsrad $3, %xmm0, %xmm2 {%k1} {z} ## encoding: [0x62,0xf1,0x6d,0x89,0x72,0xe0,0x03]
5657; CHECK-NEXT:    vpsrad $3, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0x72,0xe0,0x03]
5658; CHECK-NEXT:    vpaddd %xmm2, %xmm1, %xmm1 ## encoding: [0x62,0xf1,0x75,0x08,0xfe,0xca]
5659; CHECK-NEXT:    vpaddd %xmm0, %xmm1, %xmm0 ## encoding: [0x62,0xf1,0x75,0x08,0xfe,0xc0]
5660; CHECK-NEXT:    retq ## encoding: [0xc3]
5661  %res = call <4 x i32> @llvm.x86.avx512.mask.psra.di.128(<4 x i32> %x0, i32 3, <4 x i32> %x2, i8 %x3)
5662  %res1 = call <4 x i32> @llvm.x86.avx512.mask.psra.di.128(<4 x i32> %x0, i32 3, <4 x i32> zeroinitializer, i8 %x3)
5663  %res2 = call <4 x i32> @llvm.x86.avx512.mask.psra.di.128(<4 x i32> %x0, i32 3, <4 x i32> %x2, i8 -1)
5664  %res3 = add <4 x i32> %res, %res1
5665  %res4 = add <4 x i32> %res3, %res2
5666  ret <4 x i32> %res4
5667}
5668
5669declare <8 x i32> @llvm.x86.avx512.mask.psra.di.256(<8 x i32>, i32, <8 x i32>, i8)
5670
5671define <8 x i32>@test_int_x86_avx512_mask_psra_di_256(<8 x i32> %x0, i32 %x1, <8 x i32> %x2, i8 %x3) {
5672; CHECK-LABEL: test_int_x86_avx512_mask_psra_di_256:
5673; CHECK:       ## BB#0:
5674; CHECK-NEXT:    kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
5675; CHECK-NEXT:    vpsrad $3, %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf1,0x75,0x29,0x72,0xe0,0x03]
5676; CHECK-NEXT:    vpsrad $3, %ymm0, %ymm2 {%k1} {z} ## encoding: [0x62,0xf1,0x6d,0xa9,0x72,0xe0,0x03]
5677; CHECK-NEXT:    vpsrad $3, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7d,0x28,0x72,0xe0,0x03]
5678; CHECK-NEXT:    vpaddd %ymm2, %ymm1, %ymm1 ## encoding: [0x62,0xf1,0x75,0x28,0xfe,0xca]
5679; CHECK-NEXT:    vpaddd %ymm0, %ymm1, %ymm0 ## encoding: [0x62,0xf1,0x75,0x28,0xfe,0xc0]
5680; CHECK-NEXT:    retq ## encoding: [0xc3]
5681  %res = call <8 x i32> @llvm.x86.avx512.mask.psra.di.256(<8 x i32> %x0, i32 3, <8 x i32> %x2, i8 %x3)
5682  %res1 = call <8 x i32> @llvm.x86.avx512.mask.psra.di.256(<8 x i32> %x0, i32 3, <8 x i32> zeroinitializer, i8 %x3)
5683  %res2 = call <8 x i32> @llvm.x86.avx512.mask.psra.di.256(<8 x i32> %x0, i32 3, <8 x i32> %x2, i8 -1)
5684  %res3 = add <8 x i32> %res, %res1
5685  %res4 = add <8 x i32> %res3, %res2
5686  ret <8 x i32> %res4
5687}
5688
5689declare <2 x i64> @llvm.x86.avx512.mask.psra.q.128(<2 x i64>, <2 x i64>, <2 x i64>, i8)
5690
5691define <2 x i64>@test_int_x86_avx512_mask_psra_q_128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 %x3) {
5692; CHECK-LABEL: test_int_x86_avx512_mask_psra_q_128:
5693; CHECK:       ## BB#0:
5694; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
5695; CHECK-NEXT:    vpsraq %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf1,0xfd,0x09,0xe2,0xd1]
5696; CHECK-NEXT:    vpsraq %xmm1, %xmm0, %xmm3 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0x89,0xe2,0xd9]
5697; CHECK-NEXT:    vpsraq %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0xfd,0x08,0xe2,0xc1]
5698; CHECK-NEXT:    vpaddq %xmm3, %xmm2, %xmm1 ## encoding: [0x62,0xf1,0xed,0x08,0xd4,0xcb]
5699; CHECK-NEXT:    vpaddq %xmm0, %xmm1, %xmm0 ## encoding: [0x62,0xf1,0xf5,0x08,0xd4,0xc0]
5700; CHECK-NEXT:    retq ## encoding: [0xc3]
5701  %res = call <2 x i64> @llvm.x86.avx512.mask.psra.q.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 %x3)
5702  %res1 = call <2 x i64> @llvm.x86.avx512.mask.psra.q.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> zeroinitializer, i8 %x3)
5703  %res2 = call <2 x i64> @llvm.x86.avx512.mask.psra.q.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 -1)
5704  %res3 = add <2 x i64> %res, %res1
5705  %res4 = add <2 x i64> %res3, %res2
5706  ret <2 x i64> %res4
5707}
5708
5709declare <4 x i64> @llvm.x86.avx512.mask.psra.q.256(<4 x i64>, <2 x i64>, <4 x i64>, i8)
5710
5711define <4 x i64>@test_int_x86_avx512_mask_psra_q_256(<4 x i64> %x0, <2 x i64> %x1, <4 x i64> %x2, i8 %x3) {
5712; CHECK-LABEL: test_int_x86_avx512_mask_psra_q_256:
5713; CHECK:       ## BB#0:
5714; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
5715; CHECK-NEXT:    vpsraq %xmm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf1,0xfd,0x29,0xe2,0xd1]
5716; CHECK-NEXT:    vpsraq %xmm1, %ymm0, %ymm3 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xa9,0xe2,0xd9]
5717; CHECK-NEXT:    vpsraq %xmm1, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0xfd,0x28,0xe2,0xc1]
5718; CHECK-NEXT:    vpaddq %ymm3, %ymm2, %ymm1 ## encoding: [0x62,0xf1,0xed,0x28,0xd4,0xcb]
5719; CHECK-NEXT:    vpaddq %ymm0, %ymm1, %ymm0 ## encoding: [0x62,0xf1,0xf5,0x28,0xd4,0xc0]
5720; CHECK-NEXT:    retq ## encoding: [0xc3]
5721  %res = call <4 x i64> @llvm.x86.avx512.mask.psra.q.256(<4 x i64> %x0, <2 x i64> %x1, <4 x i64> %x2, i8 %x3)
5722  %res1 = call <4 x i64> @llvm.x86.avx512.mask.psra.q.256(<4 x i64> %x0, <2 x i64> %x1, <4 x i64> zeroinitializer, i8 %x3)
5723  %res2 = call <4 x i64> @llvm.x86.avx512.mask.psra.q.256(<4 x i64> %x0, <2 x i64> %x1, <4 x i64> %x2, i8 -1)
5724  %res3 = add <4 x i64> %res, %res1
5725  %res4 = add <4 x i64> %res3, %res2
5726  ret <4 x i64> %res4
5727}
5728
5729declare <2 x i64> @llvm.x86.avx512.mask.psra.qi.128(<2 x i64>, i32, <2 x i64>, i8)
5730
5731define <2 x i64>@test_int_x86_avx512_mask_psra_qi_128(<2 x i64> %x0, i32 %x1, <2 x i64> %x2, i8 %x3) {
5732; CHECK-LABEL: test_int_x86_avx512_mask_psra_qi_128:
5733; CHECK:       ## BB#0:
5734; CHECK-NEXT:    kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
5735; CHECK-NEXT:    vpsraq $3, %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0xf5,0x09,0x72,0xe0,0x03]
5736; CHECK-NEXT:    vpsraq $3, %xmm0, %xmm2 {%k1} {z} ## encoding: [0x62,0xf1,0xed,0x89,0x72,0xe0,0x03]
5737; CHECK-NEXT:    vpsraq $3, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0xfd,0x08,0x72,0xe0,0x03]
5738; CHECK-NEXT:    vpaddq %xmm2, %xmm1, %xmm1 ## encoding: [0x62,0xf1,0xf5,0x08,0xd4,0xca]
5739; CHECK-NEXT:    vpaddq %xmm0, %xmm1, %xmm0 ## encoding: [0x62,0xf1,0xf5,0x08,0xd4,0xc0]
5740; CHECK-NEXT:    retq ## encoding: [0xc3]
5741  %res = call <2 x i64> @llvm.x86.avx512.mask.psra.qi.128(<2 x i64> %x0, i32 3, <2 x i64> %x2, i8 %x3)
5742  %res1 = call <2 x i64> @llvm.x86.avx512.mask.psra.qi.128(<2 x i64> %x0, i32 3, <2 x i64> zeroinitializer, i8 %x3)
5743  %res2 = call <2 x i64> @llvm.x86.avx512.mask.psra.qi.128(<2 x i64> %x0, i32 3, <2 x i64> %x2, i8 -1)
5744  %res3 = add <2 x i64> %res, %res1
5745  %res4 = add <2 x i64> %res3, %res2
5746  ret <2 x i64> %res4
5747}
5748
5749declare <4 x i64> @llvm.x86.avx512.mask.psra.qi.256(<4 x i64>, i32, <4 x i64>, i8)
5750
5751define <4 x i64>@test_int_x86_avx512_mask_psra_qi_256(<4 x i64> %x0, i32 %x1, <4 x i64> %x2, i8 %x3) {
5752; CHECK-LABEL: test_int_x86_avx512_mask_psra_qi_256:
5753; CHECK:       ## BB#0:
5754; CHECK-NEXT:    kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
5755; CHECK-NEXT:    vpsraq $3, %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf1,0xf5,0x29,0x72,0xe0,0x03]
5756; CHECK-NEXT:    vpsraq $3, %ymm0, %ymm2 {%k1} {z} ## encoding: [0x62,0xf1,0xed,0xa9,0x72,0xe0,0x03]
5757; CHECK-NEXT:    vpsraq $3, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0xfd,0x28,0x72,0xe0,0x03]
5758; CHECK-NEXT:    vpaddq %ymm2, %ymm1, %ymm1 ## encoding: [0x62,0xf1,0xf5,0x28,0xd4,0xca]
5759; CHECK-NEXT:    vpaddq %ymm0, %ymm1, %ymm0 ## encoding: [0x62,0xf1,0xf5,0x28,0xd4,0xc0]
5760; CHECK-NEXT:    retq ## encoding: [0xc3]
5761  %res = call <4 x i64> @llvm.x86.avx512.mask.psra.qi.256(<4 x i64> %x0, i32 3, <4 x i64> %x2, i8 %x3)
5762  %res1 = call <4 x i64> @llvm.x86.avx512.mask.psra.qi.256(<4 x i64> %x0, i32 3, <4 x i64> zeroinitializer, i8 %x3)
5763  %res2 = call <4 x i64> @llvm.x86.avx512.mask.psra.qi.256(<4 x i64> %x0, i32 3, <4 x i64> %x2, i8 -1)
5764  %res3 = add <4 x i64> %res, %res1
5765  %res4 = add <4 x i64> %res3, %res2
5766  ret <4 x i64> %res4
5767}
5768
5769
5770declare <4 x i32> @llvm.x86.avx512.mask.psll.d.128(<4 x i32>, <4 x i32>, <4 x i32>, i8)
5771
5772define <4 x i32>@test_int_x86_avx512_mask_psll_d_128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 %x3) {
5773; CHECK-LABEL: test_int_x86_avx512_mask_psll_d_128:
5774; CHECK:       ## BB#0:
5775; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
5776; CHECK-NEXT:    vpslld %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x09,0xf2,0xd1]
5777; CHECK-NEXT:    vpslld %xmm1, %xmm0, %xmm3 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0x89,0xf2,0xd9]
5778; CHECK-NEXT:    vpslld %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xf2,0xc1]
5779; CHECK-NEXT:    vpaddd %xmm3, %xmm2, %xmm1 ## encoding: [0x62,0xf1,0x6d,0x08,0xfe,0xcb]
5780; CHECK-NEXT:    vpaddd %xmm0, %xmm1, %xmm0 ## encoding: [0x62,0xf1,0x75,0x08,0xfe,0xc0]
5781; CHECK-NEXT:    retq ## encoding: [0xc3]
5782  %res = call <4 x i32> @llvm.x86.avx512.mask.psll.d.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 %x3)
5783  %res1 = call <4 x i32> @llvm.x86.avx512.mask.psll.d.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> zeroinitializer, i8 %x3)
5784  %res2 = call <4 x i32> @llvm.x86.avx512.mask.psll.d.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 -1)
5785  %res3 = add <4 x i32> %res, %res1
5786  %res4 = add <4 x i32> %res3, %res2
5787  ret <4 x i32> %res4
5788}
5789
5790declare <8 x i32> @llvm.x86.avx512.mask.psll.d.256(<8 x i32>, <4 x i32>, <8 x i32>, i8)
5791
5792define <8 x i32>@test_int_x86_avx512_mask_psll_d_256(<8 x i32> %x0, <4 x i32> %x1, <8 x i32> %x2, i8 %x3) {
5793; CHECK-LABEL: test_int_x86_avx512_mask_psll_d_256:
5794; CHECK:       ## BB#0:
5795; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
5796; CHECK-NEXT:    vpslld %xmm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0xf2,0xd1]
5797; CHECK-NEXT:    vpslld %xmm1, %ymm0, %ymm3 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xa9,0xf2,0xd9]
5798; CHECK-NEXT:    vpslld %xmm1, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7d,0x28,0xf2,0xc1]
5799; CHECK-NEXT:    vpaddd %ymm3, %ymm2, %ymm1 ## encoding: [0x62,0xf1,0x6d,0x28,0xfe,0xcb]
5800; CHECK-NEXT:    vpaddd %ymm0, %ymm1, %ymm0 ## encoding: [0x62,0xf1,0x75,0x28,0xfe,0xc0]
5801; CHECK-NEXT:    retq ## encoding: [0xc3]
5802  %res = call <8 x i32> @llvm.x86.avx512.mask.psll.d.256(<8 x i32> %x0, <4 x i32> %x1, <8 x i32> %x2, i8 %x3)
5803  %res1 = call <8 x i32> @llvm.x86.avx512.mask.psll.d.256(<8 x i32> %x0, <4 x i32> %x1, <8 x i32> zeroinitializer, i8 %x3)
5804  %res2 = call <8 x i32> @llvm.x86.avx512.mask.psll.d.256(<8 x i32> %x0, <4 x i32> %x1, <8 x i32> %x2, i8 -1)
5805  %res3 = add <8 x i32> %res, %res1
5806  %res4 = add <8 x i32> %res3, %res2
5807  ret <8 x i32> %res4
5808}
5809
5810declare <4 x i32> @llvm.x86.avx512.mask.psll.di.128(<4 x i32>, i32, <4 x i32>, i8)
5811
5812define <4 x i32>@test_int_x86_avx512_mask_psll_di_128(<4 x i32> %x0, i32 %x1, <4 x i32> %x2, i8 %x3) {
5813; CHECK-LABEL: test_int_x86_avx512_mask_psll_di_128:
5814; CHECK:       ## BB#0:
5815; CHECK-NEXT:    kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
5816; CHECK-NEXT:    vpslld $3, %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0x75,0x09,0x72,0xf0,0x03]
5817; CHECK-NEXT:    vpslld $3, %xmm0, %xmm2 {%k1} {z} ## encoding: [0x62,0xf1,0x6d,0x89,0x72,0xf0,0x03]
5818; CHECK-NEXT:    vpslld $3, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0x72,0xf0,0x03]
5819; CHECK-NEXT:    vpaddd %xmm2, %xmm1, %xmm1 ## encoding: [0x62,0xf1,0x75,0x08,0xfe,0xca]
5820; CHECK-NEXT:    vpaddd %xmm0, %xmm1, %xmm0 ## encoding: [0x62,0xf1,0x75,0x08,0xfe,0xc0]
5821; CHECK-NEXT:    retq ## encoding: [0xc3]
5822  %res = call <4 x i32> @llvm.x86.avx512.mask.psll.di.128(<4 x i32> %x0, i32 3, <4 x i32> %x2, i8 %x3)
5823  %res1 = call <4 x i32> @llvm.x86.avx512.mask.psll.di.128(<4 x i32> %x0, i32 3, <4 x i32> zeroinitializer, i8 %x3)
5824  %res2 = call <4 x i32> @llvm.x86.avx512.mask.psll.di.128(<4 x i32> %x0, i32 3, <4 x i32> %x2, i8 -1)
5825  %res3 = add <4 x i32> %res, %res1
5826  %res4 = add <4 x i32> %res3, %res2
5827  ret <4 x i32> %res4
5828}
5829
5830declare <8 x i32> @llvm.x86.avx512.mask.psll.di.256(<8 x i32>, i32, <8 x i32>, i8)
5831
5832define <8 x i32>@test_int_x86_avx512_mask_psll_di_256(<8 x i32> %x0, i32 %x1, <8 x i32> %x2, i8 %x3) {
5833; CHECK-LABEL: test_int_x86_avx512_mask_psll_di_256:
5834; CHECK:       ## BB#0:
5835; CHECK-NEXT:    kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
5836; CHECK-NEXT:    vpslld $3, %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf1,0x75,0x29,0x72,0xf0,0x03]
5837; CHECK-NEXT:    vpslld $3, %ymm0, %ymm2 {%k1} {z} ## encoding: [0x62,0xf1,0x6d,0xa9,0x72,0xf0,0x03]
5838; CHECK-NEXT:    vpslld $3, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7d,0x28,0x72,0xf0,0x03]
5839; CHECK-NEXT:    vpaddd %ymm2, %ymm1, %ymm1 ## encoding: [0x62,0xf1,0x75,0x28,0xfe,0xca]
5840; CHECK-NEXT:    vpaddd %ymm0, %ymm1, %ymm0 ## encoding: [0x62,0xf1,0x75,0x28,0xfe,0xc0]
5841; CHECK-NEXT:    retq ## encoding: [0xc3]
5842  %res = call <8 x i32> @llvm.x86.avx512.mask.psll.di.256(<8 x i32> %x0, i32 3, <8 x i32> %x2, i8 %x3)
5843  %res1 = call <8 x i32> @llvm.x86.avx512.mask.psll.di.256(<8 x i32> %x0, i32 3, <8 x i32> zeroinitializer, i8 %x3)
5844  %res2 = call <8 x i32> @llvm.x86.avx512.mask.psll.di.256(<8 x i32> %x0, i32 3, <8 x i32> %x2, i8 -1)
5845  %res3 = add <8 x i32> %res, %res1
5846  %res4 = add <8 x i32> %res3, %res2
5847  ret <8 x i32> %res4
5848}
5849
5850declare <4 x i64> @llvm.x86.avx512.mask.psll.q.256(<4 x i64>, <2 x i64>, <4 x i64>, i8)
5851
5852define <4 x i64>@test_int_x86_avx512_mask_psll_q_256(<4 x i64> %x0, <2 x i64> %x1, <4 x i64> %x2, i8 %x3) {
5853; CHECK-LABEL: test_int_x86_avx512_mask_psll_q_256:
5854; CHECK:       ## BB#0:
5855; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
5856; CHECK-NEXT:    vpsllq %xmm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf1,0xfd,0x29,0xf3,0xd1]
5857; CHECK-NEXT:    vpsllq %xmm1, %ymm0, %ymm3 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xa9,0xf3,0xd9]
5858; CHECK-NEXT:    vpsllq %xmm1, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0xfd,0x28,0xf3,0xc1]
5859; CHECK-NEXT:    vpaddq %ymm3, %ymm2, %ymm1 ## encoding: [0x62,0xf1,0xed,0x28,0xd4,0xcb]
5860; CHECK-NEXT:    vpaddq %ymm0, %ymm1, %ymm0 ## encoding: [0x62,0xf1,0xf5,0x28,0xd4,0xc0]
5861; CHECK-NEXT:    retq ## encoding: [0xc3]
5862  %res = call <4 x i64> @llvm.x86.avx512.mask.psll.q.256(<4 x i64> %x0, <2 x i64> %x1, <4 x i64> %x2, i8 %x3)
5863  %res1 = call <4 x i64> @llvm.x86.avx512.mask.psll.q.256(<4 x i64> %x0, <2 x i64> %x1, <4 x i64> zeroinitializer, i8 %x3)
5864  %res2 = call <4 x i64> @llvm.x86.avx512.mask.psll.q.256(<4 x i64> %x0, <2 x i64> %x1, <4 x i64> %x2, i8 -1)
5865  %res3 = add <4 x i64> %res, %res1
5866  %res4 = add <4 x i64> %res3, %res2
5867  ret <4 x i64> %res4
5868}
5869
5870declare <2 x i64> @llvm.x86.avx512.mask.psll.qi.128(<2 x i64>, i32, <2 x i64>, i8)
5871
5872define <2 x i64>@test_int_x86_avx512_mask_psll_qi_128(<2 x i64> %x0, i32 %x1, <2 x i64> %x2, i8 %x3) {
5873; CHECK-LABEL: test_int_x86_avx512_mask_psll_qi_128:
5874; CHECK:       ## BB#0:
5875; CHECK-NEXT:    kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
5876; CHECK-NEXT:    vpsllq $3, %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0xf5,0x09,0x73,0xf0,0x03]
5877; CHECK-NEXT:    vpsllq $3, %xmm0, %xmm2 {%k1} {z} ## encoding: [0x62,0xf1,0xed,0x89,0x73,0xf0,0x03]
5878; CHECK-NEXT:    vpsllq $3, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0xfd,0x08,0x73,0xf0,0x03]
5879; CHECK-NEXT:    vpaddq %xmm2, %xmm1, %xmm1 ## encoding: [0x62,0xf1,0xf5,0x08,0xd4,0xca]
5880; CHECK-NEXT:    vpaddq %xmm0, %xmm1, %xmm0 ## encoding: [0x62,0xf1,0xf5,0x08,0xd4,0xc0]
5881; CHECK-NEXT:    retq ## encoding: [0xc3]
5882  %res = call <2 x i64> @llvm.x86.avx512.mask.psll.qi.128(<2 x i64> %x0, i32 3, <2 x i64> %x2, i8 %x3)
5883  %res1 = call <2 x i64> @llvm.x86.avx512.mask.psll.qi.128(<2 x i64> %x0, i32 3, <2 x i64> zeroinitializer, i8 %x3)
5884  %res2 = call <2 x i64> @llvm.x86.avx512.mask.psll.qi.128(<2 x i64> %x0, i32 3, <2 x i64> %x2, i8 -1)
5885  %res3 = add <2 x i64> %res, %res1
5886  %res4 = add <2 x i64> %res3, %res2
5887  ret <2 x i64> %res4
5888}
5889
5890declare <4 x i64> @llvm.x86.avx512.mask.psll.qi.256(<4 x i64>, i32, <4 x i64>, i8)
5891
5892define <4 x i64>@test_int_x86_avx512_mask_psll_qi_256(<4 x i64> %x0, i32 %x1, <4 x i64> %x2, i8 %x3) {
5893; CHECK-LABEL: test_int_x86_avx512_mask_psll_qi_256:
5894; CHECK:       ## BB#0:
5895; CHECK-NEXT:    kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
5896; CHECK-NEXT:    vpsllq $3, %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf1,0xf5,0x29,0x73,0xf0,0x03]
5897; CHECK-NEXT:    vpsllq $3, %ymm0, %ymm2 {%k1} {z} ## encoding: [0x62,0xf1,0xed,0xa9,0x73,0xf0,0x03]
5898; CHECK-NEXT:    vpsllq $3, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0xfd,0x28,0x73,0xf0,0x03]
5899; CHECK-NEXT:    vpaddq %ymm2, %ymm1, %ymm1 ## encoding: [0x62,0xf1,0xf5,0x28,0xd4,0xca]
5900; CHECK-NEXT:    vpaddq %ymm0, %ymm1, %ymm0 ## encoding: [0x62,0xf1,0xf5,0x28,0xd4,0xc0]
5901; CHECK-NEXT:    retq ## encoding: [0xc3]
5902  %res = call <4 x i64> @llvm.x86.avx512.mask.psll.qi.256(<4 x i64> %x0, i32 3, <4 x i64> %x2, i8 %x3)
5903  %res1 = call <4 x i64> @llvm.x86.avx512.mask.psll.qi.256(<4 x i64> %x0, i32 3, <4 x i64> zeroinitializer, i8 %x3)
5904  %res2 = call <4 x i64> @llvm.x86.avx512.mask.psll.qi.256(<4 x i64> %x0, i32 3, <4 x i64> %x2, i8 -1)
5905  %res3 = add <4 x i64> %res, %res1
5906  %res4 = add <4 x i64> %res3, %res2
5907 ret <4 x i64> %res4
5908}
5909
5910declare <4 x i32> @llvm.x86.avx512.mask.psrav4.si(<4 x i32>, <4 x i32>, <4 x i32>, i8)
5911
5912define <4 x i32>@test_int_x86_avx512_mask_psrav4_si(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 %x3) {
5913; CHECK-LABEL: test_int_x86_avx512_mask_psrav4_si:
5914; CHECK:       ## BB#0:
5915; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
5916; CHECK-NEXT:    vpsravd %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf2,0x7d,0x09,0x46,0xd1]
5917; CHECK-NEXT:    vpsravd %xmm1, %xmm0, %xmm3 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0x89,0x46,0xd9]
5918; CHECK-NEXT:    vpsravd %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf2,0x7d,0x08,0x46,0xc1]
5919; CHECK-NEXT:    vpaddd %xmm3, %xmm2, %xmm1 ## encoding: [0x62,0xf1,0x6d,0x08,0xfe,0xcb]
5920; CHECK-NEXT:    vpaddd %xmm0, %xmm1, %xmm0 ## encoding: [0x62,0xf1,0x75,0x08,0xfe,0xc0]
5921; CHECK-NEXT:    retq ## encoding: [0xc3]
5922  %res = call <4 x i32> @llvm.x86.avx512.mask.psrav4.si(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 %x3)
5923  %res1 = call <4 x i32> @llvm.x86.avx512.mask.psrav4.si(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> zeroinitializer, i8 %x3)
5924  %res2 = call <4 x i32> @llvm.x86.avx512.mask.psrav4.si(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 -1)
5925  %res3 = add <4 x i32> %res, %res1
5926  %res4 = add <4 x i32> %res3, %res2
5927  ret <4 x i32> %res4
5928}
5929
5930declare <8 x i32> @llvm.x86.avx512.mask.psrav8.si(<8 x i32>, <8 x i32>, <8 x i32>, i8)
5931
5932define <8 x i32>@test_int_x86_avx512_mask_psrav8_si(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 %x3) {
5933; CHECK-LABEL: test_int_x86_avx512_mask_psrav8_si:
5934; CHECK:       ## BB#0:
5935; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
5936; CHECK-NEXT:    vpsravd %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf2,0x7d,0x29,0x46,0xd1]
5937; CHECK-NEXT:    vpsravd %ymm1, %ymm0, %ymm3 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0xa9,0x46,0xd9]
5938; CHECK-NEXT:    vpsravd %ymm1, %ymm0, %ymm0 ## encoding: [0x62,0xf2,0x7d,0x28,0x46,0xc1]
5939; CHECK-NEXT:    vpaddd %ymm3, %ymm2, %ymm1 ## encoding: [0x62,0xf1,0x6d,0x28,0xfe,0xcb]
5940; CHECK-NEXT:    vpaddd %ymm0, %ymm1, %ymm0 ## encoding: [0x62,0xf1,0x75,0x28,0xfe,0xc0]
5941; CHECK-NEXT:    retq ## encoding: [0xc3]
5942  %res = call <8 x i32> @llvm.x86.avx512.mask.psrav8.si(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 %x3)
5943  %res1 = call <8 x i32> @llvm.x86.avx512.mask.psrav8.si(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> zeroinitializer, i8 %x3)
5944  %res2 = call <8 x i32> @llvm.x86.avx512.mask.psrav8.si(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 -1)
5945  %res3 = add <8 x i32> %res, %res1
5946  %res4 = add <8 x i32> %res3, %res2
5947  ret <8 x i32> %res4
5948}
5949
5950define <8 x i32>@test_int_x86_avx512_mask_psrav8_si_const() {
5951; CHECK-LABEL: test_int_x86_avx512_mask_psrav8_si_const:
5952; CHECK:       ## BB#0:
5953; CHECK-NEXT:    vmovdqa32 {{.*#+}} ymm0 = [2,9,4294967284,23,4294967270,37,4294967256,51]
5954; CHECK-NEXT:    ## encoding: [0x62,0xf1,0x7d,0x28,0x6f,0x05,A,A,A,A]
5955; CHECK-NEXT:    ## fixup A - offset: 6, value: LCPI371_0-4, kind: reloc_riprel_4byte
5956; CHECK-NEXT:    vpsravd {{.*}}(%rip), %ymm0, %ymm0 ## encoding: [0x62,0xf2,0x7d,0x28,0x46,0x05,A,A,A,A]
5957; CHECK-NEXT:    ## fixup A - offset: 6, value: LCPI371_1-4, kind: reloc_riprel_4byte
5958; CHECK-NEXT:    retq ## encoding: [0xc3]
5959  %res = call <8 x i32> @llvm.x86.avx512.mask.psrav8.si(<8 x i32> <i32 2, i32 9, i32 -12, i32 23, i32 -26, i32 37, i32 -40, i32 51>, <8 x i32> <i32 1, i32 18, i32 35, i32 52, i32 69, i32 15, i32 32, i32 49>, <8 x i32> zeroinitializer, i8 -1)
5960  ret <8 x i32> %res
5961}
5962
5963declare <2 x i64> @llvm.x86.avx512.mask.psrav.q.128(<2 x i64>, <2 x i64>, <2 x i64>, i8)
5964
5965define <2 x i64>@test_int_x86_avx512_mask_psrav_q_128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 %x3) {
5966; CHECK-LABEL: test_int_x86_avx512_mask_psrav_q_128:
5967; CHECK:       ## BB#0:
5968; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
5969; CHECK-NEXT:    vpsravq %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf2,0xfd,0x09,0x46,0xd1]
5970; CHECK-NEXT:    vpsravq %xmm1, %xmm0, %xmm3 {%k1} {z} ## encoding: [0x62,0xf2,0xfd,0x89,0x46,0xd9]
5971; CHECK-NEXT:    vpsravq %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf2,0xfd,0x08,0x46,0xc1]
5972; CHECK-NEXT:    vpaddq %xmm3, %xmm2, %xmm1 ## encoding: [0x62,0xf1,0xed,0x08,0xd4,0xcb]
5973; CHECK-NEXT:    vpaddq %xmm0, %xmm1, %xmm0 ## encoding: [0x62,0xf1,0xf5,0x08,0xd4,0xc0]
5974; CHECK-NEXT:    retq ## encoding: [0xc3]
5975  %res = call <2 x i64> @llvm.x86.avx512.mask.psrav.q.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 %x3)
5976  %res1 = call <2 x i64> @llvm.x86.avx512.mask.psrav.q.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> zeroinitializer, i8 %x3)
5977  %res2 = call <2 x i64> @llvm.x86.avx512.mask.psrav.q.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 -1)
5978  %res3 = add <2 x i64> %res, %res1
5979  %res4 = add <2 x i64> %res3, %res2
5980  ret <2 x i64> %res4
5981}
5982
5983define <2 x i64>@test_int_x86_avx512_mask_psrav_q_128_const(i8 %x3) {
5984; CHECK-LABEL: test_int_x86_avx512_mask_psrav_q_128_const:
5985; CHECK:       ## BB#0:
5986; CHECK-NEXT:    vmovdqa64 {{.*#+}} xmm0 = [2,18446744073709551607]
5987; CHECK-NEXT:    ## encoding: [0x62,0xf1,0xfd,0x08,0x6f,0x05,A,A,A,A]
5988; CHECK-NEXT:    ## fixup A - offset: 6, value: LCPI373_0-4, kind: reloc_riprel_4byte
5989; CHECK-NEXT:    vpsravq {{.*}}(%rip), %xmm0, %xmm0 ## encoding: [0x62,0xf2,0xfd,0x08,0x46,0x05,A,A,A,A]
5990; CHECK-NEXT:    ## fixup A - offset: 6, value: LCPI373_1-4, kind: reloc_riprel_4byte
5991; CHECK-NEXT:    retq ## encoding: [0xc3]
5992  %res = call <2 x i64> @llvm.x86.avx512.mask.psrav.q.128(<2 x i64> <i64 2, i64 -9>, <2 x i64> <i64 1, i64 90>, <2 x i64> zeroinitializer, i8 -1)
5993  ret <2 x i64> %res
5994}
5995
5996declare <4 x i64> @llvm.x86.avx512.mask.psrav.q.256(<4 x i64>, <4 x i64>, <4 x i64>, i8)
5997
5998define <4 x i64>@test_int_x86_avx512_mask_psrav_q_256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 %x3) {
5999; CHECK-LABEL: test_int_x86_avx512_mask_psrav_q_256:
6000; CHECK:       ## BB#0:
6001; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
6002; CHECK-NEXT:    vpsravq %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf2,0xfd,0x29,0x46,0xd1]
6003; CHECK-NEXT:    vpsravq %ymm1, %ymm0, %ymm3 {%k1} {z} ## encoding: [0x62,0xf2,0xfd,0xa9,0x46,0xd9]
6004; CHECK-NEXT:    vpsravq %ymm1, %ymm0, %ymm0 ## encoding: [0x62,0xf2,0xfd,0x28,0x46,0xc1]
6005; CHECK-NEXT:    vpaddq %ymm3, %ymm2, %ymm1 ## encoding: [0x62,0xf1,0xed,0x28,0xd4,0xcb]
6006; CHECK-NEXT:    vpaddq %ymm0, %ymm1, %ymm0 ## encoding: [0x62,0xf1,0xf5,0x28,0xd4,0xc0]
6007; CHECK-NEXT:    retq ## encoding: [0xc3]
6008  %res = call <4 x i64> @llvm.x86.avx512.mask.psrav.q.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 %x3)
6009  %res1 = call <4 x i64> @llvm.x86.avx512.mask.psrav.q.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> zeroinitializer, i8 %x3)
6010  %res2 = call <4 x i64> @llvm.x86.avx512.mask.psrav.q.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 -1)
6011  %res3 = add <4 x i64> %res, %res1
6012  %res4 = add <4 x i64> %res3, %res2
6013  ret <4 x i64> %res4
6014}
6015
6016declare <2 x i64> @llvm.x86.avx512.mask.psllv2.di(<2 x i64>, <2 x i64>, <2 x i64>, i8)
6017
6018define <2 x i64>@test_int_x86_avx512_mask_psllv2_di(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 %x3) {
6019; CHECK-LABEL: test_int_x86_avx512_mask_psllv2_di:
6020; CHECK:       ## BB#0:
6021; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
6022; CHECK-NEXT:    vpsllvq %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf2,0xfd,0x09,0x47,0xd1]
6023; CHECK-NEXT:    vpsllvq %xmm1, %xmm0, %xmm3 {%k1} {z} ## encoding: [0x62,0xf2,0xfd,0x89,0x47,0xd9]
6024; CHECK-NEXT:    vpsllvq %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf2,0xfd,0x08,0x47,0xc1]
6025; CHECK-NEXT:    vpaddq %xmm3, %xmm2, %xmm1 ## encoding: [0x62,0xf1,0xed,0x08,0xd4,0xcb]
6026; CHECK-NEXT:    vpaddq %xmm0, %xmm1, %xmm0 ## encoding: [0x62,0xf1,0xf5,0x08,0xd4,0xc0]
6027; CHECK-NEXT:    retq ## encoding: [0xc3]
6028  %res = call <2 x i64> @llvm.x86.avx512.mask.psllv2.di(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 %x3)
6029  %res1 = call <2 x i64> @llvm.x86.avx512.mask.psllv2.di(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> zeroinitializer, i8 %x3)
6030  %res2 = call <2 x i64> @llvm.x86.avx512.mask.psllv2.di(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 -1)
6031  %res3 = add <2 x i64> %res, %res1
6032  %res4 = add <2 x i64> %res3, %res2
6033  ret <2 x i64> %res4
6034}
6035
6036declare <4 x i64> @llvm.x86.avx512.mask.psllv4.di(<4 x i64>, <4 x i64>, <4 x i64>, i8)
6037
6038define <4 x i64>@test_int_x86_avx512_mask_psllv4_di(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 %x3) {
6039; CHECK-LABEL: test_int_x86_avx512_mask_psllv4_di:
6040; CHECK:       ## BB#0:
6041; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
6042; CHECK-NEXT:    vpsllvq %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf2,0xfd,0x29,0x47,0xd1]
6043; CHECK-NEXT:    vpsllvq %ymm1, %ymm0, %ymm3 {%k1} {z} ## encoding: [0x62,0xf2,0xfd,0xa9,0x47,0xd9]
6044; CHECK-NEXT:    vpsllvq %ymm1, %ymm0, %ymm0 ## encoding: [0x62,0xf2,0xfd,0x28,0x47,0xc1]
6045; CHECK-NEXT:    vpaddq %ymm3, %ymm2, %ymm1 ## encoding: [0x62,0xf1,0xed,0x28,0xd4,0xcb]
6046; CHECK-NEXT:    vpaddq %ymm0, %ymm1, %ymm0 ## encoding: [0x62,0xf1,0xf5,0x28,0xd4,0xc0]
6047; CHECK-NEXT:    retq ## encoding: [0xc3]
6048  %res = call <4 x i64> @llvm.x86.avx512.mask.psllv4.di(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 %x3)
6049  %res1 = call <4 x i64> @llvm.x86.avx512.mask.psllv4.di(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> zeroinitializer, i8 %x3)
6050  %res2 = call <4 x i64> @llvm.x86.avx512.mask.psllv4.di(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 -1)
6051  %res3 = add <4 x i64> %res, %res1
6052  %res4 = add <4 x i64> %res3, %res2
6053  ret <4 x i64> %res4
6054}
6055
6056declare <4 x i32> @llvm.x86.avx512.mask.psllv4.si(<4 x i32>, <4 x i32>, <4 x i32>, i8)
6057
6058define <4 x i32>@test_int_x86_avx512_mask_psllv4_si(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 %x3) {
6059; CHECK-LABEL: test_int_x86_avx512_mask_psllv4_si:
6060; CHECK:       ## BB#0:
6061; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
6062; CHECK-NEXT:    vpsllvd %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf2,0x7d,0x09,0x47,0xd1]
6063; CHECK-NEXT:    vpsllvd %xmm1, %xmm0, %xmm3 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0x89,0x47,0xd9]
6064; CHECK-NEXT:    vpsllvd %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf2,0x7d,0x08,0x47,0xc1]
6065; CHECK-NEXT:    vpaddd %xmm3, %xmm2, %xmm1 ## encoding: [0x62,0xf1,0x6d,0x08,0xfe,0xcb]
6066; CHECK-NEXT:    vpaddd %xmm0, %xmm1, %xmm0 ## encoding: [0x62,0xf1,0x75,0x08,0xfe,0xc0]
6067; CHECK-NEXT:    retq ## encoding: [0xc3]
6068  %res = call <4 x i32> @llvm.x86.avx512.mask.psllv4.si(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 %x3)
6069  %res1 = call <4 x i32> @llvm.x86.avx512.mask.psllv4.si(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> zeroinitializer, i8 %x3)
6070  %res2 = call <4 x i32> @llvm.x86.avx512.mask.psllv4.si(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 -1)
6071  %res3 = add <4 x i32> %res, %res1
6072  %res4 = add <4 x i32> %res3, %res2
6073  ret <4 x i32> %res4
6074}
6075
6076declare <8 x i32> @llvm.x86.avx512.mask.psllv8.si(<8 x i32>, <8 x i32>, <8 x i32>, i8)
6077
6078define <8 x i32>@test_int_x86_avx512_mask_psllv8_si(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 %x3) {
6079; CHECK-LABEL: test_int_x86_avx512_mask_psllv8_si:
6080; CHECK:       ## BB#0:
6081; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
6082; CHECK-NEXT:    vpsllvd %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf2,0x7d,0x29,0x47,0xd1]
6083; CHECK-NEXT:    vpsllvd %ymm1, %ymm0, %ymm3 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0xa9,0x47,0xd9]
6084; CHECK-NEXT:    vpsllvd %ymm1, %ymm0, %ymm0 ## encoding: [0x62,0xf2,0x7d,0x28,0x47,0xc1]
6085; CHECK-NEXT:    vpaddd %ymm3, %ymm2, %ymm1 ## encoding: [0x62,0xf1,0x6d,0x28,0xfe,0xcb]
6086; CHECK-NEXT:    vpaddd %ymm0, %ymm1, %ymm0 ## encoding: [0x62,0xf1,0x75,0x28,0xfe,0xc0]
6087; CHECK-NEXT:    retq ## encoding: [0xc3]
6088  %res = call <8 x i32> @llvm.x86.avx512.mask.psllv8.si(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 %x3)
6089  %res1 = call <8 x i32> @llvm.x86.avx512.mask.psllv8.si(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> zeroinitializer, i8 %x3)
6090  %res2 = call <8 x i32> @llvm.x86.avx512.mask.psllv8.si(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 -1)
6091  %res3 = add <8 x i32> %res, %res1
6092  %res4 = add <8 x i32> %res3, %res2
6093  ret <8 x i32> %res4
6094}
6095
6096declare <4 x i32> @llvm.x86.avx512.mask.prorv.d.128(<4 x i32>, <4 x i32>, <4 x i32>, i8)
6097
6098define <4 x i32>@test_int_x86_avx512_mask_prorv_d_128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 %x3) {
6099; CHECK-LABEL: test_int_x86_avx512_mask_prorv_d_128:
6100; CHECK:       ## BB#0:
6101; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
6102; CHECK-NEXT:    vprorvd %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf2,0x7d,0x09,0x14,0xd1]
6103; CHECK-NEXT:    vprorvd %xmm1, %xmm0, %xmm3 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0x89,0x14,0xd9]
6104; CHECK-NEXT:    vprorvd %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf2,0x7d,0x08,0x14,0xc1]
6105; CHECK-NEXT:    vpaddd %xmm3, %xmm2, %xmm1 ## encoding: [0x62,0xf1,0x6d,0x08,0xfe,0xcb]
6106; CHECK-NEXT:    vpaddd %xmm0, %xmm1, %xmm0 ## encoding: [0x62,0xf1,0x75,0x08,0xfe,0xc0]
6107; CHECK-NEXT:    retq ## encoding: [0xc3]
6108  %res = call <4 x i32> @llvm.x86.avx512.mask.prorv.d.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 %x3)
6109  %res1 = call <4 x i32> @llvm.x86.avx512.mask.prorv.d.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> zeroinitializer, i8 %x3)
6110  %res2 = call <4 x i32> @llvm.x86.avx512.mask.prorv.d.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 -1)
6111  %res3 = add <4 x i32> %res, %res1
6112  %res4 = add <4 x i32> %res3, %res2
6113  ret <4 x i32> %res4
6114}
6115
6116declare <8 x i32> @llvm.x86.avx512.mask.prorv.d.256(<8 x i32>, <8 x i32>, <8 x i32>, i8)
6117
6118define <8 x i32>@test_int_x86_avx512_mask_prorv_d_256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 %x3) {
6119; CHECK-LABEL: test_int_x86_avx512_mask_prorv_d_256:
6120; CHECK:       ## BB#0:
6121; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
6122; CHECK-NEXT:    vprorvd %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf2,0x7d,0x29,0x14,0xd1]
6123; CHECK-NEXT:    vprorvd %ymm1, %ymm0, %ymm3 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0xa9,0x14,0xd9]
6124; CHECK-NEXT:    vprorvd %ymm1, %ymm0, %ymm0 ## encoding: [0x62,0xf2,0x7d,0x28,0x14,0xc1]
6125; CHECK-NEXT:    vpaddd %ymm3, %ymm2, %ymm1 ## encoding: [0x62,0xf1,0x6d,0x28,0xfe,0xcb]
6126; CHECK-NEXT:    vpaddd %ymm0, %ymm1, %ymm0 ## encoding: [0x62,0xf1,0x75,0x28,0xfe,0xc0]
6127; CHECK-NEXT:    retq ## encoding: [0xc3]
6128  %res = call <8 x i32> @llvm.x86.avx512.mask.prorv.d.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 %x3)
6129  %res1 = call <8 x i32> @llvm.x86.avx512.mask.prorv.d.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> zeroinitializer, i8 %x3)
6130  %res2 = call <8 x i32> @llvm.x86.avx512.mask.prorv.d.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 -1)
6131  %res3 = add <8 x i32> %res, %res1
6132  %res4 = add <8 x i32> %res3, %res2
6133  ret <8 x i32> %res4
6134}
6135
6136declare <2 x i64> @llvm.x86.avx512.mask.prorv.q.128(<2 x i64>, <2 x i64>, <2 x i64>, i8)
6137
6138define <2 x i64>@test_int_x86_avx512_mask_prorv_q_128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 %x3) {
6139; CHECK-LABEL: test_int_x86_avx512_mask_prorv_q_128:
6140; CHECK:       ## BB#0:
6141; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
6142; CHECK-NEXT:    vprorvq %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf2,0xfd,0x09,0x14,0xd1]
6143; CHECK-NEXT:    vprorvq %xmm1, %xmm0, %xmm3 {%k1} {z} ## encoding: [0x62,0xf2,0xfd,0x89,0x14,0xd9]
6144; CHECK-NEXT:    vprorvq %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf2,0xfd,0x08,0x14,0xc1]
6145; CHECK-NEXT:    vpaddq %xmm3, %xmm2, %xmm1 ## encoding: [0x62,0xf1,0xed,0x08,0xd4,0xcb]
6146; CHECK-NEXT:    vpaddq %xmm0, %xmm1, %xmm0 ## encoding: [0x62,0xf1,0xf5,0x08,0xd4,0xc0]
6147; CHECK-NEXT:    retq ## encoding: [0xc3]
6148  %res = call <2 x i64> @llvm.x86.avx512.mask.prorv.q.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 %x3)
6149  %res1 = call <2 x i64> @llvm.x86.avx512.mask.prorv.q.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> zeroinitializer, i8 %x3)
6150  %res2 = call <2 x i64> @llvm.x86.avx512.mask.prorv.q.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 -1)
6151  %res3 = add <2 x i64> %res, %res1
6152  %res4 = add <2 x i64> %res3, %res2
6153  ret <2 x i64> %res4
6154}
6155
6156declare <4 x i64> @llvm.x86.avx512.mask.prorv.q.256(<4 x i64>, <4 x i64>, <4 x i64>, i8)
6157
6158define <4 x i64>@test_int_x86_avx512_mask_prorv_q_256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 %x3) {
6159; CHECK-LABEL: test_int_x86_avx512_mask_prorv_q_256:
6160; CHECK:       ## BB#0:
6161; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
6162; CHECK-NEXT:    vprorvq %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf2,0xfd,0x29,0x14,0xd1]
6163; CHECK-NEXT:    vprorvq %ymm1, %ymm0, %ymm3 {%k1} {z} ## encoding: [0x62,0xf2,0xfd,0xa9,0x14,0xd9]
6164; CHECK-NEXT:    vprorvq %ymm1, %ymm0, %ymm0 ## encoding: [0x62,0xf2,0xfd,0x28,0x14,0xc1]
6165; CHECK-NEXT:    vpaddq %ymm3, %ymm2, %ymm1 ## encoding: [0x62,0xf1,0xed,0x28,0xd4,0xcb]
6166; CHECK-NEXT:    vpaddq %ymm0, %ymm1, %ymm0 ## encoding: [0x62,0xf1,0xf5,0x28,0xd4,0xc0]
6167; CHECK-NEXT:    retq ## encoding: [0xc3]
6168  %res = call <4 x i64> @llvm.x86.avx512.mask.prorv.q.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 %x3)
6169  %res1 = call <4 x i64> @llvm.x86.avx512.mask.prorv.q.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> zeroinitializer, i8 %x3)
6170  %res2 = call <4 x i64> @llvm.x86.avx512.mask.prorv.q.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 -1)
6171  %res3 = add <4 x i64> %res, %res1
6172  %res4 = add <4 x i64> %res3, %res2
6173  ret <4 x i64> %res4
6174}
6175
6176declare <4 x i32> @llvm.x86.avx512.mask.prol.d.128(<4 x i32>, i32, <4 x i32>, i8)
6177
6178define <4 x i32>@test_int_x86_avx512_mask_prol_d_128(<4 x i32> %x0, i32 %x1, <4 x i32> %x2, i8 %x3) {
6179; CHECK-LABEL: test_int_x86_avx512_mask_prol_d_128:
6180; CHECK:       ## BB#0:
6181; CHECK-NEXT:    kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
6182; CHECK-NEXT:    vprold $3, %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0x75,0x09,0x72,0xc8,0x03]
6183; CHECK-NEXT:    vprold $3, %xmm0, %xmm2 {%k1} {z} ## encoding: [0x62,0xf1,0x6d,0x89,0x72,0xc8,0x03]
6184; CHECK-NEXT:    vprold $3, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0x72,0xc8,0x03]
6185; CHECK-NEXT:    vpaddd %xmm2, %xmm1, %xmm1 ## encoding: [0x62,0xf1,0x75,0x08,0xfe,0xca]
6186; CHECK-NEXT:    vpaddd %xmm0, %xmm1, %xmm0 ## encoding: [0x62,0xf1,0x75,0x08,0xfe,0xc0]
6187; CHECK-NEXT:    retq ## encoding: [0xc3]
6188  %res = call <4 x i32> @llvm.x86.avx512.mask.prol.d.128(<4 x i32> %x0, i32 3, <4 x i32> %x2, i8 %x3)
6189  %res1 = call <4 x i32> @llvm.x86.avx512.mask.prol.d.128(<4 x i32> %x0, i32 3, <4 x i32> zeroinitializer, i8 %x3)
6190  %res2 = call <4 x i32> @llvm.x86.avx512.mask.prol.d.128(<4 x i32> %x0, i32 3, <4 x i32> %x2, i8 -1)
6191  %res3 = add <4 x i32> %res, %res1
6192  %res4 = add <4 x i32> %res3, %res2
6193  ret <4 x i32> %res4
6194}
6195
6196declare <8 x i32> @llvm.x86.avx512.mask.prol.d.256(<8 x i32>, i32, <8 x i32>, i8)
6197
6198define <8 x i32>@test_int_x86_avx512_mask_prol_d_256(<8 x i32> %x0, i32 %x1, <8 x i32> %x2, i8 %x3) {
6199; CHECK-LABEL: test_int_x86_avx512_mask_prol_d_256:
6200; CHECK:       ## BB#0:
6201; CHECK-NEXT:    kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
6202; CHECK-NEXT:    vprold $3, %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf1,0x75,0x29,0x72,0xc8,0x03]
6203; CHECK-NEXT:    vprold $3, %ymm0, %ymm2 {%k1} {z} ## encoding: [0x62,0xf1,0x6d,0xa9,0x72,0xc8,0x03]
6204; CHECK-NEXT:    vprold $3, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7d,0x28,0x72,0xc8,0x03]
6205; CHECK-NEXT:    vpaddd %ymm2, %ymm1, %ymm1 ## encoding: [0x62,0xf1,0x75,0x28,0xfe,0xca]
6206; CHECK-NEXT:    vpaddd %ymm0, %ymm1, %ymm0 ## encoding: [0x62,0xf1,0x75,0x28,0xfe,0xc0]
6207; CHECK-NEXT:    retq ## encoding: [0xc3]
6208  %res = call <8 x i32> @llvm.x86.avx512.mask.prol.d.256(<8 x i32> %x0, i32 3, <8 x i32> %x2, i8 %x3)
6209  %res1 = call <8 x i32> @llvm.x86.avx512.mask.prol.d.256(<8 x i32> %x0, i32 3, <8 x i32> zeroinitializer, i8 %x3)
6210  %res2 = call <8 x i32> @llvm.x86.avx512.mask.prol.d.256(<8 x i32> %x0, i32 3, <8 x i32> %x2, i8 -1)
6211  %res3 = add <8 x i32> %res, %res1
6212  %res4 = add <8 x i32> %res3, %res2
6213  ret <8 x i32> %res4
6214}
6215
6216declare <2 x i64> @llvm.x86.avx512.mask.prol.q.128(<2 x i64>, i32, <2 x i64>, i8)
6217
6218define <2 x i64>@test_int_x86_avx512_mask_prol_q_128(<2 x i64> %x0, i32 %x1, <2 x i64> %x2, i8 %x3) {
6219; CHECK-LABEL: test_int_x86_avx512_mask_prol_q_128:
6220; CHECK:       ## BB#0:
6221; CHECK-NEXT:    kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
6222; CHECK-NEXT:    vprolq $3, %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0xf5,0x09,0x72,0xc8,0x03]
6223; CHECK-NEXT:    vprolq $3, %xmm0, %xmm2 {%k1} {z} ## encoding: [0x62,0xf1,0xed,0x89,0x72,0xc8,0x03]
6224; CHECK-NEXT:    vprolq $3, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0xfd,0x08,0x72,0xc8,0x03]
6225; CHECK-NEXT:    vpaddq %xmm2, %xmm1, %xmm1 ## encoding: [0x62,0xf1,0xf5,0x08,0xd4,0xca]
6226; CHECK-NEXT:    vpaddq %xmm0, %xmm1, %xmm0 ## encoding: [0x62,0xf1,0xf5,0x08,0xd4,0xc0]
6227; CHECK-NEXT:    retq ## encoding: [0xc3]
6228  %res = call <2 x i64> @llvm.x86.avx512.mask.prol.q.128(<2 x i64> %x0, i32 3, <2 x i64> %x2, i8 %x3)
6229  %res1 = call <2 x i64> @llvm.x86.avx512.mask.prol.q.128(<2 x i64> %x0, i32 3, <2 x i64> zeroinitializer, i8 %x3)
6230  %res2 = call <2 x i64> @llvm.x86.avx512.mask.prol.q.128(<2 x i64> %x0, i32 3, <2 x i64> %x2, i8 -1)
6231  %res3 = add <2 x i64> %res, %res1
6232  %res4 = add <2 x i64> %res3, %res2
6233  ret <2 x i64> %res4
6234}
6235
6236declare <4 x i64> @llvm.x86.avx512.mask.prol.q.256(<4 x i64>, i32, <4 x i64>, i8)
6237
6238define <4 x i64>@test_int_x86_avx512_mask_prol_q_256(<4 x i64> %x0, i32 %x1, <4 x i64> %x2, i8 %x3) {
6239; CHECK-LABEL: test_int_x86_avx512_mask_prol_q_256:
6240; CHECK:       ## BB#0:
6241; CHECK-NEXT:    kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
6242; CHECK-NEXT:    vprolq $3, %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf1,0xf5,0x29,0x72,0xc8,0x03]
6243; CHECK-NEXT:    vprolq $3, %ymm0, %ymm2 {%k1} {z} ## encoding: [0x62,0xf1,0xed,0xa9,0x72,0xc8,0x03]
6244; CHECK-NEXT:    vprolq $3, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0xfd,0x28,0x72,0xc8,0x03]
6245; CHECK-NEXT:    vpaddq %ymm2, %ymm1, %ymm1 ## encoding: [0x62,0xf1,0xf5,0x28,0xd4,0xca]
6246; CHECK-NEXT:    vpaddq %ymm0, %ymm1, %ymm0 ## encoding: [0x62,0xf1,0xf5,0x28,0xd4,0xc0]
6247; CHECK-NEXT:    retq ## encoding: [0xc3]
6248  %res = call <4 x i64> @llvm.x86.avx512.mask.prol.q.256(<4 x i64> %x0, i32 3, <4 x i64> %x2, i8 %x3)
6249  %res1 = call <4 x i64> @llvm.x86.avx512.mask.prol.q.256(<4 x i64> %x0, i32 3, <4 x i64> zeroinitializer, i8 %x3)
6250  %res2 = call <4 x i64> @llvm.x86.avx512.mask.prol.q.256(<4 x i64> %x0, i32 3, <4 x i64> %x2, i8 -1)
6251  %res3 = add <4 x i64> %res, %res1
6252  %res4 = add <4 x i64> %res3, %res2
6253  ret <4 x i64> %res4
6254}
6255
6256declare <4 x i32> @llvm.x86.avx512.mask.prolv.d.128(<4 x i32>, <4 x i32>, <4 x i32>, i8)
6257
6258define <4 x i32>@test_int_x86_avx512_mask_prolv_d_128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 %x3) {
6259; CHECK-LABEL: test_int_x86_avx512_mask_prolv_d_128:
6260; CHECK:       ## BB#0:
6261; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
6262; CHECK-NEXT:    vprolvd %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf2,0x7d,0x09,0x15,0xd1]
6263; CHECK-NEXT:    vprolvd %xmm1, %xmm0, %xmm3 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0x89,0x15,0xd9]
6264; CHECK-NEXT:    vprolvd %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf2,0x7d,0x08,0x15,0xc1]
6265; CHECK-NEXT:    vpaddd %xmm3, %xmm2, %xmm1 ## encoding: [0x62,0xf1,0x6d,0x08,0xfe,0xcb]
6266; CHECK-NEXT:    vpaddd %xmm0, %xmm1, %xmm0 ## encoding: [0x62,0xf1,0x75,0x08,0xfe,0xc0]
6267; CHECK-NEXT:    retq ## encoding: [0xc3]
6268  %res = call <4 x i32> @llvm.x86.avx512.mask.prolv.d.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 %x3)
6269  %res1 = call <4 x i32> @llvm.x86.avx512.mask.prolv.d.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> zeroinitializer, i8 %x3)
6270  %res2 = call <4 x i32> @llvm.x86.avx512.mask.prolv.d.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 -1)
6271  %res3 = add <4 x i32> %res, %res1
6272  %res4 = add <4 x i32> %res3, %res2
6273  ret <4 x i32> %res4
6274}
6275
6276declare <8 x i32> @llvm.x86.avx512.mask.prolv.d.256(<8 x i32>, <8 x i32>, <8 x i32>, i8)
6277
6278define <8 x i32>@test_int_x86_avx512_mask_prolv_d_256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 %x3) {
6279; CHECK-LABEL: test_int_x86_avx512_mask_prolv_d_256:
6280; CHECK:       ## BB#0:
6281; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
6282; CHECK-NEXT:    vprolvd %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf2,0x7d,0x29,0x15,0xd1]
6283; CHECK-NEXT:    vprolvd %ymm1, %ymm0, %ymm3 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0xa9,0x15,0xd9]
6284; CHECK-NEXT:    vprolvd %ymm1, %ymm0, %ymm0 ## encoding: [0x62,0xf2,0x7d,0x28,0x15,0xc1]
6285; CHECK-NEXT:    vpaddd %ymm3, %ymm2, %ymm1 ## encoding: [0x62,0xf1,0x6d,0x28,0xfe,0xcb]
6286; CHECK-NEXT:    vpaddd %ymm0, %ymm1, %ymm0 ## encoding: [0x62,0xf1,0x75,0x28,0xfe,0xc0]
6287; CHECK-NEXT:    retq ## encoding: [0xc3]
6288  %res = call <8 x i32> @llvm.x86.avx512.mask.prolv.d.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 %x3)
6289  %res1 = call <8 x i32> @llvm.x86.avx512.mask.prolv.d.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> zeroinitializer, i8 %x3)
6290  %res2 = call <8 x i32> @llvm.x86.avx512.mask.prolv.d.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 -1)
6291  %res3 = add <8 x i32> %res, %res1
6292  %res4 = add <8 x i32> %res3, %res2
6293  ret <8 x i32> %res4
6294}
6295
6296declare <2 x i64> @llvm.x86.avx512.mask.prolv.q.128(<2 x i64>, <2 x i64>, <2 x i64>, i8)
6297
6298define <2 x i64>@test_int_x86_avx512_mask_prolv_q_128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 %x3) {
6299; CHECK-LABEL: test_int_x86_avx512_mask_prolv_q_128:
6300; CHECK:       ## BB#0:
6301; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
6302; CHECK-NEXT:    vprolvq %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf2,0xfd,0x09,0x15,0xd1]
6303; CHECK-NEXT:    vprolvq %xmm1, %xmm0, %xmm3 {%k1} {z} ## encoding: [0x62,0xf2,0xfd,0x89,0x15,0xd9]
6304; CHECK-NEXT:    vprolvq %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf2,0xfd,0x08,0x15,0xc1]
6305; CHECK-NEXT:    vpaddq %xmm3, %xmm2, %xmm1 ## encoding: [0x62,0xf1,0xed,0x08,0xd4,0xcb]
6306; CHECK-NEXT:    vpaddq %xmm0, %xmm1, %xmm0 ## encoding: [0x62,0xf1,0xf5,0x08,0xd4,0xc0]
6307; CHECK-NEXT:    retq ## encoding: [0xc3]
6308  %res = call <2 x i64> @llvm.x86.avx512.mask.prolv.q.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 %x3)
6309  %res1 = call <2 x i64> @llvm.x86.avx512.mask.prolv.q.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> zeroinitializer, i8 %x3)
6310  %res2 = call <2 x i64> @llvm.x86.avx512.mask.prolv.q.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 -1)
6311  %res3 = add <2 x i64> %res, %res1
6312  %res4 = add <2 x i64> %res3, %res2
6313  ret <2 x i64> %res4
6314}
6315
6316declare <4 x i64> @llvm.x86.avx512.mask.prolv.q.256(<4 x i64>, <4 x i64>, <4 x i64>, i8)
6317
6318define <4 x i64>@test_int_x86_avx512_mask_prolv_q_256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 %x3) {
6319; CHECK-LABEL: test_int_x86_avx512_mask_prolv_q_256:
6320; CHECK:       ## BB#0:
6321; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
6322; CHECK-NEXT:    vprolvq %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf2,0xfd,0x29,0x15,0xd1]
6323; CHECK-NEXT:    vprolvq %ymm1, %ymm0, %ymm3 {%k1} {z} ## encoding: [0x62,0xf2,0xfd,0xa9,0x15,0xd9]
6324; CHECK-NEXT:    vprolvq %ymm1, %ymm0, %ymm0 ## encoding: [0x62,0xf2,0xfd,0x28,0x15,0xc1]
6325; CHECK-NEXT:    vpaddq %ymm3, %ymm2, %ymm1 ## encoding: [0x62,0xf1,0xed,0x28,0xd4,0xcb]
6326; CHECK-NEXT:    vpaddq %ymm0, %ymm1, %ymm0 ## encoding: [0x62,0xf1,0xf5,0x28,0xd4,0xc0]
6327; CHECK-NEXT:    retq ## encoding: [0xc3]
6328  %res = call <4 x i64> @llvm.x86.avx512.mask.prolv.q.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 %x3)
6329  %res1 = call <4 x i64> @llvm.x86.avx512.mask.prolv.q.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> zeroinitializer, i8 %x3)
6330  %res2 = call <4 x i64> @llvm.x86.avx512.mask.prolv.q.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 -1)
6331  %res3 = add <4 x i64> %res, %res1
6332  %res4 = add <4 x i64> %res3, %res2
6333  ret <4 x i64> %res4
6334}
6335
6336declare <4 x i32> @llvm.x86.avx512.mask.pror.d.128(<4 x i32>, i32, <4 x i32>, i8)
6337
6338define <4 x i32>@test_int_x86_avx512_mask_pror_d_128(<4 x i32> %x0, i32 %x1, <4 x i32> %x2, i8 %x3) {
6339; CHECK-LABEL: test_int_x86_avx512_mask_pror_d_128:
6340; CHECK:       ## BB#0:
6341; CHECK-NEXT:    kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
6342; CHECK-NEXT:    vprord $3, %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0x75,0x09,0x72,0xc0,0x03]
6343; CHECK-NEXT:    vprord $3, %xmm0, %xmm2 {%k1} {z} ## encoding: [0x62,0xf1,0x6d,0x89,0x72,0xc0,0x03]
6344; CHECK-NEXT:    vprord $3, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0x72,0xc0,0x03]
6345; CHECK-NEXT:    vpaddd %xmm2, %xmm1, %xmm1 ## encoding: [0x62,0xf1,0x75,0x08,0xfe,0xca]
6346; CHECK-NEXT:    vpaddd %xmm0, %xmm1, %xmm0 ## encoding: [0x62,0xf1,0x75,0x08,0xfe,0xc0]
6347; CHECK-NEXT:    retq ## encoding: [0xc3]
6348  %res = call <4 x i32> @llvm.x86.avx512.mask.pror.d.128(<4 x i32> %x0, i32 3, <4 x i32> %x2, i8 %x3)
6349  %res1 = call <4 x i32> @llvm.x86.avx512.mask.pror.d.128(<4 x i32> %x0, i32 3, <4 x i32> zeroinitializer, i8 %x3)
6350  %res2 = call <4 x i32> @llvm.x86.avx512.mask.pror.d.128(<4 x i32> %x0, i32 3, <4 x i32> %x2, i8 -1)
6351  %res3 = add <4 x i32> %res, %res1
6352  %res4 = add <4 x i32> %res3, %res2
6353  ret <4 x i32> %res4
6354}
6355
6356declare <8 x i32> @llvm.x86.avx512.mask.pror.d.256(<8 x i32>, i32, <8 x i32>, i8)
6357
6358define <8 x i32>@test_int_x86_avx512_mask_pror_d_256(<8 x i32> %x0, i32 %x1, <8 x i32> %x2, i8 %x3) {
6359; CHECK-LABEL: test_int_x86_avx512_mask_pror_d_256:
6360; CHECK:       ## BB#0:
6361; CHECK-NEXT:    kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
6362; CHECK-NEXT:    vprord $3, %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf1,0x75,0x29,0x72,0xc0,0x03]
6363; CHECK-NEXT:    vprord $3, %ymm0, %ymm2 {%k1} {z} ## encoding: [0x62,0xf1,0x6d,0xa9,0x72,0xc0,0x03]
6364; CHECK-NEXT:    vprord $3, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7d,0x28,0x72,0xc0,0x03]
6365; CHECK-NEXT:    vpaddd %ymm2, %ymm1, %ymm1 ## encoding: [0x62,0xf1,0x75,0x28,0xfe,0xca]
6366; CHECK-NEXT:    vpaddd %ymm0, %ymm1, %ymm0 ## encoding: [0x62,0xf1,0x75,0x28,0xfe,0xc0]
6367; CHECK-NEXT:    retq ## encoding: [0xc3]
6368  %res = call <8 x i32> @llvm.x86.avx512.mask.pror.d.256(<8 x i32> %x0, i32 3, <8 x i32> %x2, i8 %x3)
6369  %res1 = call <8 x i32> @llvm.x86.avx512.mask.pror.d.256(<8 x i32> %x0, i32 3, <8 x i32> zeroinitializer, i8 %x3)
6370  %res2 = call <8 x i32> @llvm.x86.avx512.mask.pror.d.256(<8 x i32> %x0, i32 3, <8 x i32> %x2, i8 -1)
6371  %res3 = add <8 x i32> %res, %res1
6372  %res4 = add <8 x i32> %res3, %res2
6373  ret <8 x i32> %res4
6374}
6375
6376declare <2 x i64> @llvm.x86.avx512.mask.pror.q.128(<2 x i64>, i32, <2 x i64>, i8)
6377
6378define <2 x i64>@test_int_x86_avx512_mask_pror_q_128(<2 x i64> %x0, i32 %x1, <2 x i64> %x2, i8 %x3) {
6379; CHECK-LABEL: test_int_x86_avx512_mask_pror_q_128:
6380; CHECK:       ## BB#0:
6381; CHECK-NEXT:    kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
6382; CHECK-NEXT:    vprorq $3, %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0xf5,0x09,0x72,0xc0,0x03]
6383; CHECK-NEXT:    vprorq $3, %xmm0, %xmm2 {%k1} {z} ## encoding: [0x62,0xf1,0xed,0x89,0x72,0xc0,0x03]
6384; CHECK-NEXT:    vprorq $3, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0xfd,0x08,0x72,0xc0,0x03]
6385; CHECK-NEXT:    vpaddq %xmm2, %xmm1, %xmm1 ## encoding: [0x62,0xf1,0xf5,0x08,0xd4,0xca]
6386; CHECK-NEXT:    vpaddq %xmm0, %xmm1, %xmm0 ## encoding: [0x62,0xf1,0xf5,0x08,0xd4,0xc0]
6387; CHECK-NEXT:    retq ## encoding: [0xc3]
6388  %res = call <2 x i64> @llvm.x86.avx512.mask.pror.q.128(<2 x i64> %x0, i32 3, <2 x i64> %x2, i8 %x3)
6389  %res1 = call <2 x i64> @llvm.x86.avx512.mask.pror.q.128(<2 x i64> %x0, i32 3, <2 x i64> zeroinitializer, i8 %x3)
6390  %res2 = call <2 x i64> @llvm.x86.avx512.mask.pror.q.128(<2 x i64> %x0, i32 3, <2 x i64> %x2, i8 -1)
6391  %res3 = add <2 x i64> %res, %res1
6392  %res4 = add <2 x i64> %res3, %res2
6393  ret <2 x i64> %res4
6394}
6395
6396declare <4 x i64> @llvm.x86.avx512.mask.pror.q.256(<4 x i64>, i32, <4 x i64>, i8)
6397
6398define <4 x i64>@test_int_x86_avx512_mask_pror_q_256(<4 x i64> %x0, i32 %x1, <4 x i64> %x2, i8 %x3) {
6399; CHECK-LABEL: test_int_x86_avx512_mask_pror_q_256:
6400; CHECK:       ## BB#0:
6401; CHECK-NEXT:    kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
6402; CHECK-NEXT:    vprorq $3, %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf1,0xf5,0x29,0x72,0xc0,0x03]
6403; CHECK-NEXT:    vprorq $3, %ymm0, %ymm2 {%k1} {z} ## encoding: [0x62,0xf1,0xed,0xa9,0x72,0xc0,0x03]
6404; CHECK-NEXT:    vprorq $3, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0xfd,0x28,0x72,0xc0,0x03]
6405; CHECK-NEXT:    vpaddq %ymm2, %ymm1, %ymm1 ## encoding: [0x62,0xf1,0xf5,0x28,0xd4,0xca]
6406; CHECK-NEXT:    vpaddq %ymm0, %ymm1, %ymm0 ## encoding: [0x62,0xf1,0xf5,0x28,0xd4,0xc0]
6407; CHECK-NEXT:    retq ## encoding: [0xc3]
6408  %res = call <4 x i64> @llvm.x86.avx512.mask.pror.q.256(<4 x i64> %x0, i32 3, <4 x i64> %x2, i8 %x3)
6409  %res1 = call <4 x i64> @llvm.x86.avx512.mask.pror.q.256(<4 x i64> %x0, i32 3, <4 x i64> zeroinitializer, i8 %x3)
6410  %res2 = call <4 x i64> @llvm.x86.avx512.mask.pror.q.256(<4 x i64> %x0, i32 3, <4 x i64> %x2, i8 -1)
6411  %res3 = add <4 x i64> %res, %res1
6412  %res4 = add <4 x i64> %res3, %res2
6413  ret <4 x i64> %res4
6414}
6415
6416declare <4 x i32> @llvm.x86.avx512.mask.pmovzxb.d.128(<16 x i8>, <4 x i32>, i8)
6417
6418define <4 x i32>@test_int_x86_avx512_mask_pmovzxb_d_128(<16 x i8> %x0, <4 x i32> %x1, i8 %x2) {
6419; CHECK-LABEL: test_int_x86_avx512_mask_pmovzxb_d_128:
6420; CHECK:       ## BB#0:
6421; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
6422; CHECK-NEXT:    vpmovzxbd %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf2,0x7d,0x09,0x31,0xc8]
6423; CHECK-NEXT:    ## xmm1 {%k1} = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
6424; CHECK-NEXT:    vpmovzxbd %xmm0, %xmm2 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0x89,0x31,0xd0]
6425; CHECK-NEXT:    ## xmm2 {%k1} {z} = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
6426; CHECK-NEXT:    vpmovzxbd %xmm0, %xmm0 ## encoding: [0x62,0xf2,0x7d,0x08,0x31,0xc0]
6427; CHECK-NEXT:    ## xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
6428; CHECK-NEXT:    vpaddd %xmm2, %xmm1, %xmm1 ## encoding: [0x62,0xf1,0x75,0x08,0xfe,0xca]
6429; CHECK-NEXT:    vpaddd %xmm0, %xmm1, %xmm0 ## encoding: [0x62,0xf1,0x75,0x08,0xfe,0xc0]
6430; CHECK-NEXT:    retq ## encoding: [0xc3]
6431  %res = call <4 x i32> @llvm.x86.avx512.mask.pmovzxb.d.128(<16 x i8> %x0, <4 x i32> %x1, i8 %x2)
6432  %res1 = call <4 x i32> @llvm.x86.avx512.mask.pmovzxb.d.128(<16 x i8> %x0, <4 x i32> zeroinitializer, i8 %x2)
6433  %res2 = call <4 x i32> @llvm.x86.avx512.mask.pmovzxb.d.128(<16 x i8> %x0, <4 x i32> %x1, i8 -1)
6434  %res3 = add <4 x i32> %res, %res1
6435  %res4 = add <4 x i32> %res3, %res2
6436  ret <4 x i32> %res4
6437}
6438
6439declare <8 x i32> @llvm.x86.avx512.mask.pmovzxb.d.256(<16 x i8>, <8 x i32>, i8)
6440
6441define <8 x i32>@test_int_x86_avx512_mask_pmovzxb_d_256(<16 x i8> %x0, <8 x i32> %x1, i8 %x2) {
6442; CHECK-LABEL: test_int_x86_avx512_mask_pmovzxb_d_256:
6443; CHECK:       ## BB#0:
6444; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
6445; CHECK-NEXT:    vpmovzxbd %xmm0, %ymm1 {%k1} ## encoding: [0x62,0xf2,0x7d,0x29,0x31,0xc8]
6446; CHECK-NEXT:    ## ymm1 {%k1} = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero
6447; CHECK-NEXT:    vpmovzxbd %xmm0, %ymm2 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0xa9,0x31,0xd0]
6448; CHECK-NEXT:    ## ymm2 {%k1} {z} = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero
6449; CHECK-NEXT:    vpmovzxbd %xmm0, %ymm0 ## encoding: [0x62,0xf2,0x7d,0x28,0x31,0xc0]
6450; CHECK-NEXT:    ## ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero
6451; CHECK-NEXT:    vpaddd %ymm2, %ymm1, %ymm1 ## encoding: [0x62,0xf1,0x75,0x28,0xfe,0xca]
6452; CHECK-NEXT:    vpaddd %ymm0, %ymm1, %ymm0 ## encoding: [0x62,0xf1,0x75,0x28,0xfe,0xc0]
6453; CHECK-NEXT:    retq ## encoding: [0xc3]
6454  %res = call <8 x i32> @llvm.x86.avx512.mask.pmovzxb.d.256(<16 x i8> %x0, <8 x i32> %x1, i8 %x2)
6455  %res1 = call <8 x i32> @llvm.x86.avx512.mask.pmovzxb.d.256(<16 x i8> %x0, <8 x i32> zeroinitializer, i8 %x2)
6456  %res2 = call <8 x i32> @llvm.x86.avx512.mask.pmovzxb.d.256(<16 x i8> %x0, <8 x i32> %x1, i8 -1)
6457  %res3 = add <8 x i32> %res, %res1
6458  %res4 = add <8 x i32> %res3, %res2
6459  ret <8 x i32> %res4
6460}
6461
6462declare <2 x i64> @llvm.x86.avx512.mask.pmovzxb.q.128(<16 x i8>, <2 x i64>, i8)
6463
6464define <2 x i64>@test_int_x86_avx512_mask_pmovzxb_q_128(<16 x i8> %x0, <2 x i64> %x1, i8 %x2) {
6465; CHECK-LABEL: test_int_x86_avx512_mask_pmovzxb_q_128:
6466; CHECK:       ## BB#0:
6467; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
6468; CHECK-NEXT:    vpmovzxbq %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf2,0x7d,0x09,0x32,0xc8]
6469; CHECK-NEXT:    ## xmm1 {%k1} = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero
6470; CHECK-NEXT:    vpmovzxbq %xmm0, %xmm2 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0x89,0x32,0xd0]
6471; CHECK-NEXT:    ## xmm2 {%k1} {z} = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero
6472; CHECK-NEXT:    vpmovzxbq %xmm0, %xmm0 ## encoding: [0x62,0xf2,0x7d,0x08,0x32,0xc0]
6473; CHECK-NEXT:    ## xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero
6474; CHECK-NEXT:    vpaddq %xmm2, %xmm1, %xmm1 ## encoding: [0x62,0xf1,0xf5,0x08,0xd4,0xca]
6475; CHECK-NEXT:    vpaddq %xmm0, %xmm1, %xmm0 ## encoding: [0x62,0xf1,0xf5,0x08,0xd4,0xc0]
6476; CHECK-NEXT:    retq ## encoding: [0xc3]
6477  %res = call <2 x i64> @llvm.x86.avx512.mask.pmovzxb.q.128(<16 x i8> %x0, <2 x i64> %x1, i8 %x2)
6478  %res1 = call <2 x i64> @llvm.x86.avx512.mask.pmovzxb.q.128(<16 x i8> %x0, <2 x i64> zeroinitializer, i8 %x2)
6479  %res2 = call <2 x i64> @llvm.x86.avx512.mask.pmovzxb.q.128(<16 x i8> %x0, <2 x i64> %x1, i8 -1)
6480  %res3 = add <2 x i64> %res, %res1
6481  %res4 = add <2 x i64> %res3, %res2
6482  ret <2 x i64> %res4
6483}
6484
6485declare <4 x i64> @llvm.x86.avx512.mask.pmovzxb.q.256(<16 x i8>, <4 x i64>, i8)
6486
6487define <4 x i64>@test_int_x86_avx512_mask_pmovzxb_q_256(<16 x i8> %x0, <4 x i64> %x1, i8 %x2) {
6488; CHECK-LABEL: test_int_x86_avx512_mask_pmovzxb_q_256:
6489; CHECK:       ## BB#0:
6490; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
6491; CHECK-NEXT:    vpmovzxbq %xmm0, %ymm1 {%k1} ## encoding: [0x62,0xf2,0x7d,0x29,0x32,0xc8]
6492; CHECK-NEXT:    ## ymm1 {%k1} = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero,xmm0[2],zero,zero,zero,zero,zero,zero,zero,xmm0[3],zero,zero,zero,zero,zero,zero,zero
6493; CHECK-NEXT:    vpmovzxbq %xmm0, %ymm2 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0xa9,0x32,0xd0]
6494; CHECK-NEXT:    ## ymm2 {%k1} {z} = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero,xmm0[2],zero,zero,zero,zero,zero,zero,zero,xmm0[3],zero,zero,zero,zero,zero,zero,zero
6495; CHECK-NEXT:    vpmovzxbq %xmm0, %ymm0 ## encoding: [0x62,0xf2,0x7d,0x28,0x32,0xc0]
6496; CHECK-NEXT:    ## ymm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero,xmm0[2],zero,zero,zero,zero,zero,zero,zero,xmm0[3],zero,zero,zero,zero,zero,zero,zero
6497; CHECK-NEXT:    vpaddq %ymm2, %ymm1, %ymm1 ## encoding: [0x62,0xf1,0xf5,0x28,0xd4,0xca]
6498; CHECK-NEXT:    vpaddq %ymm0, %ymm1, %ymm0 ## encoding: [0x62,0xf1,0xf5,0x28,0xd4,0xc0]
6499; CHECK-NEXT:    retq ## encoding: [0xc3]
6500  %res = call <4 x i64> @llvm.x86.avx512.mask.pmovzxb.q.256(<16 x i8> %x0, <4 x i64> %x1, i8 %x2)
6501  %res1 = call <4 x i64> @llvm.x86.avx512.mask.pmovzxb.q.256(<16 x i8> %x0, <4 x i64> zeroinitializer, i8 %x2)
6502  %res2 = call <4 x i64> @llvm.x86.avx512.mask.pmovzxb.q.256(<16 x i8> %x0, <4 x i64> %x1, i8 -1)
6503  %res3 = add <4 x i64> %res, %res1
6504  %res4 = add <4 x i64> %res3, %res2
6505  ret <4 x i64> %res4
6506}
6507
6508declare <2 x i64> @llvm.x86.avx512.mask.pmovzxd.q.128(<4 x i32>, <2 x i64>, i8)
6509
6510define <2 x i64>@test_int_x86_avx512_mask_pmovzxd_q_128(<4 x i32> %x0, <2 x i64> %x1, i8 %x2) {
6511; CHECK-LABEL: test_int_x86_avx512_mask_pmovzxd_q_128:
6512; CHECK:       ## BB#0:
6513; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
6514; CHECK-NEXT:    vpmovzxdq %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf2,0x7d,0x09,0x35,0xc8]
6515; CHECK-NEXT:    ## xmm1 {%k1} = xmm0[0],zero,xmm0[1],zero
6516; CHECK-NEXT:    vpmovzxdq %xmm0, %xmm2 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0x89,0x35,0xd0]
6517; CHECK-NEXT:    ## xmm2 {%k1} {z} = xmm0[0],zero,xmm0[1],zero
6518; CHECK-NEXT:    vpmovzxdq %xmm0, %xmm0 ## encoding: [0x62,0xf2,0x7d,0x08,0x35,0xc0]
6519; CHECK-NEXT:    ## xmm0 = xmm0[0],zero,xmm0[1],zero
6520; CHECK-NEXT:    vpaddq %xmm2, %xmm1, %xmm1 ## encoding: [0x62,0xf1,0xf5,0x08,0xd4,0xca]
6521; CHECK-NEXT:    vpaddq %xmm0, %xmm1, %xmm0 ## encoding: [0x62,0xf1,0xf5,0x08,0xd4,0xc0]
6522; CHECK-NEXT:    retq ## encoding: [0xc3]
6523  %res = call <2 x i64> @llvm.x86.avx512.mask.pmovzxd.q.128(<4 x i32> %x0, <2 x i64> %x1, i8 %x2)
6524  %res1 = call <2 x i64> @llvm.x86.avx512.mask.pmovzxd.q.128(<4 x i32> %x0, <2 x i64> zeroinitializer, i8 %x2)
6525  %res2 = call <2 x i64> @llvm.x86.avx512.mask.pmovzxd.q.128(<4 x i32> %x0, <2 x i64> %x1, i8 -1)
6526  %res3 = add <2 x i64> %res, %res1
6527  %res4 = add <2 x i64> %res3, %res2
6528  ret <2 x i64> %res4
6529}
6530
6531declare <4 x i64> @llvm.x86.avx512.mask.pmovzxd.q.256(<4 x i32>, <4 x i64>, i8)
6532
6533define <4 x i64>@test_int_x86_avx512_mask_pmovzxd_q_256(<4 x i32> %x0, <4 x i64> %x1, i8 %x2) {
6534; CHECK-LABEL: test_int_x86_avx512_mask_pmovzxd_q_256:
6535; CHECK:       ## BB#0:
6536; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
6537; CHECK-NEXT:    vpmovzxdq %xmm0, %ymm1 {%k1} ## encoding: [0x62,0xf2,0x7d,0x29,0x35,0xc8]
6538; CHECK-NEXT:    ## ymm1 {%k1} = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
6539; CHECK-NEXT:    vpmovzxdq %xmm0, %ymm2 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0xa9,0x35,0xd0]
6540; CHECK-NEXT:    ## ymm2 {%k1} {z} = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
6541; CHECK-NEXT:    vpmovzxdq %xmm0, %ymm0 ## encoding: [0x62,0xf2,0x7d,0x28,0x35,0xc0]
6542; CHECK-NEXT:    ## ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
6543; CHECK-NEXT:    vpaddq %ymm2, %ymm1, %ymm1 ## encoding: [0x62,0xf1,0xf5,0x28,0xd4,0xca]
6544; CHECK-NEXT:    vpaddq %ymm0, %ymm1, %ymm0 ## encoding: [0x62,0xf1,0xf5,0x28,0xd4,0xc0]
6545; CHECK-NEXT:    retq ## encoding: [0xc3]
6546  %res = call <4 x i64> @llvm.x86.avx512.mask.pmovzxd.q.256(<4 x i32> %x0, <4 x i64> %x1, i8 %x2)
6547  %res1 = call <4 x i64> @llvm.x86.avx512.mask.pmovzxd.q.256(<4 x i32> %x0, <4 x i64> zeroinitializer, i8 %x2)
6548  %res2 = call <4 x i64> @llvm.x86.avx512.mask.pmovzxd.q.256(<4 x i32> %x0, <4 x i64> %x1, i8 -1)
6549  %res3 = add <4 x i64> %res, %res1
6550  %res4 = add <4 x i64> %res3, %res2
6551  ret <4 x i64> %res4
6552}
6553
6554declare <4 x i32> @llvm.x86.avx512.mask.pmovzxw.d.128(<8 x i16>, <4 x i32>, i8)
6555
6556define <4 x i32>@test_int_x86_avx512_mask_pmovzxw_d_128(<8 x i16> %x0, <4 x i32> %x1, i8 %x2) {
6557; CHECK-LABEL: test_int_x86_avx512_mask_pmovzxw_d_128:
6558; CHECK:       ## BB#0:
6559; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
6560; CHECK-NEXT:    vpmovzxwd %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf2,0x7d,0x09,0x33,0xc8]
6561; CHECK-NEXT:    ## xmm1 {%k1} = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
6562; CHECK-NEXT:    vpmovzxwd %xmm0, %xmm2 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0x89,0x33,0xd0]
6563; CHECK-NEXT:    ## xmm2 {%k1} {z} = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
6564; CHECK-NEXT:    vpmovzxwd %xmm0, %xmm0 ## encoding: [0x62,0xf2,0x7d,0x08,0x33,0xc0]
6565; CHECK-NEXT:    ## xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
6566; CHECK-NEXT:    vpaddd %xmm2, %xmm1, %xmm1 ## encoding: [0x62,0xf1,0x75,0x08,0xfe,0xca]
6567; CHECK-NEXT:    vpaddd %xmm0, %xmm1, %xmm0 ## encoding: [0x62,0xf1,0x75,0x08,0xfe,0xc0]
6568; CHECK-NEXT:    retq ## encoding: [0xc3]
6569  %res = call <4 x i32> @llvm.x86.avx512.mask.pmovzxw.d.128(<8 x i16> %x0, <4 x i32> %x1, i8 %x2)
6570  %res1 = call <4 x i32> @llvm.x86.avx512.mask.pmovzxw.d.128(<8 x i16> %x0, <4 x i32> zeroinitializer, i8 %x2)
6571  %res2 = call <4 x i32> @llvm.x86.avx512.mask.pmovzxw.d.128(<8 x i16> %x0, <4 x i32> %x1, i8 -1)
6572  %res3 = add <4 x i32> %res, %res1
6573  %res4 = add <4 x i32> %res3, %res2
6574  ret <4 x i32> %res4
6575}
6576
6577declare <8 x i32> @llvm.x86.avx512.mask.pmovzxw.d.256(<8 x i16>, <8 x i32>, i8)
6578
6579define <8 x i32>@test_int_x86_avx512_mask_pmovzxw_d_256(<8 x i16> %x0, <8 x i32> %x1, i8 %x2) {
6580; CHECK-LABEL: test_int_x86_avx512_mask_pmovzxw_d_256:
6581; CHECK:       ## BB#0:
6582; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
6583; CHECK-NEXT:    vpmovzxwd %xmm0, %ymm1 {%k1} ## encoding: [0x62,0xf2,0x7d,0x29,0x33,0xc8]
6584; CHECK-NEXT:    ## ymm1 {%k1} = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
6585; CHECK-NEXT:    vpmovzxwd %xmm0, %ymm2 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0xa9,0x33,0xd0]
6586; CHECK-NEXT:    ## ymm2 {%k1} {z} = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
6587; CHECK-NEXT:    vpmovzxwd %xmm0, %ymm0 ## encoding: [0x62,0xf2,0x7d,0x28,0x33,0xc0]
6588; CHECK-NEXT:    ## ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
6589; CHECK-NEXT:    vpaddd %ymm2, %ymm1, %ymm1 ## encoding: [0x62,0xf1,0x75,0x28,0xfe,0xca]
6590; CHECK-NEXT:    vpaddd %ymm0, %ymm1, %ymm0 ## encoding: [0x62,0xf1,0x75,0x28,0xfe,0xc0]
6591; CHECK-NEXT:    retq ## encoding: [0xc3]
6592  %res = call <8 x i32> @llvm.x86.avx512.mask.pmovzxw.d.256(<8 x i16> %x0, <8 x i32> %x1, i8 %x2)
6593  %res1 = call <8 x i32> @llvm.x86.avx512.mask.pmovzxw.d.256(<8 x i16> %x0, <8 x i32> zeroinitializer, i8 %x2)
6594  %res2 = call <8 x i32> @llvm.x86.avx512.mask.pmovzxw.d.256(<8 x i16> %x0, <8 x i32> %x1, i8 -1)
6595  %res3 = add <8 x i32> %res, %res1
6596  %res4 = add <8 x i32> %res3, %res2
6597  ret <8 x i32> %res4
6598}
6599
6600declare <2 x i64> @llvm.x86.avx512.mask.pmovzxw.q.128(<8 x i16>, <2 x i64>, i8)
6601
6602define <2 x i64>@test_int_x86_avx512_mask_pmovzxw_q_128(<8 x i16> %x0, <2 x i64> %x1, i8 %x2) {
6603; CHECK-LABEL: test_int_x86_avx512_mask_pmovzxw_q_128:
6604; CHECK:       ## BB#0:
6605; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
6606; CHECK-NEXT:    vpmovzxwq %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf2,0x7d,0x09,0x34,0xc8]
6607; CHECK-NEXT:    ## xmm1 {%k1} = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero
6608; CHECK-NEXT:    vpmovzxwq %xmm0, %xmm2 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0x89,0x34,0xd0]
6609; CHECK-NEXT:    ## xmm2 {%k1} {z} = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero
6610; CHECK-NEXT:    vpmovzxwq %xmm0, %xmm0 ## encoding: [0x62,0xf2,0x7d,0x08,0x34,0xc0]
6611; CHECK-NEXT:    ## xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero
6612; CHECK-NEXT:    vpaddq %xmm2, %xmm1, %xmm1 ## encoding: [0x62,0xf1,0xf5,0x08,0xd4,0xca]
6613; CHECK-NEXT:    vpaddq %xmm0, %xmm1, %xmm0 ## encoding: [0x62,0xf1,0xf5,0x08,0xd4,0xc0]
6614; CHECK-NEXT:    retq ## encoding: [0xc3]
6615  %res = call <2 x i64> @llvm.x86.avx512.mask.pmovzxw.q.128(<8 x i16> %x0, <2 x i64> %x1, i8 %x2)
6616  %res1 = call <2 x i64> @llvm.x86.avx512.mask.pmovzxw.q.128(<8 x i16> %x0, <2 x i64> zeroinitializer, i8 %x2)
6617  %res2 = call <2 x i64> @llvm.x86.avx512.mask.pmovzxw.q.128(<8 x i16> %x0, <2 x i64> %x1, i8 -1)
6618  %res3 = add <2 x i64> %res, %res1
6619  %res4 = add <2 x i64> %res3, %res2
6620  ret <2 x i64> %res4
6621}
6622
6623declare <4 x i64> @llvm.x86.avx512.mask.pmovzxw.q.256(<8 x i16>, <4 x i64>, i8)
6624
6625define <4 x i64>@test_int_x86_avx512_mask_pmovzxw_q_256(<8 x i16> %x0, <4 x i64> %x1, i8 %x2) {
6626; CHECK-LABEL: test_int_x86_avx512_mask_pmovzxw_q_256:
6627; CHECK:       ## BB#0:
6628; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
6629; CHECK-NEXT:    vpmovzxwq %xmm0, %ymm1 {%k1} ## encoding: [0x62,0xf2,0x7d,0x29,0x34,0xc8]
6630; CHECK-NEXT:    ## ymm1 {%k1} = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
6631; CHECK-NEXT:    vpmovzxwq %xmm0, %ymm2 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0xa9,0x34,0xd0]
6632; CHECK-NEXT:    ## ymm2 {%k1} {z} = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
6633; CHECK-NEXT:    vpmovzxwq %xmm0, %ymm0 ## encoding: [0x62,0xf2,0x7d,0x28,0x34,0xc0]
6634; CHECK-NEXT:    ## ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
6635; CHECK-NEXT:    vpaddq %ymm2, %ymm1, %ymm1 ## encoding: [0x62,0xf1,0xf5,0x28,0xd4,0xca]
6636; CHECK-NEXT:    vpaddq %ymm0, %ymm1, %ymm0 ## encoding: [0x62,0xf1,0xf5,0x28,0xd4,0xc0]
6637; CHECK-NEXT:    retq ## encoding: [0xc3]
6638  %res = call <4 x i64> @llvm.x86.avx512.mask.pmovzxw.q.256(<8 x i16> %x0, <4 x i64> %x1, i8 %x2)
6639  %res1 = call <4 x i64> @llvm.x86.avx512.mask.pmovzxw.q.256(<8 x i16> %x0, <4 x i64> zeroinitializer, i8 %x2)
6640  %res2 = call <4 x i64> @llvm.x86.avx512.mask.pmovzxw.q.256(<8 x i16> %x0, <4 x i64> %x1, i8 -1)
6641  %res3 = add <4 x i64> %res, %res1
6642  %res4 = add <4 x i64> %res3, %res2
6643  ret <4 x i64> %res4
6644}
6645
6646declare <4 x i32> @llvm.x86.avx512.mask.pmovsxb.d.128(<16 x i8>, <4 x i32>, i8)
6647
6648define <4 x i32>@test_int_x86_avx512_mask_pmovsxb_d_128(<16 x i8> %x0, <4 x i32> %x1, i8 %x2) {
6649; CHECK-LABEL: test_int_x86_avx512_mask_pmovsxb_d_128:
6650; CHECK:       ## BB#0:
6651; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
6652; CHECK-NEXT:    vpmovsxbd %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf2,0x7d,0x09,0x21,0xc8]
6653; CHECK-NEXT:    vpmovsxbd %xmm0, %xmm2 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0x89,0x21,0xd0]
6654; CHECK-NEXT:    vpmovsxbd %xmm0, %xmm0 ## encoding: [0x62,0xf2,0x7d,0x08,0x21,0xc0]
6655; CHECK-NEXT:    vpaddd %xmm2, %xmm1, %xmm1 ## encoding: [0x62,0xf1,0x75,0x08,0xfe,0xca]
6656; CHECK-NEXT:    vpaddd %xmm0, %xmm1, %xmm0 ## encoding: [0x62,0xf1,0x75,0x08,0xfe,0xc0]
6657; CHECK-NEXT:    retq ## encoding: [0xc3]
6658  %res = call <4 x i32> @llvm.x86.avx512.mask.pmovsxb.d.128(<16 x i8> %x0, <4 x i32> %x1, i8 %x2)
6659  %res1 = call <4 x i32> @llvm.x86.avx512.mask.pmovsxb.d.128(<16 x i8> %x0, <4 x i32> zeroinitializer, i8 %x2)
6660  %res2 = call <4 x i32> @llvm.x86.avx512.mask.pmovsxb.d.128(<16 x i8> %x0, <4 x i32> %x1, i8 -1)
6661  %res3 = add <4 x i32> %res, %res1
6662  %res4 = add <4 x i32> %res3, %res2
6663  ret <4 x i32> %res4
6664}
6665
6666declare <8 x i32> @llvm.x86.avx512.mask.pmovsxb.d.256(<16 x i8>, <8 x i32>, i8)
6667
6668define <8 x i32>@test_int_x86_avx512_mask_pmovsxb_d_256(<16 x i8> %x0, <8 x i32> %x1, i8 %x2) {
6669; CHECK-LABEL: test_int_x86_avx512_mask_pmovsxb_d_256:
6670; CHECK:       ## BB#0:
6671; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
6672; CHECK-NEXT:    vpmovsxbd %xmm0, %ymm1 {%k1} ## encoding: [0x62,0xf2,0x7d,0x29,0x21,0xc8]
6673; CHECK-NEXT:    vpmovsxbd %xmm0, %ymm2 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0xa9,0x21,0xd0]
6674; CHECK-NEXT:    vpmovsxbd %xmm0, %ymm0 ## encoding: [0x62,0xf2,0x7d,0x28,0x21,0xc0]
6675; CHECK-NEXT:    vpaddd %ymm2, %ymm1, %ymm1 ## encoding: [0x62,0xf1,0x75,0x28,0xfe,0xca]
6676; CHECK-NEXT:    vpaddd %ymm0, %ymm1, %ymm0 ## encoding: [0x62,0xf1,0x75,0x28,0xfe,0xc0]
6677; CHECK-NEXT:    retq ## encoding: [0xc3]
6678  %res = call <8 x i32> @llvm.x86.avx512.mask.pmovsxb.d.256(<16 x i8> %x0, <8 x i32> %x1, i8 %x2)
6679  %res1 = call <8 x i32> @llvm.x86.avx512.mask.pmovsxb.d.256(<16 x i8> %x0, <8 x i32> zeroinitializer, i8 %x2)
6680  %res2 = call <8 x i32> @llvm.x86.avx512.mask.pmovsxb.d.256(<16 x i8> %x0, <8 x i32> %x1, i8 -1)
6681  %res3 = add <8 x i32> %res, %res1
6682  %res4 = add <8 x i32> %res3, %res2
6683  ret <8 x i32> %res4
6684}
6685
6686declare <2 x i64> @llvm.x86.avx512.mask.pmovsxb.q.128(<16 x i8>, <2 x i64>, i8)
6687
6688define <2 x i64>@test_int_x86_avx512_mask_pmovsxb_q_128(<16 x i8> %x0, <2 x i64> %x1, i8 %x2) {
6689; CHECK-LABEL: test_int_x86_avx512_mask_pmovsxb_q_128:
6690; CHECK:       ## BB#0:
6691; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
6692; CHECK-NEXT:    vpmovsxbq %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf2,0x7d,0x09,0x22,0xc8]
6693; CHECK-NEXT:    vpmovsxbq %xmm0, %xmm2 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0x89,0x22,0xd0]
6694; CHECK-NEXT:    vpmovsxbq %xmm0, %xmm0 ## encoding: [0x62,0xf2,0x7d,0x08,0x22,0xc0]
6695; CHECK-NEXT:    vpaddq %xmm2, %xmm1, %xmm1 ## encoding: [0x62,0xf1,0xf5,0x08,0xd4,0xca]
6696; CHECK-NEXT:    vpaddq %xmm0, %xmm1, %xmm0 ## encoding: [0x62,0xf1,0xf5,0x08,0xd4,0xc0]
6697; CHECK-NEXT:    retq ## encoding: [0xc3]
6698  %res = call <2 x i64> @llvm.x86.avx512.mask.pmovsxb.q.128(<16 x i8> %x0, <2 x i64> %x1, i8 %x2)
6699  %res1 = call <2 x i64> @llvm.x86.avx512.mask.pmovsxb.q.128(<16 x i8> %x0, <2 x i64> zeroinitializer, i8 %x2)
6700  %res2 = call <2 x i64> @llvm.x86.avx512.mask.pmovsxb.q.128(<16 x i8> %x0, <2 x i64> %x1, i8 -1)
6701  %res3 = add <2 x i64> %res, %res1
6702  %res4 = add <2 x i64> %res3, %res2
6703  ret <2 x i64> %res4
6704}
6705
6706declare <4 x i64> @llvm.x86.avx512.mask.pmovsxb.q.256(<16 x i8>, <4 x i64>, i8)
6707
6708define <4 x i64>@test_int_x86_avx512_mask_pmovsxb_q_256(<16 x i8> %x0, <4 x i64> %x1, i8 %x2) {
6709; CHECK-LABEL: test_int_x86_avx512_mask_pmovsxb_q_256:
6710; CHECK:       ## BB#0:
6711; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
6712; CHECK-NEXT:    vpmovsxbq %xmm0, %ymm1 {%k1} ## encoding: [0x62,0xf2,0x7d,0x29,0x22,0xc8]
6713; CHECK-NEXT:    vpmovsxbq %xmm0, %ymm2 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0xa9,0x22,0xd0]
6714; CHECK-NEXT:    vpmovsxbq %xmm0, %ymm0 ## encoding: [0x62,0xf2,0x7d,0x28,0x22,0xc0]
6715; CHECK-NEXT:    vpaddq %ymm2, %ymm1, %ymm1 ## encoding: [0x62,0xf1,0xf5,0x28,0xd4,0xca]
6716; CHECK-NEXT:    vpaddq %ymm0, %ymm1, %ymm0 ## encoding: [0x62,0xf1,0xf5,0x28,0xd4,0xc0]
6717; CHECK-NEXT:    retq ## encoding: [0xc3]
6718  %res = call <4 x i64> @llvm.x86.avx512.mask.pmovsxb.q.256(<16 x i8> %x0, <4 x i64> %x1, i8 %x2)
6719  %res1 = call <4 x i64> @llvm.x86.avx512.mask.pmovsxb.q.256(<16 x i8> %x0, <4 x i64> zeroinitializer, i8 %x2)
6720  %res2 = call <4 x i64> @llvm.x86.avx512.mask.pmovsxb.q.256(<16 x i8> %x0, <4 x i64> %x1, i8 -1)
6721  %res3 = add <4 x i64> %res, %res1
6722  %res4 = add <4 x i64> %res3, %res2
6723  ret <4 x i64> %res4
6724}
6725
6726declare <4 x i32> @llvm.x86.avx512.mask.pmovsxw.d.128(<8 x i16>, <4 x i32>, i8)
6727
6728define <4 x i32>@test_int_x86_avx512_mask_pmovsxw_d_128(<8 x i16> %x0, <4 x i32> %x1, i8 %x2) {
6729; CHECK-LABEL: test_int_x86_avx512_mask_pmovsxw_d_128:
6730; CHECK:       ## BB#0:
6731; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
6732; CHECK-NEXT:    vpmovsxwd %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf2,0x7d,0x09,0x23,0xc8]
6733; CHECK-NEXT:    vpmovsxwd %xmm0, %xmm2 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0x89,0x23,0xd0]
6734; CHECK-NEXT:    vpmovsxwd %xmm0, %xmm0 ## encoding: [0x62,0xf2,0x7d,0x08,0x23,0xc0]
6735; CHECK-NEXT:    vpaddd %xmm2, %xmm1, %xmm1 ## encoding: [0x62,0xf1,0x75,0x08,0xfe,0xca]
6736; CHECK-NEXT:    vpaddd %xmm0, %xmm1, %xmm0 ## encoding: [0x62,0xf1,0x75,0x08,0xfe,0xc0]
6737; CHECK-NEXT:    retq ## encoding: [0xc3]
6738  %res = call <4 x i32> @llvm.x86.avx512.mask.pmovsxw.d.128(<8 x i16> %x0, <4 x i32> %x1, i8 %x2)
6739  %res1 = call <4 x i32> @llvm.x86.avx512.mask.pmovsxw.d.128(<8 x i16> %x0, <4 x i32> zeroinitializer, i8 %x2)
6740  %res2 = call <4 x i32> @llvm.x86.avx512.mask.pmovsxw.d.128(<8 x i16> %x0, <4 x i32> %x1, i8 -1)
6741  %res3 = add <4 x i32> %res, %res1
6742  %res4 = add <4 x i32> %res3, %res2
6743  ret <4 x i32> %res4
6744}
6745
6746declare <8 x i32> @llvm.x86.avx512.mask.pmovsxw.d.256(<8 x i16>, <8 x i32>, i8)
6747
6748define <8 x i32>@test_int_x86_avx512_mask_pmovsxw_d_256(<8 x i16> %x0, <8 x i32> %x1, i8 %x2) {
6749; CHECK-LABEL: test_int_x86_avx512_mask_pmovsxw_d_256:
6750; CHECK:       ## BB#0:
6751; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
6752; CHECK-NEXT:    vpmovsxwd %xmm0, %ymm1 {%k1} ## encoding: [0x62,0xf2,0x7d,0x29,0x23,0xc8]
6753; CHECK-NEXT:    vpmovsxwd %xmm0, %ymm2 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0xa9,0x23,0xd0]
6754; CHECK-NEXT:    vpmovsxwd %xmm0, %ymm0 ## encoding: [0x62,0xf2,0x7d,0x28,0x23,0xc0]
6755; CHECK-NEXT:    vpaddd %ymm2, %ymm1, %ymm1 ## encoding: [0x62,0xf1,0x75,0x28,0xfe,0xca]
6756; CHECK-NEXT:    vpaddd %ymm0, %ymm1, %ymm0 ## encoding: [0x62,0xf1,0x75,0x28,0xfe,0xc0]
6757; CHECK-NEXT:    retq ## encoding: [0xc3]
6758  %res = call <8 x i32> @llvm.x86.avx512.mask.pmovsxw.d.256(<8 x i16> %x0, <8 x i32> %x1, i8 %x2)
6759  %res1 = call <8 x i32> @llvm.x86.avx512.mask.pmovsxw.d.256(<8 x i16> %x0, <8 x i32> zeroinitializer, i8 %x2)
6760  %res2 = call <8 x i32> @llvm.x86.avx512.mask.pmovsxw.d.256(<8 x i16> %x0, <8 x i32> %x1, i8 -1)
6761  %res3 = add <8 x i32> %res, %res1
6762  %res4 = add <8 x i32> %res3, %res2
6763  ret <8 x i32> %res4
6764}
6765
6766declare <2 x i64> @llvm.x86.avx512.mask.pmovsxw.q.128(<8 x i16>, <2 x i64>, i8)
6767
6768define <2 x i64>@test_int_x86_avx512_mask_pmovsxw_q_128(<8 x i16> %x0, <2 x i64> %x1, i8 %x2) {
6769; CHECK-LABEL: test_int_x86_avx512_mask_pmovsxw_q_128:
6770; CHECK:       ## BB#0:
6771; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
6772; CHECK-NEXT:    vpmovsxwq %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf2,0x7d,0x09,0x24,0xc8]
6773; CHECK-NEXT:    vpmovsxwq %xmm0, %xmm2 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0x89,0x24,0xd0]
6774; CHECK-NEXT:    vpmovsxwq %xmm0, %xmm0 ## encoding: [0x62,0xf2,0x7d,0x08,0x24,0xc0]
6775; CHECK-NEXT:    vpaddq %xmm2, %xmm1, %xmm1 ## encoding: [0x62,0xf1,0xf5,0x08,0xd4,0xca]
6776; CHECK-NEXT:    vpaddq %xmm0, %xmm1, %xmm0 ## encoding: [0x62,0xf1,0xf5,0x08,0xd4,0xc0]
6777; CHECK-NEXT:    retq ## encoding: [0xc3]
6778  %res = call <2 x i64> @llvm.x86.avx512.mask.pmovsxw.q.128(<8 x i16> %x0, <2 x i64> %x1, i8 %x2)
6779  %res1 = call <2 x i64> @llvm.x86.avx512.mask.pmovsxw.q.128(<8 x i16> %x0, <2 x i64> zeroinitializer, i8 %x2)
6780  %res2 = call <2 x i64> @llvm.x86.avx512.mask.pmovsxw.q.128(<8 x i16> %x0, <2 x i64> %x1, i8 -1)
6781  %res3 = add <2 x i64> %res, %res1
6782  %res4 = add <2 x i64> %res3, %res2
6783  ret <2 x i64> %res4
6784}
6785
6786declare <4 x i64> @llvm.x86.avx512.mask.pmovsxw.q.256(<8 x i16>, <4 x i64>, i8)
6787
6788define <4 x i64>@test_int_x86_avx512_mask_pmovsxw_q_256(<8 x i16> %x0, <4 x i64> %x1, i8 %x2) {
6789; CHECK-LABEL: test_int_x86_avx512_mask_pmovsxw_q_256:
6790; CHECK:       ## BB#0:
6791; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
6792; CHECK-NEXT:    vpmovsxwq %xmm0, %ymm1 {%k1} ## encoding: [0x62,0xf2,0x7d,0x29,0x24,0xc8]
6793; CHECK-NEXT:    vpmovsxwq %xmm0, %ymm2 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0xa9,0x24,0xd0]
6794; CHECK-NEXT:    vpmovsxwq %xmm0, %ymm0 ## encoding: [0x62,0xf2,0x7d,0x28,0x24,0xc0]
6795; CHECK-NEXT:    vpaddq %ymm2, %ymm1, %ymm1 ## encoding: [0x62,0xf1,0xf5,0x28,0xd4,0xca]
6796; CHECK-NEXT:    vpaddq %ymm0, %ymm1, %ymm0 ## encoding: [0x62,0xf1,0xf5,0x28,0xd4,0xc0]
6797; CHECK-NEXT:    retq ## encoding: [0xc3]
6798  %res = call <4 x i64> @llvm.x86.avx512.mask.pmovsxw.q.256(<8 x i16> %x0, <4 x i64> %x1, i8 %x2)
6799  %res1 = call <4 x i64> @llvm.x86.avx512.mask.pmovsxw.q.256(<8 x i16> %x0, <4 x i64> zeroinitializer, i8 %x2)
6800  %res2 = call <4 x i64> @llvm.x86.avx512.mask.pmovsxw.q.256(<8 x i16> %x0, <4 x i64> %x1, i8 -1)
6801  %res3 = add <4 x i64> %res, %res1
6802  %res4 = add <4 x i64> %res3, %res2
6803  ret <4 x i64> %res4
6804}
6805
6806declare <4 x double> @llvm.x86.avx512.mask.permvar.df.256(<4 x double>, <4 x i64>, <4 x double>, i8)
6807
6808define <4 x double>@test_int_x86_avx512_mask_permvar_df_256(<4 x double> %x0, <4 x i64> %x1, <4 x double> %x2, i8 %x3) {
6809; CHECK-LABEL: test_int_x86_avx512_mask_permvar_df_256:
6810; CHECK:       ## BB#0:
6811; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
6812; CHECK-NEXT:    vpermpd %ymm0, %ymm1, %ymm2 {%k1} ## encoding: [0x62,0xf2,0xf5,0x29,0x16,0xd0]
6813; CHECK-NEXT:    vpermpd %ymm0, %ymm1, %ymm3 {%k1} {z} ## encoding: [0x62,0xf2,0xf5,0xa9,0x16,0xd8]
6814; CHECK-NEXT:    vpermpd %ymm0, %ymm1, %ymm0 ## encoding: [0x62,0xf2,0xf5,0x28,0x16,0xc0]
6815; CHECK-NEXT:    vaddpd %ymm3, %ymm2, %ymm1 ## encoding: [0x62,0xf1,0xed,0x28,0x58,0xcb]
6816; CHECK-NEXT:    vaddpd %ymm0, %ymm1, %ymm0 ## encoding: [0x62,0xf1,0xf5,0x28,0x58,0xc0]
6817; CHECK-NEXT:    retq ## encoding: [0xc3]
6818  %res = call <4 x double> @llvm.x86.avx512.mask.permvar.df.256(<4 x double> %x0, <4 x i64> %x1, <4 x double> %x2, i8 %x3)
6819  %res1 = call <4 x double> @llvm.x86.avx512.mask.permvar.df.256(<4 x double> %x0, <4 x i64> %x1, <4 x double> zeroinitializer, i8 %x3)
6820  %res2 = call <4 x double> @llvm.x86.avx512.mask.permvar.df.256(<4 x double> %x0, <4 x i64> %x1, <4 x double> %x2, i8 -1)
6821  %res3 = fadd <4 x double> %res, %res1
6822  %res4 = fadd <4 x double> %res3, %res2
6823  ret <4 x double> %res4
6824}
6825
6826declare <4 x i64> @llvm.x86.avx512.mask.permvar.di.256(<4 x i64>, <4 x i64>, <4 x i64>, i8)
6827
6828define <4 x i64>@test_int_x86_avx512_mask_permvar_di_256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 %x3) {
6829; CHECK-LABEL: test_int_x86_avx512_mask_permvar_di_256:
6830; CHECK:       ## BB#0:
6831; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
6832; CHECK-NEXT:    vpermq %ymm0, %ymm1, %ymm2 {%k1} ## encoding: [0x62,0xf2,0xf5,0x29,0x36,0xd0]
6833; CHECK-NEXT:    vpermq %ymm0, %ymm1, %ymm3 {%k1} {z} ## encoding: [0x62,0xf2,0xf5,0xa9,0x36,0xd8]
6834; CHECK-NEXT:    vpermq %ymm0, %ymm1, %ymm0 ## encoding: [0x62,0xf2,0xf5,0x28,0x36,0xc0]
6835; CHECK-NEXT:    vpaddq %ymm3, %ymm2, %ymm1 ## encoding: [0x62,0xf1,0xed,0x28,0xd4,0xcb]
6836; CHECK-NEXT:    vpaddq %ymm0, %ymm1, %ymm0 ## encoding: [0x62,0xf1,0xf5,0x28,0xd4,0xc0]
6837; CHECK-NEXT:    retq ## encoding: [0xc3]
6838  %res = call <4 x i64> @llvm.x86.avx512.mask.permvar.di.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 %x3)
6839  %res1 = call <4 x i64> @llvm.x86.avx512.mask.permvar.di.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> zeroinitializer, i8 %x3)
6840  %res2 = call <4 x i64> @llvm.x86.avx512.mask.permvar.di.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 -1)
6841  %res3 = add <4 x i64> %res, %res1
6842  %res4 = add <4 x i64> %res3, %res2
6843  ret <4 x i64> %res4
6844}
6845
6846declare <8 x float> @llvm.x86.avx512.mask.permvar.sf.256(<8 x float>, <8 x i32>, <8 x float>, i8)
6847
6848define <8 x float>@test_int_x86_avx512_mask_permvar_sf_256(<8 x float> %x0, <8 x i32> %x1, <8 x float> %x2, i8 %x3) {
6849; CHECK-LABEL: test_int_x86_avx512_mask_permvar_sf_256:
6850; CHECK:       ## BB#0:
6851; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
6852; CHECK-NEXT:    vpermps %ymm0, %ymm1, %ymm2 {%k1} ## encoding: [0x62,0xf2,0x75,0x29,0x16,0xd0]
6853; CHECK-NEXT:    vpermps %ymm0, %ymm1, %ymm3 {%k1} {z} ## encoding: [0x62,0xf2,0x75,0xa9,0x16,0xd8]
6854; CHECK-NEXT:    vpermps %ymm0, %ymm1, %ymm0 ## encoding: [0x62,0xf2,0x75,0x28,0x16,0xc0]
6855; CHECK-NEXT:    vaddps %ymm3, %ymm2, %ymm1 ## encoding: [0x62,0xf1,0x6c,0x28,0x58,0xcb]
6856; CHECK-NEXT:    vaddps %ymm0, %ymm1, %ymm0 ## encoding: [0x62,0xf1,0x74,0x28,0x58,0xc0]
6857; CHECK-NEXT:    retq ## encoding: [0xc3]
6858  %res = call <8 x float> @llvm.x86.avx512.mask.permvar.sf.256(<8 x float> %x0, <8 x i32> %x1, <8 x float> %x2, i8 %x3)
6859  %res1 = call <8 x float> @llvm.x86.avx512.mask.permvar.sf.256(<8 x float> %x0, <8 x i32> %x1, <8 x float> zeroinitializer, i8 %x3)
6860  %res2 = call <8 x float> @llvm.x86.avx512.mask.permvar.sf.256(<8 x float> %x0, <8 x i32> %x1, <8 x float> %x2, i8 -1)
6861  %res3 = fadd <8 x float> %res, %res1
6862  %res4 = fadd <8 x float> %res3, %res2
6863  ret <8 x float> %res4
6864}
6865
6866declare <8 x i32> @llvm.x86.avx512.mask.permvar.si.256(<8 x i32>, <8 x i32>, <8 x i32>, i8)
6867
6868define <8 x i32>@test_int_x86_avx512_mask_permvar_si_256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 %x3) {
6869; CHECK-LABEL: test_int_x86_avx512_mask_permvar_si_256:
6870; CHECK:       ## BB#0:
6871; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
6872; CHECK-NEXT:    vpermd %ymm0, %ymm1, %ymm2 {%k1} ## encoding: [0x62,0xf2,0x75,0x29,0x36,0xd0]
6873; CHECK-NEXT:    vpermd %ymm0, %ymm1, %ymm3 {%k1} {z} ## encoding: [0x62,0xf2,0x75,0xa9,0x36,0xd8]
6874; CHECK-NEXT:    vpermd %ymm0, %ymm1, %ymm0 ## encoding: [0x62,0xf2,0x75,0x28,0x36,0xc0]
6875; CHECK-NEXT:    vpaddd %ymm3, %ymm2, %ymm1 ## encoding: [0x62,0xf1,0x6d,0x28,0xfe,0xcb]
6876; CHECK-NEXT:    vpaddd %ymm0, %ymm1, %ymm0 ## encoding: [0x62,0xf1,0x75,0x28,0xfe,0xc0]
6877; CHECK-NEXT:    retq ## encoding: [0xc3]
6878  %res = call <8 x i32> @llvm.x86.avx512.mask.permvar.si.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 %x3)
6879  %res1 = call <8 x i32> @llvm.x86.avx512.mask.permvar.si.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> zeroinitializer, i8 %x3)
6880  %res2 = call <8 x i32> @llvm.x86.avx512.mask.permvar.si.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 -1)
6881  %res3 = add <8 x i32> %res, %res1
6882  %res4 = add <8 x i32> %res3, %res2
6883  ret <8 x i32> %res4
6884}
6885
6886declare <2 x double> @llvm.x86.avx512.mask.fixupimm.pd.128(<2 x double>, <2 x double>, <2 x i64>, i32, i8)
6887
6888define <2 x double>@test_int_x86_avx512_mask_fixupimm_pd_128(<2 x double> %x0, <2 x double> %x1, <2 x i64> %x2, i8 %x4) {
6889; CHECK-LABEL: test_int_x86_avx512_mask_fixupimm_pd_128:
6890; CHECK:       ## BB#0:
6891; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
6892; CHECK-NEXT:    vmovaps %zmm0, %zmm3 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xd8]
6893; CHECK-NEXT:    vfixupimmpd $5, %xmm2, %xmm1, %xmm3 {%k1} ## encoding: [0x62,0xf3,0xf5,0x09,0x54,0xda,0x05]
6894; CHECK-NEXT:    vpxord %xmm4, %xmm4, %xmm4 ## encoding: [0x62,0xf1,0x5d,0x08,0xef,0xe4]
6895; CHECK-NEXT:    vfixupimmpd $4, %xmm2, %xmm1, %xmm4 {%k1} {z} ## encoding: [0x62,0xf3,0xf5,0x89,0x54,0xe2,0x04]
6896; CHECK-NEXT:    vfixupimmpd $3, %xmm2, %xmm1, %xmm0 ## encoding: [0x62,0xf3,0xf5,0x08,0x54,0xc2,0x03]
6897; CHECK-NEXT:    vaddpd %xmm4, %xmm3, %xmm1 ## encoding: [0x62,0xf1,0xe5,0x08,0x58,0xcc]
6898; CHECK-NEXT:    vaddpd %xmm0, %xmm1, %xmm0 ## encoding: [0x62,0xf1,0xf5,0x08,0x58,0xc0]
6899; CHECK-NEXT:    retq ## encoding: [0xc3]
6900  %res = call <2 x double> @llvm.x86.avx512.mask.fixupimm.pd.128(<2 x double> %x0, <2 x double> %x1,<2 x i64> %x2, i32 5, i8 %x4)
6901  %res1 = call <2 x double> @llvm.x86.avx512.mask.fixupimm.pd.128(<2 x double> zeroinitializer, <2 x double> %x1, <2 x i64> %x2, i32 4, i8 %x4)
6902  %res2 = call <2 x double> @llvm.x86.avx512.mask.fixupimm.pd.128(<2 x double> %x0, <2 x double> %x1, <2 x i64> %x2, i32 3, i8 -1)
6903  %res3 = fadd <2 x double> %res, %res1
6904  %res4 = fadd <2 x double> %res3, %res2
6905  ret <2 x double> %res4
6906}
6907
6908declare <2 x double> @llvm.x86.avx512.maskz.fixupimm.pd.128(<2 x double>, <2 x double>, <2 x i64>, i32, i8)
6909
6910define <2 x double>@test_int_x86_avx512_maskz_fixupimm_pd_128(<2 x double> %x0, <2 x double> %x1, <2 x i64> %x2, i8 %x4) {
6911; CHECK-LABEL: test_int_x86_avx512_maskz_fixupimm_pd_128:
6912; CHECK:       ## BB#0:
6913; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
6914; CHECK-NEXT:    vmovaps %zmm0, %zmm3 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xd8]
6915; CHECK-NEXT:    vfixupimmpd $5, %xmm2, %xmm1, %xmm3 {%k1} {z} ## encoding: [0x62,0xf3,0xf5,0x89,0x54,0xda,0x05]
6916; CHECK-NEXT:    vpxord %xmm2, %xmm2, %xmm2 ## encoding: [0x62,0xf1,0x6d,0x08,0xef,0xd2]
6917; CHECK-NEXT:    vfixupimmpd $3, %xmm2, %xmm1, %xmm0 {%k1} {z} ## encoding: [0x62,0xf3,0xf5,0x89,0x54,0xc2,0x03]
6918; CHECK-NEXT:    vaddpd %xmm0, %xmm3, %xmm0 ## encoding: [0x62,0xf1,0xe5,0x08,0x58,0xc0]
6919; CHECK-NEXT:    retq ## encoding: [0xc3]
6920  %res = call <2 x double> @llvm.x86.avx512.maskz.fixupimm.pd.128(<2 x double> %x0, <2 x double> %x1, <2 x i64> %x2, i32 5, i8 %x4)
6921  %res1 = call <2 x double> @llvm.x86.avx512.maskz.fixupimm.pd.128(<2 x double> %x0, <2 x double> %x1, <2 x i64> zeroinitializer, i32 3, i8 %x4)
6922  ;%res2 = call <2 x double> @llvm.x86.avx512.maskz.fixupimm.pd.128(<2 x double> %x0, <2 x double> %x1, <2 x i64> %x2, i32 4, i8 -1)
6923  %res3 = fadd <2 x double> %res, %res1
6924  ;%res4 = fadd <2 x double> %res3, %res2
6925  ret <2 x double> %res3
6926}
6927
6928declare <4 x double> @llvm.x86.avx512.mask.fixupimm.pd.256(<4 x double>, <4 x double>, <4 x i64>, i32, i8)
6929
6930define <4 x double>@test_int_x86_avx512_mask_fixupimm_pd_256(<4 x double> %x0, <4 x double> %x1, <4 x i64> %x2, i8 %x4) {
6931; CHECK-LABEL: test_int_x86_avx512_mask_fixupimm_pd_256:
6932; CHECK:       ## BB#0:
6933; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
6934; CHECK-NEXT:    vmovaps %zmm0, %zmm3 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xd8]
6935; CHECK-NEXT:    vfixupimmpd $4, %ymm2, %ymm1, %ymm3 {%k1} ## encoding: [0x62,0xf3,0xf5,0x29,0x54,0xda,0x04]
6936; CHECK-NEXT:    vpxord %ymm4, %ymm4, %ymm4 ## encoding: [0x62,0xf1,0x5d,0x28,0xef,0xe4]
6937; CHECK-NEXT:    vfixupimmpd $5, %ymm2, %ymm1, %ymm4 {%k1} {z} ## encoding: [0x62,0xf3,0xf5,0xa9,0x54,0xe2,0x05]
6938; CHECK-NEXT:    vfixupimmpd $3, %ymm2, %ymm1, %ymm0 ## encoding: [0x62,0xf3,0xf5,0x28,0x54,0xc2,0x03]
6939; CHECK-NEXT:    vaddpd %ymm4, %ymm3, %ymm1 ## encoding: [0x62,0xf1,0xe5,0x28,0x58,0xcc]
6940; CHECK-NEXT:    vaddpd %ymm0, %ymm1, %ymm0 ## encoding: [0x62,0xf1,0xf5,0x28,0x58,0xc0]
6941; CHECK-NEXT:    retq ## encoding: [0xc3]
6942  %res = call <4 x double> @llvm.x86.avx512.mask.fixupimm.pd.256(<4 x double> %x0, <4 x double> %x1, <4 x i64> %x2, i32 4, i8 %x4)
6943  %res1 = call <4 x double> @llvm.x86.avx512.mask.fixupimm.pd.256(<4 x double> zeroinitializer, <4 x double> %x1, <4 x i64> %x2 , i32 5, i8 %x4)
6944  %res2 = call <4 x double> @llvm.x86.avx512.mask.fixupimm.pd.256(<4 x double> %x0, <4 x double> %x1, <4 x i64> %x2, i32 3, i8 -1)
6945  %res3 = fadd <4 x double> %res, %res1
6946  %res4 = fadd <4 x double> %res3, %res2
6947  ret <4 x double> %res4
6948}
6949
6950declare <4 x double> @llvm.x86.avx512.maskz.fixupimm.pd.256(<4 x double>, <4 x double>, <4 x i64>, i32, i8)
6951
6952define <4 x double>@test_int_x86_avx512_maskz_fixupimm_pd_256(<4 x double> %x0, <4 x double> %x1, <4 x i64> %x2, i8 %x4) {
6953; CHECK-LABEL: test_int_x86_avx512_maskz_fixupimm_pd_256:
6954; CHECK:       ## BB#0:
6955; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
6956; CHECK-NEXT:    vmovaps %zmm0, %zmm3 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xd8]
6957; CHECK-NEXT:    vfixupimmpd $5, %ymm2, %ymm1, %ymm3 {%k1} {z} ## encoding: [0x62,0xf3,0xf5,0xa9,0x54,0xda,0x05]
6958; CHECK-NEXT:    vpxord %ymm4, %ymm4, %ymm4 ## encoding: [0x62,0xf1,0x5d,0x28,0xef,0xe4]
6959; CHECK-NEXT:    vmovaps %zmm0, %zmm5 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xe8]
6960; CHECK-NEXT:    vfixupimmpd $4, %ymm4, %ymm1, %ymm5 {%k1} {z} ## encoding: [0x62,0xf3,0xf5,0xa9,0x54,0xec,0x04]
6961; CHECK-NEXT:    vfixupimmpd $3, %ymm2, %ymm1, %ymm0 ## encoding: [0x62,0xf3,0xf5,0x28,0x54,0xc2,0x03]
6962; CHECK-NEXT:    vaddpd %ymm5, %ymm3, %ymm1 ## encoding: [0x62,0xf1,0xe5,0x28,0x58,0xcd]
6963; CHECK-NEXT:    vaddpd %ymm0, %ymm1, %ymm0 ## encoding: [0x62,0xf1,0xf5,0x28,0x58,0xc0]
6964; CHECK-NEXT:    retq ## encoding: [0xc3]
6965  %res = call <4 x double> @llvm.x86.avx512.maskz.fixupimm.pd.256(<4 x double> %x0, <4 x double> %x1, <4 x i64> %x2, i32 5, i8 %x4)
6966  %res1 = call <4 x double> @llvm.x86.avx512.maskz.fixupimm.pd.256(<4 x double> %x0, <4 x double> %x1, <4 x i64> zeroinitializer, i32 4, i8 %x4)
6967  %res2 = call <4 x double> @llvm.x86.avx512.maskz.fixupimm.pd.256(<4 x double> %x0, <4 x double> %x1, <4 x i64> %x2, i32 3, i8 -1)
6968  %res3 = fadd <4 x double> %res, %res1
6969  %res4 = fadd <4 x double> %res3, %res2
6970  ret <4 x double> %res4
6971}
6972
6973declare <4 x float> @llvm.x86.avx512.mask.fixupimm.ps.128(<4 x float>, <4 x float>, <4 x i32>, i32, i8)
6974
6975define <4 x float>@test_int_x86_avx512_mask_fixupimm_ps_128(<4 x float> %x0, <4 x float> %x1, <4 x i32> %x2, i8 %x4) {
6976; CHECK-LABEL: test_int_x86_avx512_mask_fixupimm_ps_128:
6977; CHECK:       ## BB#0:
6978; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
6979; CHECK-NEXT:    vmovaps %zmm0, %zmm3 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xd8]
6980; CHECK-NEXT:    vfixupimmps $5, %xmm2, %xmm1, %xmm3 {%k1} ## encoding: [0x62,0xf3,0x75,0x09,0x54,0xda,0x05]
6981; CHECK-NEXT:    vmovaps %zmm0, %zmm4 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xe0]
6982; CHECK-NEXT:    vfixupimmps $5, %xmm2, %xmm1, %xmm4 ## encoding: [0x62,0xf3,0x75,0x08,0x54,0xe2,0x05]
6983; CHECK-NEXT:    vpxord %xmm2, %xmm2, %xmm2 ## encoding: [0x62,0xf1,0x6d,0x08,0xef,0xd2]
6984; CHECK-NEXT:    vfixupimmps $5, %xmm2, %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf3,0x75,0x09,0x54,0xc2,0x05]
6985; CHECK-NEXT:    vaddps %xmm0, %xmm3, %xmm0 ## encoding: [0x62,0xf1,0x64,0x08,0x58,0xc0]
6986; CHECK-NEXT:    vaddps %xmm4, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7c,0x08,0x58,0xc4]
6987; CHECK-NEXT:    retq ## encoding: [0xc3]
6988  %res = call <4 x float> @llvm.x86.avx512.mask.fixupimm.ps.128(<4 x float> %x0, <4 x float> %x1, <4 x i32> %x2, i32 5, i8 %x4)
6989  %res1 = call <4 x float> @llvm.x86.avx512.mask.fixupimm.ps.128(<4 x float> %x0, <4 x float> %x1, <4 x i32> zeroinitializer, i32 5, i8 %x4)
6990  %res2 = call <4 x float> @llvm.x86.avx512.mask.fixupimm.ps.128(<4 x float> %x0, <4 x float> %x1, <4 x i32> %x2, i32 5, i8 -1)
6991  %res3 = fadd <4 x float> %res, %res1
6992  %res4 = fadd <4 x float> %res3, %res2
6993  ret <4 x float> %res4
6994}
6995
6996declare <4 x float> @llvm.x86.avx512.maskz.fixupimm.ps.128(<4 x float>, <4 x float>, <4 x i32>, i32, i8)
6997
6998define <4 x float>@test_int_x86_avx512_maskz_fixupimm_ps_128(<4 x float> %x0, <4 x float> %x1, <4 x i32> %x2, i8 %x4) {
6999; CHECK-LABEL: test_int_x86_avx512_maskz_fixupimm_ps_128:
7000; CHECK:       ## BB#0:
7001; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
7002; CHECK-NEXT:    vmovaps %zmm0, %zmm3 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xd8]
7003; CHECK-NEXT:    vfixupimmps $5, %xmm2, %xmm1, %xmm3 {%k1} {z} ## encoding: [0x62,0xf3,0x75,0x89,0x54,0xda,0x05]
7004; CHECK-NEXT:    vmovaps %zmm0, %zmm4 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xe0]
7005; CHECK-NEXT:    vfixupimmps $5, %xmm2, %xmm1, %xmm4 ## encoding: [0x62,0xf3,0x75,0x08,0x54,0xe2,0x05]
7006; CHECK-NEXT:    vpxord %xmm2, %xmm2, %xmm2 ## encoding: [0x62,0xf1,0x6d,0x08,0xef,0xd2]
7007; CHECK-NEXT:    vfixupimmps $5, %xmm2, %xmm1, %xmm0 {%k1} {z} ## encoding: [0x62,0xf3,0x75,0x89,0x54,0xc2,0x05]
7008; CHECK-NEXT:    vaddps %xmm0, %xmm3, %xmm0 ## encoding: [0x62,0xf1,0x64,0x08,0x58,0xc0]
7009; CHECK-NEXT:    vaddps %xmm4, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7c,0x08,0x58,0xc4]
7010; CHECK-NEXT:    retq ## encoding: [0xc3]
7011  %res = call <4 x float> @llvm.x86.avx512.maskz.fixupimm.ps.128(<4 x float> %x0, <4 x float> %x1, <4 x i32> %x2, i32 5, i8 %x4)
7012  %res1 = call <4 x float> @llvm.x86.avx512.maskz.fixupimm.ps.128(<4 x float> %x0, <4 x float> %x1, <4 x i32> zeroinitializer, i32 5, i8 %x4)
7013  %res2 = call <4 x float> @llvm.x86.avx512.maskz.fixupimm.ps.128(<4 x float> %x0, <4 x float> %x1, <4 x i32> %x2, i32 5, i8 -1)
7014  %res3 = fadd <4 x float> %res, %res1
7015  %res4 = fadd <4 x float> %res3, %res2
7016  ret <4 x float> %res4
7017}
7018
7019declare <8 x float> @llvm.x86.avx512.mask.fixupimm.ps.256(<8 x float>, <8 x float>, <8 x i32>, i32, i8)
7020
7021define <8 x float>@test_int_x86_avx512_mask_fixupimm_ps_256(<8 x float> %x0, <8 x float> %x1, <8 x i32> %x2, i8 %x4) {
7022; CHECK-LABEL: test_int_x86_avx512_mask_fixupimm_ps_256:
7023; CHECK:       ## BB#0:
7024; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
7025; CHECK-NEXT:    vmovaps %zmm0, %zmm3 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xd8]
7026; CHECK-NEXT:    vfixupimmps $5, %ymm2, %ymm1, %ymm3 {%k1} ## encoding: [0x62,0xf3,0x75,0x29,0x54,0xda,0x05]
7027; CHECK-NEXT:    vmovaps %zmm0, %zmm4 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xe0]
7028; CHECK-NEXT:    vfixupimmps $5, %ymm2, %ymm1, %ymm4 ## encoding: [0x62,0xf3,0x75,0x28,0x54,0xe2,0x05]
7029; CHECK-NEXT:    vpxord %ymm2, %ymm2, %ymm2 ## encoding: [0x62,0xf1,0x6d,0x28,0xef,0xd2]
7030; CHECK-NEXT:    vfixupimmps $5, %ymm2, %ymm1, %ymm0 {%k1} ## encoding: [0x62,0xf3,0x75,0x29,0x54,0xc2,0x05]
7031; CHECK-NEXT:    vaddps %ymm0, %ymm3, %ymm0 ## encoding: [0x62,0xf1,0x64,0x28,0x58,0xc0]
7032; CHECK-NEXT:    vaddps %ymm4, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7c,0x28,0x58,0xc4]
7033; CHECK-NEXT:    retq ## encoding: [0xc3]
7034  %res = call <8 x float> @llvm.x86.avx512.mask.fixupimm.ps.256(<8 x float> %x0, <8 x float> %x1, <8 x i32> %x2, i32 5, i8 %x4)
7035  %res1 = call <8 x float> @llvm.x86.avx512.mask.fixupimm.ps.256(<8 x float> %x0, <8 x float> %x1, <8 x i32> zeroinitializer, i32 5, i8 %x4)
7036  %res2 = call <8 x float> @llvm.x86.avx512.mask.fixupimm.ps.256(<8 x float> %x0, <8 x float> %x1, <8 x i32> %x2, i32 5, i8 -1)
7037  %res3 = fadd <8 x float> %res, %res1
7038  %res4 = fadd <8 x float> %res3, %res2
7039  ret <8 x float> %res4
7040}
7041
7042declare <8 x float> @llvm.x86.avx512.maskz.fixupimm.ps.256(<8 x float>, <8 x float>, <8 x i32>, i32, i8)
7043
7044define <8 x float>@test_int_x86_avx512_maskz_fixupimm_ps_256(<8 x float> %x0, <8 x float> %x1, <8 x i32> %x2, i8 %x4) {
7045; CHECK-LABEL: test_int_x86_avx512_maskz_fixupimm_ps_256:
7046; CHECK:       ## BB#0:
7047; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
7048; CHECK-NEXT:    vmovaps %zmm0, %zmm3 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xd8]
7049; CHECK-NEXT:    vfixupimmps $5, %ymm2, %ymm1, %ymm3 {%k1} {z} ## encoding: [0x62,0xf3,0x75,0xa9,0x54,0xda,0x05]
7050; CHECK-NEXT:    vmovaps %zmm0, %zmm4 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xe0]
7051; CHECK-NEXT:    vfixupimmps $5, %ymm2, %ymm1, %ymm4 ## encoding: [0x62,0xf3,0x75,0x28,0x54,0xe2,0x05]
7052; CHECK-NEXT:    vpxord %ymm2, %ymm2, %ymm2 ## encoding: [0x62,0xf1,0x6d,0x28,0xef,0xd2]
7053; CHECK-NEXT:    vfixupimmps $5, %ymm2, %ymm1, %ymm0 {%k1} {z} ## encoding: [0x62,0xf3,0x75,0xa9,0x54,0xc2,0x05]
7054; CHECK-NEXT:    vaddps %ymm0, %ymm3, %ymm0 ## encoding: [0x62,0xf1,0x64,0x28,0x58,0xc0]
7055; CHECK-NEXT:    vaddps %ymm4, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7c,0x28,0x58,0xc4]
7056; CHECK-NEXT:    retq ## encoding: [0xc3]
7057  %res = call <8 x float> @llvm.x86.avx512.maskz.fixupimm.ps.256(<8 x float> %x0, <8 x float> %x1, <8 x i32> %x2, i32 5, i8 %x4)
7058  %res1 = call <8 x float> @llvm.x86.avx512.maskz.fixupimm.ps.256(<8 x float> %x0, <8 x float> %x1, <8 x i32> zeroinitializer, i32 5, i8 %x4)
7059  %res2 = call <8 x float> @llvm.x86.avx512.maskz.fixupimm.ps.256(<8 x float> %x0, <8 x float> %x1, <8 x i32> %x2, i32 5, i8 -1)
7060  %res3 = fadd <8 x float> %res, %res1
7061  %res4 = fadd <8 x float> %res3, %res2
7062  ret <8 x float> %res4
7063}
7064
7065declare i8 @llvm.x86.avx512.ptestm.d.128(<4 x i32>, <4 x i32>,i8)
7066
7067define i8@test_int_x86_avx512_ptestm_d_128(<4 x i32> %x0, <4 x i32> %x1, i8 %x2) {
7068; CHECK-LABEL: test_int_x86_avx512_ptestm_d_128:
7069; CHECK:       ## BB#0:
7070; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
7071; CHECK-NEXT:    vptestmd %xmm1, %xmm0, %k0 {%k1} ## encoding: [0x62,0xf2,0x7d,0x09,0x27,0xc1]
7072; CHECK-NEXT:    kmovw %k0, %ecx ## encoding: [0xc5,0xf8,0x93,0xc8]
7073; CHECK-NEXT:    vptestmd %xmm1, %xmm0, %k0 ## encoding: [0x62,0xf2,0x7d,0x08,0x27,0xc1]
7074; CHECK-NEXT:    kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0]
7075; CHECK-NEXT:    addb %cl, %al ## encoding: [0x00,0xc8]
7076; CHECK-NEXT:    ## kill: %AL<def> %AL<kill> %EAX<kill>
7077; CHECK-NEXT:    retq ## encoding: [0xc3]
7078  %res = call i8 @llvm.x86.avx512.ptestm.d.128(<4 x i32> %x0, <4 x i32> %x1, i8 %x2)
7079  %res1 = call i8 @llvm.x86.avx512.ptestm.d.128(<4 x i32> %x0, <4 x i32> %x1, i8-1)
7080  %res2 = add i8 %res, %res1
7081  ret i8 %res2
7082}
7083
7084declare i8 @llvm.x86.avx512.ptestm.d.256(<8 x i32>, <8 x i32>, i8)
7085
7086define i8@test_int_x86_avx512_ptestm_d_256(<8 x i32> %x0, <8 x i32> %x1, i8 %x2) {
7087; CHECK-LABEL: test_int_x86_avx512_ptestm_d_256:
7088; CHECK:       ## BB#0:
7089; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
7090; CHECK-NEXT:    vptestmd %ymm1, %ymm0, %k0 {%k1} ## encoding: [0x62,0xf2,0x7d,0x29,0x27,0xc1]
7091; CHECK-NEXT:    kmovw %k0, %ecx ## encoding: [0xc5,0xf8,0x93,0xc8]
7092; CHECK-NEXT:    vptestmd %ymm1, %ymm0, %k0 ## encoding: [0x62,0xf2,0x7d,0x28,0x27,0xc1]
7093; CHECK-NEXT:    kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0]
7094; CHECK-NEXT:    addb %cl, %al ## encoding: [0x00,0xc8]
7095; CHECK-NEXT:    ## kill: %AL<def> %AL<kill> %EAX<kill>
7096; CHECK-NEXT:    retq ## encoding: [0xc3]
7097  %res = call i8 @llvm.x86.avx512.ptestm.d.256(<8 x i32> %x0, <8 x i32> %x1, i8 %x2)
7098  %res1 = call i8 @llvm.x86.avx512.ptestm.d.256(<8 x i32> %x0, <8 x i32> %x1, i8-1)
7099  %res2 = add i8 %res, %res1
7100  ret i8 %res2
7101}
7102
7103declare i8 @llvm.x86.avx512.ptestm.q.128(<2 x i64>, <2 x i64>, i8)
7104
7105define i8@test_int_x86_avx512_ptestm_q_128(<2 x i64> %x0, <2 x i64> %x1, i8 %x2) {
7106; CHECK-LABEL: test_int_x86_avx512_ptestm_q_128:
7107; CHECK:       ## BB#0:
7108; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
7109; CHECK-NEXT:    vptestmq %xmm1, %xmm0, %k0 {%k1} ## encoding: [0x62,0xf2,0xfd,0x09,0x27,0xc1]
7110; CHECK-NEXT:    kmovw %k0, %ecx ## encoding: [0xc5,0xf8,0x93,0xc8]
7111; CHECK-NEXT:    vptestmq %xmm1, %xmm0, %k0 ## encoding: [0x62,0xf2,0xfd,0x08,0x27,0xc1]
7112; CHECK-NEXT:    kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0]
7113; CHECK-NEXT:    addb %cl, %al ## encoding: [0x00,0xc8]
7114; CHECK-NEXT:    ## kill: %AL<def> %AL<kill> %EAX<kill>
7115; CHECK-NEXT:    retq ## encoding: [0xc3]
7116  %res = call i8 @llvm.x86.avx512.ptestm.q.128(<2 x i64> %x0, <2 x i64> %x1, i8 %x2)
7117  %res1 = call i8 @llvm.x86.avx512.ptestm.q.128(<2 x i64> %x0, <2 x i64> %x1, i8-1)
7118  %res2 = add i8 %res, %res1
7119  ret i8 %res2
7120}
7121
7122declare i8 @llvm.x86.avx512.ptestm.q.256(<4 x i64>, <4 x i64>, i8)
7123
7124define i8@test_int_x86_avx512_ptestm_q_256(<4 x i64> %x0, <4 x i64> %x1, i8 %x2) {
7125; CHECK-LABEL: test_int_x86_avx512_ptestm_q_256:
7126; CHECK:       ## BB#0:
7127; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
7128; CHECK-NEXT:    vptestmq %ymm1, %ymm0, %k0 {%k1} ## encoding: [0x62,0xf2,0xfd,0x29,0x27,0xc1]
7129; CHECK-NEXT:    kmovw %k0, %ecx ## encoding: [0xc5,0xf8,0x93,0xc8]
7130; CHECK-NEXT:    vptestmq %ymm1, %ymm0, %k0 ## encoding: [0x62,0xf2,0xfd,0x28,0x27,0xc1]
7131; CHECK-NEXT:    kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0]
7132; CHECK-NEXT:    addb %cl, %al ## encoding: [0x00,0xc8]
7133; CHECK-NEXT:    ## kill: %AL<def> %AL<kill> %EAX<kill>
7134; CHECK-NEXT:    retq ## encoding: [0xc3]
7135  %res = call i8 @llvm.x86.avx512.ptestm.q.256(<4 x i64> %x0, <4 x i64> %x1, i8 %x2)
7136  %res1 = call i8 @llvm.x86.avx512.ptestm.q.256(<4 x i64> %x0, <4 x i64> %x1, i8-1)
7137  %res2 = add i8 %res, %res1
7138  ret i8 %res2
7139}
7140
7141declare i8 @llvm.x86.avx512.ptestnm.d.128(<4 x i32>, <4 x i32>, i8 %x2)
7142
7143define i8@test_int_x86_avx512_ptestnm_d_128(<4 x i32> %x0, <4 x i32> %x1, i8 %x2) {
7144; CHECK-LABEL: test_int_x86_avx512_ptestnm_d_128:
7145; CHECK:       ## BB#0:
7146; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
7147; CHECK-NEXT:    vptestnmd %xmm1, %xmm0, %k0 {%k1} ## encoding: [0x62,0xf2,0x7e,0x09,0x27,0xc1]
7148; CHECK-NEXT:    kmovw %k0, %ecx ## encoding: [0xc5,0xf8,0x93,0xc8]
7149; CHECK-NEXT:    vptestnmd %xmm1, %xmm0, %k0 ## encoding: [0x62,0xf2,0x7e,0x08,0x27,0xc1]
7150; CHECK-NEXT:    kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0]
7151; CHECK-NEXT:    addb %cl, %al ## encoding: [0x00,0xc8]
7152; CHECK-NEXT:    ## kill: %AL<def> %AL<kill> %EAX<kill>
7153; CHECK-NEXT:    retq ## encoding: [0xc3]
7154  %res = call i8 @llvm.x86.avx512.ptestnm.d.128(<4 x i32> %x0, <4 x i32> %x1, i8 %x2)
7155  %res1 = call i8 @llvm.x86.avx512.ptestnm.d.128(<4 x i32> %x0, <4 x i32> %x1, i8-1)
7156  %res2 = add i8 %res, %res1
7157  ret i8 %res2
7158}
7159
7160declare i8 @llvm.x86.avx512.ptestnm.d.256(<8 x i32>, <8 x i32>, i8 %x2)
7161
7162define i8@test_int_x86_avx512_ptestnm_d_256(<8 x i32> %x0, <8 x i32> %x1, i8 %x2) {
7163; CHECK-LABEL: test_int_x86_avx512_ptestnm_d_256:
7164; CHECK:       ## BB#0:
7165; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
7166; CHECK-NEXT:    vptestnmd %ymm1, %ymm0, %k0 {%k1} ## encoding: [0x62,0xf2,0x7e,0x29,0x27,0xc1]
7167; CHECK-NEXT:    kmovw %k0, %ecx ## encoding: [0xc5,0xf8,0x93,0xc8]
7168; CHECK-NEXT:    vptestnmd %ymm1, %ymm0, %k0 ## encoding: [0x62,0xf2,0x7e,0x28,0x27,0xc1]
7169; CHECK-NEXT:    kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0]
7170; CHECK-NEXT:    addb %cl, %al ## encoding: [0x00,0xc8]
7171; CHECK-NEXT:    ## kill: %AL<def> %AL<kill> %EAX<kill>
7172; CHECK-NEXT:    retq ## encoding: [0xc3]
7173  %res = call i8 @llvm.x86.avx512.ptestnm.d.256(<8 x i32> %x0, <8 x i32> %x1, i8 %x2)
7174  %res1 = call i8 @llvm.x86.avx512.ptestnm.d.256(<8 x i32> %x0, <8 x i32> %x1, i8-1)
7175  %res2 = add i8 %res, %res1
7176  ret i8 %res2
7177}
7178
7179declare i8 @llvm.x86.avx512.ptestnm.q.128(<2 x i64>, <2 x i64>, i8 %x2)
7180
7181define i8@test_int_x86_avx512_ptestnm_q_128(<2 x i64> %x0, <2 x i64> %x1, i8 %x2) {
7182; CHECK-LABEL: test_int_x86_avx512_ptestnm_q_128:
7183; CHECK:       ## BB#0:
7184; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
7185; CHECK-NEXT:    vptestnmq %xmm1, %xmm0, %k0 {%k1} ## encoding: [0x62,0xf2,0xfe,0x09,0x27,0xc1]
7186; CHECK-NEXT:    kmovw %k0, %ecx ## encoding: [0xc5,0xf8,0x93,0xc8]
7187; CHECK-NEXT:    vptestnmq %xmm1, %xmm0, %k0 ## encoding: [0x62,0xf2,0xfe,0x08,0x27,0xc1]
7188; CHECK-NEXT:    kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0]
7189; CHECK-NEXT:    addb %cl, %al ## encoding: [0x00,0xc8]
7190; CHECK-NEXT:    ## kill: %AL<def> %AL<kill> %EAX<kill>
7191; CHECK-NEXT:    retq ## encoding: [0xc3]
7192  %res = call i8 @llvm.x86.avx512.ptestnm.q.128(<2 x i64> %x0, <2 x i64> %x1, i8 %x2)
7193  %res1 = call i8 @llvm.x86.avx512.ptestnm.q.128(<2 x i64> %x0, <2 x i64> %x1, i8-1)
7194  %res2 = add i8 %res, %res1
7195  ret i8 %res2
7196}
7197
7198declare i8 @llvm.x86.avx512.ptestnm.q.256(<4 x i64>, <4 x i64>, i8 %x2)
7199
7200define i8@test_int_x86_avx512_ptestnm_q_256(<4 x i64> %x0, <4 x i64> %x1, i8 %x2) {
7201; CHECK-LABEL: test_int_x86_avx512_ptestnm_q_256:
7202; CHECK:       ## BB#0:
7203; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
7204; CHECK-NEXT:    vptestnmq %ymm1, %ymm0, %k0 {%k1} ## encoding: [0x62,0xf2,0xfe,0x29,0x27,0xc1]
7205; CHECK-NEXT:    kmovw %k0, %ecx ## encoding: [0xc5,0xf8,0x93,0xc8]
7206; CHECK-NEXT:    vptestnmq %ymm1, %ymm0, %k0 ## encoding: [0x62,0xf2,0xfe,0x28,0x27,0xc1]
7207; CHECK-NEXT:    kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0]
7208; CHECK-NEXT:    addb %cl, %al ## encoding: [0x00,0xc8]
7209; CHECK-NEXT:    ## kill: %AL<def> %AL<kill> %EAX<kill>
7210; CHECK-NEXT:    retq ## encoding: [0xc3]
7211  %res = call i8 @llvm.x86.avx512.ptestnm.q.256(<4 x i64> %x0, <4 x i64> %x1, i8 %x2)
7212  %res1 = call i8 @llvm.x86.avx512.ptestnm.q.256(<4 x i64> %x0, <4 x i64> %x1, i8-1)
7213  %res2 = add i8 %res, %res1
7214  ret i8 %res2
7215}
7216
7217declare <8 x i32> @llvm.x86.avx512.mask.pbroadcast.d.gpr.256(i32, <8 x i32>, i8)
7218
7219define <8 x i32>@test_int_x86_avx512_mask_pbroadcast_d_gpr_256(i32 %x0, <8 x i32> %x1, i8 %mask) {
7220; CHECK-LABEL: test_int_x86_avx512_mask_pbroadcast_d_gpr_256:
7221; CHECK:       ## BB#0:
7222; CHECK-NEXT:    kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
7223; CHECK-NEXT:    vpbroadcastd %edi, %ymm0 {%k1} ## encoding: [0x62,0xf2,0x7d,0x29,0x7c,0xc7]
7224; CHECK-NEXT:    vpbroadcastd %edi, %ymm1 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0xa9,0x7c,0xcf]
7225; CHECK-NEXT:    vpbroadcastd %edi, %ymm2 ## encoding: [0x62,0xf2,0x7d,0x28,0x7c,0xd7]
7226; CHECK-NEXT:    vpaddd %ymm0, %ymm2, %ymm0 ## encoding: [0x62,0xf1,0x6d,0x28,0xfe,0xc0]
7227; CHECK-NEXT:    vpaddd %ymm0, %ymm1, %ymm0 ## encoding: [0x62,0xf1,0x75,0x28,0xfe,0xc0]
7228; CHECK-NEXT:    retq ## encoding: [0xc3]
7229  %res = call <8 x i32> @llvm.x86.avx512.mask.pbroadcast.d.gpr.256(i32 %x0, <8 x i32> %x1, i8 -1)
7230  %res1 = call <8 x i32> @llvm.x86.avx512.mask.pbroadcast.d.gpr.256(i32 %x0, <8 x i32> %x1, i8 %mask)
7231  %res2 = call <8 x i32> @llvm.x86.avx512.mask.pbroadcast.d.gpr.256(i32 %x0, <8 x i32> zeroinitializer, i8 %mask)
7232  %res3 = add <8 x i32> %res, %res1
7233  %res4 = add <8 x i32> %res2, %res3
7234  ret <8 x i32> %res4
7235}
7236
7237declare <4 x i32> @llvm.x86.avx512.mask.pbroadcast.d.gpr.128(i32, <4 x i32>, i8)
7238
7239define <4 x i32>@test_int_x86_avx512_mask_pbroadcast_d_gpr_128(i32 %x0, <4 x i32> %x1, i8 %mask) {
7240; CHECK-LABEL: test_int_x86_avx512_mask_pbroadcast_d_gpr_128:
7241; CHECK:       ## BB#0:
7242; CHECK-NEXT:    kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
7243; CHECK-NEXT:    vpbroadcastd %edi, %xmm0 {%k1} ## encoding: [0x62,0xf2,0x7d,0x09,0x7c,0xc7]
7244; CHECK-NEXT:    vpbroadcastd %edi, %xmm1 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0x89,0x7c,0xcf]
7245; CHECK-NEXT:    vpbroadcastd %edi, %xmm2 ## encoding: [0x62,0xf2,0x7d,0x08,0x7c,0xd7]
7246; CHECK-NEXT:    vpaddd %xmm0, %xmm2, %xmm0 ## encoding: [0x62,0xf1,0x6d,0x08,0xfe,0xc0]
7247; CHECK-NEXT:    vpaddd %xmm0, %xmm1, %xmm0 ## encoding: [0x62,0xf1,0x75,0x08,0xfe,0xc0]
7248; CHECK-NEXT:    retq ## encoding: [0xc3]
7249  %res = call <4 x i32> @llvm.x86.avx512.mask.pbroadcast.d.gpr.128(i32 %x0, <4 x i32> %x1, i8 -1)
7250  %res1 = call <4 x i32> @llvm.x86.avx512.mask.pbroadcast.d.gpr.128(i32 %x0, <4 x i32> %x1, i8 %mask)
7251  %res2 = call <4 x i32> @llvm.x86.avx512.mask.pbroadcast.d.gpr.128(i32 %x0, <4 x i32> zeroinitializer, i8 %mask)
7252  %res3 = add <4 x i32> %res, %res1
7253  %res4 = add <4 x i32> %res2, %res3
7254  ret <4 x i32> %res4
7255}
7256
7257declare <4 x i64> @llvm.x86.avx512.mask.pbroadcast.q.gpr.256(i64, <4 x i64>, i8)
7258
7259define <4 x i64>@test_int_x86_avx512_mask_pbroadcast_q_gpr_256(i64 %x0, <4 x i64> %x1, i8 %mask) {
7260; CHECK-LABEL: test_int_x86_avx512_mask_pbroadcast_q_gpr_256:
7261; CHECK:       ## BB#0:
7262; CHECK-NEXT:    kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
7263; CHECK-NEXT:    vpbroadcastq %rdi, %ymm0 {%k1} ## encoding: [0x62,0xf2,0xfd,0x29,0x7c,0xc7]
7264; CHECK-NEXT:    vpbroadcastq %rdi, %ymm1 {%k1} {z} ## encoding: [0x62,0xf2,0xfd,0xa9,0x7c,0xcf]
7265; CHECK-NEXT:    vpbroadcastq %rdi, %ymm2 ## encoding: [0x62,0xf2,0xfd,0x28,0x7c,0xd7]
7266; CHECK-NEXT:    vpaddq %ymm0, %ymm2, %ymm0 ## encoding: [0x62,0xf1,0xed,0x28,0xd4,0xc0]
7267; CHECK-NEXT:    vpaddq %ymm0, %ymm1, %ymm0 ## encoding: [0x62,0xf1,0xf5,0x28,0xd4,0xc0]
7268; CHECK-NEXT:    retq ## encoding: [0xc3]
7269  %res = call <4 x i64> @llvm.x86.avx512.mask.pbroadcast.q.gpr.256(i64 %x0, <4 x i64> %x1,i8 -1)
7270  %res1 = call <4 x i64> @llvm.x86.avx512.mask.pbroadcast.q.gpr.256(i64 %x0, <4 x i64> %x1,i8 %mask)
7271  %res2 = call <4 x i64> @llvm.x86.avx512.mask.pbroadcast.q.gpr.256(i64 %x0, <4 x i64> zeroinitializer,i8 %mask)
7272  %res3 = add <4 x i64> %res, %res1
7273  %res4 = add <4 x i64> %res2, %res3
7274  ret <4 x i64> %res4
7275}
7276
7277declare <2 x i64> @llvm.x86.avx512.mask.pbroadcast.q.gpr.128(i64, <2 x i64>, i8)
7278
7279define <2 x i64>@test_int_x86_avx512_mask_pbroadcast_q_gpr_128(i64 %x0, <2 x i64> %x1, i8 %mask) {
7280; CHECK-LABEL: test_int_x86_avx512_mask_pbroadcast_q_gpr_128:
7281; CHECK:       ## BB#0:
7282; CHECK-NEXT:    kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
7283; CHECK-NEXT:    vpbroadcastq %rdi, %xmm0 {%k1} ## encoding: [0x62,0xf2,0xfd,0x09,0x7c,0xc7]
7284; CHECK-NEXT:    vpbroadcastq %rdi, %xmm1 {%k1} {z} ## encoding: [0x62,0xf2,0xfd,0x89,0x7c,0xcf]
7285; CHECK-NEXT:    vpbroadcastq %rdi, %xmm2 ## encoding: [0x62,0xf2,0xfd,0x08,0x7c,0xd7]
7286; CHECK-NEXT:    vpaddq %xmm0, %xmm2, %xmm0 ## encoding: [0x62,0xf1,0xed,0x08,0xd4,0xc0]
7287; CHECK-NEXT:    vpaddq %xmm0, %xmm1, %xmm0 ## encoding: [0x62,0xf1,0xf5,0x08,0xd4,0xc0]
7288; CHECK-NEXT:    retq ## encoding: [0xc3]
7289  %res = call <2 x i64> @llvm.x86.avx512.mask.pbroadcast.q.gpr.128(i64 %x0, <2 x i64> %x1,i8 -1)
7290  %res1 = call <2 x i64> @llvm.x86.avx512.mask.pbroadcast.q.gpr.128(i64 %x0, <2 x i64> %x1,i8 %mask)
7291  %res2 = call <2 x i64> @llvm.x86.avx512.mask.pbroadcast.q.gpr.128(i64 %x0, <2 x i64> zeroinitializer,i8 %mask)
7292  %res3 = add <2 x i64> %res, %res1
7293  %res4 = add <2 x i64> %res2, %res3
7294  ret <2 x i64> %res4
7295}
7296