1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=x86_64-unknown -mcpu=bdver2 | FileCheck %s --check-prefixes=CHECK,XOP,XOPAVX1
3; RUN: llc < %s -mtriple=x86_64-unknown -mcpu=bdver4 | FileCheck %s --check-prefixes=CHECK,XOP,XOPAVX2
4; RUN: llc < %s -mtriple=x86_64-unknown -mcpu=skylake-avx512 | FileCheck %s --check-prefixes=CHECK,AVX512
5
6define <4 x i32> @rot_v4i32_splat(<4 x i32> %x) {
7; XOP-LABEL: rot_v4i32_splat:
8; XOP:       # %bb.0:
9; XOP-NEXT:    vprotd $31, %xmm0, %xmm0
10; XOP-NEXT:    retq
11;
12; AVX512-LABEL: rot_v4i32_splat:
13; AVX512:       # %bb.0:
14; AVX512-NEXT:    vprold $31, %xmm0, %xmm0
15; AVX512-NEXT:    retq
16  %1 = lshr <4 x i32> %x, <i32 1, i32 1, i32 1, i32 1>
17  %2 = shl <4 x i32> %x, <i32 31, i32 31, i32 31, i32 31>
18  %3 = or <4 x i32> %1, %2
19  ret <4 x i32> %3
20}
21
22define <4 x i32> @rot_v4i32_non_splat(<4 x i32> %x) {
23; XOP-LABEL: rot_v4i32_non_splat:
24; XOP:       # %bb.0:
25; XOP-NEXT:    vprotd {{.*}}(%rip), %xmm0, %xmm0
26; XOP-NEXT:    retq
27;
28; AVX512-LABEL: rot_v4i32_non_splat:
29; AVX512:       # %bb.0:
30; AVX512-NEXT:    vprolvd {{.*}}(%rip), %xmm0, %xmm0
31; AVX512-NEXT:    retq
32  %1 = lshr <4 x i32> %x, <i32 1, i32 2, i32 3, i32 4>
33  %2 = shl <4 x i32> %x, <i32 31, i32 30, i32 29, i32 28>
34  %3 = or <4 x i32> %1, %2
35  ret <4 x i32> %3
36}
37
38define <4 x i32> @rot_v4i32_splat_2masks(<4 x i32> %x) {
39; XOP-LABEL: rot_v4i32_splat_2masks:
40; XOP:       # %bb.0:
41; XOP-NEXT:    vprotd $31, %xmm0, %xmm0
42; XOP-NEXT:    vpand {{.*}}(%rip), %xmm0, %xmm0
43; XOP-NEXT:    retq
44;
45; AVX512-LABEL: rot_v4i32_splat_2masks:
46; AVX512:       # %bb.0:
47; AVX512-NEXT:    vprold $31, %xmm0, %xmm0
48; AVX512-NEXT:    vpand {{.*}}(%rip), %xmm0, %xmm0
49; AVX512-NEXT:    retq
50  %1 = lshr <4 x i32> %x, <i32 1, i32 1, i32 1, i32 1>
51  %2 = and <4 x i32> %1, <i32 4294901760, i32 4294901760, i32 4294901760, i32 4294901760>
52
53  %3 = shl <4 x i32> %x, <i32 31, i32 31, i32 31, i32 31>
54  %4 = and <4 x i32> %3, <i32 0, i32 4294901760, i32 0, i32 4294901760>
55  %5 = or <4 x i32> %2, %4
56  ret <4 x i32> %5
57}
58
59define <4 x i32> @rot_v4i32_non_splat_2masks(<4 x i32> %x) {
60; XOP-LABEL: rot_v4i32_non_splat_2masks:
61; XOP:       # %bb.0:
62; XOP-NEXT:    vprotd {{.*}}(%rip), %xmm0, %xmm0
63; XOP-NEXT:    vpand {{.*}}(%rip), %xmm0, %xmm0
64; XOP-NEXT:    retq
65;
66; AVX512-LABEL: rot_v4i32_non_splat_2masks:
67; AVX512:       # %bb.0:
68; AVX512-NEXT:    vprolvd {{.*}}(%rip), %xmm0, %xmm0
69; AVX512-NEXT:    vpand {{.*}}(%rip), %xmm0, %xmm0
70; AVX512-NEXT:    retq
71  %1 = lshr <4 x i32> %x, <i32 1, i32 2, i32 3, i32 4>
72  %2 = and <4 x i32> %1, <i32 4294901760, i32 4294901760, i32 4294901760, i32 4294901760>
73
74  %3 = shl <4 x i32> %x, <i32 31, i32 30, i32 29, i32 28>
75  %4 = and <4 x i32> %3, <i32 0, i32 4294901760, i32 0, i32 4294901760>
76  %5 = or <4 x i32> %2, %4
77  ret <4 x i32> %5
78}
79
80define <4 x i32> @rot_v4i32_zero_non_splat(<4 x i32> %x) {
81; XOPAVX1-LABEL: rot_v4i32_zero_non_splat:
82; XOPAVX1:       # %bb.0:
83; XOPAVX1-NEXT:    vpermilps {{.*#+}} xmm0 = xmm0[0,0,0,0]
84; XOPAVX1-NEXT:    retq
85;
86; XOPAVX2-LABEL: rot_v4i32_zero_non_splat:
87; XOPAVX2:       # %bb.0:
88; XOPAVX2-NEXT:    vbroadcastss %xmm0, %xmm0
89; XOPAVX2-NEXT:    retq
90;
91; AVX512-LABEL: rot_v4i32_zero_non_splat:
92; AVX512:       # %bb.0:
93; AVX512-NEXT:    vbroadcastss %xmm0, %xmm0
94; AVX512-NEXT:    retq
95  %1 = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %x, <4 x i32> %x, <4 x i32> <i32 0, i32 1, i32 2, i32 3>)
96  %2 = shufflevector <4 x i32> %1, <4 x i32> undef, <4 x i32> zeroinitializer
97  ret <4 x i32> %2
98}
99
100define <4 x i32> @rot_v4i32_allsignbits(<4 x i32> %x, <4 x i32> %y) {
101; CHECK-LABEL: rot_v4i32_allsignbits:
102; CHECK:       # %bb.0:
103; CHECK-NEXT:    vpsrad $31, %xmm0, %xmm0
104; CHECK-NEXT:    retq
105  %1 = ashr <4 x i32> %x, <i32 31, i32 31, i32 31, i32 31>
106  %2 = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %1, <4 x i32> %1, <4 x i32> %y)
107  ret <4 x i32> %2
108}
109
110define <4 x i32> @rot_v4i32_mask_ashr0(<4 x i32> %a0) {
111; XOPAVX1-LABEL: rot_v4i32_mask_ashr0:
112; XOPAVX1:       # %bb.0:
113; XOPAVX1-NEXT:    vpshad {{.*}}(%rip), %xmm0, %xmm0
114; XOPAVX1-NEXT:    vprotd $1, %xmm0, %xmm0
115; XOPAVX1-NEXT:    vpand {{.*}}(%rip), %xmm0, %xmm0
116; XOPAVX1-NEXT:    retq
117;
118; XOPAVX2-LABEL: rot_v4i32_mask_ashr0:
119; XOPAVX2:       # %bb.0:
120; XOPAVX2-NEXT:    vpsravd {{.*}}(%rip), %xmm0, %xmm0
121; XOPAVX2-NEXT:    vprotd $1, %xmm0, %xmm0
122; XOPAVX2-NEXT:    vpand {{.*}}(%rip), %xmm0, %xmm0
123; XOPAVX2-NEXT:    retq
124;
125; AVX512-LABEL: rot_v4i32_mask_ashr0:
126; AVX512:       # %bb.0:
127; AVX512-NEXT:    vpsravd {{.*}}(%rip), %xmm0, %xmm0
128; AVX512-NEXT:    vprold $1, %xmm0, %xmm0
129; AVX512-NEXT:    vpand {{.*}}(%rip), %xmm0, %xmm0
130; AVX512-NEXT:    retq
131  %1 = ashr <4 x i32> %a0, <i32 25, i32 26, i32 27, i32 28>
132  %2 = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %1, <4 x i32> %1, <4 x i32> <i32 1, i32 1, i32 1, i32 1>)
133  %3 = ashr <4 x i32> %2, <i32 1, i32 2, i32 3, i32 4>
134  %4 = and <4 x i32> %3, <i32 -32768, i32 -65536, i32 -32768, i32 -65536>
135  ret <4 x i32> %4
136}
137
138define <4 x i32> @rot_v4i32_mask_ashr1(<4 x i32> %a0) {
139; XOPAVX1-LABEL: rot_v4i32_mask_ashr1:
140; XOPAVX1:       # %bb.0:
141; XOPAVX1-NEXT:    vpsrad $25, %xmm0, %xmm0
142; XOPAVX1-NEXT:    vprotd $1, %xmm0, %xmm0
143; XOPAVX1-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
144; XOPAVX1-NEXT:    vpand {{.*}}(%rip), %xmm0, %xmm0
145; XOPAVX1-NEXT:    retq
146;
147; XOPAVX2-LABEL: rot_v4i32_mask_ashr1:
148; XOPAVX2:       # %bb.0:
149; XOPAVX2-NEXT:    vpsrad $25, %xmm0, %xmm0
150; XOPAVX2-NEXT:    vprotd $1, %xmm0, %xmm0
151; XOPAVX2-NEXT:    vpbroadcastd %xmm0, %xmm0
152; XOPAVX2-NEXT:    vpand {{.*}}(%rip), %xmm0, %xmm0
153; XOPAVX2-NEXT:    retq
154;
155; AVX512-LABEL: rot_v4i32_mask_ashr1:
156; AVX512:       # %bb.0:
157; AVX512-NEXT:    vpsrad $25, %xmm0, %xmm0
158; AVX512-NEXT:    vprold $1, %xmm0, %xmm0
159; AVX512-NEXT:    vpbroadcastd %xmm0, %xmm0
160; AVX512-NEXT:    vpand {{.*}}(%rip), %xmm0, %xmm0
161; AVX512-NEXT:    retq
162  %1 = ashr <4 x i32> %a0, <i32 25, i32 26, i32 27, i32 28>
163  %2 = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %1, <4 x i32> %1, <4 x i32> <i32 1, i32 2, i32 3, i32 4>)
164  %3 = shufflevector <4 x i32> %2, <4 x i32> undef, <4 x i32> zeroinitializer
165  %4 = ashr <4 x i32> %3, <i32 1, i32 2, i32 3, i32 4>
166  %5 = and <4 x i32> %4, <i32 -4096, i32 -8192, i32 -4096, i32 -8192>
167  ret <4 x i32> %5
168}
169
170declare <4 x i32> @llvm.fshl.v4i32(<4 x i32>, <4 x i32>, <4 x i32>)
171