1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mattr=+sha -mtriple=x86_64-unknown-unknown | FileCheck %s --check-prefix=CHECK --check-prefix=SSE
3; RUN: llc < %s -mattr=+sha,+avx2 -mtriple=x86_64-unknown-unknown | FileCheck %s --check-prefix=CHECK --check-prefix=AVX
4
5declare <4 x i32> @llvm.x86.sha1rnds4(<4 x i32>, <4 x i32>, i8) nounwind readnone
6
7define <4 x i32> @test_sha1rnds4rr(<4 x i32> %a, <4 x i32> %b) nounwind uwtable {
8; CHECK-LABEL: test_sha1rnds4rr:
9; CHECK:       # %bb.0: # %entry
10; CHECK-NEXT:    sha1rnds4 $3, %xmm1, %xmm0
11; CHECK-NEXT:    retq
12entry:
13  %0 = tail call <4 x i32> @llvm.x86.sha1rnds4(<4 x i32> %a, <4 x i32> %b, i8 3)
14  ret <4 x i32> %0
15}
16
17define <4 x i32> @test_sha1rnds4rm(<4 x i32> %a, <4 x i32>* %b) nounwind uwtable {
18; CHECK-LABEL: test_sha1rnds4rm:
19; CHECK:       # %bb.0: # %entry
20; CHECK-NEXT:    sha1rnds4 $3, (%rdi), %xmm0
21; CHECK-NEXT:    retq
22entry:
23  %0 = load <4 x i32>, <4 x i32>* %b
24  %1 = tail call <4 x i32> @llvm.x86.sha1rnds4(<4 x i32> %a, <4 x i32> %0, i8 3)
25  ret <4 x i32> %1
26}
27
28declare <4 x i32> @llvm.x86.sha1nexte(<4 x i32>, <4 x i32>) nounwind readnone
29
30define <4 x i32> @test_sha1nexterr(<4 x i32> %a, <4 x i32> %b) nounwind uwtable {
31; CHECK-LABEL: test_sha1nexterr:
32; CHECK:       # %bb.0: # %entry
33; CHECK-NEXT:    sha1nexte %xmm1, %xmm0
34; CHECK-NEXT:    retq
35entry:
36  %0 = tail call <4 x i32> @llvm.x86.sha1nexte(<4 x i32> %a, <4 x i32> %b)
37  ret <4 x i32> %0
38}
39
40define <4 x i32> @test_sha1nexterm(<4 x i32> %a, <4 x i32>* %b) nounwind uwtable {
41; CHECK-LABEL: test_sha1nexterm:
42; CHECK:       # %bb.0: # %entry
43; CHECK-NEXT:    sha1nexte (%rdi), %xmm0
44; CHECK-NEXT:    retq
45entry:
46  %0 = load <4 x i32>, <4 x i32>* %b
47  %1 = tail call <4 x i32> @llvm.x86.sha1nexte(<4 x i32> %a, <4 x i32> %0)
48  ret <4 x i32> %1
49}
50
51declare <4 x i32> @llvm.x86.sha1msg1(<4 x i32>, <4 x i32>) nounwind readnone
52
53define <4 x i32> @test_sha1msg1rr(<4 x i32> %a, <4 x i32> %b) nounwind uwtable {
54; CHECK-LABEL: test_sha1msg1rr:
55; CHECK:       # %bb.0: # %entry
56; CHECK-NEXT:    sha1msg1 %xmm1, %xmm0
57; CHECK-NEXT:    retq
58entry:
59  %0 = tail call <4 x i32> @llvm.x86.sha1msg1(<4 x i32> %a, <4 x i32> %b)
60  ret <4 x i32> %0
61}
62
63define <4 x i32> @test_sha1msg1rm(<4 x i32> %a, <4 x i32>* %b) nounwind uwtable {
64; CHECK-LABEL: test_sha1msg1rm:
65; CHECK:       # %bb.0: # %entry
66; CHECK-NEXT:    sha1msg1 (%rdi), %xmm0
67; CHECK-NEXT:    retq
68entry:
69  %0 = load <4 x i32>, <4 x i32>* %b
70  %1 = tail call <4 x i32> @llvm.x86.sha1msg1(<4 x i32> %a, <4 x i32> %0)
71  ret <4 x i32> %1
72}
73
74declare <4 x i32> @llvm.x86.sha1msg2(<4 x i32>, <4 x i32>) nounwind readnone
75
76define <4 x i32> @test_sha1msg2rr(<4 x i32> %a, <4 x i32> %b) nounwind uwtable {
77; CHECK-LABEL: test_sha1msg2rr:
78; CHECK:       # %bb.0: # %entry
79; CHECK-NEXT:    sha1msg2 %xmm1, %xmm0
80; CHECK-NEXT:    retq
81entry:
82  %0 = tail call <4 x i32> @llvm.x86.sha1msg2(<4 x i32> %a, <4 x i32> %b)
83  ret <4 x i32> %0
84}
85
86define <4 x i32> @test_sha1msg2rm(<4 x i32> %a, <4 x i32>* %b) nounwind uwtable {
87; CHECK-LABEL: test_sha1msg2rm:
88; CHECK:       # %bb.0: # %entry
89; CHECK-NEXT:    sha1msg2 (%rdi), %xmm0
90; CHECK-NEXT:    retq
91entry:
92  %0 = load <4 x i32>, <4 x i32>* %b
93  %1 = tail call <4 x i32> @llvm.x86.sha1msg2(<4 x i32> %a, <4 x i32> %0)
94  ret <4 x i32> %1
95}
96
97declare <4 x i32> @llvm.x86.sha256rnds2(<4 x i32>, <4 x i32>, <4 x i32>) nounwind readnone
98
99define <4 x i32> @test_sha256rnds2rr(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) nounwind uwtable {
100; SSE-LABEL: test_sha256rnds2rr:
101; SSE:       # %bb.0: # %entry
102; SSE-NEXT:    movaps %xmm0, %xmm3
103; SSE-NEXT:    movaps %xmm2, %xmm0
104; SSE-NEXT:    sha256rnds2 %xmm0, %xmm1, %xmm3
105; SSE-NEXT:    movaps %xmm3, %xmm0
106; SSE-NEXT:    retq
107;
108; AVX-LABEL: test_sha256rnds2rr:
109; AVX:       # %bb.0: # %entry
110; AVX-NEXT:    vmovaps %xmm0, %xmm3
111; AVX-NEXT:    vmovaps %xmm2, %xmm0
112; AVX-NEXT:    sha256rnds2 %xmm0, %xmm1, %xmm3
113; AVX-NEXT:    vmovaps %xmm3, %xmm0
114; AVX-NEXT:    retq
115entry:
116  %0 = tail call <4 x i32> @llvm.x86.sha256rnds2(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c)
117  ret <4 x i32> %0
118}
119
120define <4 x i32> @test_sha256rnds2rm(<4 x i32> %a, <4 x i32>* %b, <4 x i32> %c) nounwind uwtable {
121; SSE-LABEL: test_sha256rnds2rm:
122; SSE:       # %bb.0: # %entry
123; SSE-NEXT:    movaps %xmm0, %xmm2
124; SSE-NEXT:    movaps %xmm1, %xmm0
125; SSE-NEXT:    sha256rnds2 %xmm0, (%rdi), %xmm2
126; SSE-NEXT:    movaps %xmm2, %xmm0
127; SSE-NEXT:    retq
128;
129; AVX-LABEL: test_sha256rnds2rm:
130; AVX:       # %bb.0: # %entry
131; AVX-NEXT:    vmovaps %xmm0, %xmm2
132; AVX-NEXT:    vmovaps %xmm1, %xmm0
133; AVX-NEXT:    sha256rnds2 %xmm0, (%rdi), %xmm2
134; AVX-NEXT:    vmovaps %xmm2, %xmm0
135; AVX-NEXT:    retq
136entry:
137  %0 = load <4 x i32>, <4 x i32>* %b
138  %1 = tail call <4 x i32> @llvm.x86.sha256rnds2(<4 x i32> %a, <4 x i32> %0, <4 x i32> %c)
139  ret <4 x i32> %1
140}
141
142declare <4 x i32> @llvm.x86.sha256msg1(<4 x i32>, <4 x i32>) nounwind readnone
143
144define <4 x i32> @test_sha256msg1rr(<4 x i32> %a, <4 x i32> %b) nounwind uwtable {
145; CHECK-LABEL: test_sha256msg1rr:
146; CHECK:       # %bb.0: # %entry
147; CHECK-NEXT:    sha256msg1 %xmm1, %xmm0
148; CHECK-NEXT:    retq
149entry:
150  %0 = tail call <4 x i32> @llvm.x86.sha256msg1(<4 x i32> %a, <4 x i32> %b)
151  ret <4 x i32> %0
152}
153
154define <4 x i32> @test_sha256msg1rm(<4 x i32> %a, <4 x i32>* %b) nounwind uwtable {
155; CHECK-LABEL: test_sha256msg1rm:
156; CHECK:       # %bb.0: # %entry
157; CHECK-NEXT:    sha256msg1 (%rdi), %xmm0
158; CHECK-NEXT:    retq
159entry:
160  %0 = load <4 x i32>, <4 x i32>* %b
161  %1 = tail call <4 x i32> @llvm.x86.sha256msg1(<4 x i32> %a, <4 x i32> %0)
162  ret <4 x i32> %1
163}
164
165declare <4 x i32> @llvm.x86.sha256msg2(<4 x i32>, <4 x i32>) nounwind readnone
166
167define <4 x i32> @test_sha256msg2rr(<4 x i32> %a, <4 x i32> %b) nounwind uwtable {
168; CHECK-LABEL: test_sha256msg2rr:
169; CHECK:       # %bb.0: # %entry
170; CHECK-NEXT:    sha256msg2 %xmm1, %xmm0
171; CHECK-NEXT:    retq
172entry:
173  %0 = tail call <4 x i32> @llvm.x86.sha256msg2(<4 x i32> %a, <4 x i32> %b)
174  ret <4 x i32> %0
175}
176
177define <4 x i32> @test_sha256msg2rm(<4 x i32> %a, <4 x i32>* %b) nounwind uwtable {
178; CHECK-LABEL: test_sha256msg2rm:
179; CHECK:       # %bb.0: # %entry
180; CHECK-NEXT:    sha256msg2 (%rdi), %xmm0
181; CHECK-NEXT:    retq
182entry:
183  %0 = load <4 x i32>, <4 x i32>* %b
184  %1 = tail call <4 x i32> @llvm.x86.sha256msg2(<4 x i32> %a, <4 x i32> %0)
185  ret <4 x i32> %1
186}
187
188; Make sure we don't forget that sha instructions have no VEX equivalents and thus don't zero YMM/ZMM.
189define <8 x i32> @test_sha1rnds4_zero_extend(<4 x i32> %a, <4 x i32>* %b) nounwind uwtable {
190; SSE-LABEL: test_sha1rnds4_zero_extend:
191; SSE:       # %bb.0: # %entry
192; SSE-NEXT:    sha1rnds4 $3, (%rdi), %xmm0
193; SSE-NEXT:    xorps %xmm1, %xmm1
194; SSE-NEXT:    retq
195;
196; AVX-LABEL: test_sha1rnds4_zero_extend:
197; AVX:       # %bb.0: # %entry
198; AVX-NEXT:    sha1rnds4 $3, (%rdi), %xmm0
199; AVX-NEXT:    vmovaps %xmm0, %xmm0
200; AVX-NEXT:    retq
201entry:
202  %0 = load <4 x i32>, <4 x i32>* %b
203  %1 = tail call <4 x i32> @llvm.x86.sha1rnds4(<4 x i32> %a, <4 x i32> %0, i8 3)
204  %2 = shufflevector <4 x i32> %1, <4 x i32> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
205  ret <8 x i32> %2
206}
207