1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mattr=+sha -mtriple=x86_64-unknown-unknown | FileCheck %s --check-prefix=CHECK --check-prefix=SSE 3; RUN: llc < %s -mattr=+sha,+avx2 -mtriple=x86_64-unknown-unknown | FileCheck %s --check-prefix=CHECK --check-prefix=AVX 4 5declare <4 x i32> @llvm.x86.sha1rnds4(<4 x i32>, <4 x i32>, i8) nounwind readnone 6 7define <4 x i32> @test_sha1rnds4rr(<4 x i32> %a, <4 x i32> %b) nounwind uwtable { 8; CHECK-LABEL: test_sha1rnds4rr: 9; CHECK: # %bb.0: # %entry 10; CHECK-NEXT: sha1rnds4 $3, %xmm1, %xmm0 11; CHECK-NEXT: retq 12entry: 13 %0 = tail call <4 x i32> @llvm.x86.sha1rnds4(<4 x i32> %a, <4 x i32> %b, i8 3) 14 ret <4 x i32> %0 15} 16 17define <4 x i32> @test_sha1rnds4rm(<4 x i32> %a, <4 x i32>* %b) nounwind uwtable { 18; CHECK-LABEL: test_sha1rnds4rm: 19; CHECK: # %bb.0: # %entry 20; CHECK-NEXT: sha1rnds4 $3, (%rdi), %xmm0 21; CHECK-NEXT: retq 22entry: 23 %0 = load <4 x i32>, <4 x i32>* %b 24 %1 = tail call <4 x i32> @llvm.x86.sha1rnds4(<4 x i32> %a, <4 x i32> %0, i8 3) 25 ret <4 x i32> %1 26} 27 28declare <4 x i32> @llvm.x86.sha1nexte(<4 x i32>, <4 x i32>) nounwind readnone 29 30define <4 x i32> @test_sha1nexterr(<4 x i32> %a, <4 x i32> %b) nounwind uwtable { 31; CHECK-LABEL: test_sha1nexterr: 32; CHECK: # %bb.0: # %entry 33; CHECK-NEXT: sha1nexte %xmm1, %xmm0 34; CHECK-NEXT: retq 35entry: 36 %0 = tail call <4 x i32> @llvm.x86.sha1nexte(<4 x i32> %a, <4 x i32> %b) 37 ret <4 x i32> %0 38} 39 40define <4 x i32> @test_sha1nexterm(<4 x i32> %a, <4 x i32>* %b) nounwind uwtable { 41; CHECK-LABEL: test_sha1nexterm: 42; CHECK: # %bb.0: # %entry 43; CHECK-NEXT: sha1nexte (%rdi), %xmm0 44; CHECK-NEXT: retq 45entry: 46 %0 = load <4 x i32>, <4 x i32>* %b 47 %1 = tail call <4 x i32> @llvm.x86.sha1nexte(<4 x i32> %a, <4 x i32> %0) 48 ret <4 x i32> %1 49} 50 51declare <4 x i32> @llvm.x86.sha1msg1(<4 x i32>, <4 x i32>) nounwind readnone 52 53define <4 x i32> @test_sha1msg1rr(<4 x i32> %a, <4 x i32> %b) nounwind uwtable { 54; CHECK-LABEL: test_sha1msg1rr: 55; CHECK: # %bb.0: # %entry 56; CHECK-NEXT: sha1msg1 %xmm1, %xmm0 57; CHECK-NEXT: retq 58entry: 59 %0 = tail call <4 x i32> @llvm.x86.sha1msg1(<4 x i32> %a, <4 x i32> %b) 60 ret <4 x i32> %0 61} 62 63define <4 x i32> @test_sha1msg1rm(<4 x i32> %a, <4 x i32>* %b) nounwind uwtable { 64; CHECK-LABEL: test_sha1msg1rm: 65; CHECK: # %bb.0: # %entry 66; CHECK-NEXT: sha1msg1 (%rdi), %xmm0 67; CHECK-NEXT: retq 68entry: 69 %0 = load <4 x i32>, <4 x i32>* %b 70 %1 = tail call <4 x i32> @llvm.x86.sha1msg1(<4 x i32> %a, <4 x i32> %0) 71 ret <4 x i32> %1 72} 73 74declare <4 x i32> @llvm.x86.sha1msg2(<4 x i32>, <4 x i32>) nounwind readnone 75 76define <4 x i32> @test_sha1msg2rr(<4 x i32> %a, <4 x i32> %b) nounwind uwtable { 77; CHECK-LABEL: test_sha1msg2rr: 78; CHECK: # %bb.0: # %entry 79; CHECK-NEXT: sha1msg2 %xmm1, %xmm0 80; CHECK-NEXT: retq 81entry: 82 %0 = tail call <4 x i32> @llvm.x86.sha1msg2(<4 x i32> %a, <4 x i32> %b) 83 ret <4 x i32> %0 84} 85 86define <4 x i32> @test_sha1msg2rm(<4 x i32> %a, <4 x i32>* %b) nounwind uwtable { 87; CHECK-LABEL: test_sha1msg2rm: 88; CHECK: # %bb.0: # %entry 89; CHECK-NEXT: sha1msg2 (%rdi), %xmm0 90; CHECK-NEXT: retq 91entry: 92 %0 = load <4 x i32>, <4 x i32>* %b 93 %1 = tail call <4 x i32> @llvm.x86.sha1msg2(<4 x i32> %a, <4 x i32> %0) 94 ret <4 x i32> %1 95} 96 97declare <4 x i32> @llvm.x86.sha256rnds2(<4 x i32>, <4 x i32>, <4 x i32>) nounwind readnone 98 99define <4 x i32> @test_sha256rnds2rr(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) nounwind uwtable { 100; SSE-LABEL: test_sha256rnds2rr: 101; SSE: # %bb.0: # %entry 102; SSE-NEXT: movaps %xmm0, %xmm3 103; SSE-NEXT: movaps %xmm2, %xmm0 104; SSE-NEXT: sha256rnds2 %xmm0, %xmm1, %xmm3 105; SSE-NEXT: movaps %xmm3, %xmm0 106; SSE-NEXT: retq 107; 108; AVX-LABEL: test_sha256rnds2rr: 109; AVX: # %bb.0: # %entry 110; AVX-NEXT: vmovaps %xmm0, %xmm3 111; AVX-NEXT: vmovaps %xmm2, %xmm0 112; AVX-NEXT: sha256rnds2 %xmm0, %xmm1, %xmm3 113; AVX-NEXT: vmovaps %xmm3, %xmm0 114; AVX-NEXT: retq 115entry: 116 %0 = tail call <4 x i32> @llvm.x86.sha256rnds2(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) 117 ret <4 x i32> %0 118} 119 120define <4 x i32> @test_sha256rnds2rm(<4 x i32> %a, <4 x i32>* %b, <4 x i32> %c) nounwind uwtable { 121; SSE-LABEL: test_sha256rnds2rm: 122; SSE: # %bb.0: # %entry 123; SSE-NEXT: movaps %xmm0, %xmm2 124; SSE-NEXT: movaps %xmm1, %xmm0 125; SSE-NEXT: sha256rnds2 %xmm0, (%rdi), %xmm2 126; SSE-NEXT: movaps %xmm2, %xmm0 127; SSE-NEXT: retq 128; 129; AVX-LABEL: test_sha256rnds2rm: 130; AVX: # %bb.0: # %entry 131; AVX-NEXT: vmovaps %xmm0, %xmm2 132; AVX-NEXT: vmovaps %xmm1, %xmm0 133; AVX-NEXT: sha256rnds2 %xmm0, (%rdi), %xmm2 134; AVX-NEXT: vmovaps %xmm2, %xmm0 135; AVX-NEXT: retq 136entry: 137 %0 = load <4 x i32>, <4 x i32>* %b 138 %1 = tail call <4 x i32> @llvm.x86.sha256rnds2(<4 x i32> %a, <4 x i32> %0, <4 x i32> %c) 139 ret <4 x i32> %1 140} 141 142declare <4 x i32> @llvm.x86.sha256msg1(<4 x i32>, <4 x i32>) nounwind readnone 143 144define <4 x i32> @test_sha256msg1rr(<4 x i32> %a, <4 x i32> %b) nounwind uwtable { 145; CHECK-LABEL: test_sha256msg1rr: 146; CHECK: # %bb.0: # %entry 147; CHECK-NEXT: sha256msg1 %xmm1, %xmm0 148; CHECK-NEXT: retq 149entry: 150 %0 = tail call <4 x i32> @llvm.x86.sha256msg1(<4 x i32> %a, <4 x i32> %b) 151 ret <4 x i32> %0 152} 153 154define <4 x i32> @test_sha256msg1rm(<4 x i32> %a, <4 x i32>* %b) nounwind uwtable { 155; CHECK-LABEL: test_sha256msg1rm: 156; CHECK: # %bb.0: # %entry 157; CHECK-NEXT: sha256msg1 (%rdi), %xmm0 158; CHECK-NEXT: retq 159entry: 160 %0 = load <4 x i32>, <4 x i32>* %b 161 %1 = tail call <4 x i32> @llvm.x86.sha256msg1(<4 x i32> %a, <4 x i32> %0) 162 ret <4 x i32> %1 163} 164 165declare <4 x i32> @llvm.x86.sha256msg2(<4 x i32>, <4 x i32>) nounwind readnone 166 167define <4 x i32> @test_sha256msg2rr(<4 x i32> %a, <4 x i32> %b) nounwind uwtable { 168; CHECK-LABEL: test_sha256msg2rr: 169; CHECK: # %bb.0: # %entry 170; CHECK-NEXT: sha256msg2 %xmm1, %xmm0 171; CHECK-NEXT: retq 172entry: 173 %0 = tail call <4 x i32> @llvm.x86.sha256msg2(<4 x i32> %a, <4 x i32> %b) 174 ret <4 x i32> %0 175} 176 177define <4 x i32> @test_sha256msg2rm(<4 x i32> %a, <4 x i32>* %b) nounwind uwtable { 178; CHECK-LABEL: test_sha256msg2rm: 179; CHECK: # %bb.0: # %entry 180; CHECK-NEXT: sha256msg2 (%rdi), %xmm0 181; CHECK-NEXT: retq 182entry: 183 %0 = load <4 x i32>, <4 x i32>* %b 184 %1 = tail call <4 x i32> @llvm.x86.sha256msg2(<4 x i32> %a, <4 x i32> %0) 185 ret <4 x i32> %1 186} 187 188; Make sure we don't forget that sha instructions have no VEX equivalents and thus don't zero YMM/ZMM. 189define <8 x i32> @test_sha1rnds4_zero_extend(<4 x i32> %a, <4 x i32>* %b) nounwind uwtable { 190; SSE-LABEL: test_sha1rnds4_zero_extend: 191; SSE: # %bb.0: # %entry 192; SSE-NEXT: sha1rnds4 $3, (%rdi), %xmm0 193; SSE-NEXT: xorps %xmm1, %xmm1 194; SSE-NEXT: retq 195; 196; AVX-LABEL: test_sha1rnds4_zero_extend: 197; AVX: # %bb.0: # %entry 198; AVX-NEXT: sha1rnds4 $3, (%rdi), %xmm0 199; AVX-NEXT: vmovaps %xmm0, %xmm0 200; AVX-NEXT: retq 201entry: 202 %0 = load <4 x i32>, <4 x i32>* %b 203 %1 = tail call <4 x i32> @llvm.x86.sha1rnds4(<4 x i32> %a, <4 x i32> %0, i8 3) 204 %2 = shufflevector <4 x i32> %1, <4 x i32> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 205 ret <8 x i32> %2 206} 207