1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=znver1 | FileCheck %s 3 4define win64cc void @opaque() { 5; CHECK-LABEL: opaque: 6; CHECK: # %bb.0: 7; CHECK-NEXT: retq 8 ret void 9} 10 11; We need xmm6 to be live from the loop header across all iterations of the loop. 12; We shouldn't clobber ymm6 inside the loop. 13define i32 @main() { 14; CHECK-LABEL: main: 15; CHECK: # %bb.0: # %start 16; CHECK-NEXT: subq $584, %rsp # imm = 0x248 17; CHECK-NEXT: .cfi_def_cfa_offset 592 18; CHECK-NEXT: vmovaps {{.*#+}} xmm6 = [1010101010101010101,2020202020202020202] 19; CHECK-NEXT: xorl %esi, %esi 20; CHECK-NEXT: .p2align 4, 0x90 21; CHECK-NEXT: .LBB1_1: # %fake-loop 22; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 23; CHECK-NEXT: vmovups {{[0-9]+}}(%rsp), %ymm0 24; CHECK-NEXT: vmovups {{[0-9]+}}(%rsp), %ymm1 25; CHECK-NEXT: vmovups {{[0-9]+}}(%rsp), %ymm7 26; CHECK-NEXT: vmovups {{[0-9]+}}(%rsp), %ymm2 27; CHECK-NEXT: vmovups {{[0-9]+}}(%rsp), %ymm3 28; CHECK-NEXT: vmovups %ymm0, {{[0-9]+}}(%rsp) 29; CHECK-NEXT: vmovups %ymm1, {{[0-9]+}}(%rsp) 30; CHECK-NEXT: vmovups {{[0-9]+}}(%rsp), %ymm1 31; CHECK-NEXT: vmovups %ymm3, {{[0-9]+}}(%rsp) 32; CHECK-NEXT: vmovups %ymm2, {{[0-9]+}}(%rsp) 33; CHECK-NEXT: vmovups %ymm7, {{[0-9]+}}(%rsp) 34; CHECK-NEXT: vmovups %ymm3, {{[0-9]+}}(%rsp) 35; CHECK-NEXT: vmovups %ymm2, {{[0-9]+}}(%rsp) 36; CHECK-NEXT: vmovups %ymm7, {{[0-9]+}}(%rsp) 37; CHECK-NEXT: vmovups %ymm1, {{[0-9]+}}(%rsp) 38; CHECK-NEXT: vmovups %ymm1, {{[0-9]+}}(%rsp) 39; CHECK-NEXT: vmovups {{[0-9]+}}(%rsp), %ymm5 40; CHECK-NEXT: vmovups {{[0-9]+}}(%rsp), %ymm4 41; CHECK-NEXT: vmovups %ymm5, {{[0-9]+}}(%rsp) 42; CHECK-NEXT: vmovups %ymm4, {{[0-9]+}}(%rsp) 43; CHECK-NEXT: vzeroupper 44; CHECK-NEXT: callq opaque 45; CHECK-NEXT: vmovaps %xmm6, {{[0-9]+}}(%rsp) 46; CHECK-NEXT: testb %sil, %sil 47; CHECK-NEXT: jne .LBB1_1 48; CHECK-NEXT: # %bb.2: # %exit 49; CHECK-NEXT: movabsq $1010101010101010101, %rcx # imm = 0xE04998456557EB5 50; CHECK-NEXT: xorl %eax, %eax 51; CHECK-NEXT: cmpq %rcx, {{[0-9]+}}(%rsp) 52; CHECK-NEXT: setne %al 53; CHECK-NEXT: negl %eax 54; CHECK-NEXT: addq $584, %rsp # imm = 0x248 55; CHECK-NEXT: .cfi_def_cfa_offset 8 56; CHECK-NEXT: retq 57start: 58 %dummy0 = alloca [22 x i64], align 8 59 %dummy1 = alloca [22 x i64], align 8 60 %dummy2 = alloca [22 x i64], align 8 61 62 %data = alloca <2 x i64>, align 8 63 64 br label %fake-loop 65 66fake-loop: ; preds = %fake-loop, %start 67 %dummy0.cast = bitcast [22 x i64]* %dummy0 to i8* 68 %dummy1.cast = bitcast [22 x i64]* %dummy1 to i8* 69 call void @llvm.memcpy.p0i8.p0i8.i64(i8* nonnull align 8 %dummy1.cast, i8* nonnull align 8 %dummy0.cast, i64 176, i1 false) 70 71 %dummy1.cast.copy = bitcast [22 x i64]* %dummy1 to i8* 72 %dummy2.cast = bitcast [22 x i64]* %dummy2 to i8* 73 call void @llvm.lifetime.start.p0i8(i64 176, i8* nonnull %dummy2.cast) 74 call void @llvm.memcpy.p0i8.p0i8.i64(i8* nonnull align 8 %dummy2.cast, i8* nonnull align 8 %dummy1.cast.copy, i64 176, i1 false) 75 76 call win64cc void @opaque() 77 78 store <2 x i64> <i64 1010101010101010101, i64 2020202020202020202>, <2 x i64>* %data, align 8 79 80 %opaque-false = icmp eq i8 0, 1 81 br i1 %opaque-false, label %fake-loop, label %exit 82 83exit: ; preds = %fake-loop 84 %data.cast = bitcast <2 x i64>* %data to i64* 85 %0 = load i64, i64* %data.cast, align 8 86 %1 = icmp eq i64 %0, 1010101010101010101 87 %2 = select i1 %1, i32 0, i32 -1 88 ret i32 %2 89} 90 91; Function Attrs: argmemonly nounwind 92declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture writeonly, i8* nocapture readonly, i64, i1 immarg) #0 93 94; Function Attrs: argmemonly nounwind 95declare void @llvm.lifetime.start.p0i8(i64 immarg, i8* nocapture) #0 96 97attributes #0 = { argmemonly nounwind } 98