1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: opt -instcombine -mtriple=thumbv8.1m.main -S %s | FileCheck --check-prefix=IR %s 3; RUN: opt -instcombine -mtriple=thumbv8.1m.main %s | llc -mtriple=thumbv8.1m.main -mattr=+mve.fp -verify-machineinstrs -O3 -o - | FileCheck --check-prefix=ASM %s 4 5%struct.foo = type { [2 x <4 x i32>] } 6 7define arm_aapcs_vfpcc i32 @test_vadciq_multiple(%struct.foo %a, %struct.foo %b, i32 %carry) { 8entry: 9 %a.0 = extractvalue %struct.foo %a, 0, 0 10 %a.1 = extractvalue %struct.foo %a, 0, 1 11 %b.0 = extractvalue %struct.foo %b, 0, 0 12 %b.1 = extractvalue %struct.foo %b, 0, 1 13 14 %fpscr.in.0 = shl i32 %carry, 29 15 %outpair.0 = call { <4 x i32>, i32 } @llvm.arm.mve.vadc.v4i32(<4 x i32> %a.0, <4 x i32> %b.0, i32 %fpscr.in.0) 16 %fpscr.out.0 = extractvalue { <4 x i32>, i32 } %outpair.0, 1 17 %shifted.out.0 = lshr i32 %fpscr.out.0, 29 18 %carry.out.0 = and i32 1, %shifted.out.0 19 %fpscr.in.1 = shl i32 %carry.out.0, 29 20 %outpair.1 = call { <4 x i32>, i32 } @llvm.arm.mve.vadc.v4i32(<4 x i32> %a.1, <4 x i32> %b.1, i32 %fpscr.in.1) 21 %fpscr.out.1 = extractvalue { <4 x i32>, i32 } %outpair.1, 1 22 %shifted.out.1 = lshr i32 %fpscr.out.1, 29 23 %carry.out.1 = and i32 1, %shifted.out.1 24 ret i32 %carry.out.1 25} 26 27define arm_aapcs_vfpcc i32 @test_vadciq_pred_multiple(%struct.foo %a, %struct.foo %b, i32 %ipred, i32 %carry) { 28entry: 29 %a.0 = extractvalue %struct.foo %a, 0, 0 30 %a.1 = extractvalue %struct.foo %a, 0, 1 31 %b.0 = extractvalue %struct.foo %b, 0, 0 32 %b.1 = extractvalue %struct.foo %b, 0, 1 33 34 %vpred = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %ipred) 35 %fpscr.in.0 = shl i32 %carry, 29 36 %outpair.0 = call { <4 x i32>, i32 } @llvm.arm.mve.vadc.predicated.v4i32.v4i1(<4 x i32> undef, <4 x i32> %a.0, <4 x i32> %b.0, i32 %fpscr.in.0, <4 x i1> %vpred) 37 %fpscr.out.0 = extractvalue { <4 x i32>, i32 } %outpair.0, 1 38 %shifted.out.0 = lshr i32 %fpscr.out.0, 29 39 %carry.out.0 = and i32 1, %shifted.out.0 40 %fpscr.in.1 = shl i32 %carry.out.0, 29 41 %outpair.1 = call { <4 x i32>, i32 } @llvm.arm.mve.vadc.predicated.v4i32.v4i1(<4 x i32> undef, <4 x i32> %a.1, <4 x i32> %b.1, i32 %fpscr.in.1, <4 x i1> %vpred) 42 %fpscr.out.1 = extractvalue { <4 x i32>, i32 } %outpair.1, 1 43 %shifted.out.1 = lshr i32 %fpscr.out.1, 29 44 %carry.out.1 = and i32 1, %shifted.out.1 45 ret i32 %carry.out.1 46} 47 48declare { <4 x i32>, i32 } @llvm.arm.mve.vadc.v4i32(<4 x i32>, <4 x i32>, i32) 49declare { <4 x i32>, i32 } @llvm.arm.mve.vadc.predicated.v4i32.v4i1(<4 x i32>, <4 x i32>, <4 x i32>, i32, <4 x i1>) 50declare <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32) 51 52; Expect the transformation in between the two intrinsics, where the 53; fpscr-formatted output value is turned back into just the carry bit 54; at bit 0 and then back again for the next call, to be optimized away 55; completely in InstCombine, so that the FPSCR output from one 56; intrinsic is passed straight on to the next: 57 58; IR: %outpair.0 = call { <4 x i32>, i32 } @llvm.arm.mve.vadc.v4i32(<4 x i32> %a.0, <4 x i32> %b.0, i32 %fpscr.in.0) 59; IR: %fpscr.out.0 = extractvalue { <4 x i32>, i32 } %outpair.0, 1 60; IR: %outpair.1 = call { <4 x i32>, i32 } @llvm.arm.mve.vadc.v4i32(<4 x i32> %a.1, <4 x i32> %b.1, i32 %fpscr.out.0) 61 62; IR: %outpair.0 = call { <4 x i32>, i32 } @llvm.arm.mve.vadc.predicated.v4i32.v4i1(<4 x i32> undef, <4 x i32> %a.0, <4 x i32> %b.0, i32 %fpscr.in.0, <4 x i1> %vpred) 63; IR: %fpscr.out.0 = extractvalue { <4 x i32>, i32 } %outpair.0, 1 64; IR: %outpair.1 = call { <4 x i32>, i32 } @llvm.arm.mve.vadc.predicated.v4i32.v4i1(<4 x i32> undef, <4 x i32> %a.1, <4 x i32> %b.1, i32 %fpscr.out.0, <4 x i1> %vpred) 65 66; And this is the assembly language we expect at the end of it, with 67; the two vadc.i32 instructions right next to each other, and the 68; second one implicitly reusing the FPSCR written by the first. 69 70; ASM: test_vadciq_multiple: 71; ASM: lsls r0, r0, #29 72; ASM-NEXT: vmsr fpscr_nzcvqc, r0 73; ASM-NEXT: vadc.i32 q0, q0, q2 74; ASM-NEXT: vadc.i32 q0, q1, q3 75; ASM-NEXT: vmrs r0, fpscr_nzcvqc 76; ASM-NEXT: ubfx r0, r0, #29, #1 77; ASM-NEXT: bx lr 78 79; ASM: test_vadciq_pred_multiple: 80; ASM: lsls r1, r1, #29 81; ASM-NEXT: vmsr p0, r0 82; ASM-NEXT: vmsr fpscr_nzcvqc, r1 83; ASM-NEXT: vpstt 84; ASM-NEXT: vadct.i32 q0, q0, q2 85; ASM-NEXT: vadct.i32 q0, q1, q3 86; ASM-NEXT: vmrs r0, fpscr_nzcvqc 87; ASM-NEXT: ubfx r0, r0, #29, #1 88; ASM-NEXT: bx lr 89