1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -mtriple=thumbv8.1m.main-none-none-eabi -mattr=+mve -verify-machineinstrs %s -o - | FileCheck %s --check-prefix=CHECK-LE 3; RUN: llc -mtriple=thumbebv8.1m.main-none-none-eabi -mattr=+mve -verify-machineinstrs %s -o - | FileCheck %s --check-prefix=CHECK-BE 4 5declare arm_aapcs_vfpcc <4 x i32> @ext_i32(<4 x i32> %c) 6declare arm_aapcs_vfpcc <8 x i16> @ext_i16(<8 x i16> %c) 7declare arm_aapcs_vfpcc <16 x i8> @ext_i8(<16 x i8> %c) 8 9define arm_aapcs_vfpcc <4 x i32> @shuffle1_v4i32(<4 x i32> %src, <4 x i32> %a) { 10; CHECK-LE-LABEL: shuffle1_v4i32: 11; CHECK-LE: @ %bb.0: @ %entry 12; CHECK-LE-NEXT: .save {r7, lr} 13; CHECK-LE-NEXT: push {r7, lr} 14; CHECK-LE-NEXT: .vsave {d8, d9} 15; CHECK-LE-NEXT: vpush {d8, d9} 16; CHECK-LE-NEXT: .pad #8 17; CHECK-LE-NEXT: sub sp, #8 18; CHECK-LE-NEXT: vcmp.i32 eq, q0, zr 19; CHECK-LE-NEXT: vmov.i32 q0, #0x0 20; CHECK-LE-NEXT: vpsel q0, q1, q0 21; CHECK-LE-NEXT: vmov q4, q1 22; CHECK-LE-NEXT: vstr p0, [sp, #4] @ 4-byte Spill 23; CHECK-LE-NEXT: bl ext_i32 24; CHECK-LE-NEXT: vldr p0, [sp, #4] @ 4-byte Reload 25; CHECK-LE-NEXT: vpsel q0, q4, q0 26; CHECK-LE-NEXT: add sp, #8 27; CHECK-LE-NEXT: vpop {d8, d9} 28; CHECK-LE-NEXT: pop {r7, pc} 29; 30; CHECK-BE-LABEL: shuffle1_v4i32: 31; CHECK-BE: @ %bb.0: @ %entry 32; CHECK-BE-NEXT: .save {r7, lr} 33; CHECK-BE-NEXT: push {r7, lr} 34; CHECK-BE-NEXT: .vsave {d8, d9} 35; CHECK-BE-NEXT: vpush {d8, d9} 36; CHECK-BE-NEXT: .pad #8 37; CHECK-BE-NEXT: sub sp, #8 38; CHECK-BE-NEXT: vrev64.32 q4, q1 39; CHECK-BE-NEXT: vrev64.32 q1, q0 40; CHECK-BE-NEXT: vcmp.i32 eq, q1, zr 41; CHECK-BE-NEXT: vmov.i32 q0, #0x0 42; CHECK-BE-NEXT: vpsel q1, q4, q0 43; CHECK-BE-NEXT: vstr p0, [sp, #4] @ 4-byte Spill 44; CHECK-BE-NEXT: vrev64.32 q0, q1 45; CHECK-BE-NEXT: bl ext_i32 46; CHECK-BE-NEXT: vldr p0, [sp, #4] @ 4-byte Reload 47; CHECK-BE-NEXT: vrev64.32 q1, q0 48; CHECK-BE-NEXT: vpsel q1, q4, q1 49; CHECK-BE-NEXT: vrev64.32 q0, q1 50; CHECK-BE-NEXT: add sp, #8 51; CHECK-BE-NEXT: vpop {d8, d9} 52; CHECK-BE-NEXT: pop {r7, pc} 53entry: 54 %c = icmp eq <4 x i32> %src, zeroinitializer 55 %s1 = select <4 x i1> %c, <4 x i32> %a, <4 x i32> zeroinitializer 56 %ext = call arm_aapcs_vfpcc <4 x i32> @ext_i32(<4 x i32> %s1) 57 %s = select <4 x i1> %c, <4 x i32> %a, <4 x i32> %ext 58 ret <4 x i32> %s 59} 60 61define arm_aapcs_vfpcc <8 x i16> @shuffle1_v8i16(<8 x i16> %src, <8 x i16> %a) { 62; CHECK-LE-LABEL: shuffle1_v8i16: 63; CHECK-LE: @ %bb.0: @ %entry 64; CHECK-LE-NEXT: .save {r7, lr} 65; CHECK-LE-NEXT: push {r7, lr} 66; CHECK-LE-NEXT: .vsave {d8, d9} 67; CHECK-LE-NEXT: vpush {d8, d9} 68; CHECK-LE-NEXT: .pad #8 69; CHECK-LE-NEXT: sub sp, #8 70; CHECK-LE-NEXT: vcmp.i16 eq, q0, zr 71; CHECK-LE-NEXT: vmov.i32 q0, #0x0 72; CHECK-LE-NEXT: vpsel q0, q1, q0 73; CHECK-LE-NEXT: vmov q4, q1 74; CHECK-LE-NEXT: vstr p0, [sp, #4] @ 4-byte Spill 75; CHECK-LE-NEXT: bl ext_i16 76; CHECK-LE-NEXT: vldr p0, [sp, #4] @ 4-byte Reload 77; CHECK-LE-NEXT: vpsel q0, q4, q0 78; CHECK-LE-NEXT: add sp, #8 79; CHECK-LE-NEXT: vpop {d8, d9} 80; CHECK-LE-NEXT: pop {r7, pc} 81; 82; CHECK-BE-LABEL: shuffle1_v8i16: 83; CHECK-BE: @ %bb.0: @ %entry 84; CHECK-BE-NEXT: .save {r7, lr} 85; CHECK-BE-NEXT: push {r7, lr} 86; CHECK-BE-NEXT: .vsave {d8, d9} 87; CHECK-BE-NEXT: vpush {d8, d9} 88; CHECK-BE-NEXT: .pad #8 89; CHECK-BE-NEXT: sub sp, #8 90; CHECK-BE-NEXT: vrev64.16 q4, q1 91; CHECK-BE-NEXT: vmov.i32 q1, #0x0 92; CHECK-BE-NEXT: vrev64.16 q2, q0 93; CHECK-BE-NEXT: vrev32.16 q1, q1 94; CHECK-BE-NEXT: vcmp.i16 eq, q2, zr 95; CHECK-BE-NEXT: vpsel q1, q4, q1 96; CHECK-BE-NEXT: vstr p0, [sp, #4] @ 4-byte Spill 97; CHECK-BE-NEXT: vrev64.16 q0, q1 98; CHECK-BE-NEXT: bl ext_i16 99; CHECK-BE-NEXT: vldr p0, [sp, #4] @ 4-byte Reload 100; CHECK-BE-NEXT: vrev64.16 q1, q0 101; CHECK-BE-NEXT: vpsel q1, q4, q1 102; CHECK-BE-NEXT: vrev64.16 q0, q1 103; CHECK-BE-NEXT: add sp, #8 104; CHECK-BE-NEXT: vpop {d8, d9} 105; CHECK-BE-NEXT: pop {r7, pc} 106entry: 107 %c = icmp eq <8 x i16> %src, zeroinitializer 108 %s1 = select <8 x i1> %c, <8 x i16> %a, <8 x i16> zeroinitializer 109 %ext = call arm_aapcs_vfpcc <8 x i16> @ext_i16(<8 x i16> %s1) 110 %s = select <8 x i1> %c, <8 x i16> %a, <8 x i16> %ext 111 ret <8 x i16> %s 112} 113 114define arm_aapcs_vfpcc <16 x i8> @shuffle1_v16i8(<16 x i8> %src, <16 x i8> %a) { 115; CHECK-LE-LABEL: shuffle1_v16i8: 116; CHECK-LE: @ %bb.0: @ %entry 117; CHECK-LE-NEXT: .save {r7, lr} 118; CHECK-LE-NEXT: push {r7, lr} 119; CHECK-LE-NEXT: .vsave {d8, d9} 120; CHECK-LE-NEXT: vpush {d8, d9} 121; CHECK-LE-NEXT: .pad #8 122; CHECK-LE-NEXT: sub sp, #8 123; CHECK-LE-NEXT: vcmp.i8 eq, q0, zr 124; CHECK-LE-NEXT: vmov.i32 q0, #0x0 125; CHECK-LE-NEXT: vpsel q0, q1, q0 126; CHECK-LE-NEXT: vmov q4, q1 127; CHECK-LE-NEXT: vstr p0, [sp, #4] @ 4-byte Spill 128; CHECK-LE-NEXT: bl ext_i8 129; CHECK-LE-NEXT: vldr p0, [sp, #4] @ 4-byte Reload 130; CHECK-LE-NEXT: vpsel q0, q4, q0 131; CHECK-LE-NEXT: add sp, #8 132; CHECK-LE-NEXT: vpop {d8, d9} 133; CHECK-LE-NEXT: pop {r7, pc} 134; 135; CHECK-BE-LABEL: shuffle1_v16i8: 136; CHECK-BE: @ %bb.0: @ %entry 137; CHECK-BE-NEXT: .save {r7, lr} 138; CHECK-BE-NEXT: push {r7, lr} 139; CHECK-BE-NEXT: .vsave {d8, d9} 140; CHECK-BE-NEXT: vpush {d8, d9} 141; CHECK-BE-NEXT: .pad #8 142; CHECK-BE-NEXT: sub sp, #8 143; CHECK-BE-NEXT: vrev64.8 q4, q1 144; CHECK-BE-NEXT: vmov.i32 q1, #0x0 145; CHECK-BE-NEXT: vrev64.8 q2, q0 146; CHECK-BE-NEXT: vrev32.8 q1, q1 147; CHECK-BE-NEXT: vcmp.i8 eq, q2, zr 148; CHECK-BE-NEXT: vpsel q1, q4, q1 149; CHECK-BE-NEXT: vstr p0, [sp, #4] @ 4-byte Spill 150; CHECK-BE-NEXT: vrev64.8 q0, q1 151; CHECK-BE-NEXT: bl ext_i8 152; CHECK-BE-NEXT: vldr p0, [sp, #4] @ 4-byte Reload 153; CHECK-BE-NEXT: vrev64.8 q1, q0 154; CHECK-BE-NEXT: vpsel q1, q4, q1 155; CHECK-BE-NEXT: vrev64.8 q0, q1 156; CHECK-BE-NEXT: add sp, #8 157; CHECK-BE-NEXT: vpop {d8, d9} 158; CHECK-BE-NEXT: pop {r7, pc} 159entry: 160 %c = icmp eq <16 x i8> %src, zeroinitializer 161 %s1 = select <16 x i1> %c, <16 x i8> %a, <16 x i8> zeroinitializer 162 %ext = call arm_aapcs_vfpcc <16 x i8> @ext_i8(<16 x i8> %s1) 163 %s = select <16 x i1> %c, <16 x i8> %a, <16 x i8> %ext 164 ret <16 x i8> %s 165} 166