1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=arm-eabi -mattr=+neon -mcpu=cortex-a8 | FileCheck %s 3 4define <8 x i8> @v_andi8(<8 x i8>* %A, <8 x i8>* %B) nounwind { 5; CHECK-LABEL: v_andi8: 6; CHECK: @ %bb.0: 7; CHECK-NEXT: vldr d16, [r1] 8; CHECK-NEXT: vldr d17, [r0] 9; CHECK-NEXT: vand d16, d17, d16 10; CHECK-NEXT: vmov r0, r1, d16 11; CHECK-NEXT: bx lr 12 %tmp1 = load <8 x i8>, <8 x i8>* %A 13 %tmp2 = load <8 x i8>, <8 x i8>* %B 14 %tmp3 = and <8 x i8> %tmp1, %tmp2 15 ret <8 x i8> %tmp3 16} 17 18define <4 x i16> @v_andi16(<4 x i16>* %A, <4 x i16>* %B) nounwind { 19; CHECK-LABEL: v_andi16: 20; CHECK: @ %bb.0: 21; CHECK-NEXT: vldr d16, [r1] 22; CHECK-NEXT: vldr d17, [r0] 23; CHECK-NEXT: vand d16, d17, d16 24; CHECK-NEXT: vmov r0, r1, d16 25; CHECK-NEXT: bx lr 26 %tmp1 = load <4 x i16>, <4 x i16>* %A 27 %tmp2 = load <4 x i16>, <4 x i16>* %B 28 %tmp3 = and <4 x i16> %tmp1, %tmp2 29 ret <4 x i16> %tmp3 30} 31 32define <2 x i32> @v_andi32(<2 x i32>* %A, <2 x i32>* %B) nounwind { 33; CHECK-LABEL: v_andi32: 34; CHECK: @ %bb.0: 35; CHECK-NEXT: vldr d16, [r1] 36; CHECK-NEXT: vldr d17, [r0] 37; CHECK-NEXT: vand d16, d17, d16 38; CHECK-NEXT: vmov r0, r1, d16 39; CHECK-NEXT: bx lr 40 %tmp1 = load <2 x i32>, <2 x i32>* %A 41 %tmp2 = load <2 x i32>, <2 x i32>* %B 42 %tmp3 = and <2 x i32> %tmp1, %tmp2 43 ret <2 x i32> %tmp3 44} 45 46define <1 x i64> @v_andi64(<1 x i64>* %A, <1 x i64>* %B) nounwind { 47; CHECK-LABEL: v_andi64: 48; CHECK: @ %bb.0: 49; CHECK-NEXT: vldr d16, [r1] 50; CHECK-NEXT: vldr d17, [r0] 51; CHECK-NEXT: vand d16, d17, d16 52; CHECK-NEXT: vmov r0, r1, d16 53; CHECK-NEXT: bx lr 54 %tmp1 = load <1 x i64>, <1 x i64>* %A 55 %tmp2 = load <1 x i64>, <1 x i64>* %B 56 %tmp3 = and <1 x i64> %tmp1, %tmp2 57 ret <1 x i64> %tmp3 58} 59 60define <16 x i8> @v_andQi8(<16 x i8>* %A, <16 x i8>* %B) nounwind { 61; CHECK-LABEL: v_andQi8: 62; CHECK: @ %bb.0: 63; CHECK-NEXT: vld1.64 {d16, d17}, [r1] 64; CHECK-NEXT: vld1.64 {d18, d19}, [r0] 65; CHECK-NEXT: vand q8, q9, q8 66; CHECK-NEXT: vmov r0, r1, d16 67; CHECK-NEXT: vmov r2, r3, d17 68; CHECK-NEXT: bx lr 69 %tmp1 = load <16 x i8>, <16 x i8>* %A 70 %tmp2 = load <16 x i8>, <16 x i8>* %B 71 %tmp3 = and <16 x i8> %tmp1, %tmp2 72 ret <16 x i8> %tmp3 73} 74 75define <8 x i16> @v_andQi16(<8 x i16>* %A, <8 x i16>* %B) nounwind { 76; CHECK-LABEL: v_andQi16: 77; CHECK: @ %bb.0: 78; CHECK-NEXT: vld1.64 {d16, d17}, [r1] 79; CHECK-NEXT: vld1.64 {d18, d19}, [r0] 80; CHECK-NEXT: vand q8, q9, q8 81; CHECK-NEXT: vmov r0, r1, d16 82; CHECK-NEXT: vmov r2, r3, d17 83; CHECK-NEXT: bx lr 84 %tmp1 = load <8 x i16>, <8 x i16>* %A 85 %tmp2 = load <8 x i16>, <8 x i16>* %B 86 %tmp3 = and <8 x i16> %tmp1, %tmp2 87 ret <8 x i16> %tmp3 88} 89 90define <4 x i32> @v_andQi32(<4 x i32>* %A, <4 x i32>* %B) nounwind { 91; CHECK-LABEL: v_andQi32: 92; CHECK: @ %bb.0: 93; CHECK-NEXT: vld1.64 {d16, d17}, [r1] 94; CHECK-NEXT: vld1.64 {d18, d19}, [r0] 95; CHECK-NEXT: vand q8, q9, q8 96; CHECK-NEXT: vmov r0, r1, d16 97; CHECK-NEXT: vmov r2, r3, d17 98; CHECK-NEXT: bx lr 99 %tmp1 = load <4 x i32>, <4 x i32>* %A 100 %tmp2 = load <4 x i32>, <4 x i32>* %B 101 %tmp3 = and <4 x i32> %tmp1, %tmp2 102 ret <4 x i32> %tmp3 103} 104 105define <2 x i64> @v_andQi64(<2 x i64>* %A, <2 x i64>* %B) nounwind { 106; CHECK-LABEL: v_andQi64: 107; CHECK: @ %bb.0: 108; CHECK-NEXT: vld1.64 {d16, d17}, [r1] 109; CHECK-NEXT: vld1.64 {d18, d19}, [r0] 110; CHECK-NEXT: vand q8, q9, q8 111; CHECK-NEXT: vmov r0, r1, d16 112; CHECK-NEXT: vmov r2, r3, d17 113; CHECK-NEXT: bx lr 114 %tmp1 = load <2 x i64>, <2 x i64>* %A 115 %tmp2 = load <2 x i64>, <2 x i64>* %B 116 %tmp3 = and <2 x i64> %tmp1, %tmp2 117 ret <2 x i64> %tmp3 118} 119 120define <8 x i8> @v_bici8(<8 x i8>* %A, <8 x i8>* %B) nounwind { 121; CHECK-LABEL: v_bici8: 122; CHECK: @ %bb.0: 123; CHECK-NEXT: vldr d16, [r1] 124; CHECK-NEXT: vldr d17, [r0] 125; CHECK-NEXT: vbic d16, d17, d16 126; CHECK-NEXT: vmov r0, r1, d16 127; CHECK-NEXT: bx lr 128 %tmp1 = load <8 x i8>, <8 x i8>* %A 129 %tmp2 = load <8 x i8>, <8 x i8>* %B 130 %tmp3 = xor <8 x i8> %tmp2, < i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1 > 131 %tmp4 = and <8 x i8> %tmp1, %tmp3 132 ret <8 x i8> %tmp4 133} 134 135define <4 x i16> @v_bici16(<4 x i16>* %A, <4 x i16>* %B) nounwind { 136; CHECK-LABEL: v_bici16: 137; CHECK: @ %bb.0: 138; CHECK-NEXT: vldr d16, [r1] 139; CHECK-NEXT: vldr d17, [r0] 140; CHECK-NEXT: vbic d16, d17, d16 141; CHECK-NEXT: vmov r0, r1, d16 142; CHECK-NEXT: bx lr 143 %tmp1 = load <4 x i16>, <4 x i16>* %A 144 %tmp2 = load <4 x i16>, <4 x i16>* %B 145 %tmp3 = xor <4 x i16> %tmp2, < i16 -1, i16 -1, i16 -1, i16 -1 > 146 %tmp4 = and <4 x i16> %tmp1, %tmp3 147 ret <4 x i16> %tmp4 148} 149 150define <2 x i32> @v_bici32(<2 x i32>* %A, <2 x i32>* %B) nounwind { 151; CHECK-LABEL: v_bici32: 152; CHECK: @ %bb.0: 153; CHECK-NEXT: vldr d16, [r1] 154; CHECK-NEXT: vldr d17, [r0] 155; CHECK-NEXT: vbic d16, d17, d16 156; CHECK-NEXT: vmov r0, r1, d16 157; CHECK-NEXT: bx lr 158 %tmp1 = load <2 x i32>, <2 x i32>* %A 159 %tmp2 = load <2 x i32>, <2 x i32>* %B 160 %tmp3 = xor <2 x i32> %tmp2, < i32 -1, i32 -1 > 161 %tmp4 = and <2 x i32> %tmp1, %tmp3 162 ret <2 x i32> %tmp4 163} 164 165define <1 x i64> @v_bici64(<1 x i64>* %A, <1 x i64>* %B) nounwind { 166; CHECK-LABEL: v_bici64: 167; CHECK: @ %bb.0: 168; CHECK-NEXT: vldr d16, [r1] 169; CHECK-NEXT: vldr d17, [r0] 170; CHECK-NEXT: vbic d16, d17, d16 171; CHECK-NEXT: vmov r0, r1, d16 172; CHECK-NEXT: bx lr 173 %tmp1 = load <1 x i64>, <1 x i64>* %A 174 %tmp2 = load <1 x i64>, <1 x i64>* %B 175 %tmp3 = xor <1 x i64> %tmp2, < i64 -1 > 176 %tmp4 = and <1 x i64> %tmp1, %tmp3 177 ret <1 x i64> %tmp4 178} 179 180define <16 x i8> @v_bicQi8(<16 x i8>* %A, <16 x i8>* %B) nounwind { 181; CHECK-LABEL: v_bicQi8: 182; CHECK: @ %bb.0: 183; CHECK-NEXT: vld1.64 {d16, d17}, [r1] 184; CHECK-NEXT: vld1.64 {d18, d19}, [r0] 185; CHECK-NEXT: vbic q8, q9, q8 186; CHECK-NEXT: vmov r0, r1, d16 187; CHECK-NEXT: vmov r2, r3, d17 188; CHECK-NEXT: bx lr 189 %tmp1 = load <16 x i8>, <16 x i8>* %A 190 %tmp2 = load <16 x i8>, <16 x i8>* %B 191 %tmp3 = xor <16 x i8> %tmp2, < i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1 > 192 %tmp4 = and <16 x i8> %tmp1, %tmp3 193 ret <16 x i8> %tmp4 194} 195 196define <8 x i16> @v_bicQi16(<8 x i16>* %A, <8 x i16>* %B) nounwind { 197; CHECK-LABEL: v_bicQi16: 198; CHECK: @ %bb.0: 199; CHECK-NEXT: vld1.64 {d16, d17}, [r1] 200; CHECK-NEXT: vld1.64 {d18, d19}, [r0] 201; CHECK-NEXT: vbic q8, q9, q8 202; CHECK-NEXT: vmov r0, r1, d16 203; CHECK-NEXT: vmov r2, r3, d17 204; CHECK-NEXT: bx lr 205 %tmp1 = load <8 x i16>, <8 x i16>* %A 206 %tmp2 = load <8 x i16>, <8 x i16>* %B 207 %tmp3 = xor <8 x i16> %tmp2, < i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1 > 208 %tmp4 = and <8 x i16> %tmp1, %tmp3 209 ret <8 x i16> %tmp4 210} 211 212define <4 x i32> @v_bicQi32(<4 x i32>* %A, <4 x i32>* %B) nounwind { 213; CHECK-LABEL: v_bicQi32: 214; CHECK: @ %bb.0: 215; CHECK-NEXT: vld1.64 {d16, d17}, [r1] 216; CHECK-NEXT: vld1.64 {d18, d19}, [r0] 217; CHECK-NEXT: vbic q8, q9, q8 218; CHECK-NEXT: vmov r0, r1, d16 219; CHECK-NEXT: vmov r2, r3, d17 220; CHECK-NEXT: bx lr 221 %tmp1 = load <4 x i32>, <4 x i32>* %A 222 %tmp2 = load <4 x i32>, <4 x i32>* %B 223 %tmp3 = xor <4 x i32> %tmp2, < i32 -1, i32 -1, i32 -1, i32 -1 > 224 %tmp4 = and <4 x i32> %tmp1, %tmp3 225 ret <4 x i32> %tmp4 226} 227 228define <2 x i64> @v_bicQi64(<2 x i64>* %A, <2 x i64>* %B) nounwind { 229; CHECK-LABEL: v_bicQi64: 230; CHECK: @ %bb.0: 231; CHECK-NEXT: vld1.64 {d16, d17}, [r1] 232; CHECK-NEXT: vld1.64 {d18, d19}, [r0] 233; CHECK-NEXT: vbic q8, q9, q8 234; CHECK-NEXT: vmov r0, r1, d16 235; CHECK-NEXT: vmov r2, r3, d17 236; CHECK-NEXT: bx lr 237 %tmp1 = load <2 x i64>, <2 x i64>* %A 238 %tmp2 = load <2 x i64>, <2 x i64>* %B 239 %tmp3 = xor <2 x i64> %tmp2, < i64 -1, i64 -1 > 240 %tmp4 = and <2 x i64> %tmp1, %tmp3 241 ret <2 x i64> %tmp4 242} 243 244define <8 x i8> @v_eori8(<8 x i8>* %A, <8 x i8>* %B) nounwind { 245; CHECK-LABEL: v_eori8: 246; CHECK: @ %bb.0: 247; CHECK-NEXT: vldr d16, [r1] 248; CHECK-NEXT: vldr d17, [r0] 249; CHECK-NEXT: veor d16, d17, d16 250; CHECK-NEXT: vmov r0, r1, d16 251; CHECK-NEXT: bx lr 252 %tmp1 = load <8 x i8>, <8 x i8>* %A 253 %tmp2 = load <8 x i8>, <8 x i8>* %B 254 %tmp3 = xor <8 x i8> %tmp1, %tmp2 255 ret <8 x i8> %tmp3 256} 257 258define <4 x i16> @v_eori16(<4 x i16>* %A, <4 x i16>* %B) nounwind { 259; CHECK-LABEL: v_eori16: 260; CHECK: @ %bb.0: 261; CHECK-NEXT: vldr d16, [r1] 262; CHECK-NEXT: vldr d17, [r0] 263; CHECK-NEXT: veor d16, d17, d16 264; CHECK-NEXT: vmov r0, r1, d16 265; CHECK-NEXT: bx lr 266 %tmp1 = load <4 x i16>, <4 x i16>* %A 267 %tmp2 = load <4 x i16>, <4 x i16>* %B 268 %tmp3 = xor <4 x i16> %tmp1, %tmp2 269 ret <4 x i16> %tmp3 270} 271 272define <2 x i32> @v_eori32(<2 x i32>* %A, <2 x i32>* %B) nounwind { 273; CHECK-LABEL: v_eori32: 274; CHECK: @ %bb.0: 275; CHECK-NEXT: vldr d16, [r1] 276; CHECK-NEXT: vldr d17, [r0] 277; CHECK-NEXT: veor d16, d17, d16 278; CHECK-NEXT: vmov r0, r1, d16 279; CHECK-NEXT: bx lr 280 %tmp1 = load <2 x i32>, <2 x i32>* %A 281 %tmp2 = load <2 x i32>, <2 x i32>* %B 282 %tmp3 = xor <2 x i32> %tmp1, %tmp2 283 ret <2 x i32> %tmp3 284} 285 286define <1 x i64> @v_eori64(<1 x i64>* %A, <1 x i64>* %B) nounwind { 287; CHECK-LABEL: v_eori64: 288; CHECK: @ %bb.0: 289; CHECK-NEXT: vldr d16, [r1] 290; CHECK-NEXT: vldr d17, [r0] 291; CHECK-NEXT: veor d16, d17, d16 292; CHECK-NEXT: vmov r0, r1, d16 293; CHECK-NEXT: bx lr 294 %tmp1 = load <1 x i64>, <1 x i64>* %A 295 %tmp2 = load <1 x i64>, <1 x i64>* %B 296 %tmp3 = xor <1 x i64> %tmp1, %tmp2 297 ret <1 x i64> %tmp3 298} 299 300define <16 x i8> @v_eorQi8(<16 x i8>* %A, <16 x i8>* %B) nounwind { 301; CHECK-LABEL: v_eorQi8: 302; CHECK: @ %bb.0: 303; CHECK-NEXT: vld1.64 {d16, d17}, [r1] 304; CHECK-NEXT: vld1.64 {d18, d19}, [r0] 305; CHECK-NEXT: veor q8, q9, q8 306; CHECK-NEXT: vmov r0, r1, d16 307; CHECK-NEXT: vmov r2, r3, d17 308; CHECK-NEXT: bx lr 309 %tmp1 = load <16 x i8>, <16 x i8>* %A 310 %tmp2 = load <16 x i8>, <16 x i8>* %B 311 %tmp3 = xor <16 x i8> %tmp1, %tmp2 312 ret <16 x i8> %tmp3 313} 314 315define <8 x i16> @v_eorQi16(<8 x i16>* %A, <8 x i16>* %B) nounwind { 316; CHECK-LABEL: v_eorQi16: 317; CHECK: @ %bb.0: 318; CHECK-NEXT: vld1.64 {d16, d17}, [r1] 319; CHECK-NEXT: vld1.64 {d18, d19}, [r0] 320; CHECK-NEXT: veor q8, q9, q8 321; CHECK-NEXT: vmov r0, r1, d16 322; CHECK-NEXT: vmov r2, r3, d17 323; CHECK-NEXT: bx lr 324 %tmp1 = load <8 x i16>, <8 x i16>* %A 325 %tmp2 = load <8 x i16>, <8 x i16>* %B 326 %tmp3 = xor <8 x i16> %tmp1, %tmp2 327 ret <8 x i16> %tmp3 328} 329 330define <4 x i32> @v_eorQi32(<4 x i32>* %A, <4 x i32>* %B) nounwind { 331; CHECK-LABEL: v_eorQi32: 332; CHECK: @ %bb.0: 333; CHECK-NEXT: vld1.64 {d16, d17}, [r1] 334; CHECK-NEXT: vld1.64 {d18, d19}, [r0] 335; CHECK-NEXT: veor q8, q9, q8 336; CHECK-NEXT: vmov r0, r1, d16 337; CHECK-NEXT: vmov r2, r3, d17 338; CHECK-NEXT: bx lr 339 %tmp1 = load <4 x i32>, <4 x i32>* %A 340 %tmp2 = load <4 x i32>, <4 x i32>* %B 341 %tmp3 = xor <4 x i32> %tmp1, %tmp2 342 ret <4 x i32> %tmp3 343} 344 345define <2 x i64> @v_eorQi64(<2 x i64>* %A, <2 x i64>* %B) nounwind { 346; CHECK-LABEL: v_eorQi64: 347; CHECK: @ %bb.0: 348; CHECK-NEXT: vld1.64 {d16, d17}, [r1] 349; CHECK-NEXT: vld1.64 {d18, d19}, [r0] 350; CHECK-NEXT: veor q8, q9, q8 351; CHECK-NEXT: vmov r0, r1, d16 352; CHECK-NEXT: vmov r2, r3, d17 353; CHECK-NEXT: bx lr 354 %tmp1 = load <2 x i64>, <2 x i64>* %A 355 %tmp2 = load <2 x i64>, <2 x i64>* %B 356 %tmp3 = xor <2 x i64> %tmp1, %tmp2 357 ret <2 x i64> %tmp3 358} 359 360define <8 x i8> @v_mvni8(<8 x i8>* %A) nounwind { 361; CHECK-LABEL: v_mvni8: 362; CHECK: @ %bb.0: 363; CHECK-NEXT: vldr d16, [r0] 364; CHECK-NEXT: vmvn d16, d16 365; CHECK-NEXT: vmov r0, r1, d16 366; CHECK-NEXT: bx lr 367 %tmp1 = load <8 x i8>, <8 x i8>* %A 368 %tmp2 = xor <8 x i8> %tmp1, < i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1 > 369 ret <8 x i8> %tmp2 370} 371 372define <4 x i16> @v_mvni16(<4 x i16>* %A) nounwind { 373; CHECK-LABEL: v_mvni16: 374; CHECK: @ %bb.0: 375; CHECK-NEXT: vldr d16, [r0] 376; CHECK-NEXT: vmvn d16, d16 377; CHECK-NEXT: vmov r0, r1, d16 378; CHECK-NEXT: bx lr 379 %tmp1 = load <4 x i16>, <4 x i16>* %A 380 %tmp2 = xor <4 x i16> %tmp1, < i16 -1, i16 -1, i16 -1, i16 -1 > 381 ret <4 x i16> %tmp2 382} 383 384define <2 x i32> @v_mvni32(<2 x i32>* %A) nounwind { 385; CHECK-LABEL: v_mvni32: 386; CHECK: @ %bb.0: 387; CHECK-NEXT: vldr d16, [r0] 388; CHECK-NEXT: vmvn d16, d16 389; CHECK-NEXT: vmov r0, r1, d16 390; CHECK-NEXT: bx lr 391 %tmp1 = load <2 x i32>, <2 x i32>* %A 392 %tmp2 = xor <2 x i32> %tmp1, < i32 -1, i32 -1 > 393 ret <2 x i32> %tmp2 394} 395 396define <1 x i64> @v_mvni64(<1 x i64>* %A) nounwind { 397; CHECK-LABEL: v_mvni64: 398; CHECK: @ %bb.0: 399; CHECK-NEXT: vldr d16, [r0] 400; CHECK-NEXT: vmvn d16, d16 401; CHECK-NEXT: vmov r0, r1, d16 402; CHECK-NEXT: bx lr 403 %tmp1 = load <1 x i64>, <1 x i64>* %A 404 %tmp2 = xor <1 x i64> %tmp1, < i64 -1 > 405 ret <1 x i64> %tmp2 406} 407 408define <16 x i8> @v_mvnQi8(<16 x i8>* %A) nounwind { 409; CHECK-LABEL: v_mvnQi8: 410; CHECK: @ %bb.0: 411; CHECK-NEXT: vld1.64 {d16, d17}, [r0] 412; CHECK-NEXT: vmvn q8, q8 413; CHECK-NEXT: vmov r0, r1, d16 414; CHECK-NEXT: vmov r2, r3, d17 415; CHECK-NEXT: bx lr 416 %tmp1 = load <16 x i8>, <16 x i8>* %A 417 %tmp2 = xor <16 x i8> %tmp1, < i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1 > 418 ret <16 x i8> %tmp2 419} 420 421define <8 x i16> @v_mvnQi16(<8 x i16>* %A) nounwind { 422; CHECK-LABEL: v_mvnQi16: 423; CHECK: @ %bb.0: 424; CHECK-NEXT: vld1.64 {d16, d17}, [r0] 425; CHECK-NEXT: vmvn q8, q8 426; CHECK-NEXT: vmov r0, r1, d16 427; CHECK-NEXT: vmov r2, r3, d17 428; CHECK-NEXT: bx lr 429 %tmp1 = load <8 x i16>, <8 x i16>* %A 430 %tmp2 = xor <8 x i16> %tmp1, < i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1 > 431 ret <8 x i16> %tmp2 432} 433 434define <4 x i32> @v_mvnQi32(<4 x i32>* %A) nounwind { 435; CHECK-LABEL: v_mvnQi32: 436; CHECK: @ %bb.0: 437; CHECK-NEXT: vld1.64 {d16, d17}, [r0] 438; CHECK-NEXT: vmvn q8, q8 439; CHECK-NEXT: vmov r0, r1, d16 440; CHECK-NEXT: vmov r2, r3, d17 441; CHECK-NEXT: bx lr 442 %tmp1 = load <4 x i32>, <4 x i32>* %A 443 %tmp2 = xor <4 x i32> %tmp1, < i32 -1, i32 -1, i32 -1, i32 -1 > 444 ret <4 x i32> %tmp2 445} 446 447define <2 x i64> @v_mvnQi64(<2 x i64>* %A) nounwind { 448; CHECK-LABEL: v_mvnQi64: 449; CHECK: @ %bb.0: 450; CHECK-NEXT: vld1.64 {d16, d17}, [r0] 451; CHECK-NEXT: vmvn q8, q8 452; CHECK-NEXT: vmov r0, r1, d16 453; CHECK-NEXT: vmov r2, r3, d17 454; CHECK-NEXT: bx lr 455 %tmp1 = load <2 x i64>, <2 x i64>* %A 456 %tmp2 = xor <2 x i64> %tmp1, < i64 -1, i64 -1 > 457 ret <2 x i64> %tmp2 458} 459 460define <8 x i8> @v_orri8(<8 x i8>* %A, <8 x i8>* %B) nounwind { 461; CHECK-LABEL: v_orri8: 462; CHECK: @ %bb.0: 463; CHECK-NEXT: vldr d16, [r1] 464; CHECK-NEXT: vldr d17, [r0] 465; CHECK-NEXT: vorr d16, d17, d16 466; CHECK-NEXT: vmov r0, r1, d16 467; CHECK-NEXT: bx lr 468 %tmp1 = load <8 x i8>, <8 x i8>* %A 469 %tmp2 = load <8 x i8>, <8 x i8>* %B 470 %tmp3 = or <8 x i8> %tmp1, %tmp2 471 ret <8 x i8> %tmp3 472} 473 474define <4 x i16> @v_orri16(<4 x i16>* %A, <4 x i16>* %B) nounwind { 475; CHECK-LABEL: v_orri16: 476; CHECK: @ %bb.0: 477; CHECK-NEXT: vldr d16, [r1] 478; CHECK-NEXT: vldr d17, [r0] 479; CHECK-NEXT: vorr d16, d17, d16 480; CHECK-NEXT: vmov r0, r1, d16 481; CHECK-NEXT: bx lr 482 %tmp1 = load <4 x i16>, <4 x i16>* %A 483 %tmp2 = load <4 x i16>, <4 x i16>* %B 484 %tmp3 = or <4 x i16> %tmp1, %tmp2 485 ret <4 x i16> %tmp3 486} 487 488define <2 x i32> @v_orri32(<2 x i32>* %A, <2 x i32>* %B) nounwind { 489; CHECK-LABEL: v_orri32: 490; CHECK: @ %bb.0: 491; CHECK-NEXT: vldr d16, [r1] 492; CHECK-NEXT: vldr d17, [r0] 493; CHECK-NEXT: vorr d16, d17, d16 494; CHECK-NEXT: vmov r0, r1, d16 495; CHECK-NEXT: bx lr 496 %tmp1 = load <2 x i32>, <2 x i32>* %A 497 %tmp2 = load <2 x i32>, <2 x i32>* %B 498 %tmp3 = or <2 x i32> %tmp1, %tmp2 499 ret <2 x i32> %tmp3 500} 501 502define <1 x i64> @v_orri64(<1 x i64>* %A, <1 x i64>* %B) nounwind { 503; CHECK-LABEL: v_orri64: 504; CHECK: @ %bb.0: 505; CHECK-NEXT: vldr d16, [r1] 506; CHECK-NEXT: vldr d17, [r0] 507; CHECK-NEXT: vorr d16, d17, d16 508; CHECK-NEXT: vmov r0, r1, d16 509; CHECK-NEXT: bx lr 510 %tmp1 = load <1 x i64>, <1 x i64>* %A 511 %tmp2 = load <1 x i64>, <1 x i64>* %B 512 %tmp3 = or <1 x i64> %tmp1, %tmp2 513 ret <1 x i64> %tmp3 514} 515 516define <16 x i8> @v_orrQi8(<16 x i8>* %A, <16 x i8>* %B) nounwind { 517; CHECK-LABEL: v_orrQi8: 518; CHECK: @ %bb.0: 519; CHECK-NEXT: vld1.64 {d16, d17}, [r1] 520; CHECK-NEXT: vld1.64 {d18, d19}, [r0] 521; CHECK-NEXT: vorr q8, q9, q8 522; CHECK-NEXT: vmov r0, r1, d16 523; CHECK-NEXT: vmov r2, r3, d17 524; CHECK-NEXT: bx lr 525 %tmp1 = load <16 x i8>, <16 x i8>* %A 526 %tmp2 = load <16 x i8>, <16 x i8>* %B 527 %tmp3 = or <16 x i8> %tmp1, %tmp2 528 ret <16 x i8> %tmp3 529} 530 531define <8 x i16> @v_orrQi16(<8 x i16>* %A, <8 x i16>* %B) nounwind { 532; CHECK-LABEL: v_orrQi16: 533; CHECK: @ %bb.0: 534; CHECK-NEXT: vld1.64 {d16, d17}, [r1] 535; CHECK-NEXT: vld1.64 {d18, d19}, [r0] 536; CHECK-NEXT: vorr q8, q9, q8 537; CHECK-NEXT: vmov r0, r1, d16 538; CHECK-NEXT: vmov r2, r3, d17 539; CHECK-NEXT: bx lr 540 %tmp1 = load <8 x i16>, <8 x i16>* %A 541 %tmp2 = load <8 x i16>, <8 x i16>* %B 542 %tmp3 = or <8 x i16> %tmp1, %tmp2 543 ret <8 x i16> %tmp3 544} 545 546define <4 x i32> @v_orrQi32(<4 x i32>* %A, <4 x i32>* %B) nounwind { 547; CHECK-LABEL: v_orrQi32: 548; CHECK: @ %bb.0: 549; CHECK-NEXT: vld1.64 {d16, d17}, [r1] 550; CHECK-NEXT: vld1.64 {d18, d19}, [r0] 551; CHECK-NEXT: vorr q8, q9, q8 552; CHECK-NEXT: vmov r0, r1, d16 553; CHECK-NEXT: vmov r2, r3, d17 554; CHECK-NEXT: bx lr 555 %tmp1 = load <4 x i32>, <4 x i32>* %A 556 %tmp2 = load <4 x i32>, <4 x i32>* %B 557 %tmp3 = or <4 x i32> %tmp1, %tmp2 558 ret <4 x i32> %tmp3 559} 560 561define <2 x i64> @v_orrQi64(<2 x i64>* %A, <2 x i64>* %B) nounwind { 562; CHECK-LABEL: v_orrQi64: 563; CHECK: @ %bb.0: 564; CHECK-NEXT: vld1.64 {d16, d17}, [r1] 565; CHECK-NEXT: vld1.64 {d18, d19}, [r0] 566; CHECK-NEXT: vorr q8, q9, q8 567; CHECK-NEXT: vmov r0, r1, d16 568; CHECK-NEXT: vmov r2, r3, d17 569; CHECK-NEXT: bx lr 570 %tmp1 = load <2 x i64>, <2 x i64>* %A 571 %tmp2 = load <2 x i64>, <2 x i64>* %B 572 %tmp3 = or <2 x i64> %tmp1, %tmp2 573 ret <2 x i64> %tmp3 574} 575 576define <8 x i8> @v_orni8(<8 x i8>* %A, <8 x i8>* %B) nounwind { 577; CHECK-LABEL: v_orni8: 578; CHECK: @ %bb.0: 579; CHECK-NEXT: vldr d16, [r1] 580; CHECK-NEXT: vldr d17, [r0] 581; CHECK-NEXT: vorn d16, d17, d16 582; CHECK-NEXT: vmov r0, r1, d16 583; CHECK-NEXT: bx lr 584 %tmp1 = load <8 x i8>, <8 x i8>* %A 585 %tmp2 = load <8 x i8>, <8 x i8>* %B 586 %tmp3 = xor <8 x i8> %tmp2, < i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1 > 587 %tmp4 = or <8 x i8> %tmp1, %tmp3 588 ret <8 x i8> %tmp4 589} 590 591define <4 x i16> @v_orni16(<4 x i16>* %A, <4 x i16>* %B) nounwind { 592; CHECK-LABEL: v_orni16: 593; CHECK: @ %bb.0: 594; CHECK-NEXT: vldr d16, [r1] 595; CHECK-NEXT: vldr d17, [r0] 596; CHECK-NEXT: vorn d16, d17, d16 597; CHECK-NEXT: vmov r0, r1, d16 598; CHECK-NEXT: bx lr 599 %tmp1 = load <4 x i16>, <4 x i16>* %A 600 %tmp2 = load <4 x i16>, <4 x i16>* %B 601 %tmp3 = xor <4 x i16> %tmp2, < i16 -1, i16 -1, i16 -1, i16 -1 > 602 %tmp4 = or <4 x i16> %tmp1, %tmp3 603 ret <4 x i16> %tmp4 604} 605 606define <2 x i32> @v_orni32(<2 x i32>* %A, <2 x i32>* %B) nounwind { 607; CHECK-LABEL: v_orni32: 608; CHECK: @ %bb.0: 609; CHECK-NEXT: vldr d16, [r1] 610; CHECK-NEXT: vldr d17, [r0] 611; CHECK-NEXT: vorn d16, d17, d16 612; CHECK-NEXT: vmov r0, r1, d16 613; CHECK-NEXT: bx lr 614 %tmp1 = load <2 x i32>, <2 x i32>* %A 615 %tmp2 = load <2 x i32>, <2 x i32>* %B 616 %tmp3 = xor <2 x i32> %tmp2, < i32 -1, i32 -1 > 617 %tmp4 = or <2 x i32> %tmp1, %tmp3 618 ret <2 x i32> %tmp4 619} 620 621define <1 x i64> @v_orni64(<1 x i64>* %A, <1 x i64>* %B) nounwind { 622; CHECK-LABEL: v_orni64: 623; CHECK: @ %bb.0: 624; CHECK-NEXT: vldr d16, [r1] 625; CHECK-NEXT: vldr d17, [r0] 626; CHECK-NEXT: vorn d16, d17, d16 627; CHECK-NEXT: vmov r0, r1, d16 628; CHECK-NEXT: bx lr 629 %tmp1 = load <1 x i64>, <1 x i64>* %A 630 %tmp2 = load <1 x i64>, <1 x i64>* %B 631 %tmp3 = xor <1 x i64> %tmp2, < i64 -1 > 632 %tmp4 = or <1 x i64> %tmp1, %tmp3 633 ret <1 x i64> %tmp4 634} 635 636define <16 x i8> @v_ornQi8(<16 x i8>* %A, <16 x i8>* %B) nounwind { 637; CHECK-LABEL: v_ornQi8: 638; CHECK: @ %bb.0: 639; CHECK-NEXT: vld1.64 {d16, d17}, [r1] 640; CHECK-NEXT: vld1.64 {d18, d19}, [r0] 641; CHECK-NEXT: vorn q8, q9, q8 642; CHECK-NEXT: vmov r0, r1, d16 643; CHECK-NEXT: vmov r2, r3, d17 644; CHECK-NEXT: bx lr 645 %tmp1 = load <16 x i8>, <16 x i8>* %A 646 %tmp2 = load <16 x i8>, <16 x i8>* %B 647 %tmp3 = xor <16 x i8> %tmp2, < i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1 > 648 %tmp4 = or <16 x i8> %tmp1, %tmp3 649 ret <16 x i8> %tmp4 650} 651 652define <8 x i16> @v_ornQi16(<8 x i16>* %A, <8 x i16>* %B) nounwind { 653; CHECK-LABEL: v_ornQi16: 654; CHECK: @ %bb.0: 655; CHECK-NEXT: vld1.64 {d16, d17}, [r1] 656; CHECK-NEXT: vld1.64 {d18, d19}, [r0] 657; CHECK-NEXT: vorn q8, q9, q8 658; CHECK-NEXT: vmov r0, r1, d16 659; CHECK-NEXT: vmov r2, r3, d17 660; CHECK-NEXT: bx lr 661 %tmp1 = load <8 x i16>, <8 x i16>* %A 662 %tmp2 = load <8 x i16>, <8 x i16>* %B 663 %tmp3 = xor <8 x i16> %tmp2, < i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1 > 664 %tmp4 = or <8 x i16> %tmp1, %tmp3 665 ret <8 x i16> %tmp4 666} 667 668define <4 x i32> @v_ornQi32(<4 x i32>* %A, <4 x i32>* %B) nounwind { 669; CHECK-LABEL: v_ornQi32: 670; CHECK: @ %bb.0: 671; CHECK-NEXT: vld1.64 {d16, d17}, [r1] 672; CHECK-NEXT: vld1.64 {d18, d19}, [r0] 673; CHECK-NEXT: vorn q8, q9, q8 674; CHECK-NEXT: vmov r0, r1, d16 675; CHECK-NEXT: vmov r2, r3, d17 676; CHECK-NEXT: bx lr 677 %tmp1 = load <4 x i32>, <4 x i32>* %A 678 %tmp2 = load <4 x i32>, <4 x i32>* %B 679 %tmp3 = xor <4 x i32> %tmp2, < i32 -1, i32 -1, i32 -1, i32 -1 > 680 %tmp4 = or <4 x i32> %tmp1, %tmp3 681 ret <4 x i32> %tmp4 682} 683 684define <2 x i64> @v_ornQi64(<2 x i64>* %A, <2 x i64>* %B) nounwind { 685; CHECK-LABEL: v_ornQi64: 686; CHECK: @ %bb.0: 687; CHECK-NEXT: vld1.64 {d16, d17}, [r1] 688; CHECK-NEXT: vld1.64 {d18, d19}, [r0] 689; CHECK-NEXT: vorn q8, q9, q8 690; CHECK-NEXT: vmov r0, r1, d16 691; CHECK-NEXT: vmov r2, r3, d17 692; CHECK-NEXT: bx lr 693 %tmp1 = load <2 x i64>, <2 x i64>* %A 694 %tmp2 = load <2 x i64>, <2 x i64>* %B 695 %tmp3 = xor <2 x i64> %tmp2, < i64 -1, i64 -1 > 696 %tmp4 = or <2 x i64> %tmp1, %tmp3 697 ret <2 x i64> %tmp4 698} 699 700define <8 x i8> @vtsti8(<8 x i8>* %A, <8 x i8>* %B) nounwind { 701; CHECK-LABEL: vtsti8: 702; CHECK: @ %bb.0: 703; CHECK-NEXT: vldr d16, [r1] 704; CHECK-NEXT: vldr d17, [r0] 705; CHECK-NEXT: vtst.8 d16, d17, d16 706; CHECK-NEXT: vmov r0, r1, d16 707; CHECK-NEXT: bx lr 708 %tmp1 = load <8 x i8>, <8 x i8>* %A 709 %tmp2 = load <8 x i8>, <8 x i8>* %B 710 %tmp3 = and <8 x i8> %tmp1, %tmp2 711 %tmp4 = icmp ne <8 x i8> %tmp3, zeroinitializer 712 %tmp5 = sext <8 x i1> %tmp4 to <8 x i8> 713 ret <8 x i8> %tmp5 714} 715 716define <4 x i16> @vtsti16(<4 x i16>* %A, <4 x i16>* %B) nounwind { 717; CHECK-LABEL: vtsti16: 718; CHECK: @ %bb.0: 719; CHECK-NEXT: vldr d16, [r1] 720; CHECK-NEXT: vldr d17, [r0] 721; CHECK-NEXT: vtst.16 d16, d17, d16 722; CHECK-NEXT: vmov r0, r1, d16 723; CHECK-NEXT: bx lr 724 %tmp1 = load <4 x i16>, <4 x i16>* %A 725 %tmp2 = load <4 x i16>, <4 x i16>* %B 726 %tmp3 = and <4 x i16> %tmp1, %tmp2 727 %tmp4 = icmp ne <4 x i16> %tmp3, zeroinitializer 728 %tmp5 = sext <4 x i1> %tmp4 to <4 x i16> 729 ret <4 x i16> %tmp5 730} 731 732define <2 x i32> @vtsti32(<2 x i32>* %A, <2 x i32>* %B) nounwind { 733; CHECK-LABEL: vtsti32: 734; CHECK: @ %bb.0: 735; CHECK-NEXT: vldr d16, [r1] 736; CHECK-NEXT: vldr d17, [r0] 737; CHECK-NEXT: vtst.32 d16, d17, d16 738; CHECK-NEXT: vmov r0, r1, d16 739; CHECK-NEXT: bx lr 740 %tmp1 = load <2 x i32>, <2 x i32>* %A 741 %tmp2 = load <2 x i32>, <2 x i32>* %B 742 %tmp3 = and <2 x i32> %tmp1, %tmp2 743 %tmp4 = icmp ne <2 x i32> %tmp3, zeroinitializer 744 %tmp5 = sext <2 x i1> %tmp4 to <2 x i32> 745 ret <2 x i32> %tmp5 746} 747 748define <16 x i8> @vtstQi8(<16 x i8>* %A, <16 x i8>* %B) nounwind { 749; CHECK-LABEL: vtstQi8: 750; CHECK: @ %bb.0: 751; CHECK-NEXT: vld1.64 {d16, d17}, [r1] 752; CHECK-NEXT: vld1.64 {d18, d19}, [r0] 753; CHECK-NEXT: vtst.8 q8, q9, q8 754; CHECK-NEXT: vmov r0, r1, d16 755; CHECK-NEXT: vmov r2, r3, d17 756; CHECK-NEXT: bx lr 757 %tmp1 = load <16 x i8>, <16 x i8>* %A 758 %tmp2 = load <16 x i8>, <16 x i8>* %B 759 %tmp3 = and <16 x i8> %tmp1, %tmp2 760 %tmp4 = icmp ne <16 x i8> %tmp3, zeroinitializer 761 %tmp5 = sext <16 x i1> %tmp4 to <16 x i8> 762 ret <16 x i8> %tmp5 763} 764 765define <8 x i16> @vtstQi16(<8 x i16>* %A, <8 x i16>* %B) nounwind { 766; CHECK-LABEL: vtstQi16: 767; CHECK: @ %bb.0: 768; CHECK-NEXT: vld1.64 {d16, d17}, [r1] 769; CHECK-NEXT: vld1.64 {d18, d19}, [r0] 770; CHECK-NEXT: vtst.16 q8, q9, q8 771; CHECK-NEXT: vmov r0, r1, d16 772; CHECK-NEXT: vmov r2, r3, d17 773; CHECK-NEXT: bx lr 774 %tmp1 = load <8 x i16>, <8 x i16>* %A 775 %tmp2 = load <8 x i16>, <8 x i16>* %B 776 %tmp3 = and <8 x i16> %tmp1, %tmp2 777 %tmp4 = icmp ne <8 x i16> %tmp3, zeroinitializer 778 %tmp5 = sext <8 x i1> %tmp4 to <8 x i16> 779 ret <8 x i16> %tmp5 780} 781 782define <4 x i32> @vtstQi32(<4 x i32>* %A, <4 x i32>* %B) nounwind { 783; CHECK-LABEL: vtstQi32: 784; CHECK: @ %bb.0: 785; CHECK-NEXT: vld1.64 {d16, d17}, [r1] 786; CHECK-NEXT: vld1.64 {d18, d19}, [r0] 787; CHECK-NEXT: vtst.32 q8, q9, q8 788; CHECK-NEXT: vmov r0, r1, d16 789; CHECK-NEXT: vmov r2, r3, d17 790; CHECK-NEXT: bx lr 791 %tmp1 = load <4 x i32>, <4 x i32>* %A 792 %tmp2 = load <4 x i32>, <4 x i32>* %B 793 %tmp3 = and <4 x i32> %tmp1, %tmp2 794 %tmp4 = icmp ne <4 x i32> %tmp3, zeroinitializer 795 %tmp5 = sext <4 x i1> %tmp4 to <4 x i32> 796 ret <4 x i32> %tmp5 797} 798 799define <8 x i8> @v_orrimm(<8 x i8>* %A) nounwind { 800; CHECK-LABEL: v_orrimm: 801; CHECK: @ %bb.0: 802; CHECK-NEXT: vldr d16, [r0] 803; CHECK-NEXT: vorr.i32 d16, #0x1000000 804; CHECK-NEXT: vmov r0, r1, d16 805; CHECK-NEXT: bx lr 806 %tmp1 = load <8 x i8>, <8 x i8>* %A 807 %tmp3 = or <8 x i8> %tmp1, <i8 0, i8 0, i8 0, i8 1, i8 0, i8 0, i8 0, i8 1> 808 ret <8 x i8> %tmp3 809} 810 811define <16 x i8> @v_orrimmQ(<16 x i8>* %A) nounwind { 812; CHECK-LABEL: v_orrimmQ: 813; CHECK: @ %bb.0: 814; CHECK-NEXT: vld1.64 {d16, d17}, [r0] 815; CHECK-NEXT: vorr.i32 q8, #0x1000000 816; CHECK-NEXT: vmov r0, r1, d16 817; CHECK-NEXT: vmov r2, r3, d17 818; CHECK-NEXT: bx lr 819 %tmp1 = load <16 x i8>, <16 x i8>* %A 820 %tmp3 = or <16 x i8> %tmp1, <i8 0, i8 0, i8 0, i8 1, i8 0, i8 0, i8 0, i8 1, i8 0, i8 0, i8 0, i8 1, i8 0, i8 0, i8 0, i8 1> 821 ret <16 x i8> %tmp3 822} 823 824define <8 x i8> @v_bicimm(<8 x i8>* %A) nounwind { 825; CHECK-LABEL: v_bicimm: 826; CHECK: @ %bb.0: 827; CHECK-NEXT: vldr d16, [r0] 828; CHECK-NEXT: vbic.i32 d16, #0xff000000 829; CHECK-NEXT: vmov r0, r1, d16 830; CHECK-NEXT: bx lr 831 %tmp1 = load <8 x i8>, <8 x i8>* %A 832 %tmp3 = and <8 x i8> %tmp1, < i8 -1, i8 -1, i8 -1, i8 0, i8 -1, i8 -1, i8 -1, i8 0 > 833 ret <8 x i8> %tmp3 834} 835 836define <16 x i8> @v_bicimmQ(<16 x i8>* %A) nounwind { 837; CHECK-LABEL: v_bicimmQ: 838; CHECK: @ %bb.0: 839; CHECK-NEXT: vld1.64 {d16, d17}, [r0] 840; CHECK-NEXT: vbic.i32 q8, #0xff000000 841; CHECK-NEXT: vmov r0, r1, d16 842; CHECK-NEXT: vmov r2, r3, d17 843; CHECK-NEXT: bx lr 844 %tmp1 = load <16 x i8>, <16 x i8>* %A 845 %tmp3 = and <16 x i8> %tmp1, < i8 -1, i8 -1, i8 -1, i8 0, i8 -1, i8 -1, i8 -1, i8 0, i8 -1, i8 -1, i8 -1, i8 0, i8 -1, i8 -1, i8 -1, i8 0 > 846 ret <16 x i8> %tmp3 847} 848 849define <4 x i32> @hidden_not_v4i32(<4 x i32> %x) nounwind { 850; CHECK-LABEL: hidden_not_v4i32: 851; CHECK: @ %bb.0: 852; CHECK-NEXT: vmov d19, r2, r3 853; CHECK-NEXT: vmov.i32 q8, #0x6 854; CHECK-NEXT: vmov d18, r0, r1 855; CHECK-NEXT: vbic q8, q8, q9 856; CHECK-NEXT: vmov r0, r1, d16 857; CHECK-NEXT: vmov r2, r3, d17 858; CHECK-NEXT: bx lr 859 %xor = xor <4 x i32> %x, <i32 15, i32 15, i32 15, i32 15> 860 %and = and <4 x i32> %xor, <i32 6, i32 6, i32 6, i32 6> 861 ret <4 x i32> %and 862} 863 864