1; RUN: llc -mtriple armeb-eabi -mattr v7,neon -float-abi soft %s -o - | FileCheck %s -check-prefix CHECK -check-prefix SOFT 2; RUN: llc -mtriple armeb-eabi -mattr v7,neon -float-abi hard %s -o - | FileCheck %s -check-prefix CHECK -check-prefix HARD 3 4; CHECK-LABEL: test_i64_f64: 5declare i64 @test_i64_f64_helper(double %p) 6define void @test_i64_f64(double* %p, i64* %q) { 7; SOFT: vadd.f64 [[REG:d[0-9]+]] 8; SOFT: vmov r1, r0, [[REG]] 9; HARD: vadd.f64 d0 10 %1 = load double, double* %p 11 %2 = fadd double %1, %1 12 %3 = call i64 @test_i64_f64_helper(double %2) 13 %4 = add i64 %3, %3 14 store i64 %4, i64* %q 15 ret void 16; CHECK: adds r1 17; CHECK: adc r0 18} 19 20; CHECK-LABEL: test_i64_v1i64: 21declare i64 @test_i64_v1i64_helper(<1 x i64> %p) 22define void @test_i64_v1i64(<1 x i64>* %p, i64* %q) { 23; SOFT: vadd.i64 [[REG:d[0-9]+]] 24; SOFT: vmov r1, r0, [[REG]] 25; HARD: vadd.i64 d0 26 %1 = load <1 x i64>, <1 x i64>* %p 27 %2 = add <1 x i64> %1, %1 28 %3 = call i64 @test_i64_v1i64_helper(<1 x i64> %2) 29 %4 = add i64 %3, %3 30 store i64 %4, i64* %q 31 ret void 32; CHECK: adds r1 33; CHECK: adc r0 34} 35 36; CHECK-LABEL: test_i64_v2f32: 37declare i64 @test_i64_v2f32_helper(<2 x float> %p) 38define void @test_i64_v2f32(<2 x float>* %p, i64* %q) { 39; SOFT: vrev64.32 [[REG:d[0-9]+]] 40; SOFT: vmov r1, r0, [[REG]] 41; HARD: vrev64.32 d0 42 %1 = load <2 x float>, <2 x float>* %p 43 %2 = fadd <2 x float> %1, %1 44 %3 = call i64 @test_i64_v2f32_helper(<2 x float> %2) 45 %4 = add i64 %3, %3 46 store i64 %4, i64* %q 47 ret void 48; CHECK: adds r1 49; CHECK: adc r0 50} 51 52; CHECK-LABEL: test_i64_v2i32: 53declare i64 @test_i64_v2i32_helper(<2 x i32> %p) 54define void @test_i64_v2i32(<2 x i32>* %p, i64* %q) { 55; SOFT: vrev64.32 [[REG:d[0-9]+]] 56; SOFT: vmov r1, r0, [[REG]] 57; HARD: vrev64.32 d0 58 %1 = load <2 x i32>, <2 x i32>* %p 59 %2 = add <2 x i32> %1, %1 60 %3 = call i64 @test_i64_v2i32_helper(<2 x i32> %2) 61 %4 = add i64 %3, %3 62 store i64 %4, i64* %q 63 ret void 64; CHECK: adds r1 65; CHECK: adc r0 66} 67 68; CHECK-LABEL: test_i64_v4i16: 69declare i64 @test_i64_v4i16_helper(<4 x i16> %p) 70define void @test_i64_v4i16(<4 x i16>* %p, i64* %q) { 71; SOFT: vrev64.16 [[REG:d[0-9]+]] 72; SOFT: vmov r1, r0, [[REG]] 73; HARD: vrev64.16 d0 74 %1 = load <4 x i16>, <4 x i16>* %p 75 %2 = add <4 x i16> %1, %1 76 %3 = call i64 @test_i64_v4i16_helper(<4 x i16> %2) 77 %4 = add i64 %3, %3 78 store i64 %4, i64* %q 79 ret void 80; CHECK: adds r1 81; CHECK: adc r0 82} 83 84; CHECK-LABEL: test_i64_v8i8: 85declare i64 @test_i64_v8i8_helper(<8 x i8> %p) 86define void @test_i64_v8i8(<8 x i8>* %p, i64* %q) { 87; SOFT: vrev64.8 [[REG:d[0-9]+]] 88; SOFT: vmov r1, r0, [[REG]] 89; HARD: vrev64.8 d0 90 %1 = load <8 x i8>, <8 x i8>* %p 91 %2 = add <8 x i8> %1, %1 92 %3 = call i64 @test_i64_v8i8_helper(<8 x i8> %2) 93 %4 = add i64 %3, %3 94 store i64 %4, i64* %q 95 ret void 96; CHECK: adds r1 97; CHECK: adc r0 98} 99 100; CHECK-LABEL: test_f64_i64: 101declare double @test_f64_i64_helper(i64 %p) 102define void @test_f64_i64(i64* %p, double* %q) { 103; CHECK: adds r1 104; CHECK: adc r0 105 %1 = load i64, i64* %p 106 %2 = add i64 %1, %1 107 %3 = call double @test_f64_i64_helper(i64 %2) 108 %4 = fadd double %3, %3 109 store double %4, double* %q 110 ret void 111; SOFT: vmov [[REG:d[0-9]+]], r1, r0 112; SOFT: vadd.f64 [[REG]] 113; HARD: vadd.f64 {{d[0-9]+}}, d0 114} 115 116; CHECK-LABEL: test_f64_v1i64: 117declare double @test_f64_v1i64_helper(<1 x i64> %p) 118define void @test_f64_v1i64(<1 x i64>* %p, double* %q) { 119; SOFT: vadd.i64 [[REG:d[0-9]+]] 120; SOFT: vmov r1, r0, [[REG]] 121; HARD: vadd.i64 d0 122 %1 = load <1 x i64>, <1 x i64>* %p 123 %2 = add <1 x i64> %1, %1 124 %3 = call double @test_f64_v1i64_helper(<1 x i64> %2) 125 %4 = fadd double %3, %3 126 store double %4, double* %q 127 ret void 128; SOFT: vmov [[REG:d[0-9]+]], r1, r0 129; SOFT: vadd.f64 [[REG]] 130; HARD: vadd.f64 {{d[0-9]+}}, d0 131} 132 133; CHECK-LABEL: test_f64_v2f32: 134declare double @test_f64_v2f32_helper(<2 x float> %p) 135define void @test_f64_v2f32(<2 x float>* %p, double* %q) { 136; SOFT: vrev64.32 [[REG:d[0-9]+]] 137; SOFT: vmov r1, r0, [[REG]] 138; HARD: vrev64.32 d0 139 %1 = load <2 x float>, <2 x float>* %p 140 %2 = fadd <2 x float> %1, %1 141 %3 = call double @test_f64_v2f32_helper(<2 x float> %2) 142 %4 = fadd double %3, %3 143 store double %4, double* %q 144 ret void 145; SOFT: vmov [[REG:d[0-9]+]], r1, r0 146; SOFT: vadd.f64 [[REG]] 147; HARD: vadd.f64 {{d[0-9]+}}, d0 148} 149 150; CHECK-LABEL: test_f64_v2i32: 151declare double @test_f64_v2i32_helper(<2 x i32> %p) 152define void @test_f64_v2i32(<2 x i32>* %p, double* %q) { 153; SOFT: vrev64.32 [[REG:d[0-9]+]] 154; SOFT: vmov r1, r0, [[REG]] 155; HARD: vrev64.32 d0 156 %1 = load <2 x i32>, <2 x i32>* %p 157 %2 = add <2 x i32> %1, %1 158 %3 = call double @test_f64_v2i32_helper(<2 x i32> %2) 159 %4 = fadd double %3, %3 160 store double %4, double* %q 161 ret void 162; SOFT: vmov [[REG:d[0-9]+]], r1, r0 163; SOFT: vadd.f64 [[REG]] 164; HARD: vadd.f64 {{d[0-9]+}}, d0 165} 166 167; CHECK-LABEL: test_f64_v4i16: 168declare double @test_f64_v4i16_helper(<4 x i16> %p) 169define void @test_f64_v4i16(<4 x i16>* %p, double* %q) { 170; SOFT: vrev64.16 [[REG:d[0-9]+]] 171; SOFT: vmov r1, r0, [[REG]] 172; HARD: vrev64.16 d0 173 %1 = load <4 x i16>, <4 x i16>* %p 174 %2 = add <4 x i16> %1, %1 175 %3 = call double @test_f64_v4i16_helper(<4 x i16> %2) 176 %4 = fadd double %3, %3 177 store double %4, double* %q 178 ret void 179; SOFT: vmov [[REG:d[0-9]+]], r1, r0 180; SOFT: vadd.f64 [[REG]] 181; HARD: vadd.f64 {{d[0-9]+}}, d0 182} 183 184; CHECK-LABEL: test_f64_v8i8: 185declare double @test_f64_v8i8_helper(<8 x i8> %p) 186define void @test_f64_v8i8(<8 x i8>* %p, double* %q) { 187; SOFT: vrev64.8 [[REG:d[0-9]+]] 188; SOFT: vmov r1, r0, [[REG]] 189; HARD: vrev64.8 d0 190 %1 = load <8 x i8>, <8 x i8>* %p 191 %2 = add <8 x i8> %1, %1 192 %3 = call double @test_f64_v8i8_helper(<8 x i8> %2) 193 %4 = fadd double %3, %3 194 store double %4, double* %q 195 ret void 196; SOFT: vmov [[REG:d[0-9]+]], r1, r0 197; SOFT: vadd.f64 [[REG]] 198; HARD: vadd.f64 {{d[0-9]+}}, d0 199} 200 201; CHECK-LABEL: test_v1i64_i64: 202declare <1 x i64> @test_v1i64_i64_helper(i64 %p) 203define void @test_v1i64_i64(i64* %p, <1 x i64>* %q) { 204; CHECK: adds r1 205; CHECK: adc r0 206 %1 = load i64, i64* %p 207 %2 = add i64 %1, %1 208 %3 = call <1 x i64> @test_v1i64_i64_helper(i64 %2) 209 %4 = add <1 x i64> %3, %3 210 store <1 x i64> %4, <1 x i64>* %q 211 ret void 212; SOFT: vmov [[REG:d[0-9]+]], r1, r0 213; SOFT: vadd.i64 [[REG]] 214; HARD: vadd.i64 {{d[0-9]+}}, d0 215} 216 217; CHECK-LABEL: test_v1i64_f64: 218declare <1 x i64> @test_v1i64_f64_helper(double %p) 219define void @test_v1i64_f64(double* %p, <1 x i64>* %q) { 220; SOFT: vadd.f64 [[REG:d[0-9]+]] 221; SOFT: vmov r1, r0, [[REG]] 222; HARD: vadd.f64 d0 223 %1 = load double, double* %p 224 %2 = fadd double %1, %1 225 %3 = call <1 x i64> @test_v1i64_f64_helper(double %2) 226 %4 = add <1 x i64> %3, %3 227 store <1 x i64> %4, <1 x i64>* %q 228 ret void 229; SOFT: vmov [[REG:d[0-9]+]], r1, r0 230; SOFT: vadd.i64 [[REG]] 231; HARD: vadd.i64 {{d[0-9]+}}, d0 232} 233 234; CHECK-LABEL: test_v1i64_v2f32: 235declare <1 x i64> @test_v1i64_v2f32_helper(<2 x float> %p) 236define void @test_v1i64_v2f32(<2 x float>* %p, <1 x i64>* %q) { 237; HARD: vrev64.32 d0 238; SOFT: vadd.f32 [[REG:d[0-9]+]] 239; SOFT: vmov r1, r0, [[REG]] 240 %1 = load <2 x float>, <2 x float>* %p 241 %2 = fadd <2 x float> %1, %1 242 %3 = call <1 x i64> @test_v1i64_v2f32_helper(<2 x float> %2) 243 %4 = add <1 x i64> %3, %3 244 store <1 x i64> %4, <1 x i64>* %q 245 ret void 246; SOFT: vmov [[REG:d[0-9]+]], r1, r0 247; SOFT: vadd.i64 [[REG]] 248; HARD: vadd.i64 {{d[0-9]+}}, d0 249} 250 251; CHECK-LABEL: test_v1i64_v2i32: 252declare <1 x i64> @test_v1i64_v2i32_helper(<2 x i32> %p) 253define void @test_v1i64_v2i32(<2 x i32>* %p, <1 x i64>* %q) { 254; HARD: vrev64.32 d0 255; SOFT: vadd.i32 [[REG:d[0-9]+]] 256; SOFT: vrev64.32 [[REG]] 257; SOFT: vmov r1, r0, [[REG]] 258 %1 = load <2 x i32>, <2 x i32>* %p 259 %2 = add <2 x i32> %1, %1 260 %3 = call <1 x i64> @test_v1i64_v2i32_helper(<2 x i32> %2) 261 %4 = add <1 x i64> %3, %3 262 store <1 x i64> %4, <1 x i64>* %q 263 ret void 264; SOFT: vmov [[REG:d[0-9]+]], r1, r0 265; SOFT: vadd.i64 [[REG]] 266; HARD: vadd.i64 {{d[0-9]+}}, d0 267} 268 269; CHECK-LABEL: test_v1i64_v4i16: 270declare <1 x i64> @test_v1i64_v4i16_helper(<4 x i16> %p) 271define void @test_v1i64_v4i16(<4 x i16>* %p, <1 x i64>* %q) { 272; SOFT: vrev64.16 [[REG:d[0-9]+]] 273; SOFT: vmov r1, r0, [[REG]] 274; HARD: vrev64.16 d0 275 %1 = load <4 x i16>, <4 x i16>* %p 276 %2 = add <4 x i16> %1, %1 277 %3 = call <1 x i64> @test_v1i64_v4i16_helper(<4 x i16> %2) 278 %4 = add <1 x i64> %3, %3 279 store <1 x i64> %4, <1 x i64>* %q 280 ret void 281; SOFT: vmov [[REG:d[0-9]+]], r1, r0 282; SOFT: vadd.i64 [[REG]] 283; HARD: vadd.i64 {{d[0-9]+}}, d0 284} 285 286; CHECK-LABEL: test_v1i64_v8i8: 287declare <1 x i64> @test_v1i64_v8i8_helper(<8 x i8> %p) 288define void @test_v1i64_v8i8(<8 x i8>* %p, <1 x i64>* %q) { 289; SOFT: vrev64.8 [[REG:d[0-9]+]] 290; SOFT: vmov r1, r0, [[REG]] 291; HARD: vrev64.8 d0 292 %1 = load <8 x i8>, <8 x i8>* %p 293 %2 = add <8 x i8> %1, %1 294 %3 = call <1 x i64> @test_v1i64_v8i8_helper(<8 x i8> %2) 295 %4 = add <1 x i64> %3, %3 296 store <1 x i64> %4, <1 x i64>* %q 297 ret void 298; SOFT: vmov [[REG:d[0-9]+]], r1, r0 299; SOFT: vadd.i64 [[REG]] 300; HARD: vadd.i64 {{d[0-9]+}}, d0 301} 302 303; CHECK-LABEL: test_v2f32_i64: 304declare <2 x float> @test_v2f32_i64_helper(i64 %p) 305define void @test_v2f32_i64(i64* %p, <2 x float>* %q) { 306; CHECK: adds r1 307; CHECK: adc r0 308 %1 = load i64, i64* %p 309 %2 = add i64 %1, %1 310 %3 = call <2 x float> @test_v2f32_i64_helper(i64 %2) 311 %4 = fadd <2 x float> %3, %3 312 store <2 x float> %4, <2 x float>* %q 313 ret void 314; SOFT: vmov [[REG:d[0-9]+]], r1, r0 315; SOFT: vrev64.32 [[REG]] 316; HARD: vrev64.32 {{d[0-9]+}}, d0 317} 318 319; CHECK-LABEL: test_v2f32_f64: 320declare <2 x float> @test_v2f32_f64_helper(double %p) 321define void @test_v2f32_f64(double* %p, <2 x float>* %q) { 322; SOFT: vadd.f64 [[REG:d[0-9]+]] 323; SOFT: vmov r1, r0, [[REG]] 324; HARD: vadd.f64 d0 325 %1 = load double, double* %p 326 %2 = fadd double %1, %1 327 %3 = call <2 x float> @test_v2f32_f64_helper(double %2) 328 %4 = fadd <2 x float> %3, %3 329 store <2 x float> %4, <2 x float>* %q 330 ret void 331; SOFT: vmov [[REG:d[0-9]+]], r1, r0 332; SOFT: vrev64.32 [[REG]] 333; HARD: vrev64.32 {{d[0-9]+}}, d0 334} 335 336; CHECK-LABEL: test_v2f32_v1i64: 337declare <2 x float> @test_v2f32_v1i64_helper(<1 x i64> %p) 338define void @test_v2f32_v1i64(<1 x i64>* %p, <2 x float>* %q) { 339; SOFT: vadd.i64 [[REG:d[0-9]+]] 340; SOFT: vmov r1, r0, [[REG]] 341; HARD: vadd.i64 d0 342 %1 = load <1 x i64>, <1 x i64>* %p 343 %2 = add <1 x i64> %1, %1 344 %3 = call <2 x float> @test_v2f32_v1i64_helper(<1 x i64> %2) 345 %4 = fadd <2 x float> %3, %3 346 store <2 x float> %4, <2 x float>* %q 347 ret void 348; SOFT: vmov [[REG:d[0-9]+]], r1, r0 349; SOFT: vrev64.32 [[REG]] 350; HARD: vrev64.32 {{d[0-9]+}}, d0 351} 352 353; CHECK-LABEL: test_v2f32_v2i32: 354declare <2 x float> @test_v2f32_v2i32_helper(<2 x i32> %p) 355define void @test_v2f32_v2i32(<2 x i32>* %p, <2 x float>* %q) { 356; HARD: vrev64.32 d0 357; SOFT: vadd.i32 [[REG:d[0-9]+]] 358; SOFT: vrev64.32 [[REG]] 359; SOFT: vmov r1, r0, [[REG]] 360 %1 = load <2 x i32>, <2 x i32>* %p 361 %2 = add <2 x i32> %1, %1 362 %3 = call <2 x float> @test_v2f32_v2i32_helper(<2 x i32> %2) 363 %4 = fadd <2 x float> %3, %3 364 store <2 x float> %4, <2 x float>* %q 365 ret void 366; SOFT: vmov [[REG:d[0-9]+]], r1, r0 367; SOFT: vrev64.32 [[REG]] 368; HARD: vrev64.32 {{d[0-9]+}}, d0 369} 370 371; CHECK-LABEL: test_v2f32_v4i16: 372declare <2 x float> @test_v2f32_v4i16_helper(<4 x i16> %p) 373define void @test_v2f32_v4i16(<4 x i16>* %p, <2 x float>* %q) { 374; SOFT: vrev64.16 [[REG:d[0-9]+]] 375; SOFT: vmov r1, r0, [[REG]] 376; HARD: vrev64.16 d0 377 %1 = load <4 x i16>, <4 x i16>* %p 378 %2 = add <4 x i16> %1, %1 379 %3 = call <2 x float> @test_v2f32_v4i16_helper(<4 x i16> %2) 380 %4 = fadd <2 x float> %3, %3 381 store <2 x float> %4, <2 x float>* %q 382 ret void 383; SOFT: vmov [[REG:d[0-9]+]], r1, r0 384; SOFT: vrev64.32 [[REG]] 385; HARD: vrev64.32 {{d[0-9]+}}, d0 386} 387 388; CHECK-LABEL: test_v2f32_v8i8: 389declare <2 x float> @test_v2f32_v8i8_helper(<8 x i8> %p) 390define void @test_v2f32_v8i8(<8 x i8>* %p, <2 x float>* %q) { 391; SOFT: vrev64.8 [[REG:d[0-9]+]] 392; SOFT: vmov r1, r0, [[REG]] 393; HARD: vrev64.8 d0 394 %1 = load <8 x i8>, <8 x i8>* %p 395 %2 = add <8 x i8> %1, %1 396 %3 = call <2 x float> @test_v2f32_v8i8_helper(<8 x i8> %2) 397 %4 = fadd <2 x float> %3, %3 398 store <2 x float> %4, <2 x float>* %q 399 ret void 400; SOFT: vmov [[REG:d[0-9]+]], r1, r0 401; SOFT: vrev64.32 [[REG]] 402; HARD: vrev64.32 {{d[0-9]+}}, d0 403} 404 405; CHECK-LABEL: test_v2i32_i64: 406declare <2 x i32> @test_v2i32_i64_helper(i64 %p) 407define void @test_v2i32_i64(i64* %p, <2 x i32>* %q) { 408; CHECK: adds r1 409; CHECK: adc r0 410 %1 = load i64, i64* %p 411 %2 = add i64 %1, %1 412 %3 = call <2 x i32> @test_v2i32_i64_helper(i64 %2) 413 %4 = add <2 x i32> %3, %3 414 store <2 x i32> %4, <2 x i32>* %q 415 ret void 416; SOFT: vmov [[REG:d[0-9]+]], r1, r0 417; SOFT: vrev64.32 [[REG]] 418; HARD: vrev64.32 {{d[0-9]+}}, d0 419} 420 421; CHECK-LABEL: test_v2i32_f64: 422declare <2 x i32> @test_v2i32_f64_helper(double %p) 423define void @test_v2i32_f64(double* %p, <2 x i32>* %q) { 424; SOFT: vadd.f64 [[REG:d[0-9]+]] 425; SOFT: vmov r1, r0, [[REG]] 426; HARD: vadd.f64 d0 427 %1 = load double, double* %p 428 %2 = fadd double %1, %1 429 %3 = call <2 x i32> @test_v2i32_f64_helper(double %2) 430 %4 = add <2 x i32> %3, %3 431 store <2 x i32> %4, <2 x i32>* %q 432 ret void 433; SOFT: vmov [[REG:d[0-9]+]], r1, r0 434; SOFT: vrev64.32 [[REG]] 435; HARD: vrev64.32 {{d[0-9]+}}, d0 436} 437 438; CHECK-LABEL: test_v2i32_v1i64: 439declare <2 x i32> @test_v2i32_v1i64_helper(<1 x i64> %p) 440define void @test_v2i32_v1i64(<1 x i64>* %p, <2 x i32>* %q) { 441; SOFT: vadd.i64 [[REG:d[0-9]+]] 442; SOFT: vmov r1, r0, [[REG]] 443; HARD: vadd.i64 d0 444 %1 = load <1 x i64>, <1 x i64>* %p 445 %2 = add <1 x i64> %1, %1 446 %3 = call <2 x i32> @test_v2i32_v1i64_helper(<1 x i64> %2) 447 %4 = add <2 x i32> %3, %3 448 store <2 x i32> %4, <2 x i32>* %q 449 ret void 450; SOFT: vmov [[REG:d[0-9]+]], r1, r0 451; SOFT: vrev64.32 [[REG]] 452; HARD: vrev64.32 {{d[0-9]+}}, d0 453} 454 455; CHECK-LABEL: test_v2i32_v2f32: 456declare <2 x i32> @test_v2i32_v2f32_helper(<2 x float> %p) 457define void @test_v2i32_v2f32(<2 x float>* %p, <2 x i32>* %q) { 458; HARD: vadd.f32 [[REG:d[0-9]+]] 459; HARD: vrev64.32 d0, [[REG]] 460; SOFT: vadd.f32 [[REG:d[0-9]+]] 461; SOFT: vrev64.32 [[REG]] 462; SOFT: vmov r1, r0, [[REG]] 463 %1 = load <2 x float>, <2 x float>* %p 464 %2 = fadd <2 x float> %1, %1 465 %3 = call <2 x i32> @test_v2i32_v2f32_helper(<2 x float> %2) 466 %4 = add <2 x i32> %3, %3 467 store <2 x i32> %4, <2 x i32>* %q 468 ret void 469; SOFT: vmov [[REG:d[0-9]+]], r1, r0 470; SOFT: vrev64.32 [[REG]] 471; HARD: vrev64.32 {{d[0-9]+}}, d0 472} 473 474; CHECK-LABEL: test_v2i32_v4i16: 475declare <2 x i32> @test_v2i32_v4i16_helper(<4 x i16> %p) 476define void @test_v2i32_v4i16(<4 x i16>* %p, <2 x i32>* %q) { 477; SOFT: vrev64.16 [[REG:d[0-9]+]] 478; SOFT: vmov r1, r0, [[REG]] 479; HARD: vrev64.16 d0 480 %1 = load <4 x i16>, <4 x i16>* %p 481 %2 = add <4 x i16> %1, %1 482 %3 = call <2 x i32> @test_v2i32_v4i16_helper(<4 x i16> %2) 483 %4 = add <2 x i32> %3, %3 484 store <2 x i32> %4, <2 x i32>* %q 485 ret void 486; SOFT: vmov [[REG:d[0-9]+]], r1, r0 487; SOFT: vrev64.32 [[REG]] 488; HARD: vrev64.32 {{d[0-9]+}}, d0 489} 490 491; CHECK-LABEL: test_v2i32_v8i8: 492declare <2 x i32> @test_v2i32_v8i8_helper(<8 x i8> %p) 493define void @test_v2i32_v8i8(<8 x i8>* %p, <2 x i32>* %q) { 494; SOFT: vrev64.8 [[REG:d[0-9]+]] 495; SOFT: vmov r1, r0, [[REG]] 496; HARD: vrev64.8 d0 497 %1 = load <8 x i8>, <8 x i8>* %p 498 %2 = add <8 x i8> %1, %1 499 %3 = call <2 x i32> @test_v2i32_v8i8_helper(<8 x i8> %2) 500 %4 = add <2 x i32> %3, %3 501 store <2 x i32> %4, <2 x i32>* %q 502 ret void 503; SOFT: vmov [[REG:d[0-9]+]], r1, r0 504; SOFT: vrev64.32 [[REG]] 505; HARD: vrev64.32 {{d[0-9]+}}, d0 506} 507 508; CHECK-LABEL: test_v4i16_i64: 509declare <4 x i16> @test_v4i16_i64_helper(i64 %p) 510define void @test_v4i16_i64(i64* %p, <4 x i16>* %q) { 511; CHECK: adds r1 512; CHECK: adc r0 513 %1 = load i64, i64* %p 514 %2 = add i64 %1, %1 515 %3 = call <4 x i16> @test_v4i16_i64_helper(i64 %2) 516 %4 = add <4 x i16> %3, %3 517 store <4 x i16> %4, <4 x i16>* %q 518 ret void 519; SOFT: vmov [[REG:d[0-9]+]], r1, r0 520; SOFT: vrev64.16 [[REG]] 521; HARD: vrev64.16 {{d[0-9]+}}, d0 522} 523 524; CHECK-LABEL: test_v4i16_f64: 525declare <4 x i16> @test_v4i16_f64_helper(double %p) 526define void @test_v4i16_f64(double* %p, <4 x i16>* %q) { 527; SOFT: vadd.f64 [[REG:d[0-9]+]] 528; SOFT: vmov r1, r0, [[REG]] 529; HARD: vadd.f64 d0 530 %1 = load double, double* %p 531 %2 = fadd double %1, %1 532 %3 = call <4 x i16> @test_v4i16_f64_helper(double %2) 533 %4 = add <4 x i16> %3, %3 534 store <4 x i16> %4, <4 x i16>* %q 535 ret void 536; SOFT: vmov [[REG:d[0-9]+]], r1, r0 537; SOFT: vrev64.16 [[REG]] 538; HARD: vrev64.16 {{d[0-9]+}}, d0 539} 540 541; CHECK-LABEL: test_v4i16_v1i64: 542declare <4 x i16> @test_v4i16_v1i64_helper(<1 x i64> %p) 543define void @test_v4i16_v1i64(<1 x i64>* %p, <4 x i16>* %q) { 544; SOFT: vadd.i64 [[REG:d[0-9]+]] 545; SOFT: vmov r1, r0, [[REG]] 546; HARD: vadd.i64 d0 547 %1 = load <1 x i64>, <1 x i64>* %p 548 %2 = add <1 x i64> %1, %1 549 %3 = call <4 x i16> @test_v4i16_v1i64_helper(<1 x i64> %2) 550 %4 = add <4 x i16> %3, %3 551 store <4 x i16> %4, <4 x i16>* %q 552 ret void 553; SOFT: vmov [[REG:d[0-9]+]], r1, r0 554; SOFT: vrev64.16 [[REG]] 555; HARD: vrev64.16 {{d[0-9]+}}, d0 556} 557 558; CHECK-LABEL: test_v4i16_v2f32: 559declare <4 x i16> @test_v4i16_v2f32_helper(<2 x float> %p) 560define void @test_v4i16_v2f32(<2 x float>* %p, <4 x i16>* %q) { 561; HARD: vadd.f32 [[REG:d[0-9]+]] 562; HARD: vrev64.32 d0, [[REG]] 563; SOFT: vadd.f32 [[REG:d[0-9]+]] 564; SOFT: vrev64.32 [[REG]] 565; SOFT: vmov r1, r0, [[REG]] 566 %1 = load <2 x float>, <2 x float>* %p 567 %2 = fadd <2 x float> %1, %1 568 %3 = call <4 x i16> @test_v4i16_v2f32_helper(<2 x float> %2) 569 %4 = add <4 x i16> %3, %3 570 store <4 x i16> %4, <4 x i16>* %q 571 ret void 572; SOFT: vmov [[REG:d[0-9]+]], r1, r0 573; SOFT: vrev64.16 [[REG]] 574; HARD: vrev64.16 {{d[0-9]+}}, d0 575} 576 577; CHECK-LABEL: test_v4i16_v2i32: 578declare <4 x i16> @test_v4i16_v2i32_helper(<2 x i32> %p) 579define void @test_v4i16_v2i32(<2 x i32>* %p, <4 x i16>* %q) { 580; HARD: vadd.i32 [[REG:d[0-9]+]] 581; HARD: vrev64.32 d0, [[REG]] 582; SOFT: vadd.i32 [[REG:d[0-9]+]] 583; SOFT: vrev64.32 [[REG]] 584; SOFT: vmov r1, r0, [[REG]] 585 %1 = load <2 x i32>, <2 x i32>* %p 586 %2 = add <2 x i32> %1, %1 587 %3 = call <4 x i16> @test_v4i16_v2i32_helper(<2 x i32> %2) 588 %4 = add <4 x i16> %3, %3 589 store <4 x i16> %4, <4 x i16>* %q 590 ret void 591; SOFT: vmov [[REG:d[0-9]+]], r1, r0 592; SOFT: vrev64.16 [[REG]] 593; HARD: vrev64.16 {{d[0-9]+}}, d0 594} 595 596; CHECK-LABEL: test_v4i16_v8i8: 597declare <4 x i16> @test_v4i16_v8i8_helper(<8 x i8> %p) 598define void @test_v4i16_v8i8(<8 x i8>* %p, <4 x i16>* %q) { 599; SOFT: vrev64.8 [[REG:d[0-9]+]] 600; SOFT: vmov r1, r0, [[REG]] 601; HARD: vrev64.8 d0 602 %1 = load <8 x i8>, <8 x i8>* %p 603 %2 = add <8 x i8> %1, %1 604 %3 = call <4 x i16> @test_v4i16_v8i8_helper(<8 x i8> %2) 605 %4 = add <4 x i16> %3, %3 606 store <4 x i16> %4, <4 x i16>* %q 607 ret void 608; SOFT: vmov [[REG:d[0-9]+]], r1, r0 609; SOFT: vrev64.16 [[REG]] 610; HARD: vrev64.16 {{d[0-9]+}}, d0 611} 612 613; CHECK-LABEL: test_v8i8_i64: 614declare <8 x i8> @test_v8i8_i64_helper(i64 %p) 615define void @test_v8i8_i64(i64* %p, <8 x i8>* %q) { 616; CHECK: adds r1 617; CHECK: adc r0 618 %1 = load i64, i64* %p 619 %2 = add i64 %1, %1 620 %3 = call <8 x i8> @test_v8i8_i64_helper(i64 %2) 621 %4 = add <8 x i8> %3, %3 622 store <8 x i8> %4, <8 x i8>* %q 623 ret void 624; SOFT: vmov [[REG:d[0-9]+]], r1, r0 625; SOFT: vrev64.8 [[REG]] 626; HARD: vrev64.8 {{d[0-9]+}}, d0 627} 628 629; CHECK-LABEL: test_v8i8_f64: 630declare <8 x i8> @test_v8i8_f64_helper(double %p) 631define void @test_v8i8_f64(double* %p, <8 x i8>* %q) { 632; SOFT: vadd.f64 [[REG:d[0-9]+]] 633; SOFT: vmov r1, r0, [[REG]] 634; HARD: vadd.f64 d0 635 %1 = load double, double* %p 636 %2 = fadd double %1, %1 637 %3 = call <8 x i8> @test_v8i8_f64_helper(double %2) 638 %4 = add <8 x i8> %3, %3 639 store <8 x i8> %4, <8 x i8>* %q 640 ret void 641; SOFT: vmov [[REG:d[0-9]+]], r1, r0 642; SOFT: vrev64.8 [[REG]] 643; HARD: vrev64.8 {{d[0-9]+}}, d0 644} 645 646; CHECK-LABEL: test_v8i8_v1i64: 647declare <8 x i8> @test_v8i8_v1i64_helper(<1 x i64> %p) 648define void @test_v8i8_v1i64(<1 x i64>* %p, <8 x i8>* %q) { 649; SOFT: vadd.i64 [[REG:d[0-9]+]] 650; SOFT: vmov r1, r0, [[REG]] 651; HARD: vadd.i64 d0 652 %1 = load <1 x i64>, <1 x i64>* %p 653 %2 = add <1 x i64> %1, %1 654 %3 = call <8 x i8> @test_v8i8_v1i64_helper(<1 x i64> %2) 655 %4 = add <8 x i8> %3, %3 656 store <8 x i8> %4, <8 x i8>* %q 657 ret void 658; SOFT: vmov [[REG:d[0-9]+]], r1, r0 659; SOFT: vrev64.8 [[REG]] 660; HARD: vrev64.8 {{d[0-9]+}}, d0 661} 662 663; CHECK-LABEL: test_v8i8_v2f32: 664declare <8 x i8> @test_v8i8_v2f32_helper(<2 x float> %p) 665define void @test_v8i8_v2f32(<2 x float>* %p, <8 x i8>* %q) { 666; SOFT: vrev64.32 [[REG:d[0-9]+]] 667; SOFT: vmov r1, r0, [[REG]] 668; HARD: vrev64.32 d0 669 %1 = load <2 x float>, <2 x float>* %p 670 %2 = fadd <2 x float> %1, %1 671 %3 = call <8 x i8> @test_v8i8_v2f32_helper(<2 x float> %2) 672 %4 = add <8 x i8> %3, %3 673 store <8 x i8> %4, <8 x i8>* %q 674 ret void 675; SOFT: vmov [[REG:d[0-9]+]], r1, r0 676; SOFT: vrev64.8 [[REG]] 677; HARD: vrev64.8 {{d[0-9]+}}, d0 678} 679 680; CHECK-LABEL: test_v8i8_v2i32: 681declare <8 x i8> @test_v8i8_v2i32_helper(<2 x i32> %p) 682define void @test_v8i8_v2i32(<2 x i32>* %p, <8 x i8>* %q) { 683; SOFT: vrev64.32 [[REG:d[0-9]+]] 684; SOFT: vmov r1, r0, [[REG]] 685; HARD: vrev64.32 d0 686 %1 = load <2 x i32>, <2 x i32>* %p 687 %2 = add <2 x i32> %1, %1 688 %3 = call <8 x i8> @test_v8i8_v2i32_helper(<2 x i32> %2) 689 %4 = add <8 x i8> %3, %3 690 store <8 x i8> %4, <8 x i8>* %q 691 ret void 692; SOFT: vmov [[REG:d[0-9]+]], r1, r0 693; SOFT: vrev64.8 [[REG]] 694; HARD: vrev64.8 {{d[0-9]+}}, d0 695} 696 697; CHECK-LABEL: test_v8i8_v4i16: 698declare <8 x i8> @test_v8i8_v4i16_helper(<4 x i16> %p) 699define void @test_v8i8_v4i16(<4 x i16>* %p, <8 x i8>* %q) { 700; SOFT: vrev64.16 [[REG:d[0-9]+]] 701; SOFT: vmov r1, r0, [[REG]] 702; HARD: vrev64.16 d0 703 %1 = load <4 x i16>, <4 x i16>* %p 704 %2 = add <4 x i16> %1, %1 705 %3 = call <8 x i8> @test_v8i8_v4i16_helper(<4 x i16> %2) 706 %4 = add <8 x i8> %3, %3 707 store <8 x i8> %4, <8 x i8>* %q 708 ret void 709; SOFT: vmov [[REG:d[0-9]+]], r1, r0 710; SOFT: vrev64.8 [[REG]] 711; HARD: vrev64.8 {{d[0-9]+}}, d0 712} 713 714; CHECK-LABEL: test_f128_v2f64: 715declare fp128 @test_f128_v2f64_helper(<2 x double> %p) 716define void @test_f128_v2f64(<2 x double>* %p, fp128* %q) { 717; SOFT: vadd.f64 [[REG2:d[0-9]+]] 718; SOFT: vadd.f64 [[REG1:d[0-9]+]] 719; SOFT: vmov r1, r0, [[REG1]] 720; SOFT: vmov r3, r2, [[REG2]] 721; HARD: vadd.f64 d1 722; HARD: vadd.f64 d0 723 %1 = load <2 x double>, <2 x double>* %p 724 %2 = fadd <2 x double> %1, %1 725 %3 = call fp128 @test_f128_v2f64_helper(<2 x double> %2) 726 %4 = fadd fp128 %3, %3 727 store fp128 %4, fp128* %q 728 ret void 729; CHECK: stm sp, {r0, r1, r2, r3} 730} 731 732; CHECK-LABEL: test_f128_v2i64: 733declare fp128 @test_f128_v2i64_helper(<2 x i64> %p) 734define void @test_f128_v2i64(<2 x i64>* %p, fp128* %q) { 735; SOFT: vmov r1, r0 736; SOFT: vmov r3, r2 737; HARD: vadd.i64 q0 738 %1 = load <2 x i64>, <2 x i64>* %p 739 %2 = add <2 x i64> %1, %1 740 %3 = call fp128 @test_f128_v2i64_helper(<2 x i64> %2) 741 %4 = fadd fp128 %3, %3 742 store fp128 %4, fp128* %q 743 ret void 744; CHECK: stm sp, {r0, r1, r2, r3} 745} 746 747; CHECK-LABEL: test_f128_v4f32: 748declare fp128 @test_f128_v4f32_helper(<4 x float> %p) 749define void @test_f128_v4f32(<4 x float>* %p, fp128* %q) { 750; SOFT: vmov r1, r0 751; SOFT: vmov r3, r2 752; HARD: vrev64.32 q0 753 %1 = load <4 x float>, <4 x float>* %p 754 %2 = fadd <4 x float> %1, %1 755 %3 = call fp128 @test_f128_v4f32_helper(<4 x float> %2) 756 %4 = fadd fp128 %3, %3 757 store fp128 %4, fp128* %q 758 ret void 759; CHECK: stm sp, {r0, r1, r2, r3} 760} 761 762; CHECK-LABEL: test_f128_v4i32: 763declare fp128 @test_f128_v4i32_helper(<4 x i32> %p) 764define void @test_f128_v4i32(<4 x i32>* %p, fp128* %q) { 765; SOFT: vmov r1, r0 766; SOFT: vmov r3, r2 767; HARD: vrev64.32 q0 768 %1 = load <4 x i32>, <4 x i32>* %p 769 %2 = add <4 x i32> %1, %1 770 %3 = call fp128 @test_f128_v4i32_helper(<4 x i32> %2) 771 %4 = fadd fp128 %3, %3 772 store fp128 %4, fp128* %q 773 ret void 774; CHECK: stm sp, {r0, r1, r2, r3} 775} 776 777; CHECK-LABEL: test_f128_v8i16: 778declare fp128 @test_f128_v8i16_helper(<8 x i16> %p) 779define void @test_f128_v8i16(<8 x i16>* %p, fp128* %q) { 780; SOFT: vmov r1, r0 781; SOFT: vmov r3, r2 782; HARD: vrev64.16 q0 783 %1 = load <8 x i16>, <8 x i16>* %p 784 %2 = add <8 x i16> %1, %1 785 %3 = call fp128 @test_f128_v8i16_helper(<8 x i16> %2) 786 %4 = fadd fp128 %3, %3 787 store fp128 %4, fp128* %q 788 ret void 789; CHECK: stm sp, {r0, r1, r2, r3} 790} 791 792; CHECK-LABEL: test_f128_v16i8: 793declare fp128 @test_f128_v16i8_helper(<16 x i8> %p) 794define void @test_f128_v16i8(<16 x i8>* %p, fp128* %q) { 795; SOFT: vmov r1, r0 796; SOFT: vmov r3, r2 797; HARD: vrev64.8 q0 798 %1 = load <16 x i8>, <16 x i8>* %p 799 %2 = add <16 x i8> %1, %1 800 %3 = call fp128 @test_f128_v16i8_helper(<16 x i8> %2) 801 %4 = fadd fp128 %3, %3 802 store fp128 %4, fp128* %q 803 ret void 804; CHECK: stm sp, {r0, r1, r2, r3} 805} 806 807; CHECK-LABEL: test_v2f64_f128: 808declare <2 x double> @test_v2f64_f128_helper(fp128 %p) 809define void @test_v2f64_f128(fp128* %p, <2 x double>* %q) { 810 %1 = load fp128, fp128* %p 811 %2 = fadd fp128 %1, %1 812 %3 = call <2 x double> @test_v2f64_f128_helper(fp128 %2) 813 %4 = fadd <2 x double> %3, %3 814 store <2 x double> %4, <2 x double>* %q 815 ret void 816; SOFT: vmov {{d[0-9]+}}, r3, r2 817; SOFT: vmov {{d[0-9]+}}, r1, r0 818 819} 820 821; CHECK-LABEL: test_v2f64_v2i64: 822declare <2 x double> @test_v2f64_v2i64_helper(<2 x i64> %p) 823define void @test_v2f64_v2i64(<2 x i64>* %p, <2 x double>* %q) { 824; SOFT: vmov r1, r0 825; SOFT: vmov r3, r2 826; HARD: vadd.i64 q0 827 %1 = load <2 x i64>, <2 x i64>* %p 828 %2 = add <2 x i64> %1, %1 829 %3 = call <2 x double> @test_v2f64_v2i64_helper(<2 x i64> %2) 830 %4 = fadd <2 x double> %3, %3 831 store <2 x double> %4, <2 x double>* %q 832 ret void 833; SOFT: vmov {{d[0-9]+}}, r3, r2 834; SOFT: vmov {{d[0-9]+}}, r1, r0 835} 836 837; CHECK-LABEL: test_v2f64_v4f32: 838declare <2 x double> @test_v2f64_v4f32_helper(<4 x float> %p) 839define void @test_v2f64_v4f32(<4 x float>* %p, <2 x double>* %q) { 840; SOFT: vmov r1, r0 841; SOFT: vmov r3, r2 842; HARD: vrev64.32 q0 843 %1 = load <4 x float>, <4 x float>* %p 844 %2 = fadd <4 x float> %1, %1 845 %3 = call <2 x double> @test_v2f64_v4f32_helper(<4 x float> %2) 846 %4 = fadd <2 x double> %3, %3 847 store <2 x double> %4, <2 x double>* %q 848 ret void 849; SOFT: vmov {{d[0-9]+}}, r3, r2 850; SOFT: vmov {{d[0-9]+}}, r1, r0 851} 852 853; CHECK-LABEL: test_v2f64_v4i32: 854declare <2 x double> @test_v2f64_v4i32_helper(<4 x i32> %p) 855define void @test_v2f64_v4i32(<4 x i32>* %p, <2 x double>* %q) { 856; SOFT: vmov r1, r0 857; SOFT: vmov r3, r2 858; HARD: vrev64.32 q0 859 %1 = load <4 x i32>, <4 x i32>* %p 860 %2 = add <4 x i32> %1, %1 861 %3 = call <2 x double> @test_v2f64_v4i32_helper(<4 x i32> %2) 862 %4 = fadd <2 x double> %3, %3 863 store <2 x double> %4, <2 x double>* %q 864 ret void 865; SOFT: vmov {{d[0-9]+}}, r3, r2 866; SOFT: vmov {{d[0-9]+}}, r1, r0 867} 868 869; CHECK-LABEL: test_v2f64_v8i16: 870declare <2 x double> @test_v2f64_v8i16_helper(<8 x i16> %p) 871define void @test_v2f64_v8i16(<8 x i16>* %p, <2 x double>* %q) { 872; SOFT: vmov r1, r0 873; SOFT: vmov r3, r2 874; HARD: vrev64.16 q0 875 %1 = load <8 x i16>, <8 x i16>* %p 876 %2 = add <8 x i16> %1, %1 877 %3 = call <2 x double> @test_v2f64_v8i16_helper(<8 x i16> %2) 878 %4 = fadd <2 x double> %3, %3 879 store <2 x double> %4, <2 x double>* %q 880 ret void 881; SOFT: vmov {{d[0-9]+}}, r3, r2 882; SOFT: vmov {{d[0-9]+}}, r1, r0 883} 884 885; CHECK-LABEL: test_v2f64_v16i8: 886declare <2 x double> @test_v2f64_v16i8_helper(<16 x i8> %p) 887define void @test_v2f64_v16i8(<16 x i8>* %p, <2 x double>* %q) { 888; SOFT: vmov r1, r0 889; SOFT: vmov r3, r2 890; HARD: vrev64.8 q0 891 %1 = load <16 x i8>, <16 x i8>* %p 892 %2 = add <16 x i8> %1, %1 893 %3 = call <2 x double> @test_v2f64_v16i8_helper(<16 x i8> %2) 894 %4 = fadd <2 x double> %3, %3 895 store <2 x double> %4, <2 x double>* %q 896 ret void 897; SOFT: vmov {{d[0-9]+}}, r3, r2 898; SOFT: vmov {{d[0-9]+}}, r1, r0 899} 900 901; CHECK-LABEL: test_v2i64_f128: 902declare <2 x i64> @test_v2i64_f128_helper(fp128 %p) 903define void @test_v2i64_f128(fp128* %p, <2 x i64>* %q) { 904 %1 = load fp128, fp128* %p 905 %2 = fadd fp128 %1, %1 906 %3 = call <2 x i64> @test_v2i64_f128_helper(fp128 %2) 907 %4 = add <2 x i64> %3, %3 908 store <2 x i64> %4, <2 x i64>* %q 909 ret void 910; SOFT: vmov {{d[0-9]+}}, r3, r2 911; SOFT: vmov {{d[0-9]+}}, r1, r0 912} 913 914; CHECK-LABEL: test_v2i64_v2f64: 915declare <2 x i64> @test_v2i64_v2f64_helper(<2 x double> %p) 916define void @test_v2i64_v2f64(<2 x double>* %p, <2 x i64>* %q) { 917; SOFT: vmov r1, r0, [[REG1]] 918; SOFT: vmov r3, r2, [[REG2]] 919; HARD: vadd.f64 d1 920; HARD: vadd.f64 d0 921 %1 = load <2 x double>, <2 x double>* %p 922 %2 = fadd <2 x double> %1, %1 923 %3 = call <2 x i64> @test_v2i64_v2f64_helper(<2 x double> %2) 924 %4 = add <2 x i64> %3, %3 925 store <2 x i64> %4, <2 x i64>* %q 926 ret void 927; SOFT: vmov {{d[0-9]+}}, r3, r2 928; SOFT: vmov {{d[0-9]+}}, r1, r0 929} 930 931; CHECK-LABEL: test_v2i64_v4f32: 932declare <2 x i64> @test_v2i64_v4f32_helper(<4 x float> %p) 933define void @test_v2i64_v4f32(<4 x float>* %p, <2 x i64>* %q) { 934; SOFT: vmov r1, r0 935; SOFT: vmov r3, r2 936; HARD: vrev64.32 q0 937 %1 = load <4 x float>, <4 x float>* %p 938 %2 = fadd <4 x float> %1, %1 939 %3 = call <2 x i64> @test_v2i64_v4f32_helper(<4 x float> %2) 940 %4 = add <2 x i64> %3, %3 941 store <2 x i64> %4, <2 x i64>* %q 942 ret void 943; SOFT: vmov {{d[0-9]+}}, r3, r2 944; SOFT: vmov {{d[0-9]+}}, r1, r0 945} 946 947; CHECK-LABEL: test_v2i64_v4i32: 948declare <2 x i64> @test_v2i64_v4i32_helper(<4 x i32> %p) 949define void @test_v2i64_v4i32(<4 x i32>* %p, <2 x i64>* %q) { 950; SOFT: vmov r1, r0 951; SOFT: vmov r3, r2 952; HARD: vrev64.32 q0 953 %1 = load <4 x i32>, <4 x i32>* %p 954 %2 = add <4 x i32> %1, %1 955 %3 = call <2 x i64> @test_v2i64_v4i32_helper(<4 x i32> %2) 956 %4 = add <2 x i64> %3, %3 957 store <2 x i64> %4, <2 x i64>* %q 958 ret void 959; SOFT: vmov {{d[0-9]+}}, r3, r2 960; SOFT: vmov {{d[0-9]+}}, r1, r0 961} 962 963; CHECK-LABEL: test_v2i64_v8i16: 964declare <2 x i64> @test_v2i64_v8i16_helper(<8 x i16> %p) 965define void @test_v2i64_v8i16(<8 x i16>* %p, <2 x i64>* %q) { 966; SOFT: vmov r1, r0 967; SOFT: vmov r3, r2 968; HARD: vrev64.16 q0 969 %1 = load <8 x i16>, <8 x i16>* %p 970 %2 = add <8 x i16> %1, %1 971 %3 = call <2 x i64> @test_v2i64_v8i16_helper(<8 x i16> %2) 972 %4 = add <2 x i64> %3, %3 973 store <2 x i64> %4, <2 x i64>* %q 974 ret void 975; SOFT: vmov {{d[0-9]+}}, r3, r2 976; SOFT: vmov {{d[0-9]+}}, r1, r0 977} 978 979; CHECK-LABEL: test_v2i64_v16i8: 980declare <2 x i64> @test_v2i64_v16i8_helper(<16 x i8> %p) 981define void @test_v2i64_v16i8(<16 x i8>* %p, <2 x i64>* %q) { 982; SOFT: vmov r1, r0 983; SOFT: vmov r3, r2 984; HARD: vrev64.8 q0 985 %1 = load <16 x i8>, <16 x i8>* %p 986 %2 = add <16 x i8> %1, %1 987 %3 = call <2 x i64> @test_v2i64_v16i8_helper(<16 x i8> %2) 988 %4 = add <2 x i64> %3, %3 989 store <2 x i64> %4, <2 x i64>* %q 990 ret void 991; SOFT: vmov {{d[0-9]+}}, r3, r2 992; SOFT: vmov {{d[0-9]+}}, r1, r0 993} 994 995; CHECK-LABEL: test_v4f32_f128: 996declare <4 x float> @test_v4f32_f128_helper(fp128 %p) 997define void @test_v4f32_f128(fp128* %p, <4 x float>* %q) { 998 %1 = load fp128, fp128* %p 999 %2 = fadd fp128 %1, %1 1000 %3 = call <4 x float> @test_v4f32_f128_helper(fp128 %2) 1001 %4 = fadd <4 x float> %3, %3 1002 store <4 x float> %4, <4 x float>* %q 1003 ret void 1004; SOFT: vmov {{d[0-9]+}}, r3, r2 1005; SOFT: vmov {{d[0-9]+}}, r1, r0 1006} 1007 1008; CHECK-LABEL: test_v4f32_v2f64: 1009declare <4 x float> @test_v4f32_v2f64_helper(<2 x double> %p) 1010define void @test_v4f32_v2f64(<2 x double>* %p, <4 x float>* %q) { 1011; SOFT: vmov r1, r0 1012; SOFT: vmov r3, r2 1013; HARD: vadd.f64 d1 1014; HARD: vadd.f64 d0 1015 %1 = load <2 x double>, <2 x double>* %p 1016 %2 = fadd <2 x double> %1, %1 1017 %3 = call <4 x float> @test_v4f32_v2f64_helper(<2 x double> %2) 1018 %4 = fadd <4 x float> %3, %3 1019 store <4 x float> %4, <4 x float>* %q 1020 ret void 1021; SOFT: vmov {{d[0-9]+}}, r3, r2 1022; SOFT: vmov {{d[0-9]+}}, r1, r0 1023} 1024 1025; CHECK-LABEL: test_v4f32_v2i64: 1026declare <4 x float> @test_v4f32_v2i64_helper(<2 x i64> %p) 1027define void @test_v4f32_v2i64(<2 x i64>* %p, <4 x float>* %q) { 1028; SOFT: vmov r1, r0 1029; SOFT: vmov r3, r2 1030; HARD: vadd.i64 q0 1031 %1 = load <2 x i64>, <2 x i64>* %p 1032 %2 = add <2 x i64> %1, %1 1033 %3 = call <4 x float> @test_v4f32_v2i64_helper(<2 x i64> %2) 1034 %4 = fadd <4 x float> %3, %3 1035 store <4 x float> %4, <4 x float>* %q 1036 ret void 1037; SOFT: vmov {{d[0-9]+}}, r3, r2 1038; SOFT: vmov {{d[0-9]+}}, r1, r0 1039} 1040 1041; CHECK-LABEL: test_v4f32_v4i32: 1042declare <4 x float> @test_v4f32_v4i32_helper(<4 x i32> %p) 1043define void @test_v4f32_v4i32(<4 x i32>* %p, <4 x float>* %q) { 1044; SOFT: vmov r1, r0 1045; SOFT: vmov r3, r2 1046; HARD: vrev64.32 q0 1047 %1 = load <4 x i32>, <4 x i32>* %p 1048 %2 = add <4 x i32> %1, %1 1049 %3 = call <4 x float> @test_v4f32_v4i32_helper(<4 x i32> %2) 1050 %4 = fadd <4 x float> %3, %3 1051 store <4 x float> %4, <4 x float>* %q 1052 ret void 1053; SOFT: vmov {{d[0-9]+}}, r3, r2 1054; SOFT: vmov {{d[0-9]+}}, r1, r0 1055} 1056 1057; CHECK-LABEL: test_v4f32_v8i16: 1058declare <4 x float> @test_v4f32_v8i16_helper(<8 x i16> %p) 1059define void @test_v4f32_v8i16(<8 x i16>* %p, <4 x float>* %q) { 1060; SOFT: vmov r1, r0 1061; SOFT: vmov r3, r2 1062; HARD: vrev64.16 q0 1063 %1 = load <8 x i16>, <8 x i16>* %p 1064 %2 = add <8 x i16> %1, %1 1065 %3 = call <4 x float> @test_v4f32_v8i16_helper(<8 x i16> %2) 1066 %4 = fadd <4 x float> %3, %3 1067 store <4 x float> %4, <4 x float>* %q 1068 ret void 1069; SOFT: vmov {{d[0-9]+}}, r3, r2 1070; SOFT: vmov {{d[0-9]+}}, r1, r0 1071} 1072 1073; CHECK-LABEL: test_v4f32_v16i8: 1074declare <4 x float> @test_v4f32_v16i8_helper(<16 x i8> %p) 1075define void @test_v4f32_v16i8(<16 x i8>* %p, <4 x float>* %q) { 1076; SOFT: vmov r1, r0 1077; SOFT: vmov r3, r2 1078; HARD: vrev64.8 q0 1079 %1 = load <16 x i8>, <16 x i8>* %p 1080 %2 = add <16 x i8> %1, %1 1081 %3 = call <4 x float> @test_v4f32_v16i8_helper(<16 x i8> %2) 1082 %4 = fadd <4 x float> %3, %3 1083 store <4 x float> %4, <4 x float>* %q 1084 ret void 1085; SOFT: vmov {{d[0-9]+}}, r3, r2 1086; SOFT: vmov {{d[0-9]+}}, r1, r0 1087} 1088 1089; CHECK-LABEL: test_v4i32_f128: 1090declare <4 x i32> @test_v4i32_f128_helper(fp128 %p) 1091define void @test_v4i32_f128(fp128* %p, <4 x i32>* %q) { 1092 %1 = load fp128, fp128* %p 1093 %2 = fadd fp128 %1, %1 1094 %3 = call <4 x i32> @test_v4i32_f128_helper(fp128 %2) 1095 %4 = add <4 x i32> %3, %3 1096 store <4 x i32> %4, <4 x i32>* %q 1097 ret void 1098; SOFT: vmov {{d[0-9]+}}, r3, r2 1099; SOFT: vmov {{d[0-9]+}}, r1, r0 1100} 1101 1102; CHECK-LABEL: test_v4i32_v2f64: 1103declare <4 x i32> @test_v4i32_v2f64_helper(<2 x double> %p) 1104define void @test_v4i32_v2f64(<2 x double>* %p, <4 x i32>* %q) { 1105; SOFT: vmov r1, r0 1106; SOFT: vmov r3, r2 1107; HARD: vadd.f64 d1 1108; HARD: vadd.f64 d0 1109 %1 = load <2 x double>, <2 x double>* %p 1110 %2 = fadd <2 x double> %1, %1 1111 %3 = call <4 x i32> @test_v4i32_v2f64_helper(<2 x double> %2) 1112 %4 = add <4 x i32> %3, %3 1113 store <4 x i32> %4, <4 x i32>* %q 1114 ret void 1115; SOFT: vmov {{d[0-9]+}}, r3, r2 1116; SOFT: vmov {{d[0-9]+}}, r1, r0 1117} 1118 1119; CHECK-LABEL: test_v4i32_v2i64: 1120declare <4 x i32> @test_v4i32_v2i64_helper(<2 x i64> %p) 1121define void @test_v4i32_v2i64(<2 x i64>* %p, <4 x i32>* %q) { 1122; SOFT: vmov r1, r0 1123; SOFT: vmov r3, r2 1124; HARD: vadd.i64 q0 1125 %1 = load <2 x i64>, <2 x i64>* %p 1126 %2 = add <2 x i64> %1, %1 1127 %3 = call <4 x i32> @test_v4i32_v2i64_helper(<2 x i64> %2) 1128 %4 = add <4 x i32> %3, %3 1129 store <4 x i32> %4, <4 x i32>* %q 1130 ret void 1131; SOFT: vmov {{d[0-9]+}}, r3, r2 1132; SOFT: vmov {{d[0-9]+}}, r1, r0 1133} 1134 1135; CHECK-LABEL: test_v4i32_v4f32: 1136declare <4 x i32> @test_v4i32_v4f32_helper(<4 x float> %p) 1137define void @test_v4i32_v4f32(<4 x float>* %p, <4 x i32>* %q) { 1138; SOFT: vmov r1, r0 1139; SOFT: vmov r3, r2 1140; HARD: vrev64.32 q0 1141 %1 = load <4 x float>, <4 x float>* %p 1142 %2 = fadd <4 x float> %1, %1 1143 %3 = call <4 x i32> @test_v4i32_v4f32_helper(<4 x float> %2) 1144 %4 = add <4 x i32> %3, %3 1145 store <4 x i32> %4, <4 x i32>* %q 1146 ret void 1147; SOFT: vmov {{d[0-9]+}}, r3, r2 1148; SOFT: vmov {{d[0-9]+}}, r1, r0 1149} 1150 1151; CHECK-LABEL: test_v4i32_v8i16: 1152declare <4 x i32> @test_v4i32_v8i16_helper(<8 x i16> %p) 1153define void @test_v4i32_v8i16(<8 x i16>* %p, <4 x i32>* %q) { 1154; SOFT: vmov r1, r0 1155; SOFT: vmov r3, r2 1156; HARD: vrev64.16 q0 1157 %1 = load <8 x i16>, <8 x i16>* %p 1158 %2 = add <8 x i16> %1, %1 1159 %3 = call <4 x i32> @test_v4i32_v8i16_helper(<8 x i16> %2) 1160 %4 = add <4 x i32> %3, %3 1161 store <4 x i32> %4, <4 x i32>* %q 1162 ret void 1163; SOFT: vmov {{d[0-9]+}}, r3, r2 1164; SOFT: vmov {{d[0-9]+}}, r1, r0 1165} 1166 1167; CHECK-LABEL: test_v4i32_v16i8: 1168declare <4 x i32> @test_v4i32_v16i8_helper(<16 x i8> %p) 1169define void @test_v4i32_v16i8(<16 x i8>* %p, <4 x i32>* %q) { 1170; SOFT: vmov r1, r0 1171; SOFT: vmov r3, r2 1172; HARD: vrev64.8 q0 1173 %1 = load <16 x i8>, <16 x i8>* %p 1174 %2 = add <16 x i8> %1, %1 1175 %3 = call <4 x i32> @test_v4i32_v16i8_helper(<16 x i8> %2) 1176 %4 = add <4 x i32> %3, %3 1177 store <4 x i32> %4, <4 x i32>* %q 1178 ret void 1179; SOFT: vmov {{d[0-9]+}}, r3, r2 1180; SOFT: vmov {{d[0-9]+}}, r1, r0 1181} 1182 1183; CHECK-LABEL: test_v8i16_f128: 1184declare <8 x i16> @test_v8i16_f128_helper(fp128 %p) 1185define void @test_v8i16_f128(fp128* %p, <8 x i16>* %q) { 1186 %1 = load fp128, fp128* %p 1187 %2 = fadd fp128 %1, %1 1188 %3 = call <8 x i16> @test_v8i16_f128_helper(fp128 %2) 1189 %4 = add <8 x i16> %3, %3 1190 store <8 x i16> %4, <8 x i16>* %q 1191 ret void 1192; SOFT: vmov {{d[0-9]+}}, r3, r2 1193; SOFT: vmov {{d[0-9]+}}, r1, r0 1194} 1195 1196; CHECK-LABEL: test_v8i16_v2f64: 1197declare <8 x i16> @test_v8i16_v2f64_helper(<2 x double> %p) 1198define void @test_v8i16_v2f64(<2 x double>* %p, <8 x i16>* %q) { 1199; SOFT: vmov r1, r0 1200; SOFT: vmov r3, r2 1201; HARD: vadd.f64 d1 1202; HARD: vadd.f64 d0 1203 %1 = load <2 x double>, <2 x double>* %p 1204 %2 = fadd <2 x double> %1, %1 1205 %3 = call <8 x i16> @test_v8i16_v2f64_helper(<2 x double> %2) 1206 %4 = add <8 x i16> %3, %3 1207 store <8 x i16> %4, <8 x i16>* %q 1208 ret void 1209; SOFT: vmov {{d[0-9]+}}, r3, r2 1210; SOFT: vmov {{d[0-9]+}}, r1, r0 1211} 1212 1213; CHECK-LABEL: test_v8i16_v2i64: 1214declare <8 x i16> @test_v8i16_v2i64_helper(<2 x i64> %p) 1215define void @test_v8i16_v2i64(<2 x i64>* %p, <8 x i16>* %q) { 1216; SOFT: vmov r1, r0 1217; SOFT: vmov r3, r2 1218; HARD: vadd.i64 q0 1219 %1 = load <2 x i64>, <2 x i64>* %p 1220 %2 = add <2 x i64> %1, %1 1221 %3 = call <8 x i16> @test_v8i16_v2i64_helper(<2 x i64> %2) 1222 %4 = add <8 x i16> %3, %3 1223 store <8 x i16> %4, <8 x i16>* %q 1224 ret void 1225; SOFT: vmov {{d[0-9]+}}, r3, r2 1226; SOFT: vmov {{d[0-9]+}}, r1, r0 1227} 1228 1229; CHECK-LABEL: test_v8i16_v4f32: 1230declare <8 x i16> @test_v8i16_v4f32_helper(<4 x float> %p) 1231define void @test_v8i16_v4f32(<4 x float>* %p, <8 x i16>* %q) { 1232; SOFT: vmov r1, r0 1233; SOFT: vmov r3, r2 1234; HARD: vrev64.32 q0 1235 %1 = load <4 x float>, <4 x float>* %p 1236 %2 = fadd <4 x float> %1, %1 1237 %3 = call <8 x i16> @test_v8i16_v4f32_helper(<4 x float> %2) 1238 %4 = add <8 x i16> %3, %3 1239 store <8 x i16> %4, <8 x i16>* %q 1240 ret void 1241; SOFT: vmov {{d[0-9]+}}, r3, r2 1242; SOFT: vmov {{d[0-9]+}}, r1, r0 1243} 1244 1245; CHECK-LABEL: test_v8i16_v4i32: 1246declare <8 x i16> @test_v8i16_v4i32_helper(<4 x i32> %p) 1247define void @test_v8i16_v4i32(<4 x i32>* %p, <8 x i16>* %q) { 1248; SOFT: vmov r1, r0 1249; SOFT: vmov r3, r2 1250; HARD: vrev64.32 q0 1251 %1 = load <4 x i32>, <4 x i32>* %p 1252 %2 = add <4 x i32> %1, %1 1253 %3 = call <8 x i16> @test_v8i16_v4i32_helper(<4 x i32> %2) 1254 %4 = add <8 x i16> %3, %3 1255 store <8 x i16> %4, <8 x i16>* %q 1256 ret void 1257; SOFT: vmov {{d[0-9]+}}, r3, r2 1258; SOFT: vmov {{d[0-9]+}}, r1, r0 1259} 1260 1261; CHECK-LABEL: test_v8i16_v16i8: 1262declare <8 x i16> @test_v8i16_v16i8_helper(<16 x i8> %p) 1263define void @test_v8i16_v16i8(<16 x i8>* %p, <8 x i16>* %q) { 1264; SOFT: vmov r1, r0 1265; SOFT: vmov r3, r2 1266; HARD: vrev64.8 q0 1267 %1 = load <16 x i8>, <16 x i8>* %p 1268 %2 = add <16 x i8> %1, %1 1269 %3 = call <8 x i16> @test_v8i16_v16i8_helper(<16 x i8> %2) 1270 %4 = add <8 x i16> %3, %3 1271 store <8 x i16> %4, <8 x i16>* %q 1272 ret void 1273; SOFT: vmov {{d[0-9]+}}, r3, r2 1274; SOFT: vmov {{d[0-9]+}}, r1, r0 1275} 1276 1277; CHECK-LABEL: test_v16i8_f128: 1278declare <16 x i8> @test_v16i8_f128_helper(fp128 %p) 1279define void @test_v16i8_f128(fp128* %p, <16 x i8>* %q) { 1280 %1 = load fp128, fp128* %p 1281 %2 = fadd fp128 %1, %1 1282 %3 = call <16 x i8> @test_v16i8_f128_helper(fp128 %2) 1283 %4 = add <16 x i8> %3, %3 1284 store <16 x i8> %4, <16 x i8>* %q 1285 ret void 1286; SOFT: vmov {{d[0-9]+}}, r3, r2 1287; SOFT: vmov {{d[0-9]+}}, r1, r0 1288} 1289 1290; CHECK-LABEL: test_v16i8_v2f64: 1291declare <16 x i8> @test_v16i8_v2f64_helper(<2 x double> %p) 1292define void @test_v16i8_v2f64(<2 x double>* %p, <16 x i8>* %q) { 1293; SOFT: vmov r1, r0 1294; SOFT: vmov r3, r2 1295; HARD: vadd.f64 d1 1296; HARD: vadd.f64 d0 1297 %1 = load <2 x double>, <2 x double>* %p 1298 %2 = fadd <2 x double> %1, %1 1299 %3 = call <16 x i8> @test_v16i8_v2f64_helper(<2 x double> %2) 1300 %4 = add <16 x i8> %3, %3 1301 store <16 x i8> %4, <16 x i8>* %q 1302 ret void 1303; SOFT: vmov {{d[0-9]+}}, r3, r2 1304; SOFT: vmov {{d[0-9]+}}, r1, r0 1305} 1306 1307; CHECK-LABEL: test_v16i8_v2i64: 1308declare <16 x i8> @test_v16i8_v2i64_helper(<2 x i64> %p) 1309define void @test_v16i8_v2i64(<2 x i64>* %p, <16 x i8>* %q) { 1310; SOFT: vmov r1, r0 1311; SOFT: vmov r3, r2 1312; HARD: vadd.i64 q0 1313 %1 = load <2 x i64>, <2 x i64>* %p 1314 %2 = add <2 x i64> %1, %1 1315 %3 = call <16 x i8> @test_v16i8_v2i64_helper(<2 x i64> %2) 1316 %4 = add <16 x i8> %3, %3 1317 store <16 x i8> %4, <16 x i8>* %q 1318 ret void 1319; SOFT: vmov {{d[0-9]+}}, r3, r2 1320; SOFT: vmov {{d[0-9]+}}, r1, r0 1321} 1322 1323; CHECK-LABEL: test_v16i8_v4f32: 1324declare <16 x i8> @test_v16i8_v4f32_helper(<4 x float> %p) 1325define void @test_v16i8_v4f32(<4 x float>* %p, <16 x i8>* %q) { 1326; SOFT: vmov r1, r0 1327; SOFT: vmov r3, r2 1328; HARD: vrev64.32 q0 1329 %1 = load <4 x float>, <4 x float>* %p 1330 %2 = fadd <4 x float> %1, %1 1331 %3 = call <16 x i8> @test_v16i8_v4f32_helper(<4 x float> %2) 1332 %4 = add <16 x i8> %3, %3 1333 store <16 x i8> %4, <16 x i8>* %q 1334 ret void 1335; SOFT: vmov {{d[0-9]+}}, r3, r2 1336; SOFT: vmov {{d[0-9]+}}, r1, r0 1337} 1338 1339; CHECK-LABEL: test_v16i8_v4i32: 1340declare <16 x i8> @test_v16i8_v4i32_helper(<4 x i32> %p) 1341define void @test_v16i8_v4i32(<4 x i32>* %p, <16 x i8>* %q) { 1342; SOFT: vmov r1, r0 1343; SOFT: vmov r3, r2 1344; HARD: vrev64.32 q0 1345 %1 = load <4 x i32>, <4 x i32>* %p 1346 %2 = add <4 x i32> %1, %1 1347 %3 = call <16 x i8> @test_v16i8_v4i32_helper(<4 x i32> %2) 1348 %4 = add <16 x i8> %3, %3 1349 store <16 x i8> %4, <16 x i8>* %q 1350 ret void 1351; SOFT: vmov {{d[0-9]+}}, r3, r2 1352; SOFT: vmov {{d[0-9]+}}, r1, r0 1353} 1354 1355; CHECK-LABEL: test_v16i8_v8i16: 1356declare <16 x i8> @test_v16i8_v8i16_helper(<8 x i16> %p) 1357define void @test_v16i8_v8i16(<8 x i16>* %p, <16 x i8>* %q) { 1358; SOFT: vmov r1, r0 1359; SOFT: vmov r3, r2 1360; HARD: vrev64.16 q0 1361 %1 = load <8 x i16>, <8 x i16>* %p 1362 %2 = add <8 x i16> %1, %1 1363 %3 = call <16 x i8> @test_v16i8_v8i16_helper(<8 x i16> %2) 1364 %4 = add <16 x i8> %3, %3 1365 store <16 x i8> %4, <16 x i8>* %q 1366 ret void 1367; SOFT: vmov {{d[0-9]+}}, r3, r2 1368; SOFT: vmov {{d[0-9]+}}, r1, r0 1369} 1370