1; RUN: llc -mtriple=arm-eabi -mattr=+neon %s -o - | FileCheck %s 2 3define <8 x i8> @v_movi8() nounwind { 4;CHECK-LABEL: v_movi8: 5;CHECK: vmov.i8 d{{.*}}, #0x8 6 ret <8 x i8> < i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8 > 7} 8 9define <4 x i16> @v_movi16a() nounwind { 10;CHECK-LABEL: v_movi16a: 11;CHECK: vmov.i16 d{{.*}}, #0x10 12 ret <4 x i16> < i16 16, i16 16, i16 16, i16 16 > 13} 14 15define <4 x i16> @v_movi16b() nounwind { 16;CHECK-LABEL: v_movi16b: 17;CHECK: vmov.i16 d{{.*}}, #0x1000 18 ret <4 x i16> < i16 4096, i16 4096, i16 4096, i16 4096 > 19} 20 21define <4 x i16> @v_mvni16a() nounwind { 22;CHECK-LABEL: v_mvni16a: 23;CHECK: vmvn.i16 d{{.*}}, #0x10 24 ret <4 x i16> < i16 65519, i16 65519, i16 65519, i16 65519 > 25} 26 27define <4 x i16> @v_mvni16b() nounwind { 28;CHECK-LABEL: v_mvni16b: 29;CHECK: vmvn.i16 d{{.*}}, #0x1000 30 ret <4 x i16> < i16 61439, i16 61439, i16 61439, i16 61439 > 31} 32 33define <2 x i32> @v_movi32a() nounwind { 34;CHECK-LABEL: v_movi32a: 35;CHECK: vmov.i32 d{{.*}}, #0x20 36 ret <2 x i32> < i32 32, i32 32 > 37} 38 39define <2 x i32> @v_movi32b() nounwind { 40;CHECK-LABEL: v_movi32b: 41;CHECK: vmov.i32 d{{.*}}, #0x2000 42 ret <2 x i32> < i32 8192, i32 8192 > 43} 44 45define <2 x i32> @v_movi32c() nounwind { 46;CHECK-LABEL: v_movi32c: 47;CHECK: vmov.i32 d{{.*}}, #0x200000 48 ret <2 x i32> < i32 2097152, i32 2097152 > 49} 50 51define <2 x i32> @v_movi32d() nounwind { 52;CHECK-LABEL: v_movi32d: 53;CHECK: vmov.i32 d{{.*}}, #0x20000000 54 ret <2 x i32> < i32 536870912, i32 536870912 > 55} 56 57define <2 x i32> @v_movi32e() nounwind { 58;CHECK-LABEL: v_movi32e: 59;CHECK: vmov.i32 d{{.*}}, #0x20ff 60 ret <2 x i32> < i32 8447, i32 8447 > 61} 62 63define <2 x i32> @v_movi32f() nounwind { 64;CHECK-LABEL: v_movi32f: 65;CHECK: vmov.i32 d{{.*}}, #0x20ffff 66 ret <2 x i32> < i32 2162687, i32 2162687 > 67} 68 69define <2 x i32> @v_mvni32a() nounwind { 70;CHECK-LABEL: v_mvni32a: 71;CHECK: vmvn.i32 d{{.*}}, #0x20 72 ret <2 x i32> < i32 4294967263, i32 4294967263 > 73} 74 75define <2 x i32> @v_mvni32b() nounwind { 76;CHECK-LABEL: v_mvni32b: 77;CHECK: vmvn.i32 d{{.*}}, #0x2000 78 ret <2 x i32> < i32 4294959103, i32 4294959103 > 79} 80 81define <2 x i32> @v_mvni32c() nounwind { 82;CHECK-LABEL: v_mvni32c: 83;CHECK: vmvn.i32 d{{.*}}, #0x200000 84 ret <2 x i32> < i32 4292870143, i32 4292870143 > 85} 86 87define <2 x i32> @v_mvni32d() nounwind { 88;CHECK-LABEL: v_mvni32d: 89;CHECK: vmvn.i32 d{{.*}}, #0x20000000 90 ret <2 x i32> < i32 3758096383, i32 3758096383 > 91} 92 93define <2 x i32> @v_mvni32e() nounwind { 94;CHECK-LABEL: v_mvni32e: 95;CHECK: vmvn.i32 d{{.*}}, #0x20ff 96 ret <2 x i32> < i32 4294958848, i32 4294958848 > 97} 98 99define <2 x i32> @v_mvni32f() nounwind { 100;CHECK-LABEL: v_mvni32f: 101;CHECK: vmvn.i32 d{{.*}}, #0x20ffff 102 ret <2 x i32> < i32 4292804608, i32 4292804608 > 103} 104 105define <1 x i64> @v_movi64() nounwind { 106;CHECK-LABEL: v_movi64: 107;CHECK: vmov.i64 d{{.*}}, #0xff0000ff0000ffff 108 ret <1 x i64> < i64 18374687574888349695 > 109} 110 111define <16 x i8> @v_movQi8() nounwind { 112;CHECK-LABEL: v_movQi8: 113;CHECK: vmov.i8 q{{.*}}, #0x8 114 ret <16 x i8> < i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8 > 115} 116 117define <8 x i16> @v_movQi16a() nounwind { 118;CHECK-LABEL: v_movQi16a: 119;CHECK: vmov.i16 q{{.*}}, #0x10 120 ret <8 x i16> < i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16 > 121} 122 123define <8 x i16> @v_movQi16b() nounwind { 124;CHECK-LABEL: v_movQi16b: 125;CHECK: vmov.i16 q{{.*}}, #0x1000 126 ret <8 x i16> < i16 4096, i16 4096, i16 4096, i16 4096, i16 4096, i16 4096, i16 4096, i16 4096 > 127} 128 129define <4 x i32> @v_movQi32a() nounwind { 130;CHECK-LABEL: v_movQi32a: 131;CHECK: vmov.i32 q{{.*}}, #0x20 132 ret <4 x i32> < i32 32, i32 32, i32 32, i32 32 > 133} 134 135define <4 x i32> @v_movQi32b() nounwind { 136;CHECK-LABEL: v_movQi32b: 137;CHECK: vmov.i32 q{{.*}}, #0x2000 138 ret <4 x i32> < i32 8192, i32 8192, i32 8192, i32 8192 > 139} 140 141define <4 x i32> @v_movQi32c() nounwind { 142;CHECK-LABEL: v_movQi32c: 143;CHECK: vmov.i32 q{{.*}}, #0x200000 144 ret <4 x i32> < i32 2097152, i32 2097152, i32 2097152, i32 2097152 > 145} 146 147define <4 x i32> @v_movQi32d() nounwind { 148;CHECK-LABEL: v_movQi32d: 149;CHECK: vmov.i32 q{{.*}}, #0x20000000 150 ret <4 x i32> < i32 536870912, i32 536870912, i32 536870912, i32 536870912 > 151} 152 153define <4 x i32> @v_movQi32e() nounwind { 154;CHECK-LABEL: v_movQi32e: 155;CHECK: vmov.i32 q{{.*}}, #0x20ff 156 ret <4 x i32> < i32 8447, i32 8447, i32 8447, i32 8447 > 157} 158 159define <4 x i32> @v_movQi32f() nounwind { 160;CHECK-LABEL: v_movQi32f: 161;CHECK: vmov.i32 q{{.*}}, #0x20ffff 162 ret <4 x i32> < i32 2162687, i32 2162687, i32 2162687, i32 2162687 > 163} 164 165define <2 x i64> @v_movQi64() nounwind { 166;CHECK-LABEL: v_movQi64: 167;CHECK: vmov.i64 q{{.*}}, #0xff0000ff0000ffff 168 ret <2 x i64> < i64 18374687574888349695, i64 18374687574888349695 > 169} 170 171; Check for correct assembler printing for immediate values. 172%struct.int8x8_t = type { <8 x i8> } 173define void @vdupn128(%struct.int8x8_t* noalias nocapture sret %agg.result) nounwind { 174entry: 175;CHECK-LABEL: vdupn128: 176;CHECK: vmov.i8 d{{.*}}, #0x80 177 %0 = getelementptr inbounds %struct.int8x8_t, %struct.int8x8_t* %agg.result, i32 0, i32 0 ; <<8 x i8>*> [#uses=1] 178 store <8 x i8> <i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128>, <8 x i8>* %0, align 8 179 ret void 180} 181 182define void @vdupnneg75(%struct.int8x8_t* noalias nocapture sret %agg.result) nounwind { 183entry: 184;CHECK-LABEL: vdupnneg75: 185;CHECK: vmov.i8 d{{.*}}, #0xb5 186 %0 = getelementptr inbounds %struct.int8x8_t, %struct.int8x8_t* %agg.result, i32 0, i32 0 ; <<8 x i8>*> [#uses=1] 187 store <8 x i8> <i8 -75, i8 -75, i8 -75, i8 -75, i8 -75, i8 -75, i8 -75, i8 -75>, <8 x i8>* %0, align 8 188 ret void 189} 190 191define <8 x i16> @vmovls8(<8 x i8>* %A) nounwind { 192;CHECK-LABEL: vmovls8: 193;CHECK: vmovl.s8 194 %tmp1 = load <8 x i8>, <8 x i8>* %A 195 %tmp2 = sext <8 x i8> %tmp1 to <8 x i16> 196 ret <8 x i16> %tmp2 197} 198 199define <4 x i32> @vmovls16(<4 x i16>* %A) nounwind { 200;CHECK-LABEL: vmovls16: 201;CHECK: vmovl.s16 202 %tmp1 = load <4 x i16>, <4 x i16>* %A 203 %tmp2 = sext <4 x i16> %tmp1 to <4 x i32> 204 ret <4 x i32> %tmp2 205} 206 207define <2 x i64> @vmovls32(<2 x i32>* %A) nounwind { 208;CHECK-LABEL: vmovls32: 209;CHECK: vmovl.s32 210 %tmp1 = load <2 x i32>, <2 x i32>* %A 211 %tmp2 = sext <2 x i32> %tmp1 to <2 x i64> 212 ret <2 x i64> %tmp2 213} 214 215define <8 x i16> @vmovlu8(<8 x i8>* %A) nounwind { 216;CHECK-LABEL: vmovlu8: 217;CHECK: vmovl.u8 218 %tmp1 = load <8 x i8>, <8 x i8>* %A 219 %tmp2 = zext <8 x i8> %tmp1 to <8 x i16> 220 ret <8 x i16> %tmp2 221} 222 223define <4 x i32> @vmovlu16(<4 x i16>* %A) nounwind { 224;CHECK-LABEL: vmovlu16: 225;CHECK: vmovl.u16 226 %tmp1 = load <4 x i16>, <4 x i16>* %A 227 %tmp2 = zext <4 x i16> %tmp1 to <4 x i32> 228 ret <4 x i32> %tmp2 229} 230 231define <2 x i64> @vmovlu32(<2 x i32>* %A) nounwind { 232;CHECK-LABEL: vmovlu32: 233;CHECK: vmovl.u32 234 %tmp1 = load <2 x i32>, <2 x i32>* %A 235 %tmp2 = zext <2 x i32> %tmp1 to <2 x i64> 236 ret <2 x i64> %tmp2 237} 238 239define <8 x i8> @vmovni16(<8 x i16>* %A) nounwind { 240;CHECK-LABEL: vmovni16: 241;CHECK: vmovn.i16 242 %tmp1 = load <8 x i16>, <8 x i16>* %A 243 %tmp2 = trunc <8 x i16> %tmp1 to <8 x i8> 244 ret <8 x i8> %tmp2 245} 246 247define <4 x i16> @vmovni32(<4 x i32>* %A) nounwind { 248;CHECK-LABEL: vmovni32: 249;CHECK: vmovn.i32 250 %tmp1 = load <4 x i32>, <4 x i32>* %A 251 %tmp2 = trunc <4 x i32> %tmp1 to <4 x i16> 252 ret <4 x i16> %tmp2 253} 254 255define <2 x i32> @vmovni64(<2 x i64>* %A) nounwind { 256;CHECK-LABEL: vmovni64: 257;CHECK: vmovn.i64 258 %tmp1 = load <2 x i64>, <2 x i64>* %A 259 %tmp2 = trunc <2 x i64> %tmp1 to <2 x i32> 260 ret <2 x i32> %tmp2 261} 262 263define <8 x i8> @vqmovns16(<8 x i16>* %A) nounwind { 264;CHECK-LABEL: vqmovns16: 265;CHECK: vqmovn.s16 266 %tmp1 = load <8 x i16>, <8 x i16>* %A 267 %tmp2 = call <8 x i8> @llvm.arm.neon.vqmovns.v8i8(<8 x i16> %tmp1) 268 ret <8 x i8> %tmp2 269} 270 271define <4 x i16> @vqmovns32(<4 x i32>* %A) nounwind { 272;CHECK-LABEL: vqmovns32: 273;CHECK: vqmovn.s32 274 %tmp1 = load <4 x i32>, <4 x i32>* %A 275 %tmp2 = call <4 x i16> @llvm.arm.neon.vqmovns.v4i16(<4 x i32> %tmp1) 276 ret <4 x i16> %tmp2 277} 278 279define <2 x i32> @vqmovns64(<2 x i64>* %A) nounwind { 280;CHECK-LABEL: vqmovns64: 281;CHECK: vqmovn.s64 282 %tmp1 = load <2 x i64>, <2 x i64>* %A 283 %tmp2 = call <2 x i32> @llvm.arm.neon.vqmovns.v2i32(<2 x i64> %tmp1) 284 ret <2 x i32> %tmp2 285} 286 287define <8 x i8> @vqmovnu16(<8 x i16>* %A) nounwind { 288;CHECK-LABEL: vqmovnu16: 289;CHECK: vqmovn.u16 290 %tmp1 = load <8 x i16>, <8 x i16>* %A 291 %tmp2 = call <8 x i8> @llvm.arm.neon.vqmovnu.v8i8(<8 x i16> %tmp1) 292 ret <8 x i8> %tmp2 293} 294 295define <4 x i16> @vqmovnu32(<4 x i32>* %A) nounwind { 296;CHECK-LABEL: vqmovnu32: 297;CHECK: vqmovn.u32 298 %tmp1 = load <4 x i32>, <4 x i32>* %A 299 %tmp2 = call <4 x i16> @llvm.arm.neon.vqmovnu.v4i16(<4 x i32> %tmp1) 300 ret <4 x i16> %tmp2 301} 302 303define <2 x i32> @vqmovnu64(<2 x i64>* %A) nounwind { 304;CHECK-LABEL: vqmovnu64: 305;CHECK: vqmovn.u64 306 %tmp1 = load <2 x i64>, <2 x i64>* %A 307 %tmp2 = call <2 x i32> @llvm.arm.neon.vqmovnu.v2i32(<2 x i64> %tmp1) 308 ret <2 x i32> %tmp2 309} 310 311define <8 x i8> @vqmovuns16(<8 x i16>* %A) nounwind { 312;CHECK-LABEL: vqmovuns16: 313;CHECK: vqmovun.s16 314 %tmp1 = load <8 x i16>, <8 x i16>* %A 315 %tmp2 = call <8 x i8> @llvm.arm.neon.vqmovnsu.v8i8(<8 x i16> %tmp1) 316 ret <8 x i8> %tmp2 317} 318 319define <4 x i16> @vqmovuns32(<4 x i32>* %A) nounwind { 320;CHECK-LABEL: vqmovuns32: 321;CHECK: vqmovun.s32 322 %tmp1 = load <4 x i32>, <4 x i32>* %A 323 %tmp2 = call <4 x i16> @llvm.arm.neon.vqmovnsu.v4i16(<4 x i32> %tmp1) 324 ret <4 x i16> %tmp2 325} 326 327define <2 x i32> @vqmovuns64(<2 x i64>* %A) nounwind { 328;CHECK-LABEL: vqmovuns64: 329;CHECK: vqmovun.s64 330 %tmp1 = load <2 x i64>, <2 x i64>* %A 331 %tmp2 = call <2 x i32> @llvm.arm.neon.vqmovnsu.v2i32(<2 x i64> %tmp1) 332 ret <2 x i32> %tmp2 333} 334 335declare <8 x i8> @llvm.arm.neon.vqmovns.v8i8(<8 x i16>) nounwind readnone 336declare <4 x i16> @llvm.arm.neon.vqmovns.v4i16(<4 x i32>) nounwind readnone 337declare <2 x i32> @llvm.arm.neon.vqmovns.v2i32(<2 x i64>) nounwind readnone 338 339declare <8 x i8> @llvm.arm.neon.vqmovnu.v8i8(<8 x i16>) nounwind readnone 340declare <4 x i16> @llvm.arm.neon.vqmovnu.v4i16(<4 x i32>) nounwind readnone 341declare <2 x i32> @llvm.arm.neon.vqmovnu.v2i32(<2 x i64>) nounwind readnone 342 343declare <8 x i8> @llvm.arm.neon.vqmovnsu.v8i8(<8 x i16>) nounwind readnone 344declare <4 x i16> @llvm.arm.neon.vqmovnsu.v4i16(<4 x i32>) nounwind readnone 345declare <2 x i32> @llvm.arm.neon.vqmovnsu.v2i32(<2 x i64>) nounwind readnone 346 347; Truncating vector stores are not supported. The following should not crash. 348; Radar 8598391. 349define void @noTruncStore(<4 x i32>* %a, <4 x i16>* %b) nounwind { 350;CHECK: vmovn 351 %tmp1 = load <4 x i32>, <4 x i32>* %a, align 16 352 %tmp2 = trunc <4 x i32> %tmp1 to <4 x i16> 353 store <4 x i16> %tmp2, <4 x i16>* %b, align 8 354 ret void 355} 356 357; Use vmov.f32 to materialize f32 immediate splats 358; rdar://10437054 359define void @v_mov_v2f32(<2 x float>* nocapture %p) nounwind { 360entry: 361;CHECK-LABEL: v_mov_v2f32: 362;CHECK: vmov.f32 d{{.*}}, #-1.600000e+01 363 store <2 x float> <float -1.600000e+01, float -1.600000e+01>, <2 x float>* %p, align 4 364 ret void 365} 366 367define void @v_mov_v4f32(<4 x float>* nocapture %p) nounwind { 368entry: 369;CHECK-LABEL: v_mov_v4f32: 370;CHECK: vmov.f32 q{{.*}}, #3.100000e+01 371 store <4 x float> <float 3.100000e+01, float 3.100000e+01, float 3.100000e+01, float 3.100000e+01>, <4 x float>* %p, align 4 372 ret void 373} 374 375define void @v_mov_v4f32_undef(<4 x float> * nocapture %p) nounwind { 376entry: 377;CHECK-LABEL: v_mov_v4f32_undef: 378;CHECK: vmov.f32 q{{.*}}, #1.000000e+00 379 %a = load <4 x float> , <4 x float> *%p 380 %b = fadd <4 x float> %a, <float undef, float 1.0, float 1.0, float 1.0> 381 store <4 x float> %b, <4 x float> *%p 382 ret void 383} 384 385; Vector any_extends must be selected as either vmovl.u or vmovl.s. 386; rdar://10723651 387define void @any_extend(<4 x i1> %x, <4 x i32> %y) nounwind ssp { 388entry: 389;CHECK-LABEL: any_extend: 390;CHECK: vmovl 391 %and.i186 = zext <4 x i1> %x to <4 x i32> 392 %add.i185 = sub <4 x i32> %and.i186, %y 393 %sub.i = sub <4 x i32> %add.i185, zeroinitializer 394 %add.i = add <4 x i32> %sub.i, zeroinitializer 395 %vmovn.i = trunc <4 x i32> %add.i to <4 x i16> 396 tail call void @llvm.arm.neon.vst1.v4i16(i8* undef, <4 x i16> %vmovn.i, i32 2) 397 unreachable 398} 399 400declare void @llvm.arm.neon.vst1.v4i16(i8*, <4 x i16>, i32) nounwind 401