1; RUN: llc < %s -mtriple aarch64-apple-darwin -asm-verbose=false -disable-post-ra | FileCheck %s 2 3define void @test_stnp_v4i64(<4 x i64>* %p, <4 x i64> %v) #0 { 4; CHECK-LABEL: test_stnp_v4i64: 5; CHECK-NEXT: mov d[[HI1:[0-9]+]], v1[1] 6; CHECK-NEXT: mov d[[HI0:[0-9]+]], v0[1] 7; CHECK-NEXT: stnp d1, d[[HI1]], [x0, #16] 8; CHECK-NEXT: stnp d0, d[[HI0]], [x0] 9; CHECK-NEXT: ret 10 store <4 x i64> %v, <4 x i64>* %p, align 1, !nontemporal !0 11 ret void 12} 13 14define void @test_stnp_v4i32(<4 x i32>* %p, <4 x i32> %v) #0 { 15; CHECK-LABEL: test_stnp_v4i32: 16; CHECK-NEXT: mov d[[HI:[0-9]+]], v0[1] 17; CHECK-NEXT: stnp d0, d[[HI]], [x0] 18; CHECK-NEXT: ret 19 store <4 x i32> %v, <4 x i32>* %p, align 1, !nontemporal !0 20 ret void 21} 22 23define void @test_stnp_v8i16(<8 x i16>* %p, <8 x i16> %v) #0 { 24; CHECK-LABEL: test_stnp_v8i16: 25; CHECK-NEXT: mov d[[HI:[0-9]+]], v0[1] 26; CHECK-NEXT: stnp d0, d[[HI]], [x0] 27; CHECK-NEXT: ret 28 store <8 x i16> %v, <8 x i16>* %p, align 1, !nontemporal !0 29 ret void 30} 31 32define void @test_stnp_v16i8(<16 x i8>* %p, <16 x i8> %v) #0 { 33; CHECK-LABEL: test_stnp_v16i8: 34; CHECK-NEXT: mov d[[HI:[0-9]+]], v0[1] 35; CHECK-NEXT: stnp d0, d[[HI]], [x0] 36; CHECK-NEXT: ret 37 store <16 x i8> %v, <16 x i8>* %p, align 1, !nontemporal !0 38 ret void 39} 40 41define void @test_stnp_v2i32(<2 x i32>* %p, <2 x i32> %v) #0 { 42; CHECK-LABEL: test_stnp_v2i32: 43; CHECK-NEXT: mov s[[HI:[0-9]+]], v0[1] 44; CHECK-NEXT: stnp s0, s[[HI]], [x0] 45; CHECK-NEXT: ret 46 store <2 x i32> %v, <2 x i32>* %p, align 1, !nontemporal !0 47 ret void 48} 49 50define void @test_stnp_v4i16(<4 x i16>* %p, <4 x i16> %v) #0 { 51; CHECK-LABEL: test_stnp_v4i16: 52; CHECK-NEXT: mov s[[HI:[0-9]+]], v0[1] 53; CHECK-NEXT: stnp s0, s[[HI]], [x0] 54; CHECK-NEXT: ret 55 store <4 x i16> %v, <4 x i16>* %p, align 1, !nontemporal !0 56 ret void 57} 58 59define void @test_stnp_v8i8(<8 x i8>* %p, <8 x i8> %v) #0 { 60; CHECK-LABEL: test_stnp_v8i8: 61; CHECK-NEXT: mov s[[HI:[0-9]+]], v0[1] 62; CHECK-NEXT: stnp s0, s[[HI]], [x0] 63; CHECK-NEXT: ret 64 store <8 x i8> %v, <8 x i8>* %p, align 1, !nontemporal !0 65 ret void 66} 67 68define void @test_stnp_v2f64(<2 x double>* %p, <2 x double> %v) #0 { 69; CHECK-LABEL: test_stnp_v2f64: 70; CHECK-NEXT: mov d[[HI:[0-9]+]], v0[1] 71; CHECK-NEXT: stnp d0, d[[HI]], [x0] 72; CHECK-NEXT: ret 73 store <2 x double> %v, <2 x double>* %p, align 1, !nontemporal !0 74 ret void 75} 76 77define void @test_stnp_v4f32(<4 x float>* %p, <4 x float> %v) #0 { 78; CHECK-LABEL: test_stnp_v4f32: 79; CHECK-NEXT: mov d[[HI:[0-9]+]], v0[1] 80; CHECK-NEXT: stnp d0, d[[HI]], [x0] 81; CHECK-NEXT: ret 82 store <4 x float> %v, <4 x float>* %p, align 1, !nontemporal !0 83 ret void 84} 85 86define void @test_stnp_v2f32(<2 x float>* %p, <2 x float> %v) #0 { 87; CHECK-LABEL: test_stnp_v2f32: 88; CHECK-NEXT: mov s[[HI:[0-9]+]], v0[1] 89; CHECK-NEXT: stnp s0, s[[HI]], [x0] 90; CHECK-NEXT: ret 91 store <2 x float> %v, <2 x float>* %p, align 1, !nontemporal !0 92 ret void 93} 94 95define void @test_stnp_v1f64(<1 x double>* %p, <1 x double> %v) #0 { 96; CHECK-LABEL: test_stnp_v1f64: 97; CHECK-NEXT: mov s[[HI:[0-9]+]], v0[1] 98; CHECK-NEXT: stnp s0, s[[HI]], [x0] 99; CHECK-NEXT: ret 100 store <1 x double> %v, <1 x double>* %p, align 1, !nontemporal !0 101 ret void 102} 103 104define void @test_stnp_v1i64(<1 x i64>* %p, <1 x i64> %v) #0 { 105; CHECK-LABEL: test_stnp_v1i64: 106; CHECK-NEXT: mov s[[HI:[0-9]+]], v0[1] 107; CHECK-NEXT: stnp s0, s[[HI]], [x0] 108; CHECK-NEXT: ret 109 store <1 x i64> %v, <1 x i64>* %p, align 1, !nontemporal !0 110 ret void 111} 112 113define void @test_stnp_i64(i64* %p, i64 %v) #0 { 114; CHECK-LABEL: test_stnp_i64: 115; CHECK-NEXT: lsr x[[HI:[0-9]+]], x1, #32 116; CHECK-NEXT: stnp w1, w[[HI]], [x0] 117; CHECK-NEXT: ret 118 store i64 %v, i64* %p, align 1, !nontemporal !0 119 ret void 120} 121 122 123define void @test_stnp_v2f64_offset(<2 x double>* %p, <2 x double> %v) #0 { 124; CHECK-LABEL: test_stnp_v2f64_offset: 125; CHECK-NEXT: mov d[[HI:[0-9]+]], v0[1] 126; CHECK-NEXT: stnp d0, d[[HI]], [x0, #16] 127; CHECK-NEXT: ret 128 %tmp0 = getelementptr <2 x double>, <2 x double>* %p, i32 1 129 store <2 x double> %v, <2 x double>* %tmp0, align 1, !nontemporal !0 130 ret void 131} 132 133define void @test_stnp_v2f64_offset_neg(<2 x double>* %p, <2 x double> %v) #0 { 134; CHECK-LABEL: test_stnp_v2f64_offset_neg: 135; CHECK-NEXT: mov d[[HI:[0-9]+]], v0[1] 136; CHECK-NEXT: stnp d0, d[[HI]], [x0, #-16] 137; CHECK-NEXT: ret 138 %tmp0 = getelementptr <2 x double>, <2 x double>* %p, i32 -1 139 store <2 x double> %v, <2 x double>* %tmp0, align 1, !nontemporal !0 140 ret void 141} 142 143define void @test_stnp_v2f32_offset(<2 x float>* %p, <2 x float> %v) #0 { 144; CHECK-LABEL: test_stnp_v2f32_offset: 145; CHECK-NEXT: mov s[[HI:[0-9]+]], v0[1] 146; CHECK-NEXT: stnp s0, s[[HI]], [x0, #8] 147; CHECK-NEXT: ret 148 %tmp0 = getelementptr <2 x float>, <2 x float>* %p, i32 1 149 store <2 x float> %v, <2 x float>* %tmp0, align 1, !nontemporal !0 150 ret void 151} 152 153define void @test_stnp_v2f32_offset_neg(<2 x float>* %p, <2 x float> %v) #0 { 154; CHECK-LABEL: test_stnp_v2f32_offset_neg: 155; CHECK-NEXT: mov s[[HI:[0-9]+]], v0[1] 156; CHECK-NEXT: stnp s0, s[[HI]], [x0, #-8] 157; CHECK-NEXT: ret 158 %tmp0 = getelementptr <2 x float>, <2 x float>* %p, i32 -1 159 store <2 x float> %v, <2 x float>* %tmp0, align 1, !nontemporal !0 160 ret void 161} 162 163define void @test_stnp_i64_offset(i64* %p, i64 %v) #0 { 164; CHECK-LABEL: test_stnp_i64_offset: 165; CHECK-NEXT: lsr x[[HI:[0-9]+]], x1, #32 166; CHECK-NEXT: stnp w1, w[[HI]], [x0, #8] 167; CHECK-NEXT: ret 168 %tmp0 = getelementptr i64, i64* %p, i32 1 169 store i64 %v, i64* %tmp0, align 1, !nontemporal !0 170 ret void 171} 172 173define void @test_stnp_i64_offset_neg(i64* %p, i64 %v) #0 { 174; CHECK-LABEL: test_stnp_i64_offset_neg: 175; CHECK-NEXT: lsr x[[HI:[0-9]+]], x1, #32 176; CHECK-NEXT: stnp w1, w[[HI]], [x0, #-8] 177; CHECK-NEXT: ret 178 %tmp0 = getelementptr i64, i64* %p, i32 -1 179 store i64 %v, i64* %tmp0, align 1, !nontemporal !0 180 ret void 181} 182 183define void @test_stnp_v4f32_invalid_offset_4(i8* %p, <4 x float> %v) #0 { 184; CHECK-LABEL: test_stnp_v4f32_invalid_offset_4: 185; CHECK-NEXT: add x[[PTR:[0-9]+]], x0, #4 186; CHECK-NEXT: mov d[[HI:[0-9]+]], v0[1] 187; CHECK-NEXT: stnp d0, d[[HI]], [x[[PTR]]] 188; CHECK-NEXT: ret 189 %tmp0 = getelementptr i8, i8* %p, i32 4 190 %tmp1 = bitcast i8* %tmp0 to <4 x float>* 191 store <4 x float> %v, <4 x float>* %tmp1, align 1, !nontemporal !0 192 ret void 193} 194 195define void @test_stnp_v4f32_invalid_offset_neg_4(i8* %p, <4 x float> %v) #0 { 196; CHECK-LABEL: test_stnp_v4f32_invalid_offset_neg_4: 197; CHECK-NEXT: sub x[[PTR:[0-9]+]], x0, #4 198; CHECK-NEXT: mov d[[HI:[0-9]+]], v0[1] 199; CHECK-NEXT: stnp d0, d[[HI]], [x[[PTR]]] 200; CHECK-NEXT: ret 201 %tmp0 = getelementptr i8, i8* %p, i32 -4 202 %tmp1 = bitcast i8* %tmp0 to <4 x float>* 203 store <4 x float> %v, <4 x float>* %tmp1, align 1, !nontemporal !0 204 ret void 205} 206 207define void @test_stnp_v4f32_invalid_offset_512(i8* %p, <4 x float> %v) #0 { 208; CHECK-LABEL: test_stnp_v4f32_invalid_offset_512: 209; CHECK-NEXT: add x[[PTR:[0-9]+]], x0, #512 210; CHECK-NEXT: mov d[[HI:[0-9]+]], v0[1] 211; CHECK-NEXT: stnp d0, d[[HI]], [x[[PTR]]] 212; CHECK-NEXT: ret 213 %tmp0 = getelementptr i8, i8* %p, i32 512 214 %tmp1 = bitcast i8* %tmp0 to <4 x float>* 215 store <4 x float> %v, <4 x float>* %tmp1, align 1, !nontemporal !0 216 ret void 217} 218 219define void @test_stnp_v4f32_offset_504(i8* %p, <4 x float> %v) #0 { 220; CHECK-LABEL: test_stnp_v4f32_offset_504: 221; CHECK-NEXT: mov d[[HI:[0-9]+]], v0[1] 222; CHECK-NEXT: stnp d0, d[[HI]], [x0, #504] 223; CHECK-NEXT: ret 224 %tmp0 = getelementptr i8, i8* %p, i32 504 225 %tmp1 = bitcast i8* %tmp0 to <4 x float>* 226 store <4 x float> %v, <4 x float>* %tmp1, align 1, !nontemporal !0 227 ret void 228} 229 230define void @test_stnp_v4f32_invalid_offset_508(i8* %p, <4 x float> %v) #0 { 231; CHECK-LABEL: test_stnp_v4f32_invalid_offset_508: 232; CHECK-NEXT: add x[[PTR:[0-9]+]], x0, #508 233; CHECK-NEXT: mov d[[HI:[0-9]+]], v0[1] 234; CHECK-NEXT: stnp d0, d[[HI]], [x[[PTR]]] 235; CHECK-NEXT: ret 236 %tmp0 = getelementptr i8, i8* %p, i32 508 237 %tmp1 = bitcast i8* %tmp0 to <4 x float>* 238 store <4 x float> %v, <4 x float>* %tmp1, align 1, !nontemporal !0 239 ret void 240} 241 242define void @test_stnp_v4f32_invalid_offset_neg_520(i8* %p, <4 x float> %v) #0 { 243; CHECK-LABEL: test_stnp_v4f32_invalid_offset_neg_520: 244; CHECK-NEXT: sub x[[PTR:[0-9]+]], x0, #520 245; CHECK-NEXT: mov d[[HI:[0-9]+]], v0[1] 246; CHECK-NEXT: stnp d0, d[[HI]], [x[[PTR]]] 247; CHECK-NEXT: ret 248 %tmp0 = getelementptr i8, i8* %p, i32 -520 249 %tmp1 = bitcast i8* %tmp0 to <4 x float>* 250 store <4 x float> %v, <4 x float>* %tmp1, align 1, !nontemporal !0 251 ret void 252} 253 254define void @test_stnp_v4f32_offset_neg_512(i8* %p, <4 x float> %v) #0 { 255; CHECK-LABEL: test_stnp_v4f32_offset_neg_512: 256; CHECK-NEXT: mov d[[HI:[0-9]+]], v0[1] 257; CHECK-NEXT: stnp d0, d[[HI]], [x0, #-512] 258; CHECK-NEXT: ret 259 %tmp0 = getelementptr i8, i8* %p, i32 -512 260 %tmp1 = bitcast i8* %tmp0 to <4 x float>* 261 store <4 x float> %v, <4 x float>* %tmp1, align 1, !nontemporal !0 262 ret void 263} 264 265 266define void @test_stnp_v2f32_invalid_offset_256(i8* %p, <2 x float> %v) #0 { 267; CHECK-LABEL: test_stnp_v2f32_invalid_offset_256: 268; CHECK-NEXT: add x[[PTR:[0-9]+]], x0, #256 269; CHECK-NEXT: mov s[[HI:[0-9]+]], v0[1] 270; CHECK-NEXT: stnp s0, s[[HI]], [x[[PTR]]] 271; CHECK-NEXT: ret 272 %tmp0 = getelementptr i8, i8* %p, i32 256 273 %tmp1 = bitcast i8* %tmp0 to <2 x float>* 274 store <2 x float> %v, <2 x float>* %tmp1, align 1, !nontemporal !0 275 ret void 276} 277 278define void @test_stnp_v2f32_offset_252(i8* %p, <2 x float> %v) #0 { 279; CHECK-LABEL: test_stnp_v2f32_offset_252: 280; CHECK-NEXT: mov s[[HI:[0-9]+]], v0[1] 281; CHECK-NEXT: stnp s0, s[[HI]], [x0, #252] 282; CHECK-NEXT: ret 283 %tmp0 = getelementptr i8, i8* %p, i32 252 284 %tmp1 = bitcast i8* %tmp0 to <2 x float>* 285 store <2 x float> %v, <2 x float>* %tmp1, align 1, !nontemporal !0 286 ret void 287} 288 289define void @test_stnp_v2f32_invalid_offset_neg_260(i8* %p, <2 x float> %v) #0 { 290; CHECK-LABEL: test_stnp_v2f32_invalid_offset_neg_260: 291; CHECK-NEXT: sub x[[PTR:[0-9]+]], x0, #260 292; CHECK-NEXT: mov s[[HI:[0-9]+]], v0[1] 293; CHECK-NEXT: stnp s0, s[[HI]], [x[[PTR]]] 294; CHECK-NEXT: ret 295 %tmp0 = getelementptr i8, i8* %p, i32 -260 296 %tmp1 = bitcast i8* %tmp0 to <2 x float>* 297 store <2 x float> %v, <2 x float>* %tmp1, align 1, !nontemporal !0 298 ret void 299} 300 301define void @test_stnp_v2f32_offset_neg_256(i8* %p, <2 x float> %v) #0 { 302; CHECK-LABEL: test_stnp_v2f32_offset_neg_256: 303; CHECK-NEXT: mov s[[HI:[0-9]+]], v0[1] 304; CHECK-NEXT: stnp s0, s[[HI]], [x0, #-256] 305; CHECK-NEXT: ret 306 %tmp0 = getelementptr i8, i8* %p, i32 -256 307 %tmp1 = bitcast i8* %tmp0 to <2 x float>* 308 store <2 x float> %v, <2 x float>* %tmp1, align 1, !nontemporal !0 309 ret void 310} 311 312declare void @dummy(<4 x float>*) 313 314define void @test_stnp_v4f32_offset_alloca(<4 x float> %v) #0 { 315; CHECK-LABEL: test_stnp_v4f32_offset_alloca: 316; CHECK: stnp d0, d{{.*}}, [sp] 317; CHECK-NEXT: mov x0, sp 318; CHECK-NEXT: bl _dummy 319 %tmp0 = alloca <4 x float> 320 store <4 x float> %v, <4 x float>* %tmp0, align 1, !nontemporal !0 321 call void @dummy(<4 x float>* %tmp0) 322 ret void 323} 324 325define void @test_stnp_v4f32_offset_alloca_2(<4 x float> %v) #0 { 326; CHECK-LABEL: test_stnp_v4f32_offset_alloca_2: 327; CHECK: stnp d0, d{{.*}}, [sp, #16] 328; CHECK-NEXT: mov x0, sp 329; CHECK-NEXT: bl _dummy 330 %tmp0 = alloca <4 x float>, i32 2 331 %tmp1 = getelementptr <4 x float>, <4 x float>* %tmp0, i32 1 332 store <4 x float> %v, <4 x float>* %tmp1, align 1, !nontemporal !0 333 call void @dummy(<4 x float>* %tmp0) 334 ret void 335} 336 337!0 = !{ i32 1 } 338 339attributes #0 = { nounwind } 340