1; RUN: llc < %s -march=arm64 -aarch64-neon-syntax=apple -verify-machineinstrs | FileCheck %s 2 3define void @st1lane_16b(<16 x i8> %A, i8* %D) { 4; CHECK-LABEL: st1lane_16b 5; CHECK: st1.b 6 %tmp = extractelement <16 x i8> %A, i32 1 7 store i8 %tmp, i8* %D 8 ret void 9} 10 11define void @st1lane_ro_16b(<16 x i8> %A, i8* %D, i64 %offset) { 12; CHECK-LABEL: st1lane_ro_16b 13; CHECK: add x[[XREG:[0-9]+]], x0, x1 14; CHECK: st1.b { v0 }[1], [x[[XREG]]] 15 %ptr = getelementptr i8, i8* %D, i64 %offset 16 %tmp = extractelement <16 x i8> %A, i32 1 17 store i8 %tmp, i8* %ptr 18 ret void 19} 20 21define void @st1lane0_ro_16b(<16 x i8> %A, i8* %D, i64 %offset) { 22; CHECK-LABEL: st1lane0_ro_16b 23; CHECK: add x[[XREG:[0-9]+]], x0, x1 24; CHECK: st1.b { v0 }[0], [x[[XREG]]] 25 %ptr = getelementptr i8, i8* %D, i64 %offset 26 %tmp = extractelement <16 x i8> %A, i32 0 27 store i8 %tmp, i8* %ptr 28 ret void 29} 30 31define void @st1lane_8h(<8 x i16> %A, i16* %D) { 32; CHECK-LABEL: st1lane_8h 33; CHECK: st1.h 34 %tmp = extractelement <8 x i16> %A, i32 1 35 store i16 %tmp, i16* %D 36 ret void 37} 38 39define void @st1lane_ro_8h(<8 x i16> %A, i16* %D, i64 %offset) { 40; CHECK-LABEL: st1lane_ro_8h 41; CHECK: add x[[XREG:[0-9]+]], x0, x1 42; CHECK: st1.h { v0 }[1], [x[[XREG]]] 43 %ptr = getelementptr i16, i16* %D, i64 %offset 44 %tmp = extractelement <8 x i16> %A, i32 1 45 store i16 %tmp, i16* %ptr 46 ret void 47} 48 49define void @st1lane0_ro_8h(<8 x i16> %A, i16* %D, i64 %offset) { 50; CHECK-LABEL: st1lane0_ro_8h 51; CHECK: str h0, [x0, x1, lsl #1] 52 %ptr = getelementptr i16, i16* %D, i64 %offset 53 %tmp = extractelement <8 x i16> %A, i32 0 54 store i16 %tmp, i16* %ptr 55 ret void 56} 57 58define void @st1lane_4s(<4 x i32> %A, i32* %D) { 59; CHECK-LABEL: st1lane_4s 60; CHECK: st1.s 61 %tmp = extractelement <4 x i32> %A, i32 1 62 store i32 %tmp, i32* %D 63 ret void 64} 65 66define void @st1lane_ro_4s(<4 x i32> %A, i32* %D, i64 %offset) { 67; CHECK-LABEL: st1lane_ro_4s 68; CHECK: add x[[XREG:[0-9]+]], x0, x1 69; CHECK: st1.s { v0 }[1], [x[[XREG]]] 70 %ptr = getelementptr i32, i32* %D, i64 %offset 71 %tmp = extractelement <4 x i32> %A, i32 1 72 store i32 %tmp, i32* %ptr 73 ret void 74} 75 76define void @st1lane0_ro_4s(<4 x i32> %A, i32* %D, i64 %offset) { 77; CHECK-LABEL: st1lane0_ro_4s 78; CHECK: str s0, [x0, x1, lsl #2] 79 %ptr = getelementptr i32, i32* %D, i64 %offset 80 %tmp = extractelement <4 x i32> %A, i32 0 81 store i32 %tmp, i32* %ptr 82 ret void 83} 84 85define void @st1lane_4s_float(<4 x float> %A, float* %D) { 86; CHECK-LABEL: st1lane_4s_float 87; CHECK: st1.s 88 %tmp = extractelement <4 x float> %A, i32 1 89 store float %tmp, float* %D 90 ret void 91} 92 93define void @st1lane_ro_4s_float(<4 x float> %A, float* %D, i64 %offset) { 94; CHECK-LABEL: st1lane_ro_4s_float 95; CHECK: add x[[XREG:[0-9]+]], x0, x1 96; CHECK: st1.s { v0 }[1], [x[[XREG]]] 97 %ptr = getelementptr float, float* %D, i64 %offset 98 %tmp = extractelement <4 x float> %A, i32 1 99 store float %tmp, float* %ptr 100 ret void 101} 102 103define void @st1lane0_ro_4s_float(<4 x float> %A, float* %D, i64 %offset) { 104; CHECK-LABEL: st1lane0_ro_4s_float 105; CHECK: str s0, [x0, x1, lsl #2] 106 %ptr = getelementptr float, float* %D, i64 %offset 107 %tmp = extractelement <4 x float> %A, i32 0 108 store float %tmp, float* %ptr 109 ret void 110} 111 112define void @st1lane_2d(<2 x i64> %A, i64* %D) { 113; CHECK-LABEL: st1lane_2d 114; CHECK: st1.d 115 %tmp = extractelement <2 x i64> %A, i32 1 116 store i64 %tmp, i64* %D 117 ret void 118} 119 120define void @st1lane_ro_2d(<2 x i64> %A, i64* %D, i64 %offset) { 121; CHECK-LABEL: st1lane_ro_2d 122; CHECK: add x[[XREG:[0-9]+]], x0, x1 123; CHECK: st1.d { v0 }[1], [x[[XREG]]] 124 %ptr = getelementptr i64, i64* %D, i64 %offset 125 %tmp = extractelement <2 x i64> %A, i32 1 126 store i64 %tmp, i64* %ptr 127 ret void 128} 129 130define void @st1lane0_ro_2d(<2 x i64> %A, i64* %D, i64 %offset) { 131; CHECK-LABEL: st1lane0_ro_2d 132; CHECK: str d0, [x0, x1, lsl #3] 133 %ptr = getelementptr i64, i64* %D, i64 %offset 134 %tmp = extractelement <2 x i64> %A, i32 0 135 store i64 %tmp, i64* %ptr 136 ret void 137} 138 139define void @st1lane_2d_double(<2 x double> %A, double* %D) { 140; CHECK-LABEL: st1lane_2d_double 141; CHECK: st1.d 142 %tmp = extractelement <2 x double> %A, i32 1 143 store double %tmp, double* %D 144 ret void 145} 146 147define void @st1lane_ro_2d_double(<2 x double> %A, double* %D, i64 %offset) { 148; CHECK-LABEL: st1lane_ro_2d_double 149; CHECK: add x[[XREG:[0-9]+]], x0, x1 150; CHECK: st1.d { v0 }[1], [x[[XREG]]] 151 %ptr = getelementptr double, double* %D, i64 %offset 152 %tmp = extractelement <2 x double> %A, i32 1 153 store double %tmp, double* %ptr 154 ret void 155} 156 157define void @st1lane0_ro_2d_double(<2 x double> %A, double* %D, i64 %offset) { 158; CHECK-LABEL: st1lane0_ro_2d_double 159; CHECK: str d0, [x0, x1, lsl #3] 160 %ptr = getelementptr double, double* %D, i64 %offset 161 %tmp = extractelement <2 x double> %A, i32 0 162 store double %tmp, double* %ptr 163 ret void 164} 165 166define void @st1lane_8b(<8 x i8> %A, i8* %D) { 167; CHECK-LABEL: st1lane_8b 168; CHECK: st1.b 169 %tmp = extractelement <8 x i8> %A, i32 1 170 store i8 %tmp, i8* %D 171 ret void 172} 173 174define void @st1lane_ro_8b(<8 x i8> %A, i8* %D, i64 %offset) { 175; CHECK-LABEL: st1lane_ro_8b 176; CHECK: add x[[XREG:[0-9]+]], x0, x1 177; CHECK: st1.b { v0 }[1], [x[[XREG]]] 178 %ptr = getelementptr i8, i8* %D, i64 %offset 179 %tmp = extractelement <8 x i8> %A, i32 1 180 store i8 %tmp, i8* %ptr 181 ret void 182} 183 184define void @st1lane0_ro_8b(<8 x i8> %A, i8* %D, i64 %offset) { 185; CHECK-LABEL: st1lane0_ro_8b 186; CHECK: add x[[XREG:[0-9]+]], x0, x1 187; CHECK: st1.b { v0 }[0], [x[[XREG]]] 188 %ptr = getelementptr i8, i8* %D, i64 %offset 189 %tmp = extractelement <8 x i8> %A, i32 0 190 store i8 %tmp, i8* %ptr 191 ret void 192} 193 194define void @st1lane_4h(<4 x i16> %A, i16* %D) { 195; CHECK-LABEL: st1lane_4h 196; CHECK: st1.h 197 %tmp = extractelement <4 x i16> %A, i32 1 198 store i16 %tmp, i16* %D 199 ret void 200} 201 202define void @st1lane_ro_4h(<4 x i16> %A, i16* %D, i64 %offset) { 203; CHECK-LABEL: st1lane_ro_4h 204; CHECK: add x[[XREG:[0-9]+]], x0, x1 205; CHECK: st1.h { v0 }[1], [x[[XREG]]] 206 %ptr = getelementptr i16, i16* %D, i64 %offset 207 %tmp = extractelement <4 x i16> %A, i32 1 208 store i16 %tmp, i16* %ptr 209 ret void 210} 211 212define void @st1lane0_ro_4h(<4 x i16> %A, i16* %D, i64 %offset) { 213; CHECK-LABEL: st1lane0_ro_4h 214; CHECK: str h0, [x0, x1, lsl #1] 215 %ptr = getelementptr i16, i16* %D, i64 %offset 216 %tmp = extractelement <4 x i16> %A, i32 0 217 store i16 %tmp, i16* %ptr 218 ret void 219} 220 221define void @st1lane_2s(<2 x i32> %A, i32* %D) { 222; CHECK-LABEL: st1lane_2s 223; CHECK: st1.s 224 %tmp = extractelement <2 x i32> %A, i32 1 225 store i32 %tmp, i32* %D 226 ret void 227} 228 229define void @st1lane_ro_2s(<2 x i32> %A, i32* %D, i64 %offset) { 230; CHECK-LABEL: st1lane_ro_2s 231; CHECK: add x[[XREG:[0-9]+]], x0, x1 232; CHECK: st1.s { v0 }[1], [x[[XREG]]] 233 %ptr = getelementptr i32, i32* %D, i64 %offset 234 %tmp = extractelement <2 x i32> %A, i32 1 235 store i32 %tmp, i32* %ptr 236 ret void 237} 238 239define void @st1lane0_ro_2s(<2 x i32> %A, i32* %D, i64 %offset) { 240; CHECK-LABEL: st1lane0_ro_2s 241; CHECK: str s0, [x0, x1, lsl #2] 242 %ptr = getelementptr i32, i32* %D, i64 %offset 243 %tmp = extractelement <2 x i32> %A, i32 0 244 store i32 %tmp, i32* %ptr 245 ret void 246} 247 248define void @st1lane_2s_float(<2 x float> %A, float* %D) { 249; CHECK-LABEL: st1lane_2s_float 250; CHECK: st1.s 251 %tmp = extractelement <2 x float> %A, i32 1 252 store float %tmp, float* %D 253 ret void 254} 255 256define void @st1lane_ro_2s_float(<2 x float> %A, float* %D, i64 %offset) { 257; CHECK-LABEL: st1lane_ro_2s_float 258; CHECK: add x[[XREG:[0-9]+]], x0, x1 259; CHECK: st1.s { v0 }[1], [x[[XREG]]] 260 %ptr = getelementptr float, float* %D, i64 %offset 261 %tmp = extractelement <2 x float> %A, i32 1 262 store float %tmp, float* %ptr 263 ret void 264} 265 266define void @st1lane0_ro_2s_float(<2 x float> %A, float* %D, i64 %offset) { 267; CHECK-LABEL: st1lane0_ro_2s_float 268; CHECK: str s0, [x0, x1, lsl #2] 269 %ptr = getelementptr float, float* %D, i64 %offset 270 %tmp = extractelement <2 x float> %A, i32 0 271 store float %tmp, float* %ptr 272 ret void 273} 274 275define void @st2lane_16b(<16 x i8> %A, <16 x i8> %B, i8* %D) { 276; CHECK-LABEL: st2lane_16b 277; CHECK: st2.b 278 call void @llvm.aarch64.neon.st2lane.v16i8.p0i8(<16 x i8> %A, <16 x i8> %B, i64 1, i8* %D) 279 ret void 280} 281 282define void @st2lane_8h(<8 x i16> %A, <8 x i16> %B, i16* %D) { 283; CHECK-LABEL: st2lane_8h 284; CHECK: st2.h 285 call void @llvm.aarch64.neon.st2lane.v8i16.p0i16(<8 x i16> %A, <8 x i16> %B, i64 1, i16* %D) 286 ret void 287} 288 289define void @st2lane_4s(<4 x i32> %A, <4 x i32> %B, i32* %D) { 290; CHECK-LABEL: st2lane_4s 291; CHECK: st2.s 292 call void @llvm.aarch64.neon.st2lane.v4i32.p0i32(<4 x i32> %A, <4 x i32> %B, i64 1, i32* %D) 293 ret void 294} 295 296define void @st2lane_2d(<2 x i64> %A, <2 x i64> %B, i64* %D) { 297; CHECK-LABEL: st2lane_2d 298; CHECK: st2.d 299 call void @llvm.aarch64.neon.st2lane.v2i64.p0i64(<2 x i64> %A, <2 x i64> %B, i64 1, i64* %D) 300 ret void 301} 302 303declare void @llvm.aarch64.neon.st2lane.v16i8.p0i8(<16 x i8>, <16 x i8>, i64, i8*) nounwind readnone 304declare void @llvm.aarch64.neon.st2lane.v8i16.p0i16(<8 x i16>, <8 x i16>, i64, i16*) nounwind readnone 305declare void @llvm.aarch64.neon.st2lane.v4i32.p0i32(<4 x i32>, <4 x i32>, i64, i32*) nounwind readnone 306declare void @llvm.aarch64.neon.st2lane.v2i64.p0i64(<2 x i64>, <2 x i64>, i64, i64*) nounwind readnone 307 308define void @st3lane_16b(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, i8* %D) { 309; CHECK-LABEL: st3lane_16b 310; CHECK: st3.b 311 call void @llvm.aarch64.neon.st3lane.v16i8.p0i8(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, i64 1, i8* %D) 312 ret void 313} 314 315define void @st3lane_8h(<8 x i16> %A, <8 x i16> %B, <8 x i16> %C, i16* %D) { 316; CHECK-LABEL: st3lane_8h 317; CHECK: st3.h 318 call void @llvm.aarch64.neon.st3lane.v8i16.p0i16(<8 x i16> %A, <8 x i16> %B, <8 x i16> %C, i64 1, i16* %D) 319 ret void 320} 321 322define void @st3lane_4s(<4 x i32> %A, <4 x i32> %B, <4 x i32> %C, i32* %D) { 323; CHECK-LABEL: st3lane_4s 324; CHECK: st3.s 325 call void @llvm.aarch64.neon.st3lane.v4i32.p0i32(<4 x i32> %A, <4 x i32> %B, <4 x i32> %C, i64 1, i32* %D) 326 ret void 327} 328 329define void @st3lane_2d(<2 x i64> %A, <2 x i64> %B, <2 x i64> %C, i64* %D) { 330; CHECK-LABEL: st3lane_2d 331; CHECK: st3.d 332 call void @llvm.aarch64.neon.st3lane.v2i64.p0i64(<2 x i64> %A, <2 x i64> %B, <2 x i64> %C, i64 1, i64* %D) 333 ret void 334} 335 336declare void @llvm.aarch64.neon.st3lane.v16i8.p0i8(<16 x i8>, <16 x i8>, <16 x i8>, i64, i8*) nounwind readnone 337declare void @llvm.aarch64.neon.st3lane.v8i16.p0i16(<8 x i16>, <8 x i16>, <8 x i16>, i64, i16*) nounwind readnone 338declare void @llvm.aarch64.neon.st3lane.v4i32.p0i32(<4 x i32>, <4 x i32>, <4 x i32>, i64, i32*) nounwind readnone 339declare void @llvm.aarch64.neon.st3lane.v2i64.p0i64(<2 x i64>, <2 x i64>, <2 x i64>, i64, i64*) nounwind readnone 340 341define void @st4lane_16b(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, i8* %E) { 342; CHECK-LABEL: st4lane_16b 343; CHECK: st4.b 344 call void @llvm.aarch64.neon.st4lane.v16i8.p0i8(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, i64 1, i8* %E) 345 ret void 346} 347 348define void @st4lane_8h(<8 x i16> %A, <8 x i16> %B, <8 x i16> %C, <8 x i16> %D, i16* %E) { 349; CHECK-LABEL: st4lane_8h 350; CHECK: st4.h 351 call void @llvm.aarch64.neon.st4lane.v8i16.p0i16(<8 x i16> %A, <8 x i16> %B, <8 x i16> %C, <8 x i16> %D, i64 1, i16* %E) 352 ret void 353} 354 355define void @st4lane_4s(<4 x i32> %A, <4 x i32> %B, <4 x i32> %C, <4 x i32> %D, i32* %E) { 356; CHECK-LABEL: st4lane_4s 357; CHECK: st4.s 358 call void @llvm.aarch64.neon.st4lane.v4i32.p0i32(<4 x i32> %A, <4 x i32> %B, <4 x i32> %C, <4 x i32> %D, i64 1, i32* %E) 359 ret void 360} 361 362define void @st4lane_2d(<2 x i64> %A, <2 x i64> %B, <2 x i64> %C, <2 x i64> %D, i64* %E) { 363; CHECK-LABEL: st4lane_2d 364; CHECK: st4.d 365 call void @llvm.aarch64.neon.st4lane.v2i64.p0i64(<2 x i64> %A, <2 x i64> %B, <2 x i64> %C, <2 x i64> %D, i64 1, i64* %E) 366 ret void 367} 368 369declare void @llvm.aarch64.neon.st4lane.v16i8.p0i8(<16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>, i64, i8*) nounwind readnone 370declare void @llvm.aarch64.neon.st4lane.v8i16.p0i16(<8 x i16>, <8 x i16>, <8 x i16>, <8 x i16>, i64, i16*) nounwind readnone 371declare void @llvm.aarch64.neon.st4lane.v4i32.p0i32(<4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, i64, i32*) nounwind readnone 372declare void @llvm.aarch64.neon.st4lane.v2i64.p0i64(<2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, i64, i64*) nounwind readnone 373 374 375define void @st2_8b(<8 x i8> %A, <8 x i8> %B, i8* %P) nounwind { 376; CHECK-LABEL: st2_8b 377; CHECK: st2.8b 378 call void @llvm.aarch64.neon.st2.v8i8.p0i8(<8 x i8> %A, <8 x i8> %B, i8* %P) 379 ret void 380} 381 382define void @st3_8b(<8 x i8> %A, <8 x i8> %B, <8 x i8> %C, i8* %P) nounwind { 383; CHECK-LABEL: st3_8b 384; CHECK: st3.8b 385 call void @llvm.aarch64.neon.st3.v8i8.p0i8(<8 x i8> %A, <8 x i8> %B, <8 x i8> %C, i8* %P) 386 ret void 387} 388 389define void @st4_8b(<8 x i8> %A, <8 x i8> %B, <8 x i8> %C, <8 x i8> %D, i8* %P) nounwind { 390; CHECK-LABEL: st4_8b 391; CHECK: st4.8b 392 call void @llvm.aarch64.neon.st4.v8i8.p0i8(<8 x i8> %A, <8 x i8> %B, <8 x i8> %C, <8 x i8> %D, i8* %P) 393 ret void 394} 395 396declare void @llvm.aarch64.neon.st2.v8i8.p0i8(<8 x i8>, <8 x i8>, i8*) nounwind readonly 397declare void @llvm.aarch64.neon.st3.v8i8.p0i8(<8 x i8>, <8 x i8>, <8 x i8>, i8*) nounwind readonly 398declare void @llvm.aarch64.neon.st4.v8i8.p0i8(<8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, i8*) nounwind readonly 399 400define void @st2_16b(<16 x i8> %A, <16 x i8> %B, i8* %P) nounwind { 401; CHECK-LABEL: st2_16b 402; CHECK: st2.16b 403 call void @llvm.aarch64.neon.st2.v16i8.p0i8(<16 x i8> %A, <16 x i8> %B, i8* %P) 404 ret void 405} 406 407define void @st3_16b(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, i8* %P) nounwind { 408; CHECK-LABEL: st3_16b 409; CHECK: st3.16b 410 call void @llvm.aarch64.neon.st3.v16i8.p0i8(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, i8* %P) 411 ret void 412} 413 414define void @st4_16b(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, i8* %P) nounwind { 415; CHECK-LABEL: st4_16b 416; CHECK: st4.16b 417 call void @llvm.aarch64.neon.st4.v16i8.p0i8(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, i8* %P) 418 ret void 419} 420 421declare void @llvm.aarch64.neon.st2.v16i8.p0i8(<16 x i8>, <16 x i8>, i8*) nounwind readonly 422declare void @llvm.aarch64.neon.st3.v16i8.p0i8(<16 x i8>, <16 x i8>, <16 x i8>, i8*) nounwind readonly 423declare void @llvm.aarch64.neon.st4.v16i8.p0i8(<16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>, i8*) nounwind readonly 424 425define void @st2_4h(<4 x i16> %A, <4 x i16> %B, i16* %P) nounwind { 426; CHECK-LABEL: st2_4h 427; CHECK: st2.4h 428 call void @llvm.aarch64.neon.st2.v4i16.p0i16(<4 x i16> %A, <4 x i16> %B, i16* %P) 429 ret void 430} 431 432define void @st3_4h(<4 x i16> %A, <4 x i16> %B, <4 x i16> %C, i16* %P) nounwind { 433; CHECK-LABEL: st3_4h 434; CHECK: st3.4h 435 call void @llvm.aarch64.neon.st3.v4i16.p0i16(<4 x i16> %A, <4 x i16> %B, <4 x i16> %C, i16* %P) 436 ret void 437} 438 439define void @st4_4h(<4 x i16> %A, <4 x i16> %B, <4 x i16> %C, <4 x i16> %D, i16* %P) nounwind { 440; CHECK-LABEL: st4_4h 441; CHECK: st4.4h 442 call void @llvm.aarch64.neon.st4.v4i16.p0i16(<4 x i16> %A, <4 x i16> %B, <4 x i16> %C, <4 x i16> %D, i16* %P) 443 ret void 444} 445 446declare void @llvm.aarch64.neon.st2.v4i16.p0i16(<4 x i16>, <4 x i16>, i16*) nounwind readonly 447declare void @llvm.aarch64.neon.st3.v4i16.p0i16(<4 x i16>, <4 x i16>, <4 x i16>, i16*) nounwind readonly 448declare void @llvm.aarch64.neon.st4.v4i16.p0i16(<4 x i16>, <4 x i16>, <4 x i16>, <4 x i16>, i16*) nounwind readonly 449 450define void @st2_8h(<8 x i16> %A, <8 x i16> %B, i16* %P) nounwind { 451; CHECK-LABEL: st2_8h 452; CHECK: st2.8h 453 call void @llvm.aarch64.neon.st2.v8i16.p0i16(<8 x i16> %A, <8 x i16> %B, i16* %P) 454 ret void 455} 456 457define void @st3_8h(<8 x i16> %A, <8 x i16> %B, <8 x i16> %C, i16* %P) nounwind { 458; CHECK-LABEL: st3_8h 459; CHECK: st3.8h 460 call void @llvm.aarch64.neon.st3.v8i16.p0i16(<8 x i16> %A, <8 x i16> %B, <8 x i16> %C, i16* %P) 461 ret void 462} 463 464define void @st4_8h(<8 x i16> %A, <8 x i16> %B, <8 x i16> %C, <8 x i16> %D, i16* %P) nounwind { 465; CHECK-LABEL: st4_8h 466; CHECK: st4.8h 467 call void @llvm.aarch64.neon.st4.v8i16.p0i16(<8 x i16> %A, <8 x i16> %B, <8 x i16> %C, <8 x i16> %D, i16* %P) 468 ret void 469} 470 471declare void @llvm.aarch64.neon.st2.v8i16.p0i16(<8 x i16>, <8 x i16>, i16*) nounwind readonly 472declare void @llvm.aarch64.neon.st3.v8i16.p0i16(<8 x i16>, <8 x i16>, <8 x i16>, i16*) nounwind readonly 473declare void @llvm.aarch64.neon.st4.v8i16.p0i16(<8 x i16>, <8 x i16>, <8 x i16>, <8 x i16>, i16*) nounwind readonly 474 475define void @st2_2s(<2 x i32> %A, <2 x i32> %B, i32* %P) nounwind { 476; CHECK-LABEL: st2_2s 477; CHECK: st2.2s 478 call void @llvm.aarch64.neon.st2.v2i32.p0i32(<2 x i32> %A, <2 x i32> %B, i32* %P) 479 ret void 480} 481 482define void @st3_2s(<2 x i32> %A, <2 x i32> %B, <2 x i32> %C, i32* %P) nounwind { 483; CHECK-LABEL: st3_2s 484; CHECK: st3.2s 485 call void @llvm.aarch64.neon.st3.v2i32.p0i32(<2 x i32> %A, <2 x i32> %B, <2 x i32> %C, i32* %P) 486 ret void 487} 488 489define void @st4_2s(<2 x i32> %A, <2 x i32> %B, <2 x i32> %C, <2 x i32> %D, i32* %P) nounwind { 490; CHECK-LABEL: st4_2s 491; CHECK: st4.2s 492 call void @llvm.aarch64.neon.st4.v2i32.p0i32(<2 x i32> %A, <2 x i32> %B, <2 x i32> %C, <2 x i32> %D, i32* %P) 493 ret void 494} 495 496declare void @llvm.aarch64.neon.st2.v2i32.p0i32(<2 x i32>, <2 x i32>, i32*) nounwind readonly 497declare void @llvm.aarch64.neon.st3.v2i32.p0i32(<2 x i32>, <2 x i32>, <2 x i32>, i32*) nounwind readonly 498declare void @llvm.aarch64.neon.st4.v2i32.p0i32(<2 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, i32*) nounwind readonly 499 500define void @st2_4s(<4 x i32> %A, <4 x i32> %B, i32* %P) nounwind { 501; CHECK-LABEL: st2_4s 502; CHECK: st2.4s 503 call void @llvm.aarch64.neon.st2.v4i32.p0i32(<4 x i32> %A, <4 x i32> %B, i32* %P) 504 ret void 505} 506 507define void @st3_4s(<4 x i32> %A, <4 x i32> %B, <4 x i32> %C, i32* %P) nounwind { 508; CHECK-LABEL: st3_4s 509; CHECK: st3.4s 510 call void @llvm.aarch64.neon.st3.v4i32.p0i32(<4 x i32> %A, <4 x i32> %B, <4 x i32> %C, i32* %P) 511 ret void 512} 513 514define void @st4_4s(<4 x i32> %A, <4 x i32> %B, <4 x i32> %C, <4 x i32> %D, i32* %P) nounwind { 515; CHECK-LABEL: st4_4s 516; CHECK: st4.4s 517 call void @llvm.aarch64.neon.st4.v4i32.p0i32(<4 x i32> %A, <4 x i32> %B, <4 x i32> %C, <4 x i32> %D, i32* %P) 518 ret void 519} 520 521declare void @llvm.aarch64.neon.st2.v4i32.p0i32(<4 x i32>, <4 x i32>, i32*) nounwind readonly 522declare void @llvm.aarch64.neon.st3.v4i32.p0i32(<4 x i32>, <4 x i32>, <4 x i32>, i32*) nounwind readonly 523declare void @llvm.aarch64.neon.st4.v4i32.p0i32(<4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, i32*) nounwind readonly 524 525; If there's only one element, st2/3/4 don't make much sense, stick to st1. 526define void @st2_1d(<1 x i64> %A, <1 x i64> %B, i64* %P) nounwind { 527; CHECK-LABEL: st2_1d 528; CHECK: st1.1d 529 call void @llvm.aarch64.neon.st2.v1i64.p0i64(<1 x i64> %A, <1 x i64> %B, i64* %P) 530 ret void 531} 532 533define void @st3_1d(<1 x i64> %A, <1 x i64> %B, <1 x i64> %C, i64* %P) nounwind { 534; CHECK-LABEL: st3_1d 535; CHECK: st1.1d 536 call void @llvm.aarch64.neon.st3.v1i64.p0i64(<1 x i64> %A, <1 x i64> %B, <1 x i64> %C, i64* %P) 537 ret void 538} 539 540define void @st4_1d(<1 x i64> %A, <1 x i64> %B, <1 x i64> %C, <1 x i64> %D, i64* %P) nounwind { 541; CHECK-LABEL: st4_1d 542; CHECK: st1.1d 543 call void @llvm.aarch64.neon.st4.v1i64.p0i64(<1 x i64> %A, <1 x i64> %B, <1 x i64> %C, <1 x i64> %D, i64* %P) 544 ret void 545} 546 547declare void @llvm.aarch64.neon.st2.v1i64.p0i64(<1 x i64>, <1 x i64>, i64*) nounwind readonly 548declare void @llvm.aarch64.neon.st3.v1i64.p0i64(<1 x i64>, <1 x i64>, <1 x i64>, i64*) nounwind readonly 549declare void @llvm.aarch64.neon.st4.v1i64.p0i64(<1 x i64>, <1 x i64>, <1 x i64>, <1 x i64>, i64*) nounwind readonly 550 551define void @st2_2d(<2 x i64> %A, <2 x i64> %B, i64* %P) nounwind { 552; CHECK-LABEL: st2_2d 553; CHECK: st2.2d 554 call void @llvm.aarch64.neon.st2.v2i64.p0i64(<2 x i64> %A, <2 x i64> %B, i64* %P) 555 ret void 556} 557 558define void @st3_2d(<2 x i64> %A, <2 x i64> %B, <2 x i64> %C, i64* %P) nounwind { 559; CHECK-LABEL: st3_2d 560; CHECK: st3.2d 561 call void @llvm.aarch64.neon.st3.v2i64.p0i64(<2 x i64> %A, <2 x i64> %B, <2 x i64> %C, i64* %P) 562 ret void 563} 564 565define void @st4_2d(<2 x i64> %A, <2 x i64> %B, <2 x i64> %C, <2 x i64> %D, i64* %P) nounwind { 566; CHECK-LABEL: st4_2d 567; CHECK: st4.2d 568 call void @llvm.aarch64.neon.st4.v2i64.p0i64(<2 x i64> %A, <2 x i64> %B, <2 x i64> %C, <2 x i64> %D, i64* %P) 569 ret void 570} 571 572declare void @llvm.aarch64.neon.st2.v2i64.p0i64(<2 x i64>, <2 x i64>, i64*) nounwind readonly 573declare void @llvm.aarch64.neon.st3.v2i64.p0i64(<2 x i64>, <2 x i64>, <2 x i64>, i64*) nounwind readonly 574declare void @llvm.aarch64.neon.st4.v2i64.p0i64(<2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, i64*) nounwind readonly 575 576declare void @llvm.aarch64.neon.st1x2.v8i8.p0i8(<8 x i8>, <8 x i8>, i8*) nounwind readonly 577declare void @llvm.aarch64.neon.st1x2.v4i16.p0i16(<4 x i16>, <4 x i16>, i16*) nounwind readonly 578declare void @llvm.aarch64.neon.st1x2.v2i32.p0i32(<2 x i32>, <2 x i32>, i32*) nounwind readonly 579declare void @llvm.aarch64.neon.st1x2.v2f32.p0f32(<2 x float>, <2 x float>, float*) nounwind readonly 580declare void @llvm.aarch64.neon.st1x2.v1i64.p0i64(<1 x i64>, <1 x i64>, i64*) nounwind readonly 581declare void @llvm.aarch64.neon.st1x2.v1f64.p0f64(<1 x double>, <1 x double>, double*) nounwind readonly 582 583define void @st1_x2_v8i8(<8 x i8> %A, <8 x i8> %B, i8* %addr) { 584; CHECK-LABEL: st1_x2_v8i8: 585; CHECK: st1.8b { {{v[0-9]+}}, {{v[0-9]+}} }, [x0] 586 call void @llvm.aarch64.neon.st1x2.v8i8.p0i8(<8 x i8> %A, <8 x i8> %B, i8* %addr) 587 ret void 588} 589 590define void @st1_x2_v4i16(<4 x i16> %A, <4 x i16> %B, i16* %addr) { 591; CHECK-LABEL: st1_x2_v4i16: 592; CHECK: st1.4h { {{v[0-9]+}}, {{v[0-9]+}} }, [x0] 593 call void @llvm.aarch64.neon.st1x2.v4i16.p0i16(<4 x i16> %A, <4 x i16> %B, i16* %addr) 594 ret void 595} 596 597define void @st1_x2_v2i32(<2 x i32> %A, <2 x i32> %B, i32* %addr) { 598; CHECK-LABEL: st1_x2_v2i32: 599; CHECK: st1.2s { {{v[0-9]+}}, {{v[0-9]+}} }, [x0] 600 call void @llvm.aarch64.neon.st1x2.v2i32.p0i32(<2 x i32> %A, <2 x i32> %B, i32* %addr) 601 ret void 602} 603 604define void @st1_x2_v2f32(<2 x float> %A, <2 x float> %B, float* %addr) { 605; CHECK-LABEL: st1_x2_v2f32: 606; CHECK: st1.2s { {{v[0-9]+}}, {{v[0-9]+}} }, [x0] 607 call void @llvm.aarch64.neon.st1x2.v2f32.p0f32(<2 x float> %A, <2 x float> %B, float* %addr) 608 ret void 609} 610 611define void @st1_x2_v1i64(<1 x i64> %A, <1 x i64> %B, i64* %addr) { 612; CHECK-LABEL: st1_x2_v1i64: 613; CHECK: st1.1d { {{v[0-9]+}}, {{v[0-9]+}} }, [x0] 614 call void @llvm.aarch64.neon.st1x2.v1i64.p0i64(<1 x i64> %A, <1 x i64> %B, i64* %addr) 615 ret void 616} 617 618define void @st1_x2_v1f64(<1 x double> %A, <1 x double> %B, double* %addr) { 619; CHECK-LABEL: st1_x2_v1f64: 620; CHECK: st1.1d { {{v[0-9]+}}, {{v[0-9]+}} }, [x0] 621 call void @llvm.aarch64.neon.st1x2.v1f64.p0f64(<1 x double> %A, <1 x double> %B, double* %addr) 622 ret void 623} 624 625declare void @llvm.aarch64.neon.st1x2.v16i8.p0i8(<16 x i8>, <16 x i8>, i8*) nounwind readonly 626declare void @llvm.aarch64.neon.st1x2.v8i16.p0i16(<8 x i16>, <8 x i16>, i16*) nounwind readonly 627declare void @llvm.aarch64.neon.st1x2.v4i32.p0i32(<4 x i32>, <4 x i32>, i32*) nounwind readonly 628declare void @llvm.aarch64.neon.st1x2.v4f32.p0f32(<4 x float>, <4 x float>, float*) nounwind readonly 629declare void @llvm.aarch64.neon.st1x2.v2i64.p0i64(<2 x i64>, <2 x i64>, i64*) nounwind readonly 630declare void @llvm.aarch64.neon.st1x2.v2f64.p0f64(<2 x double>, <2 x double>, double*) nounwind readonly 631 632define void @st1_x2_v16i8(<16 x i8> %A, <16 x i8> %B, i8* %addr) { 633; CHECK-LABEL: st1_x2_v16i8: 634; CHECK: st1.16b { {{v[0-9]+}}, {{v[0-9]+}} }, [x0] 635 call void @llvm.aarch64.neon.st1x2.v16i8.p0i8(<16 x i8> %A, <16 x i8> %B, i8* %addr) 636 ret void 637} 638 639define void @st1_x2_v8i16(<8 x i16> %A, <8 x i16> %B, i16* %addr) { 640; CHECK-LABEL: st1_x2_v8i16: 641; CHECK: st1.8h { {{v[0-9]+}}, {{v[0-9]+}} }, [x0] 642 call void @llvm.aarch64.neon.st1x2.v8i16.p0i16(<8 x i16> %A, <8 x i16> %B, i16* %addr) 643 ret void 644} 645 646define void @st1_x2_v4i32(<4 x i32> %A, <4 x i32> %B, i32* %addr) { 647; CHECK-LABEL: st1_x2_v4i32: 648; CHECK: st1.4s { {{v[0-9]+}}, {{v[0-9]+}} }, [x0] 649 call void @llvm.aarch64.neon.st1x2.v4i32.p0i32(<4 x i32> %A, <4 x i32> %B, i32* %addr) 650 ret void 651} 652 653define void @st1_x2_v4f32(<4 x float> %A, <4 x float> %B, float* %addr) { 654; CHECK-LABEL: st1_x2_v4f32: 655; CHECK: st1.4s { {{v[0-9]+}}, {{v[0-9]+}} }, [x0] 656 call void @llvm.aarch64.neon.st1x2.v4f32.p0f32(<4 x float> %A, <4 x float> %B, float* %addr) 657 ret void 658} 659 660define void @st1_x2_v2i64(<2 x i64> %A, <2 x i64> %B, i64* %addr) { 661; CHECK-LABEL: st1_x2_v2i64: 662; CHECK: st1.2d { {{v[0-9]+}}, {{v[0-9]+}} }, [x0] 663 call void @llvm.aarch64.neon.st1x2.v2i64.p0i64(<2 x i64> %A, <2 x i64> %B, i64* %addr) 664 ret void 665} 666 667define void @st1_x2_v2f64(<2 x double> %A, <2 x double> %B, double* %addr) { 668; CHECK-LABEL: st1_x2_v2f64: 669; CHECK: st1.2d { {{v[0-9]+}}, {{v[0-9]+}} }, [x0] 670 call void @llvm.aarch64.neon.st1x2.v2f64.p0f64(<2 x double> %A, <2 x double> %B, double* %addr) 671 ret void 672} 673 674declare void @llvm.aarch64.neon.st1x3.v8i8.p0i8(<8 x i8>, <8 x i8>, <8 x i8>, i8*) nounwind readonly 675declare void @llvm.aarch64.neon.st1x3.v4i16.p0i16(<4 x i16>, <4 x i16>, <4 x i16>, i16*) nounwind readonly 676declare void @llvm.aarch64.neon.st1x3.v2i32.p0i32(<2 x i32>, <2 x i32>, <2 x i32>, i32*) nounwind readonly 677declare void @llvm.aarch64.neon.st1x3.v2f32.p0f32(<2 x float>, <2 x float>, <2 x float>, float*) nounwind readonly 678declare void @llvm.aarch64.neon.st1x3.v1i64.p0i64(<1 x i64>, <1 x i64>, <1 x i64>, i64*) nounwind readonly 679declare void @llvm.aarch64.neon.st1x3.v1f64.p0f64(<1 x double>, <1 x double>, <1 x double>, double*) nounwind readonly 680 681define void @st1_x3_v8i8(<8 x i8> %A, <8 x i8> %B, <8 x i8> %C, i8* %addr) { 682; CHECK-LABEL: st1_x3_v8i8: 683; CHECK: st1.8b { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0] 684 call void @llvm.aarch64.neon.st1x3.v8i8.p0i8(<8 x i8> %A, <8 x i8> %B, <8 x i8> %C, i8* %addr) 685 ret void 686} 687 688define void @st1_x3_v4i16(<4 x i16> %A, <4 x i16> %B, <4 x i16> %C, i16* %addr) { 689; CHECK-LABEL: st1_x3_v4i16: 690; CHECK: st1.4h { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0] 691 call void @llvm.aarch64.neon.st1x3.v4i16.p0i16(<4 x i16> %A, <4 x i16> %B, <4 x i16> %C, i16* %addr) 692 ret void 693} 694 695define void @st1_x3_v2i32(<2 x i32> %A, <2 x i32> %B, <2 x i32> %C, i32* %addr) { 696; CHECK-LABEL: st1_x3_v2i32: 697; CHECK: st1.2s { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0] 698 call void @llvm.aarch64.neon.st1x3.v2i32.p0i32(<2 x i32> %A, <2 x i32> %B, <2 x i32> %C, i32* %addr) 699 ret void 700} 701 702define void @st1_x3_v2f32(<2 x float> %A, <2 x float> %B, <2 x float> %C, float* %addr) { 703; CHECK-LABEL: st1_x3_v2f32: 704; CHECK: st1.2s { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0] 705 call void @llvm.aarch64.neon.st1x3.v2f32.p0f32(<2 x float> %A, <2 x float> %B, <2 x float> %C, float* %addr) 706 ret void 707} 708 709define void @st1_x3_v1i64(<1 x i64> %A, <1 x i64> %B, <1 x i64> %C, i64* %addr) { 710; CHECK-LABEL: st1_x3_v1i64: 711; CHECK: st1.1d { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0] 712 call void @llvm.aarch64.neon.st1x3.v1i64.p0i64(<1 x i64> %A, <1 x i64> %B, <1 x i64> %C, i64* %addr) 713 ret void 714} 715 716define void @st1_x3_v1f64(<1 x double> %A, <1 x double> %B, <1 x double> %C, double* %addr) { 717; CHECK-LABEL: st1_x3_v1f64: 718; CHECK: st1.1d { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0] 719 call void @llvm.aarch64.neon.st1x3.v1f64.p0f64(<1 x double> %A, <1 x double> %B, <1 x double> %C, double* %addr) 720 ret void 721} 722 723declare void @llvm.aarch64.neon.st1x3.v16i8.p0i8(<16 x i8>, <16 x i8>, <16 x i8>, i8*) nounwind readonly 724declare void @llvm.aarch64.neon.st1x3.v8i16.p0i16(<8 x i16>, <8 x i16>, <8 x i16>, i16*) nounwind readonly 725declare void @llvm.aarch64.neon.st1x3.v4i32.p0i32(<4 x i32>, <4 x i32>, <4 x i32>, i32*) nounwind readonly 726declare void @llvm.aarch64.neon.st1x3.v4f32.p0f32(<4 x float>, <4 x float>, <4 x float>, float*) nounwind readonly 727declare void @llvm.aarch64.neon.st1x3.v2i64.p0i64(<2 x i64>, <2 x i64>, <2 x i64>, i64*) nounwind readonly 728declare void @llvm.aarch64.neon.st1x3.v2f64.p0f64(<2 x double>, <2 x double>, <2 x double>, double*) nounwind readonly 729 730define void @st1_x3_v16i8(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, i8* %addr) { 731; CHECK-LABEL: st1_x3_v16i8: 732; CHECK: st1.16b { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0] 733 call void @llvm.aarch64.neon.st1x3.v16i8.p0i8(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, i8* %addr) 734 ret void 735} 736 737define void @st1_x3_v8i16(<8 x i16> %A, <8 x i16> %B, <8 x i16> %C, i16* %addr) { 738; CHECK-LABEL: st1_x3_v8i16: 739; CHECK: st1.8h { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0] 740 call void @llvm.aarch64.neon.st1x3.v8i16.p0i16(<8 x i16> %A, <8 x i16> %B, <8 x i16> %C, i16* %addr) 741 ret void 742} 743 744define void @st1_x3_v4i32(<4 x i32> %A, <4 x i32> %B, <4 x i32> %C, i32* %addr) { 745; CHECK-LABEL: st1_x3_v4i32: 746; CHECK: st1.4s { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0] 747 call void @llvm.aarch64.neon.st1x3.v4i32.p0i32(<4 x i32> %A, <4 x i32> %B, <4 x i32> %C, i32* %addr) 748 ret void 749} 750 751define void @st1_x3_v4f32(<4 x float> %A, <4 x float> %B, <4 x float> %C, float* %addr) { 752; CHECK-LABEL: st1_x3_v4f32: 753; CHECK: st1.4s { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0] 754 call void @llvm.aarch64.neon.st1x3.v4f32.p0f32(<4 x float> %A, <4 x float> %B, <4 x float> %C, float* %addr) 755 ret void 756} 757 758define void @st1_x3_v2i64(<2 x i64> %A, <2 x i64> %B, <2 x i64> %C, i64* %addr) { 759; CHECK-LABEL: st1_x3_v2i64: 760; CHECK: st1.2d { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0] 761 call void @llvm.aarch64.neon.st1x3.v2i64.p0i64(<2 x i64> %A, <2 x i64> %B, <2 x i64> %C, i64* %addr) 762 ret void 763} 764 765define void @st1_x3_v2f64(<2 x double> %A, <2 x double> %B, <2 x double> %C, double* %addr) { 766; CHECK-LABEL: st1_x3_v2f64: 767; CHECK: st1.2d { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0] 768 call void @llvm.aarch64.neon.st1x3.v2f64.p0f64(<2 x double> %A, <2 x double> %B, <2 x double> %C, double* %addr) 769 ret void 770} 771 772 773declare void @llvm.aarch64.neon.st1x4.v8i8.p0i8(<8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, i8*) nounwind readonly 774declare void @llvm.aarch64.neon.st1x4.v4i16.p0i16(<4 x i16>, <4 x i16>, <4 x i16>, <4 x i16>, i16*) nounwind readonly 775declare void @llvm.aarch64.neon.st1x4.v2i32.p0i32(<2 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, i32*) nounwind readonly 776declare void @llvm.aarch64.neon.st1x4.v2f32.p0f32(<2 x float>, <2 x float>, <2 x float>, <2 x float>, float*) nounwind readonly 777declare void @llvm.aarch64.neon.st1x4.v1i64.p0i64(<1 x i64>, <1 x i64>, <1 x i64>, <1 x i64>, i64*) nounwind readonly 778declare void @llvm.aarch64.neon.st1x4.v1f64.p0f64(<1 x double>, <1 x double>, <1 x double>, <1 x double>, double*) nounwind readonly 779 780define void @st1_x4_v8i8(<8 x i8> %A, <8 x i8> %B, <8 x i8> %C, <8 x i8> %D, i8* %addr) { 781; CHECK-LABEL: st1_x4_v8i8: 782; CHECK: st1.8b { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0] 783 call void @llvm.aarch64.neon.st1x4.v8i8.p0i8(<8 x i8> %A, <8 x i8> %B, <8 x i8> %C, <8 x i8> %D, i8* %addr) 784 ret void 785} 786 787define void @st1_x4_v4i16(<4 x i16> %A, <4 x i16> %B, <4 x i16> %C, <4 x i16> %D, i16* %addr) { 788; CHECK-LABEL: st1_x4_v4i16: 789; CHECK: st1.4h { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0] 790 call void @llvm.aarch64.neon.st1x4.v4i16.p0i16(<4 x i16> %A, <4 x i16> %B, <4 x i16> %C, <4 x i16> %D, i16* %addr) 791 ret void 792} 793 794define void @st1_x4_v2i32(<2 x i32> %A, <2 x i32> %B, <2 x i32> %C, <2 x i32> %D, i32* %addr) { 795; CHECK-LABEL: st1_x4_v2i32: 796; CHECK: st1.2s { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0] 797 call void @llvm.aarch64.neon.st1x4.v2i32.p0i32(<2 x i32> %A, <2 x i32> %B, <2 x i32> %C, <2 x i32> %D, i32* %addr) 798 ret void 799} 800 801define void @st1_x4_v2f32(<2 x float> %A, <2 x float> %B, <2 x float> %C, <2 x float> %D, float* %addr) { 802; CHECK-LABEL: st1_x4_v2f32: 803; CHECK: st1.2s { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0] 804 call void @llvm.aarch64.neon.st1x4.v2f32.p0f32(<2 x float> %A, <2 x float> %B, <2 x float> %C, <2 x float> %D, float* %addr) 805 ret void 806} 807 808define void @st1_x4_v1i64(<1 x i64> %A, <1 x i64> %B, <1 x i64> %C, <1 x i64> %D, i64* %addr) { 809; CHECK-LABEL: st1_x4_v1i64: 810; CHECK: st1.1d { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0] 811 call void @llvm.aarch64.neon.st1x4.v1i64.p0i64(<1 x i64> %A, <1 x i64> %B, <1 x i64> %C, <1 x i64> %D, i64* %addr) 812 ret void 813} 814 815define void @st1_x4_v1f64(<1 x double> %A, <1 x double> %B, <1 x double> %C, <1 x double> %D, double* %addr) { 816; CHECK-LABEL: st1_x4_v1f64: 817; CHECK: st1.1d { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0] 818 call void @llvm.aarch64.neon.st1x4.v1f64.p0f64(<1 x double> %A, <1 x double> %B, <1 x double> %C, <1 x double> %D, double* %addr) 819 ret void 820} 821 822declare void @llvm.aarch64.neon.st1x4.v16i8.p0i8(<16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>, i8*) nounwind readonly 823declare void @llvm.aarch64.neon.st1x4.v8i16.p0i16(<8 x i16>, <8 x i16>, <8 x i16>, <8 x i16>, i16*) nounwind readonly 824declare void @llvm.aarch64.neon.st1x4.v4i32.p0i32(<4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, i32*) nounwind readonly 825declare void @llvm.aarch64.neon.st1x4.v4f32.p0f32(<4 x float>, <4 x float>, <4 x float>, <4 x float>, float*) nounwind readonly 826declare void @llvm.aarch64.neon.st1x4.v2i64.p0i64(<2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, i64*) nounwind readonly 827declare void @llvm.aarch64.neon.st1x4.v2f64.p0f64(<2 x double>, <2 x double>, <2 x double>, <2 x double>, double*) nounwind readonly 828 829define void @st1_x4_v16i8(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, i8* %addr) { 830; CHECK-LABEL: st1_x4_v16i8: 831; CHECK: st1.16b { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0] 832 call void @llvm.aarch64.neon.st1x4.v16i8.p0i8(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, i8* %addr) 833 ret void 834} 835 836define void @st1_x4_v8i16(<8 x i16> %A, <8 x i16> %B, <8 x i16> %C, <8 x i16> %D, i16* %addr) { 837; CHECK-LABEL: st1_x4_v8i16: 838; CHECK: st1.8h { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0] 839 call void @llvm.aarch64.neon.st1x4.v8i16.p0i16(<8 x i16> %A, <8 x i16> %B, <8 x i16> %C, <8 x i16> %D, i16* %addr) 840 ret void 841} 842 843define void @st1_x4_v4i32(<4 x i32> %A, <4 x i32> %B, <4 x i32> %C, <4 x i32> %D, i32* %addr) { 844; CHECK-LABEL: st1_x4_v4i32: 845; CHECK: st1.4s { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0] 846 call void @llvm.aarch64.neon.st1x4.v4i32.p0i32(<4 x i32> %A, <4 x i32> %B, <4 x i32> %C, <4 x i32> %D, i32* %addr) 847 ret void 848} 849 850define void @st1_x4_v4f32(<4 x float> %A, <4 x float> %B, <4 x float> %C, <4 x float> %D, float* %addr) { 851; CHECK-LABEL: st1_x4_v4f32: 852; CHECK: st1.4s { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0] 853 call void @llvm.aarch64.neon.st1x4.v4f32.p0f32(<4 x float> %A, <4 x float> %B, <4 x float> %C, <4 x float> %D, float* %addr) 854 ret void 855} 856 857define void @st1_x4_v2i64(<2 x i64> %A, <2 x i64> %B, <2 x i64> %C, <2 x i64> %D, i64* %addr) { 858; CHECK-LABEL: st1_x4_v2i64: 859; CHECK: st1.2d { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0] 860 call void @llvm.aarch64.neon.st1x4.v2i64.p0i64(<2 x i64> %A, <2 x i64> %B, <2 x i64> %C, <2 x i64> %D, i64* %addr) 861 ret void 862} 863 864define void @st1_x4_v2f64(<2 x double> %A, <2 x double> %B, <2 x double> %C, <2 x double> %D, double* %addr) { 865; CHECK-LABEL: st1_x4_v2f64: 866; CHECK: st1.2d { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0] 867 call void @llvm.aarch64.neon.st1x4.v2f64.p0f64(<2 x double> %A, <2 x double> %B, <2 x double> %C, <2 x double> %D, double* %addr) 868 ret void 869} 870