1; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s 2>%t | FileCheck %s 2; RUN: FileCheck --check-prefix=WARN --allow-empty %s <%t 3 4; If this check fails please read test/CodeGen/AArch64/README for instructions on how to resolve it. 5; WARN-NOT: warning 6 7; 8; CLASTA (Vectors) 9; 10 11define <vscale x 16 x i8> @clasta_i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b) { 12; CHECK-LABEL: clasta_i8: 13; CHECK: clasta z0.b, p0, z0.b, z1.b 14; CHECK-NEXT: ret 15 %out = call <vscale x 16 x i8> @llvm.aarch64.sve.clasta.nxv16i8(<vscale x 16 x i1> %pg, 16 <vscale x 16 x i8> %a, 17 <vscale x 16 x i8> %b) 18 ret <vscale x 16 x i8> %out 19} 20 21define <vscale x 8 x i16> @clasta_i16(<vscale x 8 x i1> %pg, <vscale x 8 x i16> %a, <vscale x 8 x i16> %b) { 22; CHECK-LABEL: clasta_i16: 23; CHECK: clasta z0.h, p0, z0.h, z1.h 24; CHECK-NEXT: ret 25 %out = call <vscale x 8 x i16> @llvm.aarch64.sve.clasta.nxv8i16(<vscale x 8 x i1> %pg, 26 <vscale x 8 x i16> %a, 27 <vscale x 8 x i16> %b) 28 ret <vscale x 8 x i16> %out 29} 30 31define <vscale x 4 x i32> @clasta_i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b) { 32; CHECK-LABEL: clasta_i32: 33; CHECK: clasta z0.s, p0, z0.s, z1.s 34; CHECK-NEXT: ret 35 %out = call <vscale x 4 x i32> @llvm.aarch64.sve.clasta.nxv4i32(<vscale x 4 x i1> %pg, 36 <vscale x 4 x i32> %a, 37 <vscale x 4 x i32> %b) 38 ret <vscale x 4 x i32> %out 39} 40 41define <vscale x 2 x i64> @clasta_i64(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %a, <vscale x 2 x i64> %b) { 42; CHECK-LABEL: clasta_i64: 43; CHECK: clasta z0.d, p0, z0.d, z1.d 44; CHECK-NEXT: ret 45 %out = call <vscale x 2 x i64> @llvm.aarch64.sve.clasta.nxv2i64(<vscale x 2 x i1> %pg, 46 <vscale x 2 x i64> %a, 47 <vscale x 2 x i64> %b) 48 ret <vscale x 2 x i64> %out 49} 50 51define <vscale x 8 x half> @clasta_f16(<vscale x 8 x i1> %pg, <vscale x 8 x half> %a, <vscale x 8 x half> %b) { 52; CHECK-LABEL: clasta_f16: 53; CHECK: clasta z0.h, p0, z0.h, z1.h 54; CHECK-NEXT: ret 55 %out = call <vscale x 8 x half> @llvm.aarch64.sve.clasta.nxv8f16(<vscale x 8 x i1> %pg, 56 <vscale x 8 x half> %a, 57 <vscale x 8 x half> %b) 58 ret <vscale x 8 x half> %out 59} 60 61define <vscale x 8 x bfloat> @clasta_bf16(<vscale x 8 x i1> %pg, <vscale x 8 x bfloat> %a, <vscale x 8 x bfloat> %b) #0 { 62; CHECK-LABEL: clasta_bf16: 63; CHECK: clasta z0.h, p0, z0.h, z1.h 64; CHECK-NEXT: ret 65 %out = call <vscale x 8 x bfloat> @llvm.aarch64.sve.clasta.nxv8bf16(<vscale x 8 x i1> %pg, 66 <vscale x 8 x bfloat> %a, 67 <vscale x 8 x bfloat> %b) 68 ret <vscale x 8 x bfloat> %out 69} 70 71define <vscale x 4 x float> @clasta_f32(<vscale x 4 x i1> %pg, <vscale x 4 x float> %a, <vscale x 4 x float> %b) { 72; CHECK-LABEL: clasta_f32: 73; CHECK: clasta z0.s, p0, z0.s, z1.s 74; CHECK-NEXT: ret 75 %out = call <vscale x 4 x float> @llvm.aarch64.sve.clasta.nxv4f32(<vscale x 4 x i1> %pg, 76 <vscale x 4 x float> %a, 77 <vscale x 4 x float> %b) 78 ret <vscale x 4 x float> %out 79} 80 81define <vscale x 2 x double> @clasta_f64(<vscale x 2 x i1> %pg, <vscale x 2 x double> %a, <vscale x 2 x double> %b) { 82; CHECK-LABEL: clasta_f64: 83; CHECK: clasta z0.d, p0, z0.d, z1.d 84; CHECK-NEXT: ret 85 %out = call <vscale x 2 x double> @llvm.aarch64.sve.clasta.nxv2f64(<vscale x 2 x i1> %pg, 86 <vscale x 2 x double> %a, 87 <vscale x 2 x double> %b) 88 ret <vscale x 2 x double> %out 89} 90 91; 92; CLASTA (Scalar) 93; 94 95define i8 @clasta_n_i8(<vscale x 16 x i1> %pg, i8 %a, <vscale x 16 x i8> %b) { 96; CHECK-LABEL: clasta_n_i8: 97; CHECK: clasta w0, p0, w0, z0.b 98; CHECK-NEXT: ret 99 %out = call i8 @llvm.aarch64.sve.clasta.n.nxv16i8(<vscale x 16 x i1> %pg, 100 i8 %a, 101 <vscale x 16 x i8> %b) 102 ret i8 %out 103} 104 105define i16 @clasta_n_i16(<vscale x 8 x i1> %pg, i16 %a, <vscale x 8 x i16> %b) { 106; CHECK-LABEL: clasta_n_i16: 107; CHECK: clasta w0, p0, w0, z0.h 108; CHECK-NEXT: ret 109 %out = call i16 @llvm.aarch64.sve.clasta.n.nxv8i16(<vscale x 8 x i1> %pg, 110 i16 %a, 111 <vscale x 8 x i16> %b) 112 ret i16 %out 113} 114 115define i32 @clasta_n_i32(<vscale x 4 x i1> %pg, i32 %a, <vscale x 4 x i32> %b) { 116; CHECK-LABEL: clasta_n_i32: 117; CHECK: clasta w0, p0, w0, z0.s 118; CHECK-NEXT: ret 119 %out = call i32 @llvm.aarch64.sve.clasta.n.nxv4i32(<vscale x 4 x i1> %pg, 120 i32 %a, 121 <vscale x 4 x i32> %b) 122 ret i32 %out 123} 124 125define i64 @clasta_n_i64(<vscale x 2 x i1> %pg, i64 %a, <vscale x 2 x i64> %b) { 126; CHECK-LABEL: clasta_n_i64: 127; CHECK: clasta x0, p0, x0, z0.d 128; CHECK-NEXT: ret 129 %out = call i64 @llvm.aarch64.sve.clasta.n.nxv2i64(<vscale x 2 x i1> %pg, 130 i64 %a, 131 <vscale x 2 x i64> %b) 132 ret i64 %out 133} 134 135define half @clasta_n_f16(<vscale x 8 x i1> %pg, half %a, <vscale x 8 x half> %b) { 136; CHECK-LABEL: clasta_n_f16: 137; CHECK: clasta h0, p0, h0, z1.h 138; CHECK-NEXT: ret 139 %out = call half @llvm.aarch64.sve.clasta.n.nxv8f16(<vscale x 8 x i1> %pg, 140 half %a, 141 <vscale x 8 x half> %b) 142 ret half %out 143} 144 145define bfloat @clasta_n_bf16(<vscale x 8 x i1> %pg, bfloat %a, <vscale x 8 x bfloat> %b) #0 { 146; CHECK-LABEL: clasta_n_bf16: 147; CHECK: clasta h0, p0, h0, z1.h 148; CHECK-NEXT: ret 149 %out = call bfloat @llvm.aarch64.sve.clasta.n.nxv8bf16(<vscale x 8 x i1> %pg, 150 bfloat %a, 151 <vscale x 8 x bfloat> %b) 152 ret bfloat %out 153} 154 155define float @clasta_n_f32(<vscale x 4 x i1> %pg, float %a, <vscale x 4 x float> %b) { 156; CHECK-LABEL: clasta_n_f32: 157; CHECK: clasta s0, p0, s0, z1.s 158; CHECK-NEXT: ret 159 %out = call float @llvm.aarch64.sve.clasta.n.nxv4f32(<vscale x 4 x i1> %pg, 160 float %a, 161 <vscale x 4 x float> %b) 162 ret float %out 163} 164 165define double @clasta_n_f64(<vscale x 2 x i1> %pg, double %a, <vscale x 2 x double> %b) { 166; CHECK-LABEL: clasta_n_f64: 167; CHECK: clasta d0, p0, d0, z1.d 168; CHECK-NEXT: ret 169 %out = call double @llvm.aarch64.sve.clasta.n.nxv2f64(<vscale x 2 x i1> %pg, 170 double %a, 171 <vscale x 2 x double> %b) 172 ret double %out 173} 174 175; 176; CLASTB (Vectors) 177; 178 179define <vscale x 16 x i8> @clastb_i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b) { 180; CHECK-LABEL: clastb_i8: 181; CHECK: clastb z0.b, p0, z0.b, z1.b 182; CHECK-NEXT: ret 183 %out = call <vscale x 16 x i8> @llvm.aarch64.sve.clastb.nxv16i8(<vscale x 16 x i1> %pg, 184 <vscale x 16 x i8> %a, 185 <vscale x 16 x i8> %b) 186 ret <vscale x 16 x i8> %out 187} 188 189define <vscale x 8 x i16> @clastb_i16(<vscale x 8 x i1> %pg, <vscale x 8 x i16> %a, <vscale x 8 x i16> %b) { 190; CHECK-LABEL: clastb_i16: 191; CHECK: clastb z0.h, p0, z0.h, z1.h 192; CHECK-NEXT: ret 193 %out = call <vscale x 8 x i16> @llvm.aarch64.sve.clastb.nxv8i16(<vscale x 8 x i1> %pg, 194 <vscale x 8 x i16> %a, 195 <vscale x 8 x i16> %b) 196 ret <vscale x 8 x i16> %out 197} 198 199define <vscale x 4 x i32> @clastb_i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b) { 200; CHECK-LABEL: clastb_i32: 201; CHECK: clastb z0.s, p0, z0.s, z1.s 202; CHECK-NEXT: ret 203 %out = call <vscale x 4 x i32> @llvm.aarch64.sve.clastb.nxv4i32(<vscale x 4 x i1> %pg, 204 <vscale x 4 x i32> %a, 205 <vscale x 4 x i32> %b) 206 ret <vscale x 4 x i32> %out 207} 208 209define <vscale x 2 x i64> @clastb_i64(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %a, <vscale x 2 x i64> %b) { 210; CHECK-LABEL: clastb_i64: 211; CHECK: clastb z0.d, p0, z0.d, z1.d 212; CHECK-NEXT: ret 213 %out = call <vscale x 2 x i64> @llvm.aarch64.sve.clastb.nxv2i64(<vscale x 2 x i1> %pg, 214 <vscale x 2 x i64> %a, 215 <vscale x 2 x i64> %b) 216 ret <vscale x 2 x i64> %out 217} 218 219define <vscale x 8 x half> @clastb_f16(<vscale x 8 x i1> %pg, <vscale x 8 x half> %a, <vscale x 8 x half> %b) { 220; CHECK-LABEL: clastb_f16: 221; CHECK: clastb z0.h, p0, z0.h, z1.h 222; CHECK-NEXT: ret 223 %out = call <vscale x 8 x half> @llvm.aarch64.sve.clastb.nxv8f16(<vscale x 8 x i1> %pg, 224 <vscale x 8 x half> %a, 225 <vscale x 8 x half> %b) 226 ret <vscale x 8 x half> %out 227} 228 229define <vscale x 8 x bfloat> @clastb_bf16(<vscale x 8 x i1> %pg, <vscale x 8 x bfloat> %a, <vscale x 8 x bfloat> %b) #0 { 230; CHECK-LABEL: clastb_bf16: 231; CHECK: clastb z0.h, p0, z0.h, z1.h 232; CHECK-NEXT: ret 233 %out = call <vscale x 8 x bfloat> @llvm.aarch64.sve.clastb.nxv8bf16(<vscale x 8 x i1> %pg, 234 <vscale x 8 x bfloat> %a, 235 <vscale x 8 x bfloat> %b) 236 ret <vscale x 8 x bfloat> %out 237} 238 239define <vscale x 4 x float> @clastb_f32(<vscale x 4 x i1> %pg, <vscale x 4 x float> %a, <vscale x 4 x float> %b) { 240; CHECK-LABEL: clastb_f32: 241; CHECK: clastb z0.s, p0, z0.s, z1.s 242; CHECK-NEXT: ret 243 %out = call <vscale x 4 x float> @llvm.aarch64.sve.clastb.nxv4f32(<vscale x 4 x i1> %pg, 244 <vscale x 4 x float> %a, 245 <vscale x 4 x float> %b) 246 ret <vscale x 4 x float> %out 247} 248 249define <vscale x 2 x double> @clastb_f64(<vscale x 2 x i1> %pg, <vscale x 2 x double> %a, <vscale x 2 x double> %b) { 250; CHECK-LABEL: clastb_f64: 251; CHECK: clastb z0.d, p0, z0.d, z1.d 252; CHECK-NEXT: ret 253 %out = call <vscale x 2 x double> @llvm.aarch64.sve.clastb.nxv2f64(<vscale x 2 x i1> %pg, 254 <vscale x 2 x double> %a, 255 <vscale x 2 x double> %b) 256 ret <vscale x 2 x double> %out 257} 258 259; 260; CLASTB (Scalar) 261; 262 263define i8 @clastb_n_i8(<vscale x 16 x i1> %pg, i8 %a, <vscale x 16 x i8> %b) { 264; CHECK-LABEL: clastb_n_i8: 265; CHECK: clastb w0, p0, w0, z0.b 266; CHECK-NEXT: ret 267 %out = call i8 @llvm.aarch64.sve.clastb.n.nxv16i8(<vscale x 16 x i1> %pg, 268 i8 %a, 269 <vscale x 16 x i8> %b) 270 ret i8 %out 271} 272 273define i16 @clastb_n_i16(<vscale x 8 x i1> %pg, i16 %a, <vscale x 8 x i16> %b) { 274; CHECK-LABEL: clastb_n_i16: 275; CHECK: clastb w0, p0, w0, z0.h 276; CHECK-NEXT: ret 277 %out = call i16 @llvm.aarch64.sve.clastb.n.nxv8i16(<vscale x 8 x i1> %pg, 278 i16 %a, 279 <vscale x 8 x i16> %b) 280 ret i16 %out 281} 282 283define i32 @clastb_n_i32(<vscale x 4 x i1> %pg, i32 %a, <vscale x 4 x i32> %b) { 284; CHECK-LABEL: clastb_n_i32: 285; CHECK: clastb w0, p0, w0, z0.s 286; CHECK-NEXT: ret 287 %out = call i32 @llvm.aarch64.sve.clastb.n.nxv4i32(<vscale x 4 x i1> %pg, 288 i32 %a, 289 <vscale x 4 x i32> %b) 290 ret i32 %out 291} 292 293define i64 @clastb_n_i64(<vscale x 2 x i1> %pg, i64 %a, <vscale x 2 x i64> %b) { 294; CHECK-LABEL: clastb_n_i64: 295; CHECK: clastb x0, p0, x0, z0.d 296; CHECK-NEXT: ret 297 %out = call i64 @llvm.aarch64.sve.clastb.n.nxv2i64(<vscale x 2 x i1> %pg, 298 i64 %a, 299 <vscale x 2 x i64> %b) 300 ret i64 %out 301} 302 303define half @clastb_n_f16(<vscale x 8 x i1> %pg, half %a, <vscale x 8 x half> %b) { 304; CHECK-LABEL: clastb_n_f16: 305; CHECK: clastb h0, p0, h0, z1.h 306; CHECK-NEXT: ret 307 %out = call half @llvm.aarch64.sve.clastb.n.nxv8f16(<vscale x 8 x i1> %pg, 308 half %a, 309 <vscale x 8 x half> %b) 310 ret half %out 311} 312 313define bfloat @clastb_n_bf16(<vscale x 8 x i1> %pg, bfloat %a, <vscale x 8 x bfloat> %b) #0 { 314; CHECK-LABEL: clastb_n_bf16: 315; CHECK: clastb h0, p0, h0, z1.h 316; CHECK-NEXT: ret 317 %out = call bfloat @llvm.aarch64.sve.clastb.n.nxv8bf16(<vscale x 8 x i1> %pg, 318 bfloat %a, 319 <vscale x 8 x bfloat> %b) 320 ret bfloat %out 321} 322 323define float @clastb_n_f32(<vscale x 4 x i1> %pg, float %a, <vscale x 4 x float> %b) { 324; CHECK-LABEL: clastb_n_f32: 325; CHECK: clastb s0, p0, s0, z1.s 326; CHECK-NEXT: ret 327 %out = call float @llvm.aarch64.sve.clastb.n.nxv4f32(<vscale x 4 x i1> %pg, 328 float %a, 329 <vscale x 4 x float> %b) 330 ret float %out 331} 332 333define double @clastb_n_f64(<vscale x 2 x i1> %pg, double %a, <vscale x 2 x double> %b) { 334; CHECK-LABEL: clastb_n_f64: 335; CHECK: clastb d0, p0, d0, z1.d 336; CHECK-NEXT: ret 337 %out = call double @llvm.aarch64.sve.clastb.n.nxv2f64(<vscale x 2 x i1> %pg, 338 double %a, 339 <vscale x 2 x double> %b) 340 ret double %out 341} 342 343; 344; DUPQ 345; 346 347define <vscale x 16 x i8> @dupq_i8(<vscale x 16 x i8> %a) { 348; CHECK-LABEL: dupq_i8: 349; CHECK: mov z0.q, q0 350; CHECK-NEXT: ret 351 %out = call <vscale x 16 x i8> @llvm.aarch64.sve.dupq.lane.nxv16i8(<vscale x 16 x i8> %a, i64 0) 352 ret <vscale x 16 x i8> %out 353} 354 355define <vscale x 8 x i16> @dupq_i16(<vscale x 8 x i16> %a) { 356; CHECK-LABEL: dupq_i16: 357; CHECK: mov z0.q, z0.q[1] 358; CHECK-NEXT: ret 359 %out = call <vscale x 8 x i16> @llvm.aarch64.sve.dupq.lane.nxv8i16(<vscale x 8 x i16> %a, i64 1) 360 ret <vscale x 8 x i16> %out 361} 362 363define <vscale x 4 x i32> @dupq_i32(<vscale x 4 x i32> %a) { 364; CHECK-LABEL: dupq_i32: 365; CHECK: mov z0.q, z0.q[2] 366; CHECK-NEXT: ret 367 %out = call <vscale x 4 x i32> @llvm.aarch64.sve.dupq.lane.nxv4i32(<vscale x 4 x i32> %a, i64 2) 368 ret <vscale x 4 x i32> %out 369} 370 371define <vscale x 2 x i64> @dupq_i64(<vscale x 2 x i64> %a) { 372; CHECK-LABEL: dupq_i64: 373; CHECK: mov z0.q, z0.q[3] 374; CHECK-NEXT: ret 375 %out = call <vscale x 2 x i64> @llvm.aarch64.sve.dupq.lane.nxv2i64(<vscale x 2 x i64> %a, i64 3) 376 ret <vscale x 2 x i64> %out 377} 378 379define <vscale x 8 x half> @dupq_f16(<vscale x 8 x half> %a) { 380; CHECK-LABEL: dupq_f16: 381; CHECK: mov z0.q, q0 382; CHECK-NEXT: ret 383 %out = call <vscale x 8 x half> @llvm.aarch64.sve.dupq.lane.nxv8f16(<vscale x 8 x half> %a, i64 0) 384 ret <vscale x 8 x half> %out 385} 386 387define <vscale x 8 x bfloat> @dupq_bf16(<vscale x 8 x bfloat> %a) #0 { 388; CHECK-LABEL: dupq_bf16: 389; CHECK: mov z0.q, q0 390; CHECK-NEXT: ret 391 %out = call <vscale x 8 x bfloat> @llvm.aarch64.sve.dupq.lane.nxv8bf16(<vscale x 8 x bfloat> %a, i64 0) 392 ret <vscale x 8 x bfloat> %out 393} 394 395define <vscale x 4 x float> @dupq_f32(<vscale x 4 x float> %a) { 396; CHECK-LABEL: dupq_f32: 397; CHECK: mov z0.q, z0.q[1] 398; CHECK-NEXT: ret 399 %out = call <vscale x 4 x float> @llvm.aarch64.sve.dupq.lane.nxv4f32(<vscale x 4 x float> %a, i64 1) 400 ret <vscale x 4 x float> %out 401} 402 403define <vscale x 2 x double> @dupq_f64(<vscale x 2 x double> %a) { 404; CHECK-LABEL: dupq_f64: 405; CHECK: mov z0.q, z0.q[2] 406; CHECK-NEXT: ret 407 %out = call <vscale x 2 x double> @llvm.aarch64.sve.dupq.lane.nxv2f64(<vscale x 2 x double> %a, i64 2) 408 ret <vscale x 2 x double> %out 409} 410 411; 412; DUPQ_LANE 413; 414 415define <vscale x 16 x i8> @dupq_lane_i8(<vscale x 16 x i8> %a, i64 %idx) { 416; CHECK-LABEL: dupq_lane_i8: 417; CHECK-DAG: index [[Z1:z[0-9]+]].d, #0, #1 418; CHECK-DAG: and [[Z2:z[0-9]+]].d, [[Z1]].d, #0x1 419; CHECK-DAG: add [[X1:x[0-9]+]], x0, x0 420; CHECK-DAG: mov [[Z3:z[0-9]+]].d, [[X1]] 421; CHECK: add [[Z4:z[0-9]+]].d, [[Z2]].d, [[Z3]].d 422; CHECK-NEXT: tbl z0.d, { z0.d }, [[Z4]].d 423; CHECK-NEXT: ret 424 %out = call <vscale x 16 x i8> @llvm.aarch64.sve.dupq.lane.nxv16i8(<vscale x 16 x i8> %a, i64 %idx) 425 ret <vscale x 16 x i8> %out 426} 427 428; NOTE: Identical operation to dupq_lane_i8 (i.e. element type is irrelevant). 429define <vscale x 8 x i16> @dupq_lane_i16(<vscale x 8 x i16> %a, i64 %idx) { 430; CHECK-LABEL: dupq_lane_i16: 431; CHECK-DAG: index [[Z1:z[0-9]+]].d, #0, #1 432; CHECK-DAG: and [[Z2:z[0-9]+]].d, [[Z1]].d, #0x1 433; CHECK-DAG: add [[X1:x[0-9]+]], x0, x0 434; CHECK-DAG: mov [[Z3:z[0-9]+]].d, [[X1]] 435; CHECK: add [[Z4:z[0-9]+]].d, [[Z2]].d, [[Z3]].d 436; CHECK: tbl z0.d, { z0.d }, [[Z4]].d 437; CHECK-NEXT: ret 438 %out = call <vscale x 8 x i16> @llvm.aarch64.sve.dupq.lane.nxv8i16(<vscale x 8 x i16> %a, i64 %idx) 439 ret <vscale x 8 x i16> %out 440} 441 442; NOTE: Identical operation to dupq_lane_i8 (i.e. element type is irrelevant). 443define <vscale x 4 x i32> @dupq_lane_i32(<vscale x 4 x i32> %a, i64 %idx) { 444; CHECK-LABEL: dupq_lane_i32: 445; CHECK-DAG: index [[Z1:z[0-9]+]].d, #0, #1 446; CHECK-DAG: and [[Z2:z[0-9]+]].d, [[Z1]].d, #0x1 447; CHECK-DAG: add [[X1:x[0-9]+]], x0, x0 448; CHECK-DAG: mov [[Z3:z[0-9]+]].d, [[X1]] 449; CHECK: add [[Z4:z[0-9]+]].d, [[Z2]].d, [[Z3]].d 450; CHECK: tbl z0.d, { z0.d }, [[Z4]].d 451; CHECK-NEXT: ret 452 %out = call <vscale x 4 x i32> @llvm.aarch64.sve.dupq.lane.nxv4i32(<vscale x 4 x i32> %a, i64 %idx) 453 ret <vscale x 4 x i32> %out 454} 455 456; NOTE: Identical operation to dupq_lane_i8 (i.e. element type is irrelevant). 457define <vscale x 2 x i64> @dupq_lane_i64(<vscale x 2 x i64> %a, i64 %idx) { 458; CHECK-LABEL: dupq_lane_i64: 459; CHECK-DAG: index [[Z1:z[0-9]+]].d, #0, #1 460; CHECK-DAG: and [[Z2:z[0-9]+]].d, [[Z1]].d, #0x1 461; CHECK-DAG: add [[X1:x[0-9]+]], x0, x0 462; CHECK-DAG: mov [[Z3:z[0-9]+]].d, [[X1]] 463; CHECK: add [[Z4:z[0-9]+]].d, [[Z2]].d, [[Z3]].d 464; CHECK: tbl z0.d, { z0.d }, [[Z4]].d 465; CHECK-NEXT: ret 466 %out = call <vscale x 2 x i64> @llvm.aarch64.sve.dupq.lane.nxv2i64(<vscale x 2 x i64> %a, i64 %idx) 467 ret <vscale x 2 x i64> %out 468} 469 470; NOTE: Identical operation to dupq_lane_i8 (i.e. element type is irrelevant). 471define <vscale x 8 x half> @dupq_lane_f16(<vscale x 8 x half> %a, i64 %idx) { 472; CHECK-LABEL: dupq_lane_f16: 473; CHECK-DAG: index [[Z1:z[0-9]+]].d, #0, #1 474; CHECK-DAG: and [[Z2:z[0-9]+]].d, [[Z1]].d, #0x1 475; CHECK-DAG: add [[X1:x[0-9]+]], x0, x0 476; CHECK-DAG: mov [[Z3:z[0-9]+]].d, [[X1]] 477; CHECK: add [[Z4:z[0-9]+]].d, [[Z2]].d, [[Z3]].d 478; CHECK: tbl z0.d, { z0.d }, [[Z4]].d 479; CHECK-NEXT: ret 480 %out = call <vscale x 8 x half> @llvm.aarch64.sve.dupq.lane.nxv8f16(<vscale x 8 x half> %a, i64 %idx) 481 ret <vscale x 8 x half> %out 482} 483 484; NOTE: Identical operation to dupq_lane_i8 (i.e. element type is irrelevant). 485define <vscale x 8 x bfloat> @dupq_lane_bf16(<vscale x 8 x bfloat> %a, i64 %idx) #0 { 486; CHECK-LABEL: dupq_lane_bf16: 487; CHECK-DAG: index [[Z1:z[0-9]+]].d, #0, #1 488; CHECK-DAG: and [[Z2:z[0-9]+]].d, [[Z1]].d, #0x1 489; CHECK-DAG: add [[X1:x[0-9]+]], x0, x0 490; CHECK-DAG: mov [[Z3:z[0-9]+]].d, [[X1]] 491; CHECK: add [[Z4:z[0-9]+]].d, [[Z2]].d, [[Z3]].d 492; CHECK: tbl z0.d, { z0.d }, [[Z4]].d 493; CHECK-NEXT: ret 494 %out = call <vscale x 8 x bfloat> @llvm.aarch64.sve.dupq.lane.nxv8bf16(<vscale x 8 x bfloat> %a, i64 %idx) 495 ret <vscale x 8 x bfloat> %out 496} 497 498; NOTE: Identical operation to dupq_lane_i8 (i.e. element type is irrelevant). 499define <vscale x 4 x float> @dupq_lane_f32(<vscale x 4 x float> %a, i64 %idx) { 500; CHECK-LABEL: dupq_lane_f32: 501; CHECK-DAG: index [[Z1:z[0-9]+]].d, #0, #1 502; CHECK-DAG: and [[Z2:z[0-9]+]].d, [[Z1]].d, #0x1 503; CHECK-DAG: add [[X1:x[0-9]+]], x0, x0 504; CHECK-DAG: mov [[Z3:z[0-9]+]].d, [[X1]] 505; CHECK: add [[Z4:z[0-9]+]].d, [[Z2]].d, [[Z3]].d 506; CHECK: tbl z0.d, { z0.d }, [[Z4]].d 507; CHECK-NEXT: ret 508 %out = call <vscale x 4 x float> @llvm.aarch64.sve.dupq.lane.nxv4f32(<vscale x 4 x float> %a, i64 %idx) 509 ret <vscale x 4 x float> %out 510} 511 512; NOTE: Identical operation to dupq_lane_i8 (i.e. element type is irrelevant). 513define <vscale x 2 x double> @dupq_lane_f64(<vscale x 2 x double> %a, i64 %idx) { 514; CHECK-LABEL: dupq_lane_f64: 515; CHECK-DAG: index [[Z1:z[0-9]+]].d, #0, #1 516; CHECK-DAG: and [[Z2:z[0-9]+]].d, [[Z1]].d, #0x1 517; CHECK-DAG: add [[X1:x[0-9]+]], x0, x0 518; CHECK-DAG: mov [[Z3:z[0-9]+]].d, [[X1]] 519; CHECK: add [[Z4:z[0-9]+]].d, [[Z2]].d, [[Z3]].d 520; CHECK: tbl z0.d, { z0.d }, [[Z4]].d 521; CHECK-NEXT: ret 522 %out = call <vscale x 2 x double> @llvm.aarch64.sve.dupq.lane.nxv2f64(<vscale x 2 x double> %a, i64 %idx) 523 ret <vscale x 2 x double> %out 524} 525 526; NOTE: Index out of range (0-3) 527define <vscale x 2 x i64> @dupq_i64_range(<vscale x 2 x i64> %a) { 528; CHECK-LABEL: dupq_i64_range: 529; CHECK-DAG: index [[Z1:z[0-9]+]].d, #0, #1 530; CHECK-DAG: and [[Z2:z[0-9]+]].d, [[Z1]].d, #0x1 531; CHECK-DAG: add [[Z3:z[0-9]+]].d, [[Z2]].d, #8 532; CHECK: tbl z0.d, { z0.d }, [[Z3]].d 533; CHECK-NEXT: ret 534 %out = call <vscale x 2 x i64> @llvm.aarch64.sve.dupq.lane.nxv2i64(<vscale x 2 x i64> %a, i64 4) 535 ret <vscale x 2 x i64> %out 536} 537 538; 539; EXT 540; 541 542define <vscale x 16 x i8> @ext_i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) { 543; CHECK-LABEL: ext_i8: 544; CHECK: ext z0.b, z0.b, z1.b, #255 545; CHECK-NEXT: ret 546 %out = call <vscale x 16 x i8> @llvm.aarch64.sve.ext.nxv16i8(<vscale x 16 x i8> %a, 547 <vscale x 16 x i8> %b, 548 i32 255) 549 ret <vscale x 16 x i8> %out 550} 551 552define <vscale x 8 x i16> @ext_i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) { 553; CHECK-LABEL: ext_i16: 554; CHECK: ext z0.b, z0.b, z1.b, #0 555; CHECK-NEXT: ret 556 %out = call <vscale x 8 x i16> @llvm.aarch64.sve.ext.nxv8i16(<vscale x 8 x i16> %a, 557 <vscale x 8 x i16> %b, 558 i32 0) 559 ret <vscale x 8 x i16> %out 560} 561 562define <vscale x 4 x i32> @ext_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) { 563; CHECK-LABEL: ext_i32: 564; CHECK: ext z0.b, z0.b, z1.b, #4 565; CHECK-NEXT: ret 566 %out = call <vscale x 4 x i32> @llvm.aarch64.sve.ext.nxv4i32(<vscale x 4 x i32> %a, 567 <vscale x 4 x i32> %b, 568 i32 1) 569 ret <vscale x 4 x i32> %out 570} 571 572define <vscale x 2 x i64> @ext_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b) { 573; CHECK-LABEL: ext_i64: 574; CHECK: ext z0.b, z0.b, z1.b, #16 575; CHECK-NEXT: ret 576 %out = call <vscale x 2 x i64> @llvm.aarch64.sve.ext.nxv2i64(<vscale x 2 x i64> %a, 577 <vscale x 2 x i64> %b, 578 i32 2) 579 ret <vscale x 2 x i64> %out 580} 581 582define <vscale x 8 x bfloat> @ext_bf16(<vscale x 8 x bfloat> %a, <vscale x 8 x bfloat> %b) #0 { 583; CHECK-LABEL: ext_bf16: 584; CHECK: ext z0.b, z0.b, z1.b, #6 585; CHECK-NEXT: ret 586 %out = call <vscale x 8 x bfloat> @llvm.aarch64.sve.ext.nxv8bf16(<vscale x 8 x bfloat> %a, 587 <vscale x 8 x bfloat> %b, 588 i32 3) 589 ret <vscale x 8 x bfloat> %out 590} 591 592define <vscale x 8 x half> @ext_f16(<vscale x 8 x half> %a, <vscale x 8 x half> %b) { 593; CHECK-LABEL: ext_f16: 594; CHECK: ext z0.b, z0.b, z1.b, #6 595; CHECK-NEXT: ret 596 %out = call <vscale x 8 x half> @llvm.aarch64.sve.ext.nxv8f16(<vscale x 8 x half> %a, 597 <vscale x 8 x half> %b, 598 i32 3) 599 ret <vscale x 8 x half> %out 600} 601 602define <vscale x 4 x float> @ext_f32(<vscale x 4 x float> %a, <vscale x 4 x float> %b) { 603; CHECK-LABEL: ext_f32: 604; CHECK: ext z0.b, z0.b, z1.b, #16 605; CHECK-NEXT: ret 606 %out = call <vscale x 4 x float> @llvm.aarch64.sve.ext.nxv4f32(<vscale x 4 x float> %a, 607 <vscale x 4 x float> %b, 608 i32 4) 609 ret <vscale x 4 x float> %out 610} 611 612define <vscale x 2 x double> @ext_f64(<vscale x 2 x double> %a, <vscale x 2 x double> %b) { 613; CHECK-LABEL: ext_f64: 614; CHECK: ext z0.b, z0.b, z1.b, #40 615; CHECK-NEXT: ret 616 %out = call <vscale x 2 x double> @llvm.aarch64.sve.ext.nxv2f64(<vscale x 2 x double> %a, 617 <vscale x 2 x double> %b, 618 i32 5) 619 ret <vscale x 2 x double> %out 620} 621 622; 623; LASTA 624; 625 626define i8 @lasta_i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a) { 627; CHECK-LABEL: lasta_i8 628; CHECK: lasta w0, p0, z0.b 629; CHECK-NEXT: ret 630 %res = call i8 @llvm.aarch64.sve.lasta.nxv16i8(<vscale x 16 x i1> %pg, 631 <vscale x 16 x i8> %a) 632 ret i8 %res 633} 634 635define i16 @lasta_i16(<vscale x 8 x i1> %pg, <vscale x 8 x i16> %a) { 636; CHECK-LABEL: lasta_i16 637; CHECK: lasta w0, p0, z0.h 638; CHECK-NEXT: ret 639 %res = call i16 @llvm.aarch64.sve.lasta.nxv8i16(<vscale x 8 x i1> %pg, 640 <vscale x 8 x i16> %a) 641 ret i16 %res 642} 643 644define i32 @lasta_i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a) { 645; CHECK-LABEL: lasta_i32 646; CHECK: lasta w0, p0, z0.s 647; CHECK-NEXT: ret 648 %res = call i32 @llvm.aarch64.sve.lasta.nxv4i32(<vscale x 4 x i1> %pg, 649 <vscale x 4 x i32> %a) 650 ret i32 %res 651} 652 653define i64 @lasta_i64(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %a) { 654; CHECK-LABEL: lasta_i64 655; CHECK: lasta x0, p0, z0.d 656; CHECK-NEXT: ret 657 %res = call i64 @llvm.aarch64.sve.lasta.nxv2i64(<vscale x 2 x i1> %pg, 658 <vscale x 2 x i64> %a) 659 ret i64 %res 660} 661 662define half @lasta_f16(<vscale x 8 x i1> %pg, <vscale x 8 x half> %a) { 663; CHECK-LABEL: lasta_f16 664; CHECK: lasta h0, p0, z0.h 665; CHECK-NEXT: ret 666 %res = call half @llvm.aarch64.sve.lasta.nxv8f16(<vscale x 8 x i1> %pg, 667 <vscale x 8 x half> %a) 668 ret half %res 669} 670 671define bfloat @lasta_bf16(<vscale x 8 x i1> %pg, <vscale x 8 x bfloat> %a) #0 { 672; CHECK-LABEL: lasta_bf16 673; CHECK: lasta h0, p0, z0.h 674; CHECK-NEXT: ret 675 %res = call bfloat @llvm.aarch64.sve.lasta.nxv8bf16(<vscale x 8 x i1> %pg, 676 <vscale x 8 x bfloat> %a) 677 ret bfloat %res 678} 679 680define float @lasta_f32(<vscale x 4 x i1> %pg, <vscale x 4 x float> %a) { 681; CHECK-LABEL: lasta_f32 682; CHECK: lasta s0, p0, z0.s 683; CHECK-NEXT: ret 684 %res = call float @llvm.aarch64.sve.lasta.nxv4f32(<vscale x 4 x i1> %pg, 685 <vscale x 4 x float> %a) 686 ret float %res 687} 688 689define float @lasta_f32_v2(<vscale x 2 x i1> %pg, <vscale x 2 x float> %a) { 690; CHECK-LABEL: lasta_f32_v2 691; CHECK: lasta s0, p0, z0.s 692; CHECK-NEXT: ret 693 %res = call float @llvm.aarch64.sve.lasta.nxv2f32(<vscale x 2 x i1> %pg, 694 <vscale x 2 x float> %a) 695 ret float %res 696} 697 698define double @lasta_f64(<vscale x 2 x i1> %pg, <vscale x 2 x double> %a) { 699; CHECK-LABEL: lasta_f64 700; CHECK: lasta d0, p0, z0.d 701; CHECK-NEXT: ret 702 %res = call double @llvm.aarch64.sve.lasta.nxv2f64(<vscale x 2 x i1> %pg, 703 <vscale x 2 x double> %a) 704 ret double %res 705} 706 707; 708; LASTB 709; 710 711define i8 @lastb_i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a) { 712; CHECK-LABEL: lastb_i8 713; CHECK: lastb w0, p0, z0.b 714; CHECK-NEXT: ret 715 %res = call i8 @llvm.aarch64.sve.lastb.nxv16i8(<vscale x 16 x i1> %pg, 716 <vscale x 16 x i8> %a) 717 ret i8 %res 718} 719 720define i16 @lastb_i16(<vscale x 8 x i1> %pg, <vscale x 8 x i16> %a) { 721; CHECK-LABEL: lastb_i16 722; CHECK: lastb w0, p0, z0.h 723; CHECK-NEXT: ret 724 %res = call i16 @llvm.aarch64.sve.lastb.nxv8i16(<vscale x 8 x i1> %pg, 725 <vscale x 8 x i16> %a) 726 ret i16 %res 727} 728 729define i32 @lastb_i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a) { 730; CHECK-LABEL: lastb_i32 731; CHECK: lastb w0, p0, z0.s 732; CHECK-NEXT: ret 733 %res = call i32 @llvm.aarch64.sve.lastb.nxv4i32(<vscale x 4 x i1> %pg, 734 <vscale x 4 x i32> %a) 735 ret i32 %res 736} 737 738define i64 @lastb_i64(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %a) { 739; CHECK-LABEL: lastb_i64 740; CHECK: lastb x0, p0, z0.d 741; CHECK-NEXT: ret 742 %res = call i64 @llvm.aarch64.sve.lastb.nxv2i64(<vscale x 2 x i1> %pg, 743 <vscale x 2 x i64> %a) 744 ret i64 %res 745} 746 747define half @lastb_f16(<vscale x 8 x i1> %pg, <vscale x 8 x half> %a) { 748; CHECK-LABEL: lastb_f16 749; CHECK: lastb h0, p0, z0.h 750; CHECK-NEXT: ret 751 %res = call half @llvm.aarch64.sve.lastb.nxv8f16(<vscale x 8 x i1> %pg, 752 <vscale x 8 x half> %a) 753 ret half %res 754} 755 756define bfloat @lastb_bf16(<vscale x 8 x i1> %pg, <vscale x 8 x bfloat> %a) #0 { 757; CHECK-LABEL: lastb_bf16 758; CHECK: lastb h0, p0, z0.h 759; CHECK-NEXT: ret 760 %res = call bfloat @llvm.aarch64.sve.lastb.nxv8bf16(<vscale x 8 x i1> %pg, 761 <vscale x 8 x bfloat> %a) 762 ret bfloat %res 763} 764 765define float @lastb_f32(<vscale x 4 x i1> %pg, <vscale x 4 x float> %a) { 766; CHECK-LABEL: lastb_f32 767; CHECK: lastb s0, p0, z0.s 768; CHECK-NEXT: ret 769 %res = call float @llvm.aarch64.sve.lastb.nxv4f32(<vscale x 4 x i1> %pg, 770 <vscale x 4 x float> %a) 771 ret float %res 772} 773 774define float @lastb_f32_v2(<vscale x 2 x i1> %pg, <vscale x 2 x float> %a) { 775; CHECK-LABEL: lastb_f32_v2 776; CHECK: lastb s0, p0, z0.s 777; CHECK-NEXT: ret 778 %res = call float @llvm.aarch64.sve.lastb.nxv2f32(<vscale x 2 x i1> %pg, 779 <vscale x 2 x float> %a) 780 ret float %res 781} 782 783define double @lastb_f64(<vscale x 2 x i1> %pg, <vscale x 2 x double> %a) { 784; CHECK-LABEL: lastb_f64 785; CHECK: lastb d0, p0, z0.d 786; CHECK-NEXT: ret 787 %res = call double @llvm.aarch64.sve.lastb.nxv2f64(<vscale x 2 x i1> %pg, 788 <vscale x 2 x double> %a) 789 ret double %res 790} 791 792; 793; COMPACT 794; 795 796define <vscale x 4 x i32> @compact_i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a) { 797; CHECK-LABEL: compact_i32: 798; CHECK: compact z0.s, p0, z0.s 799; CHECK-NEXT: ret 800 %out = call <vscale x 4 x i32> @llvm.aarch64.sve.compact.nxv4i32(<vscale x 4 x i1> %pg, 801 <vscale x 4 x i32> %a) 802 ret <vscale x 4 x i32> %out 803} 804 805define <vscale x 2 x i64> @compact_i64(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %a) { 806; CHECK-LABEL: compact_i64: 807; CHECK: compact z0.d, p0, z0.d 808; CHECK-NEXT: ret 809 %out = call <vscale x 2 x i64> @llvm.aarch64.sve.compact.nxv2i64(<vscale x 2 x i1> %pg, 810 <vscale x 2 x i64> %a) 811 ret <vscale x 2 x i64> %out 812} 813 814define <vscale x 4 x float> @compact_f32(<vscale x 4 x i1> %pg, <vscale x 4 x float> %a) { 815; CHECK-LABEL: compact_f32: 816; CHECK: compact z0.s, p0, z0.s 817; CHECK-NEXT: ret 818 %out = call <vscale x 4 x float> @llvm.aarch64.sve.compact.nxv4f32(<vscale x 4 x i1> %pg, 819 <vscale x 4 x float> %a) 820 ret <vscale x 4 x float> %out 821} 822 823define <vscale x 2 x double> @compact_f64(<vscale x 2 x i1> %pg, <vscale x 2 x double> %a) { 824; CHECK-LABEL: compact_f64: 825; CHECK: compact z0.d, p0, z0.d 826; CHECK-NEXT: ret 827 %out = call <vscale x 2 x double> @llvm.aarch64.sve.compact.nxv2f64(<vscale x 2 x i1> %pg, 828 <vscale x 2 x double> %a) 829 ret <vscale x 2 x double> %out 830} 831 832; 833; REV 834; 835 836define <vscale x 16 x i1> @rev_b8( <vscale x 16 x i1> %a) { 837; CHECK-LABEL: rev_b8 838; CHECK: rev p0.b, p0.b 839; CHECK-NEXT: ret 840 %res = call <vscale x 16 x i1> @llvm.aarch64.sve.rev.nxv16i1(<vscale x 16 x i1> %a) 841 ret <vscale x 16 x i1> %res 842} 843 844define <vscale x 8 x i1> @rev_b16(<vscale x 8 x i1> %a) { 845; CHECK-LABEL: rev_b16 846; CHECK: rev p0.h, p0.h 847; CHECK-NEXT: ret 848 %res = call <vscale x 8 x i1> @llvm.aarch64.sve.rev.nxv8i1(<vscale x 8 x i1> %a) 849 ret <vscale x 8 x i1> %res 850} 851 852define <vscale x 4 x i1> @rev_b32(<vscale x 4 x i1> %a) { 853; CHECK-LABEL: rev_b32 854; CHECK: rev p0.s, p0.s 855; CHECK-NEXT: ret 856 %res = call <vscale x 4 x i1> @llvm.aarch64.sve.rev.nxv4i1(<vscale x 4 x i1> %a) 857 ret <vscale x 4 x i1> %res 858} 859 860define <vscale x 2 x i1> @rev_b64(<vscale x 2 x i1> %a) { 861; CHECK-LABEL: rev_b64 862; CHECK: rev p0.d, p0.d 863; CHECK-NEXT: ret 864 %res = call <vscale x 2 x i1> @llvm.aarch64.sve.rev.nxv2i1(<vscale x 2 x i1> %a) 865 ret <vscale x 2 x i1> %res 866} 867 868define <vscale x 16 x i8> @rev_i8( <vscale x 16 x i8> %a) { 869; CHECK-LABEL: rev_i8 870; CHECK: rev z0.b, z0.b 871; CHECK-NEXT: ret 872 %res = call <vscale x 16 x i8> @llvm.aarch64.sve.rev.nxv16i8(<vscale x 16 x i8> %a) 873 ret <vscale x 16 x i8> %res 874} 875 876define <vscale x 8 x i16> @rev_i16(<vscale x 8 x i16> %a) { 877; CHECK-LABEL: rev_i16 878; CHECK: rev z0.h, z0.h 879; CHECK-NEXT: ret 880 %res = call <vscale x 8 x i16> @llvm.aarch64.sve.rev.nxv8i16(<vscale x 8 x i16> %a) 881 ret <vscale x 8 x i16> %res 882} 883 884define <vscale x 4 x i32> @rev_i32(<vscale x 4 x i32> %a) { 885; CHECK-LABEL: rev_i32 886; CHECK: rev z0.s, z0.s 887; CHECK-NEXT: ret 888 %res = call <vscale x 4 x i32> @llvm.aarch64.sve.rev.nxv4i32(<vscale x 4 x i32> %a) 889 ret <vscale x 4 x i32> %res 890} 891 892define <vscale x 2 x i64> @rev_i64(<vscale x 2 x i64> %a) { 893; CHECK-LABEL: rev_i64 894; CHECK: rev z0.d, z0.d 895; CHECK-NEXT: ret 896 %res = call <vscale x 2 x i64> @llvm.aarch64.sve.rev.nxv2i64(<vscale x 2 x i64> %a) 897 ret <vscale x 2 x i64> %res 898} 899 900define <vscale x 8 x bfloat> @rev_bf16(<vscale x 8 x bfloat> %a) #0 { 901; CHECK-LABEL: rev_bf16 902; CHECK: rev z0.h, z0.h 903; CHECK-NEXT: ret 904 %res = call <vscale x 8 x bfloat> @llvm.aarch64.sve.rev.nxv8bf16(<vscale x 8 x bfloat> %a) 905 ret <vscale x 8 x bfloat> %res 906} 907 908define <vscale x 8 x half> @rev_f16(<vscale x 8 x half> %a) { 909; CHECK-LABEL: rev_f16 910; CHECK: rev z0.h, z0.h 911; CHECK-NEXT: ret 912 %res = call <vscale x 8 x half> @llvm.aarch64.sve.rev.nxv8f16(<vscale x 8 x half> %a) 913 ret <vscale x 8 x half> %res 914} 915 916define <vscale x 4 x float> @rev_f32(<vscale x 4 x float> %a) { 917; CHECK-LABEL: rev_f32 918; CHECK: rev z0.s, z0.s 919; CHECK-NEXT: ret 920 %res = call <vscale x 4 x float> @llvm.aarch64.sve.rev.nxv4f32(<vscale x 4 x float> %a) 921 ret <vscale x 4 x float> %res 922} 923 924define <vscale x 2 x double> @rev_f64(<vscale x 2 x double> %a) { 925; CHECK-LABEL: rev_f64 926; CHECK: rev z0.d, z0.d 927; CHECK-NEXT: ret 928 %res = call <vscale x 2 x double> @llvm.aarch64.sve.rev.nxv2f64(<vscale x 2 x double> %a) 929 ret <vscale x 2 x double> %res 930} 931 932; 933; SPLICE 934; 935 936define <vscale x 16 x i8> @splice_i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b) { 937; CHECK-LABEL: splice_i8: 938; CHECK: splice z0.b, p0, z0.b, z1.b 939; CHECK-NEXT: ret 940 %out = call <vscale x 16 x i8> @llvm.aarch64.sve.splice.nxv16i8(<vscale x 16 x i1> %pg, 941 <vscale x 16 x i8> %a, 942 <vscale x 16 x i8> %b) 943 ret <vscale x 16 x i8> %out 944} 945 946define <vscale x 8 x i16> @splice_i16(<vscale x 8 x i1> %pg, <vscale x 8 x i16> %a, <vscale x 8 x i16> %b) { 947; CHECK-LABEL: splice_i16: 948; CHECK: splice z0.h, p0, z0.h, z1.h 949; CHECK-NEXT: ret 950 %out = call <vscale x 8 x i16> @llvm.aarch64.sve.splice.nxv8i16(<vscale x 8 x i1> %pg, 951 <vscale x 8 x i16> %a, 952 <vscale x 8 x i16> %b) 953 ret <vscale x 8 x i16> %out 954} 955 956define <vscale x 4 x i32> @splice_i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b) { 957; CHECK-LABEL: splice_i32: 958; CHECK: splice z0.s, p0, z0.s, z1.s 959; CHECK-NEXT: ret 960 %out = call <vscale x 4 x i32> @llvm.aarch64.sve.splice.nxv4i32(<vscale x 4 x i1> %pg, 961 <vscale x 4 x i32> %a, 962 <vscale x 4 x i32> %b) 963 ret <vscale x 4 x i32> %out 964} 965 966define <vscale x 2 x i64> @splice_i64(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %a, <vscale x 2 x i64> %b) { 967; CHECK-LABEL: splice_i64: 968; CHECK: splice z0.d, p0, z0.d, z1.d 969; CHECK-NEXT: ret 970 %out = call <vscale x 2 x i64> @llvm.aarch64.sve.splice.nxv2i64(<vscale x 2 x i1> %pg, 971 <vscale x 2 x i64> %a, 972 <vscale x 2 x i64> %b) 973 ret <vscale x 2 x i64> %out 974} 975 976define <vscale x 8 x bfloat> @splice_bf16(<vscale x 8 x i1> %pg, <vscale x 8 x bfloat> %a, <vscale x 8 x bfloat> %b) #0 { 977; CHECK-LABEL: splice_bf16: 978; CHECK: splice z0.h, p0, z0.h, z1.h 979; CHECK-NEXT: ret 980 %out = call <vscale x 8 x bfloat> @llvm.aarch64.sve.splice.nxv8bf16(<vscale x 8 x i1> %pg, 981 <vscale x 8 x bfloat> %a, 982 <vscale x 8 x bfloat> %b) 983 ret <vscale x 8 x bfloat> %out 984} 985 986define <vscale x 8 x half> @splice_f16(<vscale x 8 x i1> %pg, <vscale x 8 x half> %a, <vscale x 8 x half> %b) { 987; CHECK-LABEL: splice_f16: 988; CHECK: splice z0.h, p0, z0.h, z1.h 989; CHECK-NEXT: ret 990 %out = call <vscale x 8 x half> @llvm.aarch64.sve.splice.nxv8f16(<vscale x 8 x i1> %pg, 991 <vscale x 8 x half> %a, 992 <vscale x 8 x half> %b) 993 ret <vscale x 8 x half> %out 994} 995 996define <vscale x 4 x float> @splice_f32(<vscale x 4 x i1> %pg, <vscale x 4 x float> %a, <vscale x 4 x float> %b) { 997; CHECK-LABEL: splice_f32: 998; CHECK: splice z0.s, p0, z0.s, z1.s 999; CHECK-NEXT: ret 1000 %out = call <vscale x 4 x float> @llvm.aarch64.sve.splice.nxv4f32(<vscale x 4 x i1> %pg, 1001 <vscale x 4 x float> %a, 1002 <vscale x 4 x float> %b) 1003 ret <vscale x 4 x float> %out 1004} 1005 1006define <vscale x 2 x double> @splice_f64(<vscale x 2 x i1> %pg, <vscale x 2 x double> %a, <vscale x 2 x double> %b) { 1007; CHECK-LABEL: splice_f64: 1008; CHECK: splice z0.d, p0, z0.d, z1.d 1009; CHECK-NEXT: ret 1010 %out = call <vscale x 2 x double> @llvm.aarch64.sve.splice.nxv2f64(<vscale x 2 x i1> %pg, 1011 <vscale x 2 x double> %a, 1012 <vscale x 2 x double> %b) 1013 ret <vscale x 2 x double> %out 1014} 1015 1016; 1017; SUNPKHI 1018; 1019 1020define <vscale x 8 x i16> @sunpkhi_i16(<vscale x 16 x i8> %a) { 1021; CHECK-LABEL: sunpkhi_i16 1022; CHECK: sunpkhi z0.h, z0.b 1023; CHECK-NEXT: ret 1024 %res = call <vscale x 8 x i16> @llvm.aarch64.sve.sunpkhi.nxv8i16(<vscale x 16 x i8> %a) 1025 ret <vscale x 8 x i16> %res 1026} 1027 1028define <vscale x 4 x i32> @sunpkhi_i32(<vscale x 8 x i16> %a) { 1029; CHECK-LABEL: sunpkhi_i32 1030; CHECK: sunpkhi z0.s, z0.h 1031; CHECK-NEXT: ret 1032 %res = call <vscale x 4 x i32> @llvm.aarch64.sve.sunpkhi.nxv4i32(<vscale x 8 x i16> %a) 1033 ret <vscale x 4 x i32> %res 1034} 1035 1036define <vscale x 2 x i64> @sunpkhi_i64(<vscale x 4 x i32> %a) { 1037; CHECK-LABEL: sunpkhi_i64 1038; CHECK: sunpkhi z0.d, z0.s 1039; CHECK-NEXT: ret 1040 %res = call <vscale x 2 x i64> @llvm.aarch64.sve.sunpkhi.nxv2i64(<vscale x 4 x i32> %a) 1041 ret <vscale x 2 x i64> %res 1042} 1043 1044; 1045; SUNPKLO 1046; 1047 1048define <vscale x 8 x i16> @sunpklo_i16(<vscale x 16 x i8> %a) { 1049; CHECK-LABEL: sunpklo_i16 1050; CHECK: sunpklo z0.h, z0.b 1051; CHECK-NEXT: ret 1052 %res = call <vscale x 8 x i16> @llvm.aarch64.sve.sunpklo.nxv8i16(<vscale x 16 x i8> %a) 1053 ret <vscale x 8 x i16> %res 1054} 1055 1056define <vscale x 4 x i32> @sunpklo_i32(<vscale x 8 x i16> %a) { 1057; CHECK-LABEL: sunpklo_i32 1058; CHECK: sunpklo z0.s, z0.h 1059; CHECK-NEXT: ret 1060 %res = call <vscale x 4 x i32> @llvm.aarch64.sve.sunpklo.nxv4i32(<vscale x 8 x i16> %a) 1061 ret <vscale x 4 x i32> %res 1062} 1063 1064define <vscale x 2 x i64> @sunpklo_i64(<vscale x 4 x i32> %a) { 1065; CHECK-LABEL: sunpklo_i64 1066; CHECK: sunpklo z0.d, z0.s 1067; CHECK-NEXT: ret 1068 %res = call <vscale x 2 x i64> @llvm.aarch64.sve.sunpklo.nxv2i64(<vscale x 4 x i32> %a) 1069 ret <vscale x 2 x i64> %res 1070} 1071 1072; 1073; TBL 1074; 1075 1076define <vscale x 16 x i8> @tbl_i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) { 1077; CHECK-LABEL: tbl_i8: 1078; CHECK: tbl z0.b, { z0.b }, z1.b 1079; CHECK-NEXT: ret 1080 %out = call <vscale x 16 x i8> @llvm.aarch64.sve.tbl.nxv16i8(<vscale x 16 x i8> %a, 1081 <vscale x 16 x i8> %b) 1082 ret <vscale x 16 x i8> %out 1083} 1084 1085define <vscale x 8 x i16> @tbl_i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) { 1086; CHECK-LABEL: tbl_i16: 1087; CHECK: tbl z0.h, { z0.h }, z1.h 1088; CHECK-NEXT: ret 1089 %out = call <vscale x 8 x i16> @llvm.aarch64.sve.tbl.nxv8i16(<vscale x 8 x i16> %a, 1090 <vscale x 8 x i16> %b) 1091 ret <vscale x 8 x i16> %out 1092} 1093 1094define <vscale x 4 x i32> @tbl_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) { 1095; CHECK-LABEL: tbl_i32: 1096; CHECK: tbl z0.s, { z0.s }, z1.s 1097; CHECK-NEXT: ret 1098 %out = call <vscale x 4 x i32> @llvm.aarch64.sve.tbl.nxv4i32(<vscale x 4 x i32> %a, 1099 <vscale x 4 x i32> %b) 1100 ret <vscale x 4 x i32> %out 1101} 1102 1103define <vscale x 2 x i64> @tbl_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b) { 1104; CHECK-LABEL: tbl_i64: 1105; CHECK: tbl z0.d, { z0.d }, z1.d 1106; CHECK-NEXT: ret 1107 %out = call <vscale x 2 x i64> @llvm.aarch64.sve.tbl.nxv2i64(<vscale x 2 x i64> %a, 1108 <vscale x 2 x i64> %b) 1109 ret <vscale x 2 x i64> %out 1110} 1111 1112define <vscale x 8 x half> @tbl_f16(<vscale x 8 x half> %a, <vscale x 8 x i16> %b) { 1113; CHECK-LABEL: tbl_f16: 1114; CHECK: tbl z0.h, { z0.h }, z1.h 1115; CHECK-NEXT: ret 1116 %out = call <vscale x 8 x half> @llvm.aarch64.sve.tbl.nxv8f16(<vscale x 8 x half> %a, 1117 <vscale x 8 x i16> %b) 1118 ret <vscale x 8 x half> %out 1119} 1120 1121define <vscale x 8 x bfloat> @tbl_bf16(<vscale x 8 x bfloat> %a, <vscale x 8 x i16> %b) #0 { 1122; CHECK-LABEL: tbl_bf16: 1123; CHECK: tbl z0.h, { z0.h }, z1.h 1124; CHECK-NEXT: ret 1125 %out = call <vscale x 8 x bfloat> @llvm.aarch64.sve.tbl.nxv8bf16(<vscale x 8 x bfloat> %a, 1126 <vscale x 8 x i16> %b) 1127 ret <vscale x 8 x bfloat> %out 1128} 1129 1130define <vscale x 4 x float> @tbl_f32(<vscale x 4 x float> %a, <vscale x 4 x i32> %b) { 1131; CHECK-LABEL: tbl_f32: 1132; CHECK: tbl z0.s, { z0.s }, z1.s 1133; CHECK-NEXT: ret 1134 %out = call <vscale x 4 x float> @llvm.aarch64.sve.tbl.nxv4f32(<vscale x 4 x float> %a, 1135 <vscale x 4 x i32> %b) 1136 ret <vscale x 4 x float> %out 1137} 1138 1139define <vscale x 2 x double> @tbl_f64(<vscale x 2 x double> %a, <vscale x 2 x i64> %b) { 1140; CHECK-LABEL: tbl_f64: 1141; CHECK: tbl z0.d, { z0.d }, z1.d 1142; CHECK-NEXT: ret 1143 %out = call <vscale x 2 x double> @llvm.aarch64.sve.tbl.nxv2f64(<vscale x 2 x double> %a, 1144 <vscale x 2 x i64> %b) 1145 ret <vscale x 2 x double> %out 1146} 1147 1148; 1149; UUNPKHI 1150; 1151 1152define <vscale x 8 x i16> @uunpkhi_i16(<vscale x 16 x i8> %a) { 1153; CHECK-LABEL: uunpkhi_i16 1154; CHECK: uunpkhi z0.h, z0.b 1155; CHECK-NEXT: ret 1156 %res = call <vscale x 8 x i16> @llvm.aarch64.sve.uunpkhi.nxv8i16(<vscale x 16 x i8> %a) 1157 ret <vscale x 8 x i16> %res 1158} 1159 1160define <vscale x 4 x i32> @uunpkhi_i32(<vscale x 8 x i16> %a) { 1161; CHECK-LABEL: uunpkhi_i32 1162; CHECK: uunpkhi z0.s, z0.h 1163; CHECK-NEXT: ret 1164 %res = call <vscale x 4 x i32> @llvm.aarch64.sve.uunpkhi.nxv4i32(<vscale x 8 x i16> %a) 1165 ret <vscale x 4 x i32> %res 1166} 1167 1168define <vscale x 2 x i64> @uunpkhi_i64(<vscale x 4 x i32> %a) { 1169; CHECK-LABEL: uunpkhi_i64 1170; CHECK: uunpkhi z0.d, z0.s 1171; CHECK-NEXT: ret 1172 %res = call <vscale x 2 x i64> @llvm.aarch64.sve.uunpkhi.nxv2i64(<vscale x 4 x i32> %a) 1173 ret <vscale x 2 x i64> %res 1174} 1175 1176; 1177; UUNPKLO 1178; 1179 1180define <vscale x 8 x i16> @uunpklo_i16(<vscale x 16 x i8> %a) { 1181; CHECK-LABEL: uunpklo_i16 1182; CHECK: uunpklo z0.h, z0.b 1183; CHECK-NEXT: ret 1184 %res = call <vscale x 8 x i16> @llvm.aarch64.sve.uunpklo.nxv8i16(<vscale x 16 x i8> %a) 1185 ret <vscale x 8 x i16> %res 1186} 1187 1188define <vscale x 4 x i32> @uunpklo_i32(<vscale x 8 x i16> %a) { 1189; CHECK-LABEL: uunpklo_i32 1190; CHECK: uunpklo z0.s, z0.h 1191; CHECK-NEXT: ret 1192 %res = call <vscale x 4 x i32> @llvm.aarch64.sve.uunpklo.nxv4i32(<vscale x 8 x i16> %a) 1193 ret <vscale x 4 x i32> %res 1194} 1195 1196define <vscale x 2 x i64> @uunpklo_i64(<vscale x 4 x i32> %a) { 1197; CHECK-LABEL: uunpklo_i64 1198; CHECK: uunpklo z0.d, z0.s 1199; CHECK-NEXT: ret 1200 %res = call <vscale x 2 x i64> @llvm.aarch64.sve.uunpklo.nxv2i64(<vscale x 4 x i32> %a) 1201 ret <vscale x 2 x i64> %res 1202} 1203 1204; 1205; TRN1 1206; 1207 1208define <vscale x 16 x i1> @trn1_b8(<vscale x 16 x i1> %a, <vscale x 16 x i1> %b) { 1209; CHECK-LABEL: trn1_b8: 1210; CHECK: trn1 p0.b, p0.b, p1.b 1211; CHECK-NEXT: ret 1212 %out = call <vscale x 16 x i1> @llvm.aarch64.sve.trn1.nxv16i1(<vscale x 16 x i1> %a, 1213 <vscale x 16 x i1> %b) 1214 ret <vscale x 16 x i1> %out 1215} 1216 1217define <vscale x 8 x i1> @trn1_b16(<vscale x 8 x i1> %a, <vscale x 8 x i1> %b) { 1218; CHECK-LABEL: trn1_b16: 1219; CHECK: trn1 p0.h, p0.h, p1.h 1220; CHECK-NEXT: ret 1221 %out = call <vscale x 8 x i1> @llvm.aarch64.sve.trn1.nxv8i1(<vscale x 8 x i1> %a, 1222 <vscale x 8 x i1> %b) 1223 ret <vscale x 8 x i1> %out 1224} 1225 1226define <vscale x 4 x i1> @trn1_b32(<vscale x 4 x i1> %a, <vscale x 4 x i1> %b) { 1227; CHECK-LABEL: trn1_b32: 1228; CHECK: trn1 p0.s, p0.s, p1.s 1229; CHECK-NEXT: ret 1230 %out = call <vscale x 4 x i1> @llvm.aarch64.sve.trn1.nxv4i1(<vscale x 4 x i1> %a, 1231 <vscale x 4 x i1> %b) 1232 ret <vscale x 4 x i1> %out 1233} 1234 1235define <vscale x 2 x i1> @trn1_b64(<vscale x 2 x i1> %a, <vscale x 2 x i1> %b) { 1236; CHECK-LABEL: trn1_b64: 1237; CHECK: trn1 p0.d, p0.d, p1.d 1238; CHECK-NEXT: ret 1239 %out = call <vscale x 2 x i1> @llvm.aarch64.sve.trn1.nxv2i1(<vscale x 2 x i1> %a, 1240 <vscale x 2 x i1> %b) 1241 ret <vscale x 2 x i1> %out 1242} 1243 1244define <vscale x 16 x i8> @trn1_i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) { 1245; CHECK-LABEL: trn1_i8: 1246; CHECK: trn1 z0.b, z0.b, z1.b 1247; CHECK-NEXT: ret 1248 %out = call <vscale x 16 x i8> @llvm.aarch64.sve.trn1.nxv16i8(<vscale x 16 x i8> %a, 1249 <vscale x 16 x i8> %b) 1250 ret <vscale x 16 x i8> %out 1251} 1252 1253define <vscale x 8 x i16> @trn1_i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) { 1254; CHECK-LABEL: trn1_i16: 1255; CHECK: trn1 z0.h, z0.h, z1.h 1256; CHECK-NEXT: ret 1257 %out = call <vscale x 8 x i16> @llvm.aarch64.sve.trn1.nxv8i16(<vscale x 8 x i16> %a, 1258 <vscale x 8 x i16> %b) 1259 ret <vscale x 8 x i16> %out 1260} 1261 1262define <vscale x 4 x i32> @trn1_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) { 1263; CHECK-LABEL: trn1_i32: 1264; CHECK: trn1 z0.s, z0.s, z1.s 1265; CHECK-NEXT: ret 1266 %out = call <vscale x 4 x i32> @llvm.aarch64.sve.trn1.nxv4i32(<vscale x 4 x i32> %a, 1267 <vscale x 4 x i32> %b) 1268 ret <vscale x 4 x i32> %out 1269} 1270 1271define <vscale x 2 x i64> @trn1_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b) { 1272; CHECK-LABEL: trn1_i64: 1273; CHECK: trn1 z0.d, z0.d, z1.d 1274; CHECK-NEXT: ret 1275 %out = call <vscale x 2 x i64> @llvm.aarch64.sve.trn1.nxv2i64(<vscale x 2 x i64> %a, 1276 <vscale x 2 x i64> %b) 1277 ret <vscale x 2 x i64> %out 1278} 1279 1280define <vscale x 4 x half> @trn1_f16_v4(<vscale x 4 x half> %a, <vscale x 4 x half> %b) { 1281; CHECK-LABEL: trn1_f16_v4: 1282; CHECK: trn1 z0.s, z0.s, z1.s 1283; CHECK-NEXT: ret 1284 %out = call <vscale x 4 x half> @llvm.aarch64.sve.trn1.nxv4f16(<vscale x 4 x half> %a, 1285 <vscale x 4 x half> %b) 1286 ret <vscale x 4 x half> %out 1287} 1288 1289define <vscale x 8 x bfloat> @trn1_bf16(<vscale x 8 x bfloat> %a, <vscale x 8 x bfloat> %b) #0 { 1290; CHECK-LABEL: trn1_bf16: 1291; CHECK: trn1 z0.h, z0.h, z1.h 1292; CHECK-NEXT: ret 1293 %out = call <vscale x 8 x bfloat> @llvm.aarch64.sve.trn1.nxv8bf16(<vscale x 8 x bfloat> %a, 1294 <vscale x 8 x bfloat> %b) 1295 ret <vscale x 8 x bfloat> %out 1296} 1297 1298define <vscale x 8 x half> @trn1_f16(<vscale x 8 x half> %a, <vscale x 8 x half> %b) { 1299; CHECK-LABEL: trn1_f16: 1300; CHECK: trn1 z0.h, z0.h, z1.h 1301; CHECK-NEXT: ret 1302 %out = call <vscale x 8 x half> @llvm.aarch64.sve.trn1.nxv8f16(<vscale x 8 x half> %a, 1303 <vscale x 8 x half> %b) 1304 ret <vscale x 8 x half> %out 1305} 1306 1307define <vscale x 4 x float> @trn1_f32(<vscale x 4 x float> %a, <vscale x 4 x float> %b) { 1308; CHECK-LABEL: trn1_f32: 1309; CHECK: trn1 z0.s, z0.s, z1.s 1310; CHECK-NEXT: ret 1311 %out = call <vscale x 4 x float> @llvm.aarch64.sve.trn1.nxv4f32(<vscale x 4 x float> %a, 1312 <vscale x 4 x float> %b) 1313 ret <vscale x 4 x float> %out 1314} 1315 1316define <vscale x 2 x double> @trn1_f64(<vscale x 2 x double> %a, <vscale x 2 x double> %b) { 1317; CHECK-LABEL: trn1_f64: 1318; CHECK: trn1 z0.d, z0.d, z1.d 1319; CHECK-NEXT: ret 1320 %out = call <vscale x 2 x double> @llvm.aarch64.sve.trn1.nxv2f64(<vscale x 2 x double> %a, 1321 <vscale x 2 x double> %b) 1322 ret <vscale x 2 x double> %out 1323} 1324 1325; 1326; TRN2 1327; 1328 1329define <vscale x 16 x i1> @trn2_b8(<vscale x 16 x i1> %a, <vscale x 16 x i1> %b) { 1330; CHECK-LABEL: trn2_b8: 1331; CHECK: trn2 p0.b, p0.b, p1.b 1332; CHECK-NEXT: ret 1333 %out = call <vscale x 16 x i1> @llvm.aarch64.sve.trn2.nxv16i1(<vscale x 16 x i1> %a, 1334 <vscale x 16 x i1> %b) 1335 ret <vscale x 16 x i1> %out 1336} 1337 1338define <vscale x 8 x i1> @trn2_b16(<vscale x 8 x i1> %a, <vscale x 8 x i1> %b) { 1339; CHECK-LABEL: trn2_b16: 1340; CHECK: trn2 p0.h, p0.h, p1.h 1341; CHECK-NEXT: ret 1342 %out = call <vscale x 8 x i1> @llvm.aarch64.sve.trn2.nxv8i1(<vscale x 8 x i1> %a, 1343 <vscale x 8 x i1> %b) 1344 ret <vscale x 8 x i1> %out 1345} 1346 1347define <vscale x 4 x i1> @trn2_b32(<vscale x 4 x i1> %a, <vscale x 4 x i1> %b) { 1348; CHECK-LABEL: trn2_b32: 1349; CHECK: trn2 p0.s, p0.s, p1.s 1350; CHECK-NEXT: ret 1351 %out = call <vscale x 4 x i1> @llvm.aarch64.sve.trn2.nxv4i1(<vscale x 4 x i1> %a, 1352 <vscale x 4 x i1> %b) 1353 ret <vscale x 4 x i1> %out 1354} 1355 1356define <vscale x 2 x i1> @trn2_b64(<vscale x 2 x i1> %a, <vscale x 2 x i1> %b) { 1357; CHECK-LABEL: trn2_b64: 1358; CHECK: trn2 p0.d, p0.d, p1.d 1359; CHECK-NEXT: ret 1360 %out = call <vscale x 2 x i1> @llvm.aarch64.sve.trn2.nxv2i1(<vscale x 2 x i1> %a, 1361 <vscale x 2 x i1> %b) 1362 ret <vscale x 2 x i1> %out 1363} 1364 1365define <vscale x 16 x i8> @trn2_i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) { 1366; CHECK-LABEL: trn2_i8: 1367; CHECK: trn2 z0.b, z0.b, z1.b 1368; CHECK-NEXT: ret 1369 %out = call <vscale x 16 x i8> @llvm.aarch64.sve.trn2.nxv16i8(<vscale x 16 x i8> %a, 1370 <vscale x 16 x i8> %b) 1371 ret <vscale x 16 x i8> %out 1372} 1373 1374define <vscale x 8 x i16> @trn2_i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) { 1375; CHECK-LABEL: trn2_i16: 1376; CHECK: trn2 z0.h, z0.h, z1.h 1377; CHECK-NEXT: ret 1378 %out = call <vscale x 8 x i16> @llvm.aarch64.sve.trn2.nxv8i16(<vscale x 8 x i16> %a, 1379 <vscale x 8 x i16> %b) 1380 ret <vscale x 8 x i16> %out 1381} 1382 1383define <vscale x 4 x i32> @trn2_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) { 1384; CHECK-LABEL: trn2_i32: 1385; CHECK: trn2 z0.s, z0.s, z1.s 1386; CHECK-NEXT: ret 1387 %out = call <vscale x 4 x i32> @llvm.aarch64.sve.trn2.nxv4i32(<vscale x 4 x i32> %a, 1388 <vscale x 4 x i32> %b) 1389 ret <vscale x 4 x i32> %out 1390} 1391 1392define <vscale x 2 x i64> @trn2_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b) { 1393; CHECK-LABEL: trn2_i64: 1394; CHECK: trn2 z0.d, z0.d, z1.d 1395; CHECK-NEXT: ret 1396 %out = call <vscale x 2 x i64> @llvm.aarch64.sve.trn2.nxv2i64(<vscale x 2 x i64> %a, 1397 <vscale x 2 x i64> %b) 1398 ret <vscale x 2 x i64> %out 1399} 1400 1401define <vscale x 4 x half> @trn2_f16_v4(<vscale x 4 x half> %a, <vscale x 4 x half> %b) { 1402; CHECK-LABEL: trn2_f16_v4: 1403; CHECK: trn2 z0.s, z0.s, z1.s 1404; CHECK-NEXT: ret 1405 %out = call <vscale x 4 x half> @llvm.aarch64.sve.trn2.nxv4f16(<vscale x 4 x half> %a, 1406 <vscale x 4 x half> %b) 1407 ret <vscale x 4 x half> %out 1408} 1409 1410define <vscale x 8 x bfloat> @trn2_bf16(<vscale x 8 x bfloat> %a, <vscale x 8 x bfloat> %b) #0 { 1411; CHECK-LABEL: trn2_bf16: 1412; CHECK: trn2 z0.h, z0.h, z1.h 1413; CHECK-NEXT: ret 1414 %out = call <vscale x 8 x bfloat> @llvm.aarch64.sve.trn2.nxv8bf16(<vscale x 8 x bfloat> %a, 1415 <vscale x 8 x bfloat> %b) 1416 ret <vscale x 8 x bfloat> %out 1417} 1418 1419define <vscale x 8 x half> @trn2_f16(<vscale x 8 x half> %a, <vscale x 8 x half> %b) { 1420; CHECK-LABEL: trn2_f16: 1421; CHECK: trn2 z0.h, z0.h, z1.h 1422; CHECK-NEXT: ret 1423 %out = call <vscale x 8 x half> @llvm.aarch64.sve.trn2.nxv8f16(<vscale x 8 x half> %a, 1424 <vscale x 8 x half> %b) 1425 ret <vscale x 8 x half> %out 1426} 1427 1428define <vscale x 4 x float> @trn2_f32(<vscale x 4 x float> %a, <vscale x 4 x float> %b) { 1429; CHECK-LABEL: trn2_f32: 1430; CHECK: trn2 z0.s, z0.s, z1.s 1431; CHECK-NEXT: ret 1432 %out = call <vscale x 4 x float> @llvm.aarch64.sve.trn2.nxv4f32(<vscale x 4 x float> %a, 1433 <vscale x 4 x float> %b) 1434 ret <vscale x 4 x float> %out 1435} 1436 1437define <vscale x 2 x double> @trn2_f64(<vscale x 2 x double> %a, <vscale x 2 x double> %b) { 1438; CHECK-LABEL: trn2_f64: 1439; CHECK: trn2 z0.d, z0.d, z1.d 1440; CHECK-NEXT: ret 1441 %out = call <vscale x 2 x double> @llvm.aarch64.sve.trn2.nxv2f64(<vscale x 2 x double> %a, 1442 <vscale x 2 x double> %b) 1443 ret <vscale x 2 x double> %out 1444} 1445 1446; 1447; UZP1 1448; 1449 1450define <vscale x 16 x i1> @uzp1_b8(<vscale x 16 x i1> %a, <vscale x 16 x i1> %b) { 1451; CHECK-LABEL: uzp1_b8: 1452; CHECK: uzp1 p0.b, p0.b, p1.b 1453; CHECK-NEXT: ret 1454 %out = call <vscale x 16 x i1> @llvm.aarch64.sve.uzp1.nxv16i1(<vscale x 16 x i1> %a, 1455 <vscale x 16 x i1> %b) 1456 ret <vscale x 16 x i1> %out 1457} 1458 1459define <vscale x 8 x i1> @uzp1_b16(<vscale x 8 x i1> %a, <vscale x 8 x i1> %b) { 1460; CHECK-LABEL: uzp1_b16: 1461; CHECK: uzp1 p0.h, p0.h, p1.h 1462; CHECK-NEXT: ret 1463 %out = call <vscale x 8 x i1> @llvm.aarch64.sve.uzp1.nxv8i1(<vscale x 8 x i1> %a, 1464 <vscale x 8 x i1> %b) 1465 ret <vscale x 8 x i1> %out 1466} 1467 1468define <vscale x 4 x i1> @uzp1_b32(<vscale x 4 x i1> %a, <vscale x 4 x i1> %b) { 1469; CHECK-LABEL: uzp1_b32: 1470; CHECK: uzp1 p0.s, p0.s, p1.s 1471; CHECK-NEXT: ret 1472 %out = call <vscale x 4 x i1> @llvm.aarch64.sve.uzp1.nxv4i1(<vscale x 4 x i1> %a, 1473 <vscale x 4 x i1> %b) 1474 ret <vscale x 4 x i1> %out 1475} 1476 1477define <vscale x 2 x i1> @uzp1_b64(<vscale x 2 x i1> %a, <vscale x 2 x i1> %b) { 1478; CHECK-LABEL: uzp1_b64: 1479; CHECK: uzp1 p0.d, p0.d, p1.d 1480; CHECK-NEXT: ret 1481 %out = call <vscale x 2 x i1> @llvm.aarch64.sve.uzp1.nxv2i1(<vscale x 2 x i1> %a, 1482 <vscale x 2 x i1> %b) 1483 ret <vscale x 2 x i1> %out 1484} 1485 1486define <vscale x 16 x i8> @uzp1_i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) { 1487; CHECK-LABEL: uzp1_i8: 1488; CHECK: uzp1 z0.b, z0.b, z1.b 1489; CHECK-NEXT: ret 1490 %out = call <vscale x 16 x i8> @llvm.aarch64.sve.uzp1.nxv16i8(<vscale x 16 x i8> %a, 1491 <vscale x 16 x i8> %b) 1492 ret <vscale x 16 x i8> %out 1493} 1494 1495define <vscale x 8 x i16> @uzp1_i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) { 1496; CHECK-LABEL: uzp1_i16: 1497; CHECK: uzp1 z0.h, z0.h, z1.h 1498; CHECK-NEXT: ret 1499 %out = call <vscale x 8 x i16> @llvm.aarch64.sve.uzp1.nxv8i16(<vscale x 8 x i16> %a, 1500 <vscale x 8 x i16> %b) 1501 ret <vscale x 8 x i16> %out 1502} 1503 1504define <vscale x 4 x i32> @uzp1_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) { 1505; CHECK-LABEL: uzp1_i32: 1506; CHECK: uzp1 z0.s, z0.s, z1.s 1507; CHECK-NEXT: ret 1508 %out = call <vscale x 4 x i32> @llvm.aarch64.sve.uzp1.nxv4i32(<vscale x 4 x i32> %a, 1509 <vscale x 4 x i32> %b) 1510 ret <vscale x 4 x i32> %out 1511} 1512 1513define <vscale x 2 x i64> @uzp1_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b) { 1514; CHECK-LABEL: uzp1_i64: 1515; CHECK: uzp1 z0.d, z0.d, z1.d 1516; CHECK-NEXT: ret 1517 %out = call <vscale x 2 x i64> @llvm.aarch64.sve.uzp1.nxv2i64(<vscale x 2 x i64> %a, 1518 <vscale x 2 x i64> %b) 1519 ret <vscale x 2 x i64> %out 1520} 1521 1522define <vscale x 4 x half> @uzp1_f16_v4(<vscale x 4 x half> %a, <vscale x 4 x half> %b) { 1523; CHECK-LABEL: uzp1_f16_v4: 1524; CHECK: uzp1 z0.s, z0.s, z1.s 1525; CHECK-NEXT: ret 1526 %out = call <vscale x 4 x half> @llvm.aarch64.sve.uzp1.nxv4f16(<vscale x 4 x half> %a, 1527 <vscale x 4 x half> %b) 1528 ret <vscale x 4 x half> %out 1529} 1530 1531define <vscale x 8 x bfloat> @uzp1_bf16(<vscale x 8 x bfloat> %a, <vscale x 8 x bfloat> %b) #0 { 1532; CHECK-LABEL: uzp1_bf16: 1533; CHECK: uzp1 z0.h, z0.h, z1.h 1534; CHECK-NEXT: ret 1535 %out = call <vscale x 8 x bfloat> @llvm.aarch64.sve.uzp1.nxv8bf16(<vscale x 8 x bfloat> %a, 1536 <vscale x 8 x bfloat> %b) 1537 ret <vscale x 8 x bfloat> %out 1538} 1539 1540define <vscale x 8 x half> @uzp1_f16(<vscale x 8 x half> %a, <vscale x 8 x half> %b) { 1541; CHECK-LABEL: uzp1_f16: 1542; CHECK: uzp1 z0.h, z0.h, z1.h 1543; CHECK-NEXT: ret 1544 %out = call <vscale x 8 x half> @llvm.aarch64.sve.uzp1.nxv8f16(<vscale x 8 x half> %a, 1545 <vscale x 8 x half> %b) 1546 ret <vscale x 8 x half> %out 1547} 1548 1549define <vscale x 4 x float> @uzp1_f32(<vscale x 4 x float> %a, <vscale x 4 x float> %b) { 1550; CHECK-LABEL: uzp1_f32: 1551; CHECK: uzp1 z0.s, z0.s, z1.s 1552; CHECK-NEXT: ret 1553 %out = call <vscale x 4 x float> @llvm.aarch64.sve.uzp1.nxv4f32(<vscale x 4 x float> %a, 1554 <vscale x 4 x float> %b) 1555 ret <vscale x 4 x float> %out 1556} 1557 1558define <vscale x 2 x double> @uzp1_f64(<vscale x 2 x double> %a, <vscale x 2 x double> %b) { 1559; CHECK-LABEL: uzp1_f64: 1560; CHECK: uzp1 z0.d, z0.d, z1.d 1561; CHECK-NEXT: ret 1562 %out = call <vscale x 2 x double> @llvm.aarch64.sve.uzp1.nxv2f64(<vscale x 2 x double> %a, 1563 <vscale x 2 x double> %b) 1564 ret <vscale x 2 x double> %out 1565} 1566 1567; 1568; UZP2 1569; 1570 1571define <vscale x 16 x i1> @uzp2_b8(<vscale x 16 x i1> %a, <vscale x 16 x i1> %b) { 1572; CHECK-LABEL: uzp2_b8: 1573; CHECK: uzp2 p0.b, p0.b, p1.b 1574; CHECK-NEXT: ret 1575 %out = call <vscale x 16 x i1> @llvm.aarch64.sve.uzp2.nxv16i1(<vscale x 16 x i1> %a, 1576 <vscale x 16 x i1> %b) 1577 ret <vscale x 16 x i1> %out 1578} 1579 1580define <vscale x 8 x i1> @uzp2_b16(<vscale x 8 x i1> %a, <vscale x 8 x i1> %b) { 1581; CHECK-LABEL: uzp2_b16: 1582; CHECK: uzp2 p0.h, p0.h, p1.h 1583; CHECK-NEXT: ret 1584 %out = call <vscale x 8 x i1> @llvm.aarch64.sve.uzp2.nxv8i1(<vscale x 8 x i1> %a, 1585 <vscale x 8 x i1> %b) 1586 ret <vscale x 8 x i1> %out 1587} 1588 1589define <vscale x 4 x i1> @uzp2_b32(<vscale x 4 x i1> %a, <vscale x 4 x i1> %b) { 1590; CHECK-LABEL: uzp2_b32: 1591; CHECK: uzp2 p0.s, p0.s, p1.s 1592; CHECK-NEXT: ret 1593 %out = call <vscale x 4 x i1> @llvm.aarch64.sve.uzp2.nxv4i1(<vscale x 4 x i1> %a, 1594 <vscale x 4 x i1> %b) 1595 ret <vscale x 4 x i1> %out 1596} 1597 1598define <vscale x 2 x i1> @uzp2_b64(<vscale x 2 x i1> %a, <vscale x 2 x i1> %b) { 1599; CHECK-LABEL: uzp2_b64: 1600; CHECK: uzp2 p0.d, p0.d, p1.d 1601; CHECK-NEXT: ret 1602 %out = call <vscale x 2 x i1> @llvm.aarch64.sve.uzp2.nxv2i1(<vscale x 2 x i1> %a, 1603 <vscale x 2 x i1> %b) 1604 ret <vscale x 2 x i1> %out 1605} 1606 1607define <vscale x 16 x i8> @uzp2_i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) { 1608; CHECK-LABEL: uzp2_i8: 1609; CHECK: uzp2 z0.b, z0.b, z1.b 1610; CHECK-NEXT: ret 1611 %out = call <vscale x 16 x i8> @llvm.aarch64.sve.uzp2.nxv16i8(<vscale x 16 x i8> %a, 1612 <vscale x 16 x i8> %b) 1613 ret <vscale x 16 x i8> %out 1614} 1615 1616define <vscale x 8 x i16> @uzp2_i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) { 1617; CHECK-LABEL: uzp2_i16: 1618; CHECK: uzp2 z0.h, z0.h, z1.h 1619; CHECK-NEXT: ret 1620 %out = call <vscale x 8 x i16> @llvm.aarch64.sve.uzp2.nxv8i16(<vscale x 8 x i16> %a, 1621 <vscale x 8 x i16> %b) 1622 ret <vscale x 8 x i16> %out 1623} 1624 1625define <vscale x 4 x i32> @uzp2_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) { 1626; CHECK-LABEL: uzp2_i32: 1627; CHECK: uzp2 z0.s, z0.s, z1.s 1628; CHECK-NEXT: ret 1629 %out = call <vscale x 4 x i32> @llvm.aarch64.sve.uzp2.nxv4i32(<vscale x 4 x i32> %a, 1630 <vscale x 4 x i32> %b) 1631 ret <vscale x 4 x i32> %out 1632} 1633 1634define <vscale x 2 x i64> @uzp2_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b) { 1635; CHECK-LABEL: uzp2_i64: 1636; CHECK: uzp2 z0.d, z0.d, z1.d 1637; CHECK-NEXT: ret 1638 %out = call <vscale x 2 x i64> @llvm.aarch64.sve.uzp2.nxv2i64(<vscale x 2 x i64> %a, 1639 <vscale x 2 x i64> %b) 1640 ret <vscale x 2 x i64> %out 1641} 1642 1643define <vscale x 4 x half> @uzp2_f16_v4(<vscale x 4 x half> %a, <vscale x 4 x half> %b) { 1644; CHECK-LABEL: uzp2_f16_v4: 1645; CHECK: uzp2 z0.s, z0.s, z1.s 1646; CHECK-NEXT: ret 1647 %out = call <vscale x 4 x half> @llvm.aarch64.sve.uzp2.nxv4f16(<vscale x 4 x half> %a, 1648 <vscale x 4 x half> %b) 1649 ret <vscale x 4 x half> %out 1650} 1651 1652define <vscale x 8 x bfloat> @uzp2_bf16(<vscale x 8 x bfloat> %a, <vscale x 8 x bfloat> %b) #0 { 1653; CHECK-LABEL: uzp2_bf16: 1654; CHECK: uzp2 z0.h, z0.h, z1.h 1655; CHECK-NEXT: ret 1656 %out = call <vscale x 8 x bfloat> @llvm.aarch64.sve.uzp2.nxv8bf16(<vscale x 8 x bfloat> %a, 1657 <vscale x 8 x bfloat> %b) 1658 ret <vscale x 8 x bfloat> %out 1659} 1660 1661define <vscale x 8 x half> @uzp2_f16(<vscale x 8 x half> %a, <vscale x 8 x half> %b) { 1662; CHECK-LABEL: uzp2_f16: 1663; CHECK: uzp2 z0.h, z0.h, z1.h 1664; CHECK-NEXT: ret 1665 %out = call <vscale x 8 x half> @llvm.aarch64.sve.uzp2.nxv8f16(<vscale x 8 x half> %a, 1666 <vscale x 8 x half> %b) 1667 ret <vscale x 8 x half> %out 1668} 1669 1670define <vscale x 4 x float> @uzp2_f32(<vscale x 4 x float> %a, <vscale x 4 x float> %b) { 1671; CHECK-LABEL: uzp2_f32: 1672; CHECK: uzp2 z0.s, z0.s, z1.s 1673; CHECK-NEXT: ret 1674 %out = call <vscale x 4 x float> @llvm.aarch64.sve.uzp2.nxv4f32(<vscale x 4 x float> %a, 1675 <vscale x 4 x float> %b) 1676 ret <vscale x 4 x float> %out 1677} 1678 1679define <vscale x 2 x double> @uzp2_f64(<vscale x 2 x double> %a, <vscale x 2 x double> %b) { 1680; CHECK-LABEL: uzp2_f64: 1681; CHECK: uzp2 z0.d, z0.d, z1.d 1682; CHECK-NEXT: ret 1683 %out = call <vscale x 2 x double> @llvm.aarch64.sve.uzp2.nxv2f64(<vscale x 2 x double> %a, 1684 <vscale x 2 x double> %b) 1685 ret <vscale x 2 x double> %out 1686} 1687 1688; 1689; ZIP1 1690; 1691 1692define <vscale x 16 x i1> @zip1_b8(<vscale x 16 x i1> %a, <vscale x 16 x i1> %b) { 1693; CHECK-LABEL: zip1_b8: 1694; CHECK: zip1 p0.b, p0.b, p1.b 1695; CHECK-NEXT: ret 1696 %out = call <vscale x 16 x i1> @llvm.aarch64.sve.zip1.nxv16i1(<vscale x 16 x i1> %a, 1697 <vscale x 16 x i1> %b) 1698 ret <vscale x 16 x i1> %out 1699} 1700 1701define <vscale x 8 x i1> @zip1_b16(<vscale x 8 x i1> %a, <vscale x 8 x i1> %b) { 1702; CHECK-LABEL: zip1_b16: 1703; CHECK: zip1 p0.h, p0.h, p1.h 1704; CHECK-NEXT: ret 1705 %out = call <vscale x 8 x i1> @llvm.aarch64.sve.zip1.nxv8i1(<vscale x 8 x i1> %a, 1706 <vscale x 8 x i1> %b) 1707 ret <vscale x 8 x i1> %out 1708} 1709 1710define <vscale x 4 x i1> @zip1_b32(<vscale x 4 x i1> %a, <vscale x 4 x i1> %b) { 1711; CHECK-LABEL: zip1_b32: 1712; CHECK: zip1 p0.s, p0.s, p1.s 1713; CHECK-NEXT: ret 1714 %out = call <vscale x 4 x i1> @llvm.aarch64.sve.zip1.nxv4i1(<vscale x 4 x i1> %a, 1715 <vscale x 4 x i1> %b) 1716 ret <vscale x 4 x i1> %out 1717} 1718 1719define <vscale x 2 x i1> @zip1_b64(<vscale x 2 x i1> %a, <vscale x 2 x i1> %b) { 1720; CHECK-LABEL: zip1_b64: 1721; CHECK: zip1 p0.d, p0.d, p1.d 1722; CHECK-NEXT: ret 1723 %out = call <vscale x 2 x i1> @llvm.aarch64.sve.zip1.nxv2i1(<vscale x 2 x i1> %a, 1724 <vscale x 2 x i1> %b) 1725 ret <vscale x 2 x i1> %out 1726} 1727 1728define <vscale x 16 x i8> @zip1_i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) { 1729; CHECK-LABEL: zip1_i8: 1730; CHECK: zip1 z0.b, z0.b, z1.b 1731; CHECK-NEXT: ret 1732 %out = call <vscale x 16 x i8> @llvm.aarch64.sve.zip1.nxv16i8(<vscale x 16 x i8> %a, 1733 <vscale x 16 x i8> %b) 1734 ret <vscale x 16 x i8> %out 1735} 1736 1737define <vscale x 8 x i16> @zip1_i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) { 1738; CHECK-LABEL: zip1_i16: 1739; CHECK: zip1 z0.h, z0.h, z1.h 1740; CHECK-NEXT: ret 1741 %out = call <vscale x 8 x i16> @llvm.aarch64.sve.zip1.nxv8i16(<vscale x 8 x i16> %a, 1742 <vscale x 8 x i16> %b) 1743 ret <vscale x 8 x i16> %out 1744} 1745 1746define <vscale x 4 x i32> @zip1_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) { 1747; CHECK-LABEL: zip1_i32: 1748; CHECK: zip1 z0.s, z0.s, z1.s 1749; CHECK-NEXT: ret 1750 %out = call <vscale x 4 x i32> @llvm.aarch64.sve.zip1.nxv4i32(<vscale x 4 x i32> %a, 1751 <vscale x 4 x i32> %b) 1752 ret <vscale x 4 x i32> %out 1753} 1754 1755define <vscale x 2 x i64> @zip1_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b) { 1756; CHECK-LABEL: zip1_i64: 1757; CHECK: zip1 z0.d, z0.d, z1.d 1758; CHECK-NEXT: ret 1759 %out = call <vscale x 2 x i64> @llvm.aarch64.sve.zip1.nxv2i64(<vscale x 2 x i64> %a, 1760 <vscale x 2 x i64> %b) 1761 ret <vscale x 2 x i64> %out 1762} 1763 1764define <vscale x 4 x half> @zip1_f16_v4(<vscale x 4 x half> %a, <vscale x 4 x half> %b) { 1765; CHECK-LABEL: zip1_f16_v4: 1766; CHECK: zip1 z0.s, z0.s, z1.s 1767; CHECK-NEXT: ret 1768 %out = call <vscale x 4 x half> @llvm.aarch64.sve.zip1.nxv4f16(<vscale x 4 x half> %a, 1769 <vscale x 4 x half> %b) 1770 ret <vscale x 4 x half> %out 1771} 1772 1773define <vscale x 8 x bfloat> @zip1_bf16(<vscale x 8 x bfloat> %a, <vscale x 8 x bfloat> %b) #0 { 1774; CHECK-LABEL: zip1_bf16: 1775; CHECK: zip1 z0.h, z0.h, z1.h 1776; CHECK-NEXT: ret 1777 %out = call <vscale x 8 x bfloat> @llvm.aarch64.sve.zip1.nxv8bf16(<vscale x 8 x bfloat> %a, 1778 <vscale x 8 x bfloat> %b) 1779 ret <vscale x 8 x bfloat> %out 1780} 1781 1782define <vscale x 8 x half> @zip1_f16(<vscale x 8 x half> %a, <vscale x 8 x half> %b) { 1783; CHECK-LABEL: zip1_f16: 1784; CHECK: zip1 z0.h, z0.h, z1.h 1785; CHECK-NEXT: ret 1786 %out = call <vscale x 8 x half> @llvm.aarch64.sve.zip1.nxv8f16(<vscale x 8 x half> %a, 1787 <vscale x 8 x half> %b) 1788 ret <vscale x 8 x half> %out 1789} 1790 1791define <vscale x 4 x float> @zip1_f32(<vscale x 4 x float> %a, <vscale x 4 x float> %b) { 1792; CHECK-LABEL: zip1_f32: 1793; CHECK: zip1 z0.s, z0.s, z1.s 1794; CHECK-NEXT: ret 1795 %out = call <vscale x 4 x float> @llvm.aarch64.sve.zip1.nxv4f32(<vscale x 4 x float> %a, 1796 <vscale x 4 x float> %b) 1797 ret <vscale x 4 x float> %out 1798} 1799 1800define <vscale x 2 x double> @zip1_f64(<vscale x 2 x double> %a, <vscale x 2 x double> %b) { 1801; CHECK-LABEL: zip1_f64: 1802; CHECK: zip1 z0.d, z0.d, z1.d 1803; CHECK-NEXT: ret 1804 %out = call <vscale x 2 x double> @llvm.aarch64.sve.zip1.nxv2f64(<vscale x 2 x double> %a, 1805 <vscale x 2 x double> %b) 1806 ret <vscale x 2 x double> %out 1807} 1808 1809; 1810; ZIP2 1811; 1812 1813define <vscale x 16 x i1> @zip2_b8(<vscale x 16 x i1> %a, <vscale x 16 x i1> %b) { 1814; CHECK-LABEL: zip2_b8: 1815; CHECK: zip2 p0.b, p0.b, p1.b 1816; CHECK-NEXT: ret 1817 %out = call <vscale x 16 x i1> @llvm.aarch64.sve.zip2.nxv16i1(<vscale x 16 x i1> %a, 1818 <vscale x 16 x i1> %b) 1819 ret <vscale x 16 x i1> %out 1820} 1821 1822define <vscale x 8 x i1> @zip2_b16(<vscale x 8 x i1> %a, <vscale x 8 x i1> %b) { 1823; CHECK-LABEL: zip2_b16: 1824; CHECK: zip2 p0.h, p0.h, p1.h 1825; CHECK-NEXT: ret 1826 %out = call <vscale x 8 x i1> @llvm.aarch64.sve.zip2.nxv8i1(<vscale x 8 x i1> %a, 1827 <vscale x 8 x i1> %b) 1828 ret <vscale x 8 x i1> %out 1829} 1830 1831define <vscale x 4 x i1> @zip2_b32(<vscale x 4 x i1> %a, <vscale x 4 x i1> %b) { 1832; CHECK-LABEL: zip2_b32: 1833; CHECK: zip2 p0.s, p0.s, p1.s 1834; CHECK-NEXT: ret 1835 %out = call <vscale x 4 x i1> @llvm.aarch64.sve.zip2.nxv4i1(<vscale x 4 x i1> %a, 1836 <vscale x 4 x i1> %b) 1837 ret <vscale x 4 x i1> %out 1838} 1839 1840define <vscale x 2 x i1> @zip2_b64(<vscale x 2 x i1> %a, <vscale x 2 x i1> %b) { 1841; CHECK-LABEL: zip2_b64: 1842; CHECK: zip2 p0.d, p0.d, p1.d 1843; CHECK-NEXT: ret 1844 %out = call <vscale x 2 x i1> @llvm.aarch64.sve.zip2.nxv2i1(<vscale x 2 x i1> %a, 1845 <vscale x 2 x i1> %b) 1846 ret <vscale x 2 x i1> %out 1847} 1848 1849define <vscale x 16 x i8> @zip2_i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) { 1850; CHECK-LABEL: zip2_i8: 1851; CHECK: zip2 z0.b, z0.b, z1.b 1852; CHECK-NEXT: ret 1853 %out = call <vscale x 16 x i8> @llvm.aarch64.sve.zip2.nxv16i8(<vscale x 16 x i8> %a, 1854 <vscale x 16 x i8> %b) 1855 ret <vscale x 16 x i8> %out 1856} 1857 1858define <vscale x 8 x i16> @zip2_i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) { 1859; CHECK-LABEL: zip2_i16: 1860; CHECK: zip2 z0.h, z0.h, z1.h 1861; CHECK-NEXT: ret 1862 %out = call <vscale x 8 x i16> @llvm.aarch64.sve.zip2.nxv8i16(<vscale x 8 x i16> %a, 1863 <vscale x 8 x i16> %b) 1864 ret <vscale x 8 x i16> %out 1865} 1866 1867define <vscale x 4 x i32> @zip2_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) { 1868; CHECK-LABEL: zip2_i32: 1869; CHECK: zip2 z0.s, z0.s, z1.s 1870; CHECK-NEXT: ret 1871 %out = call <vscale x 4 x i32> @llvm.aarch64.sve.zip2.nxv4i32(<vscale x 4 x i32> %a, 1872 <vscale x 4 x i32> %b) 1873 ret <vscale x 4 x i32> %out 1874} 1875 1876define <vscale x 2 x i64> @zip2_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b) { 1877; CHECK-LABEL: zip2_i64: 1878; CHECK: zip2 z0.d, z0.d, z1.d 1879; CHECK-NEXT: ret 1880 %out = call <vscale x 2 x i64> @llvm.aarch64.sve.zip2.nxv2i64(<vscale x 2 x i64> %a, 1881 <vscale x 2 x i64> %b) 1882 ret <vscale x 2 x i64> %out 1883} 1884 1885define <vscale x 4 x half> @zip2_f16_v4(<vscale x 4 x half> %a, <vscale x 4 x half> %b) { 1886; CHECK-LABEL: zip2_f16_v4: 1887; CHECK: zip2 z0.s, z0.s, z1.s 1888; CHECK-NEXT: ret 1889 %out = call <vscale x 4 x half> @llvm.aarch64.sve.zip2.nxv4f16(<vscale x 4 x half> %a, 1890 <vscale x 4 x half> %b) 1891 ret <vscale x 4 x half> %out 1892} 1893 1894define <vscale x 8 x bfloat> @zip2_bf16(<vscale x 8 x bfloat> %a, <vscale x 8 x bfloat> %b) #0 { 1895; CHECK-LABEL: zip2_bf16: 1896; CHECK: zip2 z0.h, z0.h, z1.h 1897; CHECK-NEXT: ret 1898 %out = call <vscale x 8 x bfloat> @llvm.aarch64.sve.zip2.nxv8bf16(<vscale x 8 x bfloat> %a, 1899 <vscale x 8 x bfloat> %b) 1900 ret <vscale x 8 x bfloat> %out 1901} 1902 1903define <vscale x 8 x half> @zip2_f16(<vscale x 8 x half> %a, <vscale x 8 x half> %b) { 1904; CHECK-LABEL: zip2_f16: 1905; CHECK: zip2 z0.h, z0.h, z1.h 1906; CHECK-NEXT: ret 1907 %out = call <vscale x 8 x half> @llvm.aarch64.sve.zip2.nxv8f16(<vscale x 8 x half> %a, 1908 <vscale x 8 x half> %b) 1909 ret <vscale x 8 x half> %out 1910} 1911 1912define <vscale x 4 x float> @zip2_f32(<vscale x 4 x float> %a, <vscale x 4 x float> %b) { 1913; CHECK-LABEL: zip2_f32: 1914; CHECK: zip2 z0.s, z0.s, z1.s 1915; CHECK-NEXT: ret 1916 %out = call <vscale x 4 x float> @llvm.aarch64.sve.zip2.nxv4f32(<vscale x 4 x float> %a, 1917 <vscale x 4 x float> %b) 1918 ret <vscale x 4 x float> %out 1919} 1920 1921define <vscale x 2 x double> @zip2_f64(<vscale x 2 x double> %a, <vscale x 2 x double> %b) { 1922; CHECK-LABEL: zip2_f64: 1923; CHECK: zip2 z0.d, z0.d, z1.d 1924; CHECK-NEXT: ret 1925 %out = call <vscale x 2 x double> @llvm.aarch64.sve.zip2.nxv2f64(<vscale x 2 x double> %a, 1926 <vscale x 2 x double> %b) 1927 ret <vscale x 2 x double> %out 1928} 1929 1930declare <vscale x 16 x i8> @llvm.aarch64.sve.clasta.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x i8>, <vscale x 16 x i8>) 1931declare <vscale x 8 x i16> @llvm.aarch64.sve.clasta.nxv8i16(<vscale x 8 x i1>, <vscale x 8 x i16>, <vscale x 8 x i16>) 1932declare <vscale x 4 x i32> @llvm.aarch64.sve.clasta.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>) 1933declare <vscale x 2 x i64> @llvm.aarch64.sve.clasta.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>, <vscale x 2 x i64>) 1934declare <vscale x 8 x half> @llvm.aarch64.sve.clasta.nxv8f16(<vscale x 8 x i1>, <vscale x 8 x half>, <vscale x 8 x half>) 1935declare <vscale x 8 x bfloat> @llvm.aarch64.sve.clasta.nxv8bf16(<vscale x 8 x i1>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat>) 1936declare <vscale x 4 x float> @llvm.aarch64.sve.clasta.nxv4f32(<vscale x 4 x i1>, <vscale x 4 x float>, <vscale x 4 x float>) 1937declare <vscale x 2 x double> @llvm.aarch64.sve.clasta.nxv2f64(<vscale x 2 x i1>, <vscale x 2 x double>, <vscale x 2 x double>) 1938 1939declare i8 @llvm.aarch64.sve.clasta.n.nxv16i8(<vscale x 16 x i1>, i8, <vscale x 16 x i8>) 1940declare i16 @llvm.aarch64.sve.clasta.n.nxv8i16(<vscale x 8 x i1>, i16, <vscale x 8 x i16>) 1941declare i32 @llvm.aarch64.sve.clasta.n.nxv4i32(<vscale x 4 x i1>, i32, <vscale x 4 x i32>) 1942declare i64 @llvm.aarch64.sve.clasta.n.nxv2i64(<vscale x 2 x i1>, i64, <vscale x 2 x i64>) 1943declare half @llvm.aarch64.sve.clasta.n.nxv8f16(<vscale x 8 x i1>, half, <vscale x 8 x half>) 1944declare bfloat @llvm.aarch64.sve.clasta.n.nxv8bf16(<vscale x 8 x i1>, bfloat, <vscale x 8 x bfloat>) 1945declare float @llvm.aarch64.sve.clasta.n.nxv4f32(<vscale x 4 x i1>, float, <vscale x 4 x float>) 1946declare double @llvm.aarch64.sve.clasta.n.nxv2f64(<vscale x 2 x i1>, double, <vscale x 2 x double>) 1947 1948declare <vscale x 16 x i8> @llvm.aarch64.sve.clastb.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x i8>, <vscale x 16 x i8>) 1949declare <vscale x 8 x i16> @llvm.aarch64.sve.clastb.nxv8i16(<vscale x 8 x i1>, <vscale x 8 x i16>, <vscale x 8 x i16>) 1950declare <vscale x 4 x i32> @llvm.aarch64.sve.clastb.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>) 1951declare <vscale x 2 x i64> @llvm.aarch64.sve.clastb.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>, <vscale x 2 x i64>) 1952declare <vscale x 8 x half> @llvm.aarch64.sve.clastb.nxv8f16(<vscale x 8 x i1>, <vscale x 8 x half>, <vscale x 8 x half>) 1953declare <vscale x 8 x bfloat> @llvm.aarch64.sve.clastb.nxv8bf16(<vscale x 8 x i1>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat>) 1954declare <vscale x 4 x float> @llvm.aarch64.sve.clastb.nxv4f32(<vscale x 4 x i1>, <vscale x 4 x float>, <vscale x 4 x float>) 1955declare <vscale x 2 x double> @llvm.aarch64.sve.clastb.nxv2f64(<vscale x 2 x i1>, <vscale x 2 x double>, <vscale x 2 x double>) 1956 1957declare i8 @llvm.aarch64.sve.clastb.n.nxv16i8(<vscale x 16 x i1>, i8, <vscale x 16 x i8>) 1958declare i16 @llvm.aarch64.sve.clastb.n.nxv8i16(<vscale x 8 x i1>, i16, <vscale x 8 x i16>) 1959declare i32 @llvm.aarch64.sve.clastb.n.nxv4i32(<vscale x 4 x i1>, i32, <vscale x 4 x i32>) 1960declare i64 @llvm.aarch64.sve.clastb.n.nxv2i64(<vscale x 2 x i1>, i64, <vscale x 2 x i64>) 1961declare half @llvm.aarch64.sve.clastb.n.nxv8f16(<vscale x 8 x i1>, half, <vscale x 8 x half>) 1962declare bfloat @llvm.aarch64.sve.clastb.n.nxv8bf16(<vscale x 8 x i1>, bfloat, <vscale x 8 x bfloat>) 1963declare float @llvm.aarch64.sve.clastb.n.nxv4f32(<vscale x 4 x i1>, float, <vscale x 4 x float>) 1964declare double @llvm.aarch64.sve.clastb.n.nxv2f64(<vscale x 2 x i1>, double, <vscale x 2 x double>) 1965 1966declare <vscale x 4 x i32> @llvm.aarch64.sve.compact.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>) 1967declare <vscale x 2 x i64> @llvm.aarch64.sve.compact.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>) 1968declare <vscale x 4 x float> @llvm.aarch64.sve.compact.nxv4f32(<vscale x 4 x i1>, <vscale x 4 x float>) 1969declare <vscale x 2 x double> @llvm.aarch64.sve.compact.nxv2f64(<vscale x 2 x i1>, <vscale x 2 x double>) 1970 1971declare <vscale x 16 x i8> @llvm.aarch64.sve.dupq.lane.nxv16i8(<vscale x 16 x i8>, i64) 1972declare <vscale x 8 x i16> @llvm.aarch64.sve.dupq.lane.nxv8i16(<vscale x 8 x i16>, i64) 1973declare <vscale x 4 x i32> @llvm.aarch64.sve.dupq.lane.nxv4i32(<vscale x 4 x i32>, i64) 1974declare <vscale x 2 x i64> @llvm.aarch64.sve.dupq.lane.nxv2i64(<vscale x 2 x i64>, i64) 1975declare <vscale x 8 x half> @llvm.aarch64.sve.dupq.lane.nxv8f16(<vscale x 8 x half>, i64) 1976declare <vscale x 8 x bfloat> @llvm.aarch64.sve.dupq.lane.nxv8bf16(<vscale x 8 x bfloat>, i64) 1977declare <vscale x 4 x float> @llvm.aarch64.sve.dupq.lane.nxv4f32(<vscale x 4 x float>, i64) 1978declare <vscale x 2 x double> @llvm.aarch64.sve.dupq.lane.nxv2f64(<vscale x 2 x double>, i64) 1979 1980declare <vscale x 16 x i8> @llvm.aarch64.sve.ext.nxv16i8(<vscale x 16 x i8>, <vscale x 16 x i8>, i32) 1981declare <vscale x 8 x i16> @llvm.aarch64.sve.ext.nxv8i16(<vscale x 8 x i16>, <vscale x 8 x i16>, i32) 1982declare <vscale x 4 x i32> @llvm.aarch64.sve.ext.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32>, i32) 1983declare <vscale x 2 x i64> @llvm.aarch64.sve.ext.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i64>, i32) 1984declare <vscale x 8 x bfloat> @llvm.aarch64.sve.ext.nxv8bf16(<vscale x 8 x bfloat>, <vscale x 8 x bfloat>, i32) 1985declare <vscale x 8 x half> @llvm.aarch64.sve.ext.nxv8f16(<vscale x 8 x half>, <vscale x 8 x half>, i32) 1986declare <vscale x 4 x float> @llvm.aarch64.sve.ext.nxv4f32(<vscale x 4 x float>, <vscale x 4 x float>, i32) 1987declare <vscale x 2 x double> @llvm.aarch64.sve.ext.nxv2f64(<vscale x 2 x double>, <vscale x 2 x double>, i32) 1988 1989declare i8 @llvm.aarch64.sve.lasta.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x i8>) 1990declare i16 @llvm.aarch64.sve.lasta.nxv8i16(<vscale x 8 x i1>, <vscale x 8 x i16>) 1991declare i32 @llvm.aarch64.sve.lasta.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>) 1992declare i64 @llvm.aarch64.sve.lasta.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>) 1993declare half @llvm.aarch64.sve.lasta.nxv8f16(<vscale x 8 x i1>, <vscale x 8 x half>) 1994declare bfloat @llvm.aarch64.sve.lasta.nxv8bf16(<vscale x 8 x i1>, <vscale x 8 x bfloat>) 1995declare float @llvm.aarch64.sve.lasta.nxv2f32(<vscale x 2 x i1>, <vscale x 2 x float>) 1996declare float @llvm.aarch64.sve.lasta.nxv4f32(<vscale x 4 x i1>, <vscale x 4 x float>) 1997declare double @llvm.aarch64.sve.lasta.nxv2f64(<vscale x 2 x i1>, <vscale x 2 x double>) 1998 1999declare i8 @llvm.aarch64.sve.lastb.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x i8>) 2000declare i16 @llvm.aarch64.sve.lastb.nxv8i16(<vscale x 8 x i1>, <vscale x 8 x i16>) 2001declare i32 @llvm.aarch64.sve.lastb.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>) 2002declare i64 @llvm.aarch64.sve.lastb.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>) 2003declare half @llvm.aarch64.sve.lastb.nxv8f16(<vscale x 8 x i1>, <vscale x 8 x half>) 2004declare bfloat @llvm.aarch64.sve.lastb.nxv8bf16(<vscale x 8 x i1>, <vscale x 8 x bfloat>) 2005declare float @llvm.aarch64.sve.lastb.nxv2f32(<vscale x 2 x i1>, <vscale x 2 x float>) 2006declare float @llvm.aarch64.sve.lastb.nxv4f32(<vscale x 4 x i1>, <vscale x 4 x float>) 2007declare double @llvm.aarch64.sve.lastb.nxv2f64(<vscale x 2 x i1>, <vscale x 2 x double>) 2008 2009declare <vscale x 16 x i1> @llvm.aarch64.sve.rev.nxv16i1(<vscale x 16 x i1>) 2010declare <vscale x 8 x i1> @llvm.aarch64.sve.rev.nxv8i1(<vscale x 8 x i1>) 2011declare <vscale x 4 x i1> @llvm.aarch64.sve.rev.nxv4i1(<vscale x 4 x i1>) 2012declare <vscale x 2 x i1> @llvm.aarch64.sve.rev.nxv2i1(<vscale x 2 x i1>) 2013declare <vscale x 16 x i8> @llvm.aarch64.sve.rev.nxv16i8(<vscale x 16 x i8>) 2014declare <vscale x 8 x i16> @llvm.aarch64.sve.rev.nxv8i16(<vscale x 8 x i16>) 2015declare <vscale x 4 x i32> @llvm.aarch64.sve.rev.nxv4i32(<vscale x 4 x i32>) 2016declare <vscale x 2 x i64> @llvm.aarch64.sve.rev.nxv2i64(<vscale x 2 x i64>) 2017declare <vscale x 8 x bfloat> @llvm.aarch64.sve.rev.nxv8bf16(<vscale x 8 x bfloat>) 2018declare <vscale x 8 x half> @llvm.aarch64.sve.rev.nxv8f16(<vscale x 8 x half>) 2019declare <vscale x 4 x float> @llvm.aarch64.sve.rev.nxv4f32(<vscale x 4 x float>) 2020declare <vscale x 2 x double> @llvm.aarch64.sve.rev.nxv2f64(<vscale x 2 x double>) 2021 2022declare <vscale x 16 x i8> @llvm.aarch64.sve.splice.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x i8>, <vscale x 16 x i8>) 2023declare <vscale x 8 x i16> @llvm.aarch64.sve.splice.nxv8i16(<vscale x 8 x i1>, <vscale x 8 x i16>, <vscale x 8 x i16>) 2024declare <vscale x 4 x i32> @llvm.aarch64.sve.splice.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>) 2025declare <vscale x 2 x i64> @llvm.aarch64.sve.splice.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>, <vscale x 2 x i64>) 2026declare <vscale x 8 x bfloat> @llvm.aarch64.sve.splice.nxv8bf16(<vscale x 8 x i1>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat>) 2027declare <vscale x 8 x half> @llvm.aarch64.sve.splice.nxv8f16(<vscale x 8 x i1>, <vscale x 8 x half>, <vscale x 8 x half>) 2028declare <vscale x 4 x float> @llvm.aarch64.sve.splice.nxv4f32(<vscale x 4 x i1>, <vscale x 4 x float>, <vscale x 4 x float>) 2029declare <vscale x 2 x double> @llvm.aarch64.sve.splice.nxv2f64(<vscale x 2 x i1>, <vscale x 2 x double>, <vscale x 2 x double>) 2030 2031declare <vscale x 8 x i16> @llvm.aarch64.sve.sunpkhi.nxv8i16(<vscale x 16 x i8>) 2032declare <vscale x 4 x i32> @llvm.aarch64.sve.sunpkhi.nxv4i32(<vscale x 8 x i16>) 2033declare <vscale x 2 x i64> @llvm.aarch64.sve.sunpkhi.nxv2i64(<vscale x 4 x i32>) 2034 2035declare <vscale x 8 x i16> @llvm.aarch64.sve.sunpklo.nxv8i16(<vscale x 16 x i8>) 2036declare <vscale x 4 x i32> @llvm.aarch64.sve.sunpklo.nxv4i32(<vscale x 8 x i16>) 2037declare <vscale x 2 x i64> @llvm.aarch64.sve.sunpklo.nxv2i64(<vscale x 4 x i32>) 2038 2039declare <vscale x 16 x i8> @llvm.aarch64.sve.tbl.nxv16i8(<vscale x 16 x i8>, <vscale x 16 x i8>) 2040declare <vscale x 8 x i16> @llvm.aarch64.sve.tbl.nxv8i16(<vscale x 8 x i16>, <vscale x 8 x i16>) 2041declare <vscale x 4 x i32> @llvm.aarch64.sve.tbl.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32>) 2042declare <vscale x 2 x i64> @llvm.aarch64.sve.tbl.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i64>) 2043declare <vscale x 8 x half> @llvm.aarch64.sve.tbl.nxv8f16(<vscale x 8 x half>, <vscale x 8 x i16>) 2044declare <vscale x 8 x bfloat> @llvm.aarch64.sve.tbl.nxv8bf16(<vscale x 8 x bfloat>, <vscale x 8 x i16>) 2045declare <vscale x 4 x float> @llvm.aarch64.sve.tbl.nxv4f32(<vscale x 4 x float>, <vscale x 4 x i32>) 2046declare <vscale x 2 x double> @llvm.aarch64.sve.tbl.nxv2f64(<vscale x 2 x double>, <vscale x 2 x i64>) 2047 2048declare <vscale x 8 x i16> @llvm.aarch64.sve.uunpkhi.nxv8i16(<vscale x 16 x i8>) 2049declare <vscale x 4 x i32> @llvm.aarch64.sve.uunpkhi.nxv4i32(<vscale x 8 x i16>) 2050declare <vscale x 2 x i64> @llvm.aarch64.sve.uunpkhi.nxv2i64(<vscale x 4 x i32>) 2051 2052declare <vscale x 8 x i16> @llvm.aarch64.sve.uunpklo.nxv8i16(<vscale x 16 x i8>) 2053declare <vscale x 4 x i32> @llvm.aarch64.sve.uunpklo.nxv4i32(<vscale x 8 x i16>) 2054declare <vscale x 2 x i64> @llvm.aarch64.sve.uunpklo.nxv2i64(<vscale x 4 x i32>) 2055 2056declare <vscale x 16 x i1> @llvm.aarch64.sve.trn1.nxv16i1(<vscale x 16 x i1>, <vscale x 16 x i1>) 2057declare <vscale x 8 x i1> @llvm.aarch64.sve.trn1.nxv8i1(<vscale x 8 x i1>, <vscale x 8 x i1>) 2058declare <vscale x 4 x i1> @llvm.aarch64.sve.trn1.nxv4i1(<vscale x 4 x i1>, <vscale x 4 x i1>) 2059declare <vscale x 2 x i1> @llvm.aarch64.sve.trn1.nxv2i1(<vscale x 2 x i1>, <vscale x 2 x i1>) 2060declare <vscale x 16 x i8> @llvm.aarch64.sve.trn1.nxv16i8(<vscale x 16 x i8>, <vscale x 16 x i8>) 2061declare <vscale x 8 x i16> @llvm.aarch64.sve.trn1.nxv8i16(<vscale x 8 x i16>, <vscale x 8 x i16>) 2062declare <vscale x 4 x i32> @llvm.aarch64.sve.trn1.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32>) 2063declare <vscale x 2 x i64> @llvm.aarch64.sve.trn1.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i64>) 2064declare <vscale x 4 x half> @llvm.aarch64.sve.trn1.nxv4f16(<vscale x 4 x half>, <vscale x 4 x half>) 2065declare <vscale x 8 x bfloat> @llvm.aarch64.sve.trn1.nxv8bf16(<vscale x 8 x bfloat>, <vscale x 8 x bfloat>) 2066declare <vscale x 8 x half> @llvm.aarch64.sve.trn1.nxv8f16(<vscale x 8 x half>, <vscale x 8 x half>) 2067declare <vscale x 4 x float> @llvm.aarch64.sve.trn1.nxv4f32(<vscale x 4 x float>, <vscale x 4 x float>) 2068declare <vscale x 2 x double> @llvm.aarch64.sve.trn1.nxv2f64(<vscale x 2 x double>, <vscale x 2 x double>) 2069 2070declare <vscale x 16 x i1> @llvm.aarch64.sve.trn2.nxv16i1(<vscale x 16 x i1>, <vscale x 16 x i1>) 2071declare <vscale x 8 x i1> @llvm.aarch64.sve.trn2.nxv8i1(<vscale x 8 x i1>, <vscale x 8 x i1>) 2072declare <vscale x 4 x i1> @llvm.aarch64.sve.trn2.nxv4i1(<vscale x 4 x i1>, <vscale x 4 x i1>) 2073declare <vscale x 2 x i1> @llvm.aarch64.sve.trn2.nxv2i1(<vscale x 2 x i1>, <vscale x 2 x i1>) 2074declare <vscale x 16 x i8> @llvm.aarch64.sve.trn2.nxv16i8(<vscale x 16 x i8>, <vscale x 16 x i8>) 2075declare <vscale x 8 x i16> @llvm.aarch64.sve.trn2.nxv8i16(<vscale x 8 x i16>, <vscale x 8 x i16>) 2076declare <vscale x 4 x i32> @llvm.aarch64.sve.trn2.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32>) 2077declare <vscale x 2 x i64> @llvm.aarch64.sve.trn2.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i64>) 2078declare <vscale x 4 x half> @llvm.aarch64.sve.trn2.nxv4f16(<vscale x 4 x half>, <vscale x 4 x half>) 2079declare <vscale x 8 x bfloat> @llvm.aarch64.sve.trn2.nxv8bf16(<vscale x 8 x bfloat>, <vscale x 8 x bfloat>) 2080declare <vscale x 8 x half> @llvm.aarch64.sve.trn2.nxv8f16(<vscale x 8 x half>, <vscale x 8 x half>) 2081declare <vscale x 4 x float> @llvm.aarch64.sve.trn2.nxv4f32(<vscale x 4 x float>, <vscale x 4 x float>) 2082declare <vscale x 2 x double> @llvm.aarch64.sve.trn2.nxv2f64(<vscale x 2 x double>, <vscale x 2 x double>) 2083 2084declare <vscale x 16 x i1> @llvm.aarch64.sve.uzp1.nxv16i1(<vscale x 16 x i1>, <vscale x 16 x i1>) 2085declare <vscale x 8 x i1> @llvm.aarch64.sve.uzp1.nxv8i1(<vscale x 8 x i1>, <vscale x 8 x i1>) 2086declare <vscale x 4 x i1> @llvm.aarch64.sve.uzp1.nxv4i1(<vscale x 4 x i1>, <vscale x 4 x i1>) 2087declare <vscale x 2 x i1> @llvm.aarch64.sve.uzp1.nxv2i1(<vscale x 2 x i1>, <vscale x 2 x i1>) 2088declare <vscale x 16 x i8> @llvm.aarch64.sve.uzp1.nxv16i8(<vscale x 16 x i8>, <vscale x 16 x i8>) 2089declare <vscale x 8 x i16> @llvm.aarch64.sve.uzp1.nxv8i16(<vscale x 8 x i16>, <vscale x 8 x i16>) 2090declare <vscale x 4 x i32> @llvm.aarch64.sve.uzp1.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32>) 2091declare <vscale x 2 x i64> @llvm.aarch64.sve.uzp1.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i64>) 2092declare <vscale x 4 x half> @llvm.aarch64.sve.uzp1.nxv4f16(<vscale x 4 x half>, <vscale x 4 x half>) 2093declare <vscale x 8 x bfloat> @llvm.aarch64.sve.uzp1.nxv8bf16(<vscale x 8 x bfloat>, <vscale x 8 x bfloat>) 2094declare <vscale x 8 x half> @llvm.aarch64.sve.uzp1.nxv8f16(<vscale x 8 x half>, <vscale x 8 x half>) 2095declare <vscale x 4 x float> @llvm.aarch64.sve.uzp1.nxv4f32(<vscale x 4 x float>, <vscale x 4 x float>) 2096declare <vscale x 2 x double> @llvm.aarch64.sve.uzp1.nxv2f64(<vscale x 2 x double>, <vscale x 2 x double>) 2097 2098declare <vscale x 16 x i1> @llvm.aarch64.sve.uzp2.nxv16i1(<vscale x 16 x i1>, <vscale x 16 x i1>) 2099declare <vscale x 8 x i1> @llvm.aarch64.sve.uzp2.nxv8i1(<vscale x 8 x i1>, <vscale x 8 x i1>) 2100declare <vscale x 4 x i1> @llvm.aarch64.sve.uzp2.nxv4i1(<vscale x 4 x i1>, <vscale x 4 x i1>) 2101declare <vscale x 2 x i1> @llvm.aarch64.sve.uzp2.nxv2i1(<vscale x 2 x i1>, <vscale x 2 x i1>) 2102declare <vscale x 16 x i8> @llvm.aarch64.sve.uzp2.nxv16i8(<vscale x 16 x i8>, <vscale x 16 x i8>) 2103declare <vscale x 8 x i16> @llvm.aarch64.sve.uzp2.nxv8i16(<vscale x 8 x i16>, <vscale x 8 x i16>) 2104declare <vscale x 4 x i32> @llvm.aarch64.sve.uzp2.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32>) 2105declare <vscale x 2 x i64> @llvm.aarch64.sve.uzp2.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i64>) 2106declare <vscale x 4 x half> @llvm.aarch64.sve.uzp2.nxv4f16(<vscale x 4 x half>, <vscale x 4 x half>) 2107declare <vscale x 8 x bfloat> @llvm.aarch64.sve.uzp2.nxv8bf16(<vscale x 8 x bfloat>, <vscale x 8 x bfloat>) 2108declare <vscale x 8 x half> @llvm.aarch64.sve.uzp2.nxv8f16(<vscale x 8 x half>, <vscale x 8 x half>) 2109declare <vscale x 4 x float> @llvm.aarch64.sve.uzp2.nxv4f32(<vscale x 4 x float>, <vscale x 4 x float>) 2110declare <vscale x 2 x double> @llvm.aarch64.sve.uzp2.nxv2f64(<vscale x 2 x double>, <vscale x 2 x double>) 2111 2112declare <vscale x 16 x i1> @llvm.aarch64.sve.zip1.nxv16i1(<vscale x 16 x i1>, <vscale x 16 x i1>) 2113declare <vscale x 8 x i1> @llvm.aarch64.sve.zip1.nxv8i1(<vscale x 8 x i1>, <vscale x 8 x i1>) 2114declare <vscale x 4 x i1> @llvm.aarch64.sve.zip1.nxv4i1(<vscale x 4 x i1>, <vscale x 4 x i1>) 2115declare <vscale x 2 x i1> @llvm.aarch64.sve.zip1.nxv2i1(<vscale x 2 x i1>, <vscale x 2 x i1>) 2116declare <vscale x 16 x i8> @llvm.aarch64.sve.zip1.nxv16i8(<vscale x 16 x i8>, <vscale x 16 x i8>) 2117declare <vscale x 8 x i16> @llvm.aarch64.sve.zip1.nxv8i16(<vscale x 8 x i16>, <vscale x 8 x i16>) 2118declare <vscale x 4 x i32> @llvm.aarch64.sve.zip1.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32>) 2119declare <vscale x 2 x i64> @llvm.aarch64.sve.zip1.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i64>) 2120declare <vscale x 4 x half> @llvm.aarch64.sve.zip1.nxv4f16(<vscale x 4 x half>, <vscale x 4 x half>) 2121declare <vscale x 8 x bfloat> @llvm.aarch64.sve.zip1.nxv8bf16(<vscale x 8 x bfloat>, <vscale x 8 x bfloat>) 2122declare <vscale x 8 x half> @llvm.aarch64.sve.zip1.nxv8f16(<vscale x 8 x half>, <vscale x 8 x half>) 2123declare <vscale x 4 x float> @llvm.aarch64.sve.zip1.nxv4f32(<vscale x 4 x float>, <vscale x 4 x float>) 2124declare <vscale x 2 x double> @llvm.aarch64.sve.zip1.nxv2f64(<vscale x 2 x double>, <vscale x 2 x double>) 2125 2126declare <vscale x 16 x i1> @llvm.aarch64.sve.zip2.nxv16i1(<vscale x 16 x i1>, <vscale x 16 x i1>) 2127declare <vscale x 8 x i1> @llvm.aarch64.sve.zip2.nxv8i1(<vscale x 8 x i1>, <vscale x 8 x i1>) 2128declare <vscale x 4 x i1> @llvm.aarch64.sve.zip2.nxv4i1(<vscale x 4 x i1>, <vscale x 4 x i1>) 2129declare <vscale x 2 x i1> @llvm.aarch64.sve.zip2.nxv2i1(<vscale x 2 x i1>, <vscale x 2 x i1>) 2130declare <vscale x 16 x i8> @llvm.aarch64.sve.zip2.nxv16i8(<vscale x 16 x i8>, <vscale x 16 x i8>) 2131declare <vscale x 8 x i16> @llvm.aarch64.sve.zip2.nxv8i16(<vscale x 8 x i16>, <vscale x 8 x i16>) 2132declare <vscale x 4 x i32> @llvm.aarch64.sve.zip2.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32>) 2133declare <vscale x 2 x i64> @llvm.aarch64.sve.zip2.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i64>) 2134declare <vscale x 4 x half> @llvm.aarch64.sve.zip2.nxv4f16(<vscale x 4 x half>, <vscale x 4 x half>) 2135declare <vscale x 8 x bfloat> @llvm.aarch64.sve.zip2.nxv8bf16(<vscale x 8 x bfloat>, <vscale x 8 x bfloat>) 2136declare <vscale x 8 x half> @llvm.aarch64.sve.zip2.nxv8f16(<vscale x 8 x half>, <vscale x 8 x half>) 2137declare <vscale x 4 x float> @llvm.aarch64.sve.zip2.nxv4f32(<vscale x 4 x float>, <vscale x 4 x float>) 2138declare <vscale x 2 x double> @llvm.aarch64.sve.zip2.nxv2f64(<vscale x 2 x double>, <vscale x 2 x double>) 2139 2140; +bf16 is required for the bfloat version. 2141attributes #0 = { "target-features"="+sve,+bf16" } 2142