; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mtriple=aarch64--linux-gnu -mattr=+sve -aarch64-enable-mgather-combine=0 < %s | FileCheck %s ; RUN: llc -mtriple=aarch64--linux-gnu -mattr=+sve -aarch64-enable-mgather-combine=1 < %s | FileCheck %s ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ; unscaled unpacked 32-bit offsets ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; define @masked_gather_nxv2i8(i8* %base, %offsets, %mask) { ; CHECK-LABEL: masked_gather_nxv2i8: ; CHECK: // %bb.0: ; CHECK-NEXT: ld1b { z0.d }, p0/z, [x0, z0.d, uxtw] ; CHECK-NEXT: ret %offsets.zext = zext %offsets to %ptrs = getelementptr i8, i8* %base, %offsets.zext %vals = call @llvm.masked.gather.nxv2i8( %ptrs, i32 1, %mask, undef) %vals.zext = zext %vals to ret %vals.zext } define @masked_gather_nxv2i16(i8* %base, %offsets, %mask) { ; CHECK-LABEL: masked_gather_nxv2i16: ; CHECK: // %bb.0: ; CHECK-NEXT: ld1h { z0.d }, p0/z, [x0, z0.d, uxtw] ; CHECK-NEXT: ret %offsets.zext = zext %offsets to %byte_ptrs = getelementptr i8, i8* %base, %offsets.zext %ptrs = bitcast %byte_ptrs to %vals = call @llvm.masked.gather.nxv2i16( %ptrs, i32 2, %mask, undef) %vals.zext = zext %vals to ret %vals.zext } define @masked_gather_nxv2i32(i8* %base, %offsets, %mask) { ; CHECK-LABEL: masked_gather_nxv2i32: ; CHECK: // %bb.0: ; CHECK-NEXT: ld1w { z0.d }, p0/z, [x0, z0.d, uxtw] ; CHECK-NEXT: ret %offsets.zext = zext %offsets to %byte_ptrs = getelementptr i8, i8* %base, %offsets.zext %ptrs = bitcast %byte_ptrs to %vals = call @llvm.masked.gather.nxv2i32( %ptrs, i32 4, %mask, undef) %vals.zext = zext %vals to ret %vals.zext } define @masked_gather_nxv2i64(i8* %base, %offsets, %mask) { ; CHECK-LABEL: masked_gather_nxv2i64: ; CHECK: // %bb.0: ; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0, z0.d, uxtw] ; CHECK-NEXT: ret %offsets.zext = zext %offsets to %byte_ptrs = getelementptr i8, i8* %base, %offsets.zext %ptrs = bitcast %byte_ptrs to %vals = call @llvm.masked.gather.nxv2i64( %ptrs, i32 8, %mask, undef) ret %vals } define @masked_gather_nxv2f16(i8* %base, %offsets, %mask) { ; CHECK-LABEL: masked_gather_nxv2f16: ; CHECK: // %bb.0: ; CHECK-NEXT: ld1h { z0.d }, p0/z, [x0, z0.d, uxtw] ; CHECK-NEXT: ret %offsets.zext = zext %offsets to %byte_ptrs = getelementptr i8, i8* %base, %offsets.zext %ptrs = bitcast %byte_ptrs to %vals = call @llvm.masked.gather.nxv2f16( %ptrs, i32 2, %mask, undef) ret %vals } define @masked_gather_nxv2f32(i8* %base, %offsets, %mask) { ; CHECK-LABEL: masked_gather_nxv2f32: ; CHECK: // %bb.0: ; CHECK-NEXT: ld1w { z0.d }, p0/z, [x0, z0.d, uxtw] ; CHECK-NEXT: ret %offsets.zext = zext %offsets to %byte_ptrs = getelementptr i8, i8* %base, %offsets.zext %ptrs = bitcast %byte_ptrs to %vals = call @llvm.masked.gather.nxv2f32( %ptrs, i32 4, %mask, undef) ret %vals } define @masked_gather_nxv2f64(i8* %base, %offsets, %mask) { ; CHECK-LABEL: masked_gather_nxv2f64: ; CHECK: // %bb.0: ; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0, z0.d, uxtw] ; CHECK-NEXT: ret %offsets.zext = zext %offsets to %byte_ptrs = getelementptr i8, i8* %base, %offsets.zext %ptrs = bitcast %byte_ptrs to %vals = call @llvm.masked.gather.nxv2f64( %ptrs, i32 8, %mask, undef) ret %vals } define @masked_sgather_nxv2i8(i8* %base, %offsets, %mask) { ; CHECK-LABEL: masked_sgather_nxv2i8: ; CHECK: // %bb.0: ; CHECK-NEXT: ld1sb { z0.d }, p0/z, [x0, z0.d, uxtw] ; CHECK-NEXT: ret %offsets.zext = zext %offsets to %ptrs = getelementptr i8, i8* %base, %offsets.zext %vals = call @llvm.masked.gather.nxv2i8( %ptrs, i32 1, %mask, undef) %vals.sext = sext %vals to ret %vals.sext } define @masked_sgather_nxv2i16(i8* %base, %offsets, %mask) { ; CHECK-LABEL: masked_sgather_nxv2i16: ; CHECK: // %bb.0: ; CHECK-NEXT: ld1sh { z0.d }, p0/z, [x0, z0.d, uxtw] ; CHECK-NEXT: ret %offsets.zext = zext %offsets to %byte_ptrs = getelementptr i8, i8* %base, %offsets.zext %ptrs = bitcast %byte_ptrs to %vals = call @llvm.masked.gather.nxv2i16( %ptrs, i32 2, %mask, undef) %vals.sext = sext %vals to ret %vals.sext } define @masked_sgather_nxv2i32(i8* %base, %offsets, %mask) { ; CHECK-LABEL: masked_sgather_nxv2i32: ; CHECK: // %bb.0: ; CHECK-NEXT: ld1sw { z0.d }, p0/z, [x0, z0.d, uxtw] ; CHECK-NEXT: ret %offsets.zext = zext %offsets to %byte_ptrs = getelementptr i8, i8* %base, %offsets.zext %ptrs = bitcast %byte_ptrs to %vals = call @llvm.masked.gather.nxv2i32( %ptrs, i32 4, %mask, undef) %vals.sext = sext %vals to ret %vals.sext } ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ; unscaled packed 32-bit offsets ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; define @masked_gather_nxv4i8(i8* %base, %offsets, %mask) { ; CHECK-LABEL: masked_gather_nxv4i8: ; CHECK: // %bb.0: ; CHECK-NEXT: ld1b { z0.s }, p0/z, [x0, z0.s, uxtw] ; CHECK-NEXT: ret %offsets.zext = zext %offsets to %ptrs = getelementptr i8, i8* %base, %offsets.zext %vals = call @llvm.masked.gather.nxv4i8( %ptrs, i32 1, %mask, undef) %vals.zext = zext %vals to ret %vals.zext } define @masked_gather_nxv4i16(i8* %base, %offsets, %mask) { ; CHECK-LABEL: masked_gather_nxv4i16: ; CHECK: // %bb.0: ; CHECK-NEXT: ld1h { z0.s }, p0/z, [x0, z0.s, uxtw] ; CHECK-NEXT: ret %offsets.zext = zext %offsets to %byte_ptrs = getelementptr i8, i8* %base, %offsets.zext %ptrs = bitcast %byte_ptrs to %vals = call @llvm.masked.gather.nxv4i16( %ptrs, i32 2, %mask, undef) %vals.zext = zext %vals to ret %vals.zext } define @masked_gather_nxv4i32(i8* %base, %offsets, %mask) { ; CHECK-LABEL: masked_gather_nxv4i32: ; CHECK: // %bb.0: ; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0, z0.s, uxtw] ; CHECK-NEXT: ret %offsets.zext = zext %offsets to %byte_ptrs = getelementptr i8, i8* %base, %offsets.zext %ptrs = bitcast %byte_ptrs to %vals = call @llvm.masked.gather.nxv4i32( %ptrs, i32 4, %mask, undef) ret %vals } define @masked_gather_nxv4f16(i8* %base, %offsets, %mask) { ; CHECK-LABEL: masked_gather_nxv4f16: ; CHECK: // %bb.0: ; CHECK-NEXT: ld1h { z0.s }, p0/z, [x0, z0.s, uxtw] ; CHECK-NEXT: ret %offsets.zext = zext %offsets to %byte_ptrs = getelementptr i8, i8* %base, %offsets.zext %ptrs = bitcast %byte_ptrs to %vals = call @llvm.masked.gather.nxv4f16( %ptrs, i32 2, %mask, undef) ret %vals } define @masked_gather_nxv4f32(i8* %base, %offsets, %mask) { ; CHECK-LABEL: masked_gather_nxv4f32: ; CHECK: // %bb.0: ; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0, z0.s, uxtw] ; CHECK-NEXT: ret %offsets.zext = zext %offsets to %byte_ptrs = getelementptr i8, i8* %base, %offsets.zext %ptrs = bitcast %byte_ptrs to %vals = call @llvm.masked.gather.nxv4f32( %ptrs, i32 4, %mask, undef) ret %vals } define @masked_sgather_nxv4i8(i8* %base, %offsets, %mask) { ; CHECK-LABEL: masked_sgather_nxv4i8: ; CHECK: // %bb.0: ; CHECK-NEXT: ld1sb { z0.s }, p0/z, [x0, z0.s, uxtw] ; CHECK-NEXT: ret %offsets.zext = zext %offsets to %ptrs = getelementptr i8, i8* %base, %offsets.zext %vals = call @llvm.masked.gather.nxv4i8( %ptrs, i32 1, %mask, undef) %vals.sext = sext %vals to ret %vals.sext } define @masked_sgather_nxv4i16(i8* %base, %offsets, %mask) { ; CHECK-LABEL: masked_sgather_nxv4i16: ; CHECK: // %bb.0: ; CHECK-NEXT: ld1sh { z0.s }, p0/z, [x0, z0.s, uxtw] ; CHECK-NEXT: ret %offsets.zext = zext %offsets to %byte_ptrs = getelementptr i8, i8* %base, %offsets.zext %ptrs = bitcast %byte_ptrs to %vals = call @llvm.masked.gather.nxv4i16( %ptrs, i32 2, %mask, undef) %vals.sext = sext %vals to ret %vals.sext } declare @llvm.masked.gather.nxv2i8(, i32, , ) declare @llvm.masked.gather.nxv2i16(, i32, , ) declare @llvm.masked.gather.nxv2i32(, i32, , ) declare @llvm.masked.gather.nxv2i64(, i32, , ) declare @llvm.masked.gather.nxv2f16(, i32, , ) declare @llvm.masked.gather.nxv2f32(, i32, , ) declare @llvm.masked.gather.nxv2f64(, i32, , ) declare @llvm.masked.gather.nxv4i8(, i32, , ) declare @llvm.masked.gather.nxv4i16(, i32, , ) declare @llvm.masked.gather.nxv4i32(, i32, , ) declare @llvm.masked.gather.nxv4f16(, i32, , ) declare @llvm.masked.gather.nxv4f32(, i32, , )