1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py 2; RUN: opt -S -codegenprepare < %s | FileCheck %s 3 4target datalayout = 5"e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128" 6target triple = "x86_64-unknown-linux-gnu" 7 8%struct.a = type { i32, i32 } 9@c = external dso_local global %struct.a, align 4 10@glob_array = internal unnamed_addr constant [16 x i32] [i32 1, i32 1, i32 2, i32 3, i32 5, i32 8, i32 13, i32 21, i32 34, i32 55, i32 89, i32 144, i32 233, i32 377, i32 610, i32 987], align 16 11 12define <4 x i32> @splat_base(i32* %base, <4 x i64> %index) { 13; CHECK-LABEL: @splat_base( 14; CHECK-NEXT: [[TMP1:%.*]] = getelementptr i32, i32* [[BASE:%.*]], <4 x i64> [[INDEX:%.*]] 15; CHECK-NEXT: [[RES:%.*]] = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*> [[TMP1]], i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i32> undef) 16; CHECK-NEXT: ret <4 x i32> [[RES]] 17; 18 %broadcast.splatinsert = insertelement <4 x i32*> undef, i32* %base, i32 0 19 %broadcast.splat = shufflevector <4 x i32*> %broadcast.splatinsert, <4 x i32*> undef, <4 x i32> zeroinitializer 20 %gep = getelementptr i32, <4 x i32*> %broadcast.splat, <4 x i64> %index 21 %res = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*> %gep, i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i32> undef) 22 ret <4 x i32> %res 23} 24 25define <4 x i32> @splat_struct(%struct.a* %base) { 26; CHECK-LABEL: @splat_struct( 27; CHECK-NEXT: [[TMP1:%.*]] = getelementptr [[STRUCT_A:%.*]], %struct.a* [[BASE:%.*]], i64 0, i32 1 28; CHECK-NEXT: [[TMP2:%.*]] = getelementptr i32, i32* [[TMP1]], <4 x i64> zeroinitializer 29; CHECK-NEXT: [[RES:%.*]] = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*> [[TMP2]], i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i32> undef) 30; CHECK-NEXT: ret <4 x i32> [[RES]] 31; 32 %gep = getelementptr %struct.a, %struct.a* %base, <4 x i64> zeroinitializer, i32 1 33 %res = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*> %gep, i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i32> undef) 34 ret <4 x i32> %res 35} 36 37define <4 x i32> @scalar_index(i32* %base, i64 %index) { 38; CHECK-LABEL: @scalar_index( 39; CHECK-NEXT: [[TMP1:%.*]] = getelementptr i32, i32* [[BASE:%.*]], i64 [[INDEX:%.*]] 40; CHECK-NEXT: [[TMP2:%.*]] = getelementptr i32, i32* [[TMP1]], <4 x i64> zeroinitializer 41; CHECK-NEXT: [[RES:%.*]] = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*> [[TMP2]], i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i32> undef) 42; CHECK-NEXT: ret <4 x i32> [[RES]] 43; 44 %broadcast.splatinsert = insertelement <4 x i32*> undef, i32* %base, i32 0 45 %broadcast.splat = shufflevector <4 x i32*> %broadcast.splatinsert, <4 x i32*> undef, <4 x i32> zeroinitializer 46 %gep = getelementptr i32, <4 x i32*> %broadcast.splat, i64 %index 47 %res = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*> %gep, i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i32> undef) 48 ret <4 x i32> %res 49} 50 51define <4 x i32> @splat_index(i32* %base, i64 %index) { 52; CHECK-LABEL: @splat_index( 53; CHECK-NEXT: [[TMP1:%.*]] = getelementptr i32, i32* [[BASE:%.*]], i64 [[INDEX:%.*]] 54; CHECK-NEXT: [[TMP2:%.*]] = getelementptr i32, i32* [[TMP1]], <4 x i64> zeroinitializer 55; CHECK-NEXT: [[RES:%.*]] = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*> [[TMP2]], i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i32> undef) 56; CHECK-NEXT: ret <4 x i32> [[RES]] 57; 58 %broadcast.splatinsert = insertelement <4 x i64> undef, i64 %index, i32 0 59 %broadcast.splat = shufflevector <4 x i64> %broadcast.splatinsert, <4 x i64> undef, <4 x i32> zeroinitializer 60 %gep = getelementptr i32, i32* %base, <4 x i64> %broadcast.splat 61 %res = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*> %gep, i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i32> undef) 62 ret <4 x i32> %res 63} 64 65define <4 x i32> @test_global_array(<4 x i64> %indxs) { 66; CHECK-LABEL: @test_global_array( 67; CHECK-NEXT: [[TMP1:%.*]] = getelementptr i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @glob_array, i64 0, i64 0), <4 x i64> [[INDXS:%.*]] 68; CHECK-NEXT: [[G:%.*]] = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*> [[TMP1]], i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i32> undef) 69; CHECK-NEXT: ret <4 x i32> [[G]] 70; 71 %p = getelementptr inbounds [16 x i32], [16 x i32]* @glob_array, i64 0, <4 x i64> %indxs 72 %g = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*> %p, i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i32> undef) 73 ret <4 x i32> %g 74} 75 76define <4 x i32> @global_struct_splat() { 77; CHECK-LABEL: @global_struct_splat( 78; CHECK-NEXT: [[TMP1:%.*]] = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*> <i32* getelementptr inbounds (%struct.a, %struct.a* @c, i64 0, i32 1), i32* getelementptr inbounds (%struct.a, %struct.a* @c, i64 0, i32 1), i32* getelementptr inbounds (%struct.a, %struct.a* @c, i64 0, i32 1), i32* getelementptr inbounds (%struct.a, %struct.a* @c, i64 0, i32 1)>, i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i32> undef) 79; CHECK-NEXT: ret <4 x i32> [[TMP1]] 80; 81 %1 = insertelement <4 x %struct.a*> undef, %struct.a* @c, i32 0 82 %2 = shufflevector <4 x %struct.a*> %1, <4 x %struct.a*> undef, <4 x i32> zeroinitializer 83 %3 = getelementptr %struct.a, <4 x %struct.a*> %2, <4 x i64> zeroinitializer, i32 1 84 %4 = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*> %3, i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i32> undef) 85 ret <4 x i32> %4 86} 87 88define <4 x i32> @splat_ptr_gather(i32* %ptr, <4 x i1> %mask, <4 x i32> %passthru) { 89; CHECK-LABEL: @splat_ptr_gather( 90; CHECK-NEXT: [[TMP1:%.*]] = getelementptr i32, i32* [[PTR:%.*]], <4 x i64> zeroinitializer 91; CHECK-NEXT: [[TMP2:%.*]] = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*> [[TMP1]], i32 4, <4 x i1> [[MASK:%.*]], <4 x i32> [[PASSTHRU:%.*]]) 92; CHECK-NEXT: ret <4 x i32> [[TMP2]] 93; 94 %1 = insertelement <4 x i32*> undef, i32* %ptr, i32 0 95 %2 = shufflevector <4 x i32*> %1, <4 x i32*> undef, <4 x i32> zeroinitializer 96 %3 = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*> %2, i32 4, <4 x i1> %mask, <4 x i32> %passthru) 97 ret <4 x i32> %3 98} 99 100define void @splat_ptr_scatter(i32* %ptr, <4 x i1> %mask, <4 x i32> %val) { 101; CHECK-LABEL: @splat_ptr_scatter( 102; CHECK-NEXT: [[TMP1:%.*]] = getelementptr i32, i32* [[PTR:%.*]], <4 x i64> zeroinitializer 103; CHECK-NEXT: call void @llvm.masked.scatter.v4i32.v4p0i32(<4 x i32> [[VAL:%.*]], <4 x i32*> [[TMP1]], i32 4, <4 x i1> [[MASK:%.*]]) 104; CHECK-NEXT: ret void 105; 106 %1 = insertelement <4 x i32*> undef, i32* %ptr, i32 0 107 %2 = shufflevector <4 x i32*> %1, <4 x i32*> undef, <4 x i32> zeroinitializer 108 call void @llvm.masked.scatter.v4i32.v4p0i32(<4 x i32> %val, <4 x i32*> %2, i32 4, <4 x i1> %mask) 109 ret void 110} 111 112declare <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*>, i32, <4 x i1>, <4 x i32>) 113declare void @llvm.masked.scatter.v4i32.v4p0i32(<4 x i32>, <4 x i32*>, i32, <4 x i1>) 114