1; Test if several consecutive loads/stores can be clustered(fused) by scheduler. The 2; scheduler will print "Cluster ld/st SU(x) - SU(y)" if SU(x) and SU(y) are fused. 3 4; REQUIRES: asserts 5; RUN: llc < %s -mtriple=powerpc64le-unknown-linux-gnu -mcpu=pwr10 \ 6; RUN: -mattr=-paired-vector-memops,-pcrelative-memops -verify-misched \ 7; RUN: -debug-only=machine-scheduler 2>&1 | FileCheck %s 8 9define i64 @store_i64(i64* nocapture %P, i64 %v) { 10entry: 11; CHECK: ********** MI Scheduling ********** 12; CHECK-LABEL: store_i64:%bb.0 13; CHECK: Cluster ld/st SU([[SU3:[0-9]+]]) - SU([[SU4:[0-9]+]]) 14; CHECK: Cluster ld/st SU([[SU2:[0-9]+]]) - SU([[SU5:[0-9]+]]) 15; CHECK: SU([[SU2]]): STD %[[REG:[0-9]+]]:g8rc, 24 16; CHECK: SU([[SU3]]): STD %[[REG]]:g8rc, 16 17; CHECK: SU([[SU4]]): STD %[[REG]]:g8rc, 8 18; CHECK: SU([[SU5]]): STD %[[REG]]:g8rc, 32 19; CHECK: ********** MI Scheduling ********** 20; CHECK-LABEL: store_i64:%bb.0 21; CHECK: Cluster ld/st SU([[SU0:[0-9]+]]) - SU([[SU1:[0-9]+]]) 22; CHECK: Cluster ld/st SU([[SU2:[0-9]+]]) - SU([[SU3:[0-9]+]]) 23; CHECK: SU([[SU0]]): STD renamable $x[[REG:[0-9]+]], 16 24; CHECK: SU([[SU1]]): STD renamable $x[[REG]], 8 25; CHECK: SU([[SU2]]): STD renamable $x[[REG]], 24 26; CHECK: SU([[SU3]]): STD renamable $x[[REG]], 32 27 %arrayidx = getelementptr inbounds i64, i64* %P, i64 3 28 store i64 %v, i64* %arrayidx 29 %arrayidx1 = getelementptr inbounds i64, i64* %P, i64 2 30 store i64 %v, i64* %arrayidx1 31 %arrayidx2 = getelementptr inbounds i64, i64* %P, i64 1 32 store i64 %v, i64* %arrayidx2 33 %arrayidx3 = getelementptr inbounds i64, i64* %P, i64 4 34 store i64 %v, i64* %arrayidx3 35 ret i64 %v 36} 37 38define i32 @store_i32(i32* nocapture %P, i32 %v) { 39entry: 40; CHECK: ********** MI Scheduling ********** 41; CHECK-LABEL: store_i32:%bb.0 42; CHECK: Cluster ld/st SU([[SU3:[0-9]+]]) - SU([[SU4:[0-9]+]]) 43; CHECK: Cluster ld/st SU([[SU2:[0-9]+]]) - SU([[SU5:[0-9]+]]) 44; CHECK: SU([[SU2]]): STW %[[REG:[0-9]+]].sub_32:g8rc, 52 45; CHECK: SU([[SU3]]): STW %[[REG]].sub_32:g8rc, 48 46; CHECK: SU([[SU4]]): STW %[[REG]].sub_32:g8rc, 44 47; CHECK: SU([[SU5]]): STW %[[REG]].sub_32:g8rc, 56 48; CHECK: ********** MI Scheduling ********** 49; CHECK-LABEL: store_i32:%bb.0 50; CHECK: Cluster ld/st SU([[SU0:[0-9]+]]) - SU([[SU1:[0-9]+]]) 51; CHECK: Cluster ld/st SU([[SU2:[0-9]+]]) - SU([[SU3:[0-9]+]]) 52; CHECK: SU([[SU0]]): STW renamable $r[[REG:[0-9]+]], 48 53; CHECK: SU([[SU1]]): STW renamable $r[[REG]], 44 54; CHECK: SU([[SU2]]): STW renamable $r[[REG]], 52 55; CHECK: SU([[SU3]]): STW renamable $r[[REG]], 56 56 %arrayidx = getelementptr inbounds i32, i32* %P, i32 13 57 store i32 %v, i32* %arrayidx 58 %arrayidx1 = getelementptr inbounds i32, i32* %P, i32 12 59 store i32 %v, i32* %arrayidx1 60 %arrayidx2 = getelementptr inbounds i32, i32* %P, i32 11 61 store i32 %v, i32* %arrayidx2 62 %arrayidx3 = getelementptr inbounds i32, i32* %P, i32 14 63 store i32 %v, i32* %arrayidx3 64 ret i32 %v 65} 66 67define void @store_i64_neg(i64* nocapture %P, i64 %v) #0 { 68entry: 69; CHECK: ********** MI Scheduling ********** 70; CHECK-LABEL: store_i64_neg:%bb.0 71; CHECK: Cluster ld/st SU([[SU2:[0-9]+]]) - SU([[SU5:[0-9]+]]) 72; CHECK: Cluster ld/st SU([[SU3:[0-9]+]]) - SU([[SU4:[0-9]+]]) 73; CHECK: SU([[SU2]]): STD %[[REG:[0-9]+]]:g8rc, -24 74; CHECK: SU([[SU3]]): STD %[[REG]]:g8rc, -8 75; CHECK: SU([[SU4]]): STD %[[REG]]:g8rc, -16 76; CHECK: SU([[SU5]]): STD %[[REG]]:g8rc, -32 77; CHECK: ********** MI Scheduling ********** 78; CHECK-LABEL: store_i64_neg:%bb.0 79; CHECK: Cluster ld/st SU([[SU2:[0-9]+]]) - SU([[SU3:[0-9]+]]) 80; CHECK: Cluster ld/st SU([[SU0:[0-9]+]]) - SU([[SU1:[0-9]+]]) 81; CHECK: SU([[SU0]]): STD renamable $x[[REG:[0-9]+]], -8 82; CHECK: SU([[SU1]]): STD renamable $x[[REG]], -16 83; CHECK: SU([[SU2]]): STD renamable $x[[REG]], -24 84; CHECK: SU([[SU3]]): STD renamable $x[[REG]], -32 85 %arrayidx = getelementptr inbounds i64, i64* %P, i64 -3 86 store i64 %v, i64* %arrayidx 87 %arrayidx1 = getelementptr inbounds i64, i64* %P, i64 -1 88 store i64 %v, i64* %arrayidx1 89 %arrayidx2 = getelementptr inbounds i64, i64* %P, i64 -2 90 store i64 %v, i64* %arrayidx2 91 %arrayidx3 = getelementptr inbounds i64, i64* %P, i64 -4 92 store i64 %v, i64* %arrayidx3 93 ret void 94} 95 96define void @store_i32_neg(i32* nocapture %P, i32 %v) #0 { 97entry: 98; CHECK: ********** MI Scheduling ********** 99; CHECK-LABEL: store_i32_neg:%bb.0 100; CHECK: Cluster ld/st SU([[SU2:[0-9]+]]) - SU([[SU5:[0-9]+]]) 101; CHECK: Cluster ld/st SU([[SU3:[0-9]+]]) - SU([[SU4:[0-9]+]]) 102; CHECK: SU([[SU2]]): STW %[[REG:[0-9]+]].sub_32:g8rc, -12 103; CHECK: SU([[SU3]]): STW %[[REG]].sub_32:g8rc, -4 104; CHECK: SU([[SU4]]): STW %[[REG]].sub_32:g8rc, -8 105; CHECK: SU([[SU5]]): STW %[[REG]].sub_32:g8rc, -16 106; CHECK: ********** MI Scheduling ********** 107; CHECK-LABEL: store_i32_neg:%bb.0 108; CHECK: Cluster ld/st SU([[SU2:[0-9]+]]) - SU([[SU3:[0-9]+]]) 109; CHECK: Cluster ld/st SU([[SU0:[0-9]+]]) - SU([[SU1:[0-9]+]]) 110; CHECK:SU([[SU0]]): STW renamable $r[[REG:[0-9]+]], -4 111; CHECK:SU([[SU1]]): STW renamable $r[[REG]], -8 112; CHECK:SU([[SU2]]): STW renamable $r[[REG]], -12 113; CHECK:SU([[SU3]]): STW renamable $r[[REG]], -16 114 %arrayidx = getelementptr inbounds i32, i32* %P, i32 -3 115 store i32 %v, i32* %arrayidx 116 %arrayidx1 = getelementptr inbounds i32, i32* %P, i32 -1 117 store i32 %v, i32* %arrayidx1 118 %arrayidx2 = getelementptr inbounds i32, i32* %P, i32 -2 119 store i32 %v, i32* %arrayidx2 120 %arrayidx3 = getelementptr inbounds i32, i32* %P, i32 -4 121 store i32 %v, i32* %arrayidx3 122 ret void 123} 124 125define void @store_double(double* nocapture %P, double %v) { 126entry: 127; CHECK: ********** MI Scheduling ********** 128; CHECK-LABEL: store_double:%bb.0 129; CHECK: Cluster ld/st SU([[SU3:[0-9]+]]) - SU([[SU4:[0-9]+]]) 130; CHECK: Cluster ld/st SU([[SU2:[0-9]+]]) - SU([[SU5:[0-9]+]]) 131; CHECK: SU([[SU2]]): DFSTOREf64 %[[REG:[0-9]+]]:vsfrc, 24 132; CHECK: SU([[SU3]]): DFSTOREf64 %[[REG]]:vsfrc, 8 133; CHECK: SU([[SU4]]): DFSTOREf64 %[[REG]]:vsfrc, 16 134; CHECK: SU([[SU5]]): DFSTOREf64 %[[REG]]:vsfrc, 32 135; CHECK: ********** MI Scheduling ********** 136; CHECK-LABEL: store_double:%bb.0 137; CHECK: Cluster ld/st SU([[SU0:[0-9]+]]) - SU([[SU1:[0-9]+]]) 138; CHECK: Cluster ld/st SU([[SU2:[0-9]+]]) - SU([[SU3:[0-9]+]]) 139; CHECK: SU([[SU0]]): STFD renamable $f[[REG:[0-9]+]], 8 140; CHECK: SU([[SU1]]): STFD renamable $f[[REG]], 16 141; CHECK: SU([[SU2]]): STFD renamable $f[[REG]], 24 142; CHECK: SU([[SU3]]): STFD renamable $f[[REG]], 32 143 %arrayidx = getelementptr inbounds double, double* %P, i64 3 144 store double %v, double* %arrayidx 145 %arrayidx1 = getelementptr inbounds double, double* %P, i64 1 146 store double %v, double* %arrayidx1 147 %arrayidx2 = getelementptr inbounds double, double* %P, i64 2 148 store double %v, double* %arrayidx2 149 %arrayidx3 = getelementptr inbounds double, double* %P, i64 4 150 store double %v, double* %arrayidx3 151 ret void 152} 153 154define void @store_float(float* nocapture %P, float %v) { 155entry: 156; CHECK: ********** MI Scheduling ********** 157; CHECK-LABEL: store_float:%bb.0 158; CHECK-NOT: Cluster ld/st 159; CHECK-NOT: Cluster ld/st 160; CHECK: SU([[SU2]]): DFSTOREf32 %[[REG:[0-9]+]]:vssrc, 12 161; CHECK: SU([[SU3]]): DFSTOREf32 %[[REG]]:vssrc, 4 162; CHECK: SU([[SU4]]): DFSTOREf32 %[[REG]]:vssrc, 8 163; CHECK: SU([[SU5]]): DFSTOREf32 %[[REG]]:vssrc, 16 164; CHECK: ********** MI Scheduling ********** 165; CHECK-LABEL: store_float:%bb.0 166; CHECK-NOT: Cluster ld/st 167; CHECK-NOT: Cluster ld/st 168; CHECK: SU([[SU0]]): STFS renamable $f[[REG:[0-9]+]], 12 169; CHECK: SU([[SU1]]): STFS renamable $f[[REG]], 4 170; CHECK: SU([[SU2]]): STFS renamable $f[[REG]], 8 171; CHECK: SU([[SU3]]): STFS renamable $f[[REG]], 16 172 %arrayidx = getelementptr inbounds float, float* %P, i64 3 173 store float %v, float* %arrayidx 174 %arrayidx1 = getelementptr inbounds float, float* %P, i64 1 175 store float %v, float* %arrayidx1 176 %arrayidx2 = getelementptr inbounds float, float* %P, i64 2 177 store float %v, float* %arrayidx2 178 %arrayidx3 = getelementptr inbounds float, float* %P, i64 4 179 store float %v, float* %arrayidx3 180 ret void 181} 182 183; Cannot fuse the store/load if there is volatile in between 184define i64 @store_volatile(i64* nocapture %P, i64 %v) { 185entry: 186; CHECK: ********** MI Scheduling ********** 187; CHECK-LABEL: store_volatile:%bb.0 188; CHECK-NOT: Cluster ld/st 189; CHECK: SU([[SU2]]): STD %[[REG:[0-9]+]]:g8rc, 24 190; CHECK: SU([[SU3]]): STD %[[REG]]:g8rc, 16 191; CHECK: SU([[SU4]]): STD %[[REG]]:g8rc, 8 192; CHECK: SU([[SU5]]): STD %[[REG]]:g8rc, 32 193; CHECK: ********** MI Scheduling ********** 194; CHECK-LABEL: store_volatile:%bb.0 195; CHECK-NOT: Cluster ld/st 196; CHECK: SU([[SU0]]): STD renamable $x[[REG:[0-9]+]], 24 197; CHECK: SU([[SU1]]): STD renamable $x[[REG]], 16 198; CHECK: SU([[SU2]]): STD renamable $x[[REG]], 8 199; CHECK: SU([[SU3]]): STD renamable $x[[REG]], 32 200 %arrayidx = getelementptr inbounds i64, i64* %P, i64 3 201 store volatile i64 %v, i64* %arrayidx 202 %arrayidx1 = getelementptr inbounds i64, i64* %P, i64 2 203 store volatile i64 %v, i64* %arrayidx1 204 %arrayidx2 = getelementptr inbounds i64, i64* %P, i64 1 205 store volatile i64 %v, i64* %arrayidx2 206 %arrayidx3 = getelementptr inbounds i64, i64* %P, i64 4 207 store volatile i64 %v, i64* %arrayidx3 208 ret i64 %v 209} 210 211@p = common local_unnamed_addr global [100 x i32] zeroinitializer, align 4 212 213define void @store_i32_stw_stw8(i32 signext %m, i32 signext %n) { 214entry: 215; CHECK: ********** MI Scheduling ********** 216; CHECK-LABEL: store_i32_stw_stw8:%bb.0 217; CHECK: Cluster ld/st SU([[SU5:[0-9]+]]) - SU([[SU8:[0-9]+]]) 218; CHECK: SU([[SU5]]): STW8 %{{[0-9]+}}:g8rc, 24 219; CHECK: SU([[SU8]]): STW %{{[0-9]+}}:gprc, 20 220; CHECK: ********** MI Scheduling ********** 221; CHECK-LABEL: store_i32_stw_stw8:%bb.0 222; CHECK: Cluster ld/st SU([[SU5:[0-9]+]]) - SU([[SU6:[0-9]+]]) 223; CHECK: SU([[SU5]]): STW8 renamable $x{{[0-9]+}}, 24 224; CHECK: SU([[SU6]]): STW renamable $r{{[0-9]+}}, 20 225 store i32 9, i32* getelementptr inbounds ([100 x i32], [100 x i32]* @p, i64 0, i64 6), align 4 226 store i32 %n, i32* getelementptr inbounds ([100 x i32], [100 x i32]* @p, i64 0, i64 7), align 4 227 %add = add nsw i32 %n, %m 228 store i32 %add, i32* getelementptr inbounds ([100 x i32], [100 x i32]* @p, i64 0, i64 5), align 4 229 ret void 230} 231 232define void @store_i32_stw8(i32 signext %m, i32 signext %n) { 233entry: 234; CHECK: ********** MI Scheduling ********** 235; CHECK-LABEL: store_i32_stw8:%bb.0 236; CHECK: Cluster ld/st SU([[SU4:[0-9]+]]) - SU([[SU5:[0-9]+]]) 237; CHECK: SU([[SU4]]): STW8 %{{[0-9]+}}:g8rc, 24 238; CHECK: SU([[SU5]]): STW8 %{{[0-9]+}}:g8rc, 28 239; CHECK: ********** MI Scheduling ********** 240; CHECK-LABEL: store_i32_stw8:%bb.0 241; CHECK: Cluster ld/st SU([[SU3:[0-9]+]]) - SU([[SU4:[0-9]+]]) 242; CHECK: SU([[SU3]]): STW8 renamable $x{{[0-9]+}}, 24 243; CHECK: SU([[SU4]]): STW8 renamable $x{{[0-9]+}}, 28 244 store i32 9, i32* getelementptr inbounds ([100 x i32], [100 x i32]* @p, i64 0, i64 6), align 4 245 store i32 %n, i32* getelementptr inbounds ([100 x i32], [100 x i32]* @p, i64 0, i64 7), align 4 246 ret void 247} 248 249declare void @bar(i64*) 250 251define void @store_frame_index(i32 %a, i32 %b) { 252entry: 253; CHECK: ********** MI Scheduling ********** 254; CHECK-LABEL: store_frame_index:%bb.0 255; CHECK: Cluster ld/st SU([[SU2:[0-9]+]]) - SU([[SU3:[0-9]+]]) 256; CHECK: SU([[SU2]]): STD %{{[0-9]+}}:g8rc, 0, %stack.0.buf 257; CHECK: SU([[SU3]]): STD %{{[0-9]+}}:g8rc, 8, %stack.0.buf 258 %buf = alloca [8 x i64], align 8 259 %0 = bitcast [8 x i64]* %buf to i8* 260 %conv = zext i32 %a to i64 261 %arrayidx = getelementptr inbounds [8 x i64], [8 x i64]* %buf, i64 0, i64 0 262 store i64 %conv, i64* %arrayidx, align 8 263 %conv1 = zext i32 %b to i64 264 %arrayidx2 = getelementptr inbounds [8 x i64], [8 x i64]* %buf, i64 0, i64 1 265 store i64 %conv1, i64* %arrayidx2, align 8 266 call void @bar(i64* nonnull %arrayidx) 267 ret void 268} 269