1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py 2; RUN: opt < %s -instcombine -S | FileCheck %s 3 4; If we have some pattern that leaves only some low bits set, and then performs 5; left-shift of those bits, we can combine those two shifts into a shift+mask. 6 7; There are many variants to this pattern: 8; d) (trunc ((x & ((-1 << maskNbits) >> maskNbits)))) << shiftNbits 9; simplify to: 10; ((trunc(x)) << shiftNbits) & (-1 >> ((-(maskNbits+shiftNbits))+32)) 11 12; Simple tests. 13 14declare void @use32(i32) 15declare void @use64(i64) 16 17define i32 @t0_basic(i64 %x, i32 %nbits) { 18; CHECK-LABEL: @t0_basic( 19; CHECK-NEXT: [[T0:%.*]] = zext i32 [[NBITS:%.*]] to i64 20; CHECK-NEXT: [[T1:%.*]] = shl i64 -1, [[T0]] 21; CHECK-NEXT: [[T2:%.*]] = lshr i64 [[T1]], [[T0]] 22; CHECK-NEXT: [[T3:%.*]] = add i32 [[NBITS]], -33 23; CHECK-NEXT: call void @use64(i64 [[T0]]) 24; CHECK-NEXT: call void @use64(i64 [[T1]]) 25; CHECK-NEXT: call void @use64(i64 [[T2]]) 26; CHECK-NEXT: call void @use32(i32 [[T3]]) 27; CHECK-NEXT: [[TMP1:%.*]] = trunc i64 [[X:%.*]] to i32 28; CHECK-NEXT: [[TMP2:%.*]] = shl i32 [[TMP1]], [[T3]] 29; CHECK-NEXT: [[T6:%.*]] = and i32 [[TMP2]], 2147483647 30; CHECK-NEXT: ret i32 [[T6]] 31; 32 %t0 = zext i32 %nbits to i64 33 %t1 = shl i64 -1, %t0 34 %t2 = lshr i64 %t1, %t0 35 %t3 = add i32 %nbits, -33 36 37 call void @use64(i64 %t0) 38 call void @use64(i64 %t1) 39 call void @use64(i64 %t2) 40 call void @use32(i32 %t3) 41 42 %t4 = and i64 %t2, %x 43 %t5 = trunc i64 %t4 to i32 44 %t6 = shl i32 %t5, %t3 ; shift is smaller than mask 45 ret i32 %t6 46} 47 48; Vectors 49 50declare void @use8xi32(<8 x i32>) 51declare void @use8xi64(<8 x i64>) 52 53define <8 x i32> @t1_vec_splat(<8 x i64> %x, <8 x i32> %nbits) { 54; CHECK-LABEL: @t1_vec_splat( 55; CHECK-NEXT: [[T0:%.*]] = zext <8 x i32> [[NBITS:%.*]] to <8 x i64> 56; CHECK-NEXT: [[T1:%.*]] = shl <8 x i64> <i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1>, [[T0]] 57; CHECK-NEXT: [[T2:%.*]] = lshr <8 x i64> [[T1]], [[T0]] 58; CHECK-NEXT: [[T3:%.*]] = add <8 x i32> [[NBITS]], <i32 -33, i32 -33, i32 -33, i32 -33, i32 -33, i32 -33, i32 -33, i32 -33> 59; CHECK-NEXT: call void @use8xi64(<8 x i64> [[T0]]) 60; CHECK-NEXT: call void @use8xi64(<8 x i64> [[T1]]) 61; CHECK-NEXT: call void @use8xi64(<8 x i64> [[T2]]) 62; CHECK-NEXT: call void @use8xi32(<8 x i32> [[T3]]) 63; CHECK-NEXT: [[TMP1:%.*]] = trunc <8 x i64> [[X:%.*]] to <8 x i32> 64; CHECK-NEXT: [[TMP2:%.*]] = shl <8 x i32> [[TMP1]], [[T3]] 65; CHECK-NEXT: [[T6:%.*]] = and <8 x i32> [[TMP2]], <i32 2147483647, i32 2147483647, i32 2147483647, i32 2147483647, i32 2147483647, i32 2147483647, i32 2147483647, i32 2147483647> 66; CHECK-NEXT: ret <8 x i32> [[T6]] 67; 68 %t0 = zext <8 x i32> %nbits to <8 x i64> 69 %t1 = shl <8 x i64> <i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1>, %t0 70 %t2 = lshr <8 x i64> %t1, %t0 71 %t3 = add <8 x i32> %nbits, <i32 -33, i32 -33, i32 -33, i32 -33, i32 -33, i32 -33, i32 -33, i32 -33> 72 73 call void @use8xi64(<8 x i64> %t0) 74 call void @use8xi64(<8 x i64> %t1) 75 call void @use8xi64(<8 x i64> %t2) 76 call void @use8xi32(<8 x i32> %t3) 77 78 %t4 = and <8 x i64> %t2, %x 79 %t5 = trunc <8 x i64> %t4 to <8 x i32> 80 %t6 = shl <8 x i32> %t5, %t3 ; shift is smaller than mask 81 ret <8 x i32> %t6 82} 83 84define <8 x i32> @t2_vec_splat_undef(<8 x i64> %x, <8 x i32> %nbits) { 85; CHECK-LABEL: @t2_vec_splat_undef( 86; CHECK-NEXT: [[T0:%.*]] = zext <8 x i32> [[NBITS:%.*]] to <8 x i64> 87; CHECK-NEXT: [[T1:%.*]] = shl <8 x i64> <i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 undef, i64 -1>, [[T0]] 88; CHECK-NEXT: [[T2:%.*]] = lshr <8 x i64> [[T1]], [[T0]] 89; CHECK-NEXT: [[T3:%.*]] = add <8 x i32> [[NBITS]], <i32 -33, i32 -33, i32 -33, i32 -33, i32 -33, i32 -33, i32 undef, i32 -33> 90; CHECK-NEXT: call void @use8xi64(<8 x i64> [[T0]]) 91; CHECK-NEXT: call void @use8xi64(<8 x i64> [[T1]]) 92; CHECK-NEXT: call void @use8xi64(<8 x i64> [[T2]]) 93; CHECK-NEXT: call void @use8xi32(<8 x i32> [[T3]]) 94; CHECK-NEXT: [[TMP1:%.*]] = trunc <8 x i64> [[X:%.*]] to <8 x i32> 95; CHECK-NEXT: [[TMP2:%.*]] = shl <8 x i32> [[TMP1]], [[T3]] 96; CHECK-NEXT: [[T6:%.*]] = and <8 x i32> [[TMP2]], <i32 2147483647, i32 2147483647, i32 2147483647, i32 2147483647, i32 2147483647, i32 2147483647, i32 poison, i32 2147483647> 97; CHECK-NEXT: ret <8 x i32> [[T6]] 98; 99 %t0 = zext <8 x i32> %nbits to <8 x i64> 100 %t1 = shl <8 x i64> <i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 undef, i64 -1>, %t0 101 %t2 = lshr <8 x i64> %t1, %t0 102 %t3 = add <8 x i32> %nbits, <i32 -33, i32 -33, i32 -33, i32 -33, i32 -33, i32 -33, i32 undef, i32 -33> 103 104 call void @use8xi64(<8 x i64> %t0) 105 call void @use8xi64(<8 x i64> %t1) 106 call void @use8xi64(<8 x i64> %t2) 107 call void @use8xi32(<8 x i32> %t3) 108 109 %t4 = and <8 x i64> %t2, %x 110 %t5 = trunc <8 x i64> %t4 to <8 x i32> 111 %t6 = shl <8 x i32> %t5, %t3 ; shift is smaller than mask 112 ret <8 x i32> %t6 113} 114 115define <8 x i32> @t3_vec_nonsplat(<8 x i64> %x, <8 x i32> %nbits) { 116; CHECK-LABEL: @t3_vec_nonsplat( 117; CHECK-NEXT: [[T0:%.*]] = zext <8 x i32> [[NBITS:%.*]] to <8 x i64> 118; CHECK-NEXT: [[T1:%.*]] = shl <8 x i64> <i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 undef, i64 -1>, [[T0]] 119; CHECK-NEXT: [[T2:%.*]] = lshr <8 x i64> [[T1]], [[T0]] 120; CHECK-NEXT: [[T3:%.*]] = add <8 x i32> [[NBITS]], <i32 -64, i32 -63, i32 -33, i32 -32, i32 63, i32 64, i32 undef, i32 65> 121; CHECK-NEXT: call void @use8xi64(<8 x i64> [[T0]]) 122; CHECK-NEXT: call void @use8xi64(<8 x i64> [[T1]]) 123; CHECK-NEXT: call void @use8xi64(<8 x i64> [[T2]]) 124; CHECK-NEXT: call void @use8xi32(<8 x i32> [[T3]]) 125; CHECK-NEXT: [[TMP1:%.*]] = trunc <8 x i64> [[X:%.*]] to <8 x i32> 126; CHECK-NEXT: [[TMP2:%.*]] = shl <8 x i32> [[TMP1]], [[T3]] 127; CHECK-NEXT: [[T6:%.*]] = and <8 x i32> [[TMP2]], <i32 poison, i32 1, i32 2147483647, i32 -1, i32 -1, i32 -1, i32 poison, i32 poison> 128; CHECK-NEXT: ret <8 x i32> [[T6]] 129; 130 %t0 = zext <8 x i32> %nbits to <8 x i64> 131 %t1 = shl <8 x i64> <i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 undef, i64 -1>, %t0 132 %t2 = lshr <8 x i64> %t1, %t0 133 %t3 = add <8 x i32> %nbits, <i32 -64, i32 -63, i32 -33, i32 -32, i32 63, i32 64, i32 undef, i32 65> 134 135 call void @use8xi64(<8 x i64> %t0) 136 call void @use8xi64(<8 x i64> %t1) 137 call void @use8xi64(<8 x i64> %t2) 138 call void @use8xi32(<8 x i32> %t3) 139 140 %t4 = and <8 x i64> %t2, %x 141 %t5 = trunc <8 x i64> %t4 to <8 x i32> 142 %t6 = shl <8 x i32> %t5, %t3 ; shift is smaller than mask 143 ret <8 x i32> %t6 144} 145 146; Extra uses. 147 148define i32 @n4_extrause0(i64 %x, i32 %nbits) { 149; CHECK-LABEL: @n4_extrause0( 150; CHECK-NEXT: [[T0:%.*]] = zext i32 [[NBITS:%.*]] to i64 151; CHECK-NEXT: [[T1:%.*]] = shl i64 -1, [[T0]] 152; CHECK-NEXT: [[T2:%.*]] = lshr i64 [[T1]], [[T0]] 153; CHECK-NEXT: [[T3:%.*]] = add i32 [[NBITS]], -33 154; CHECK-NEXT: call void @use64(i64 [[T0]]) 155; CHECK-NEXT: call void @use64(i64 [[T1]]) 156; CHECK-NEXT: call void @use64(i64 [[T2]]) 157; CHECK-NEXT: call void @use32(i32 [[T3]]) 158; CHECK-NEXT: [[T4:%.*]] = and i64 [[T2]], [[X:%.*]] 159; CHECK-NEXT: call void @use64(i64 [[T4]]) 160; CHECK-NEXT: [[T5:%.*]] = trunc i64 [[T4]] to i32 161; CHECK-NEXT: [[T6:%.*]] = shl i32 [[T5]], [[T3]] 162; CHECK-NEXT: ret i32 [[T6]] 163; 164 %t0 = zext i32 %nbits to i64 165 %t1 = shl i64 -1, %t0 166 %t2 = lshr i64 %t1, %t0 167 %t3 = add i32 %nbits, -33 168 169 call void @use64(i64 %t0) 170 call void @use64(i64 %t1) 171 call void @use64(i64 %t2) 172 call void @use32(i32 %t3) 173 174 %t4 = and i64 %t2, %x 175 call void @use64(i64 %t4) 176 %t5 = trunc i64 %t4 to i32 177 %t6 = shl i32 %t5, %t3 ; shift is smaller than mask 178 ret i32 %t6 179} 180 181define i32 @n5_extrause1(i64 %x, i32 %nbits) { 182; CHECK-LABEL: @n5_extrause1( 183; CHECK-NEXT: [[T0:%.*]] = zext i32 [[NBITS:%.*]] to i64 184; CHECK-NEXT: [[T1:%.*]] = shl i64 -1, [[T0]] 185; CHECK-NEXT: [[T2:%.*]] = lshr i64 [[T1]], [[T0]] 186; CHECK-NEXT: [[T3:%.*]] = add i32 [[NBITS]], -33 187; CHECK-NEXT: call void @use64(i64 [[T0]]) 188; CHECK-NEXT: call void @use64(i64 [[T1]]) 189; CHECK-NEXT: call void @use64(i64 [[T2]]) 190; CHECK-NEXT: call void @use32(i32 [[T3]]) 191; CHECK-NEXT: [[T4:%.*]] = and i64 [[T2]], [[X:%.*]] 192; CHECK-NEXT: [[T5:%.*]] = trunc i64 [[T4]] to i32 193; CHECK-NEXT: call void @use32(i32 [[T5]]) 194; CHECK-NEXT: [[T6:%.*]] = shl i32 [[T5]], [[T3]] 195; CHECK-NEXT: ret i32 [[T6]] 196; 197 %t0 = zext i32 %nbits to i64 198 %t1 = shl i64 -1, %t0 199 %t2 = lshr i64 %t1, %t0 200 %t3 = add i32 %nbits, -33 201 202 call void @use64(i64 %t0) 203 call void @use64(i64 %t1) 204 call void @use64(i64 %t2) 205 call void @use32(i32 %t3) 206 207 %t4 = and i64 %t2, %x 208 %t5 = trunc i64 %t4 to i32 209 call void @use32(i32 %t5) 210 %t6 = shl i32 %t5, %t3 ; shift is smaller than mask 211 ret i32 %t6 212} 213 214define i32 @n6_extrause2(i64 %x, i32 %nbits) { 215; CHECK-LABEL: @n6_extrause2( 216; CHECK-NEXT: [[T0:%.*]] = zext i32 [[NBITS:%.*]] to i64 217; CHECK-NEXT: [[T1:%.*]] = shl i64 -1, [[T0]] 218; CHECK-NEXT: [[T2:%.*]] = lshr i64 [[T1]], [[T0]] 219; CHECK-NEXT: [[T3:%.*]] = add i32 [[NBITS]], -33 220; CHECK-NEXT: call void @use64(i64 [[T0]]) 221; CHECK-NEXT: call void @use64(i64 [[T1]]) 222; CHECK-NEXT: call void @use64(i64 [[T2]]) 223; CHECK-NEXT: call void @use32(i32 [[T3]]) 224; CHECK-NEXT: [[T4:%.*]] = and i64 [[T2]], [[X:%.*]] 225; CHECK-NEXT: call void @use64(i64 [[T4]]) 226; CHECK-NEXT: [[T5:%.*]] = trunc i64 [[T4]] to i32 227; CHECK-NEXT: call void @use32(i32 [[T5]]) 228; CHECK-NEXT: [[T6:%.*]] = shl i32 [[T5]], [[T3]] 229; CHECK-NEXT: ret i32 [[T6]] 230; 231 %t0 = zext i32 %nbits to i64 232 %t1 = shl i64 -1, %t0 233 %t2 = lshr i64 %t1, %t0 234 %t3 = add i32 %nbits, -33 235 236 call void @use64(i64 %t0) 237 call void @use64(i64 %t1) 238 call void @use64(i64 %t2) 239 call void @use32(i32 %t3) 240 241 %t4 = and i64 %t2, %x 242 call void @use64(i64 %t4) 243 %t5 = trunc i64 %t4 to i32 244 call void @use32(i32 %t5) 245 %t6 = shl i32 %t5, %t3 ; shift is smaller than mask 246 ret i32 %t6 247} 248