1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py 2; RUN: opt < %s -instcombine -S | FileCheck %s 3 4; Try to eliminate binops and shuffles when the shuffle is a select in disguise: 5; PR37806 - https://bugs.llvm.org/show_bug.cgi?id=37806 6 7define <4 x i32> @add(<4 x i32> %v) { 8; CHECK-LABEL: @add( 9; CHECK-NEXT: [[S:%.*]] = add <4 x i32> [[V:%.*]], <i32 11, i32 0, i32 13, i32 0> 10; CHECK-NEXT: ret <4 x i32> [[S]] 11; 12 %b = add <4 x i32> %v, <i32 11, i32 12, i32 13, i32 14> 13 %s = shufflevector <4 x i32> %b, <4 x i32> %v, <4 x i32> <i32 0, i32 5, i32 2, i32 7> 14 ret <4 x i32> %s 15} 16 17; Propagate flags when possible. 18 19define <4 x i32> @add_nuw_nsw(<4 x i32> %v) { 20; CHECK-LABEL: @add_nuw_nsw( 21; CHECK-NEXT: [[S:%.*]] = add nuw nsw <4 x i32> [[V:%.*]], <i32 11, i32 0, i32 13, i32 0> 22; CHECK-NEXT: ret <4 x i32> [[S]] 23; 24 %b = add nuw nsw <4 x i32> %v, <i32 11, i32 12, i32 13, i32 14> 25 %s = shufflevector <4 x i32> %b, <4 x i32> %v, <4 x i32> <i32 0, i32 5, i32 2, i32 7> 26 ret <4 x i32> %s 27} 28 29define <4 x i32> @add_undef_mask_elt(<4 x i32> %v) { 30; CHECK-LABEL: @add_undef_mask_elt( 31; CHECK-NEXT: [[S:%.*]] = add <4 x i32> [[V:%.*]], <i32 11, i32 0, i32 undef, i32 0> 32; CHECK-NEXT: ret <4 x i32> [[S]] 33; 34 %b = add <4 x i32> %v, <i32 11, i32 12, i32 13, i32 14> 35 %s = shufflevector <4 x i32> %b, <4 x i32> %v, <4 x i32> <i32 0, i32 5, i32 undef, i32 7> 36 ret <4 x i32> %s 37} 38 39; Poison flags must be dropped or undef must be replaced with safe constant. 40 41define <4 x i32> @add_nuw_nsw_undef_mask_elt(<4 x i32> %v) { 42; CHECK-LABEL: @add_nuw_nsw_undef_mask_elt( 43; CHECK-NEXT: [[S:%.*]] = add <4 x i32> [[V:%.*]], <i32 11, i32 undef, i32 13, i32 0> 44; CHECK-NEXT: ret <4 x i32> [[S]] 45; 46 %b = add nuw nsw <4 x i32> %v, <i32 11, i32 12, i32 13, i32 14> 47 %s = shufflevector <4 x i32> %b, <4 x i32> %v, <4 x i32> <i32 0, i32 undef, i32 2, i32 7> 48 ret <4 x i32> %s 49} 50 51; Constant operand 0 (LHS) could work for some non-commutative binops? 52 53define <4 x i32> @sub(<4 x i32> %v) { 54; CHECK-LABEL: @sub( 55; CHECK-NEXT: [[B:%.*]] = sub <4 x i32> <i32 undef, i32 undef, i32 undef, i32 14>, [[V:%.*]] 56; CHECK-NEXT: [[S:%.*]] = shufflevector <4 x i32> [[V]], <4 x i32> [[B]], <4 x i32> <i32 0, i32 1, i32 2, i32 7> 57; CHECK-NEXT: ret <4 x i32> [[S]] 58; 59 %b = sub <4 x i32> <i32 11, i32 12, i32 13, i32 14>, %v 60 %s = shufflevector <4 x i32> %v, <4 x i32> %b, <4 x i32> <i32 0, i32 1, i32 2, i32 7> 61 ret <4 x i32> %s 62} 63 64; If any element of the shuffle mask operand is undef, that element of the result is undef. 65; The shuffle is eliminated in this transform, but we can replace a constant element with undef. 66; Preserve flags when possible. It's not safe to propagate poison-generating flags with undef constants. 67 68define <4 x i32> @mul(<4 x i32> %v) { 69; CHECK-LABEL: @mul( 70; CHECK-NEXT: [[S:%.*]] = mul <4 x i32> [[V:%.*]], <i32 undef, i32 12, i32 1, i32 14> 71; CHECK-NEXT: ret <4 x i32> [[S]] 72; 73 %b = mul nsw nuw <4 x i32> %v, <i32 11, i32 12, i32 13, i32 14> 74 %s = shufflevector <4 x i32> %v, <4 x i32> %b, <4 x i32> <i32 undef, i32 5, i32 2, i32 7> 75 ret <4 x i32> %s 76} 77 78define <4 x i32> @shl(<4 x i32> %v) { 79; CHECK-LABEL: @shl( 80; CHECK-NEXT: [[S:%.*]] = shl <4 x i32> [[V:%.*]], <i32 0, i32 12, i32 13, i32 0> 81; CHECK-NEXT: ret <4 x i32> [[S]] 82; 83 %b = shl <4 x i32> %v, <i32 11, i32 12, i32 13, i32 14> 84 %s = shufflevector <4 x i32> %b, <4 x i32> %v, <4 x i32> <i32 4, i32 1, i32 2, i32 7> 85 ret <4 x i32> %s 86} 87 88define <4 x i32> @shl_nsw(<4 x i32> %v) { 89; CHECK-LABEL: @shl_nsw( 90; CHECK-NEXT: [[S:%.*]] = shl nsw <4 x i32> [[V:%.*]], <i32 0, i32 12, i32 13, i32 0> 91; CHECK-NEXT: ret <4 x i32> [[S]] 92; 93 %b = shl nsw <4 x i32> %v, <i32 11, i32 12, i32 13, i32 14> 94 %s = shufflevector <4 x i32> %b, <4 x i32> %v, <4 x i32> <i32 4, i32 1, i32 2, i32 7> 95 ret <4 x i32> %s 96} 97 98define <4 x i32> @shl_undef_mask_elt(<4 x i32> %v) { 99; CHECK-LABEL: @shl_undef_mask_elt( 100; CHECK-NEXT: [[S:%.*]] = shl <4 x i32> [[V:%.*]], <i32 0, i32 12, i32 13, i32 0> 101; CHECK-NEXT: ret <4 x i32> [[S]] 102; 103 %b = shl <4 x i32> %v, <i32 11, i32 12, i32 13, i32 14> 104 %s = shufflevector <4 x i32> %b, <4 x i32> %v, <4 x i32> <i32 undef, i32 1, i32 2, i32 7> 105 ret <4 x i32> %s 106} 107 108define <4 x i32> @shl_nuw_undef_mask_elt(<4 x i32> %v) { 109; CHECK-LABEL: @shl_nuw_undef_mask_elt( 110; CHECK-NEXT: [[S:%.*]] = shl nuw <4 x i32> [[V:%.*]], <i32 0, i32 0, i32 13, i32 0> 111; CHECK-NEXT: ret <4 x i32> [[S]] 112; 113 %b = shl nuw <4 x i32> %v, <i32 11, i32 12, i32 13, i32 14> 114 %s = shufflevector <4 x i32> %b, <4 x i32> %v, <4 x i32> <i32 undef, i32 5, i32 2, i32 undef> 115 ret <4 x i32> %s 116} 117 118define <4 x i32> @lshr_constant_op0(<4 x i32> %v) { 119; CHECK-LABEL: @lshr_constant_op0( 120; CHECK-NEXT: [[S:%.*]] = lshr <4 x i32> [[V:%.*]], <i32 11, i32 12, i32 0, i32 14> 121; CHECK-NEXT: ret <4 x i32> [[S]] 122; 123 %b = lshr <4 x i32> %v, <i32 11, i32 12, i32 13, i32 14> 124 %s = shufflevector <4 x i32> %v, <4 x i32> %b, <4 x i32> <i32 4, i32 5, i32 2, i32 7> 125 ret <4 x i32> %s 126} 127 128define <4 x i32> @lshr_exact_constant_op0(<4 x i32> %v) { 129; CHECK-LABEL: @lshr_exact_constant_op0( 130; CHECK-NEXT: [[S:%.*]] = lshr exact <4 x i32> [[V:%.*]], <i32 11, i32 12, i32 0, i32 14> 131; CHECK-NEXT: ret <4 x i32> [[S]] 132; 133 %b = lshr exact <4 x i32> %v, <i32 11, i32 12, i32 13, i32 14> 134 %s = shufflevector <4 x i32> %v, <4 x i32> %b, <4 x i32> <i32 4, i32 5, i32 2, i32 7> 135 ret <4 x i32> %s 136} 137 138define <4 x i32> @lshr_undef_mask_elt(<4 x i32> %v) { 139; CHECK-LABEL: @lshr_undef_mask_elt( 140; CHECK-NEXT: [[S:%.*]] = shl <4 x i32> [[V:%.*]], <i32 0, i32 12, i32 13, i32 0> 141; CHECK-NEXT: ret <4 x i32> [[S]] 142; 143 %b = shl <4 x i32> %v, <i32 11, i32 12, i32 13, i32 14> 144 %s = shufflevector <4 x i32> %b, <4 x i32> %v, <4 x i32> <i32 undef, i32 1, i32 2, i32 7> 145 ret <4 x i32> %s 146} 147 148define <4 x i32> @lshr_exact_undef_mask_elt(<4 x i32> %v) { 149; CHECK-LABEL: @lshr_exact_undef_mask_elt( 150; CHECK-NEXT: [[S:%.*]] = lshr exact <4 x i32> [[V:%.*]], <i32 0, i32 0, i32 13, i32 0> 151; CHECK-NEXT: ret <4 x i32> [[S]] 152; 153 %b = lshr exact <4 x i32> %v, <i32 11, i32 12, i32 13, i32 14> 154 %s = shufflevector <4 x i32> %b, <4 x i32> %v, <4 x i32> <i32 undef, i32 5, i32 2, i32 undef> 155 ret <4 x i32> %s 156} 157 158define <4 x i32> @lshr_constant_op1(<4 x i32> %v) { 159; CHECK-LABEL: @lshr_constant_op1( 160; CHECK-NEXT: [[B:%.*]] = lshr exact <4 x i32> <i32 11, i32 12, i32 13, i32 14>, [[V:%.*]] 161; CHECK-NEXT: [[S:%.*]] = shufflevector <4 x i32> [[B]], <4 x i32> [[V]], <4 x i32> <i32 0, i32 1, i32 6, i32 3> 162; CHECK-NEXT: ret <4 x i32> [[S]] 163; 164 %b = lshr exact <4 x i32> <i32 11, i32 12, i32 13, i32 14>, %v 165 %s = shufflevector <4 x i32> %v, <4 x i32> %b, <4 x i32> <i32 4, i32 5, i32 2, i32 7> 166 ret <4 x i32> %s 167} 168 169; Try weird types. 170 171define <3 x i32> @ashr(<3 x i32> %v) { 172; CHECK-LABEL: @ashr( 173; CHECK-NEXT: [[S:%.*]] = ashr <3 x i32> [[V:%.*]], <i32 0, i32 12, i32 13> 174; CHECK-NEXT: ret <3 x i32> [[S]] 175; 176 %b = ashr <3 x i32> %v, <i32 11, i32 12, i32 13> 177 %s = shufflevector <3 x i32> %b, <3 x i32> %v, <3 x i32> <i32 3, i32 1, i32 2> 178 ret <3 x i32> %s 179} 180 181define <3 x i42> @and(<3 x i42> %v) { 182; CHECK-LABEL: @and( 183; CHECK-NEXT: [[S:%.*]] = and <3 x i42> [[V:%.*]], <i42 -1, i42 12, i42 undef> 184; CHECK-NEXT: ret <3 x i42> [[S]] 185; 186 %b = and <3 x i42> %v, <i42 11, i42 12, i42 13> 187 %s = shufflevector <3 x i42> %v, <3 x i42> %b, <3 x i32> <i32 0, i32 4, i32 undef> 188 ret <3 x i42> %s 189} 190 191; It doesn't matter if the intermediate op has extra uses. 192 193declare void @use_v4i32(<4 x i32>) 194 195define <4 x i32> @or(<4 x i32> %v) { 196; CHECK-LABEL: @or( 197; CHECK-NEXT: [[B:%.*]] = or <4 x i32> [[V:%.*]], <i32 11, i32 12, i32 13, i32 14> 198; CHECK-NEXT: [[S:%.*]] = or <4 x i32> [[V]], <i32 0, i32 0, i32 13, i32 14> 199; CHECK-NEXT: call void @use_v4i32(<4 x i32> [[B]]) 200; CHECK-NEXT: ret <4 x i32> [[S]] 201; 202 %b = or <4 x i32> %v, <i32 11, i32 12, i32 13, i32 14> 203 %s = shufflevector <4 x i32> %b, <4 x i32> %v, <4 x i32> <i32 4, i32 5, i32 2, i32 3> 204 call void @use_v4i32(<4 x i32> %b) 205 ret <4 x i32> %s 206} 207 208define <4 x i32> @xor(<4 x i32> %v) { 209; CHECK-LABEL: @xor( 210; CHECK-NEXT: [[S:%.*]] = xor <4 x i32> [[V:%.*]], <i32 0, i32 12, i32 0, i32 0> 211; CHECK-NEXT: ret <4 x i32> [[S]] 212; 213 %b = xor <4 x i32> %v, <i32 11, i32 12, i32 13, i32 14> 214 %s = shufflevector <4 x i32> %v, <4 x i32> %b, <4 x i32> <i32 0, i32 5, i32 2, i32 3> 215 ret <4 x i32> %s 216} 217 218define <4 x i32> @udiv(<4 x i32> %v) { 219; CHECK-LABEL: @udiv( 220; CHECK-NEXT: [[B:%.*]] = udiv <4 x i32> <i32 11, i32 12, i32 13, i32 14>, [[V:%.*]] 221; CHECK-NEXT: [[S:%.*]] = shufflevector <4 x i32> [[B]], <4 x i32> [[V]], <4 x i32> <i32 0, i32 1, i32 2, i32 7> 222; CHECK-NEXT: ret <4 x i32> [[S]] 223; 224 %b = udiv <4 x i32> <i32 11, i32 12, i32 13, i32 14>, %v 225 %s = shufflevector <4 x i32> %b, <4 x i32> %v, <4 x i32> <i32 0, i32 1, i32 2, i32 7> 226 ret <4 x i32> %s 227} 228 229define <4 x i32> @udiv_exact(<4 x i32> %v) { 230; CHECK-LABEL: @udiv_exact( 231; CHECK-NEXT: [[B:%.*]] = udiv exact <4 x i32> <i32 11, i32 12, i32 13, i32 14>, [[V:%.*]] 232; CHECK-NEXT: [[S:%.*]] = shufflevector <4 x i32> [[B]], <4 x i32> [[V]], <4 x i32> <i32 0, i32 1, i32 2, i32 7> 233; CHECK-NEXT: ret <4 x i32> [[S]] 234; 235 %b = udiv exact <4 x i32> <i32 11, i32 12, i32 13, i32 14>, %v 236 %s = shufflevector <4 x i32> %b, <4 x i32> %v, <4 x i32> <i32 0, i32 1, i32 2, i32 7> 237 ret <4 x i32> %s 238} 239 240define <4 x i32> @udiv_undef_mask_elt(<4 x i32> %v) { 241; CHECK-LABEL: @udiv_undef_mask_elt( 242; CHECK-NEXT: [[B:%.*]] = udiv <4 x i32> <i32 11, i32 12, i32 13, i32 14>, [[V:%.*]] 243; CHECK-NEXT: [[S:%.*]] = shufflevector <4 x i32> [[B]], <4 x i32> [[V]], <4 x i32> <i32 0, i32 undef, i32 2, i32 7> 244; CHECK-NEXT: ret <4 x i32> [[S]] 245; 246 %b = udiv <4 x i32> <i32 11, i32 12, i32 13, i32 14>, %v 247 %s = shufflevector <4 x i32> %b, <4 x i32> %v, <4 x i32> <i32 0, i32 undef, i32 2, i32 7> 248 ret <4 x i32> %s 249} 250 251define <4 x i32> @udiv_exact_undef_mask_elt(<4 x i32> %v) { 252; CHECK-LABEL: @udiv_exact_undef_mask_elt( 253; CHECK-NEXT: [[B:%.*]] = udiv exact <4 x i32> <i32 11, i32 12, i32 13, i32 14>, [[V:%.*]] 254; CHECK-NEXT: [[S:%.*]] = shufflevector <4 x i32> [[B]], <4 x i32> [[V]], <4 x i32> <i32 0, i32 undef, i32 2, i32 7> 255; CHECK-NEXT: ret <4 x i32> [[S]] 256; 257 %b = udiv exact <4 x i32> <i32 11, i32 12, i32 13, i32 14>, %v 258 %s = shufflevector <4 x i32> %b, <4 x i32> %v, <4 x i32> <i32 0, i32 undef, i32 2, i32 7> 259 ret <4 x i32> %s 260} 261 262define <4 x i32> @sdiv(<4 x i32> %v) { 263; CHECK-LABEL: @sdiv( 264; CHECK-NEXT: [[S:%.*]] = sdiv <4 x i32> [[V:%.*]], <i32 11, i32 1, i32 13, i32 1> 265; CHECK-NEXT: ret <4 x i32> [[S]] 266; 267 %b = sdiv <4 x i32> %v, <i32 11, i32 12, i32 13, i32 14> 268 %s = shufflevector <4 x i32> %v, <4 x i32> %b, <4 x i32> <i32 4, i32 1, i32 6, i32 3> 269 ret <4 x i32> %s 270} 271 272define <4 x i32> @sdiv_exact(<4 x i32> %v) { 273; CHECK-LABEL: @sdiv_exact( 274; CHECK-NEXT: [[S:%.*]] = sdiv exact <4 x i32> [[V:%.*]], <i32 11, i32 1, i32 13, i32 1> 275; CHECK-NEXT: ret <4 x i32> [[S]] 276; 277 %b = sdiv exact <4 x i32> %v, <i32 11, i32 12, i32 13, i32 14> 278 %s = shufflevector <4 x i32> %v, <4 x i32> %b, <4 x i32> <i32 4, i32 1, i32 6, i32 3> 279 ret <4 x i32> %s 280} 281 282; Div/rem need special handling if the shuffle has undef elements. 283 284define <4 x i32> @sdiv_undef_mask_elt(<4 x i32> %v) { 285; CHECK-LABEL: @sdiv_undef_mask_elt( 286; CHECK-NEXT: [[S:%.*]] = sdiv <4 x i32> [[V:%.*]], <i32 1, i32 1, i32 13, i32 1> 287; CHECK-NEXT: ret <4 x i32> [[S]] 288; 289 %b = sdiv <4 x i32> %v, <i32 11, i32 12, i32 13, i32 14> 290 %s = shufflevector <4 x i32> %v, <4 x i32> %b, <4 x i32> <i32 undef, i32 1, i32 6, i32 undef> 291 ret <4 x i32> %s 292} 293 294define <4 x i32> @sdiv_exact_undef_mask_elt(<4 x i32> %v) { 295; CHECK-LABEL: @sdiv_exact_undef_mask_elt( 296; CHECK-NEXT: [[S:%.*]] = sdiv exact <4 x i32> [[V:%.*]], <i32 1, i32 1, i32 13, i32 1> 297; CHECK-NEXT: ret <4 x i32> [[S]] 298; 299 %b = sdiv exact <4 x i32> %v, <i32 11, i32 12, i32 13, i32 14> 300 %s = shufflevector <4 x i32> %v, <4 x i32> %b, <4 x i32> <i32 undef, i32 1, i32 6, i32 undef> 301 ret <4 x i32> %s 302} 303 304define <4 x i32> @urem(<4 x i32> %v) { 305; CHECK-LABEL: @urem( 306; CHECK-NEXT: [[B:%.*]] = urem <4 x i32> <i32 11, i32 12, i32 13, i32 14>, [[V:%.*]] 307; CHECK-NEXT: [[S:%.*]] = shufflevector <4 x i32> [[B]], <4 x i32> [[V]], <4 x i32> <i32 0, i32 1, i32 6, i32 7> 308; CHECK-NEXT: ret <4 x i32> [[S]] 309; 310 %b = urem <4 x i32> <i32 11, i32 12, i32 13, i32 14>, %v 311 %s = shufflevector <4 x i32> %b, <4 x i32> %v, <4 x i32> <i32 0, i32 1, i32 6, i32 7> 312 ret <4 x i32> %s 313} 314 315define <4 x i32> @urem_undef_mask_elt(<4 x i32> %v) { 316; CHECK-LABEL: @urem_undef_mask_elt( 317; CHECK-NEXT: [[B:%.*]] = urem <4 x i32> <i32 11, i32 12, i32 13, i32 14>, [[V:%.*]] 318; CHECK-NEXT: [[S:%.*]] = shufflevector <4 x i32> [[B]], <4 x i32> [[V]], <4 x i32> <i32 0, i32 1, i32 6, i32 undef> 319; CHECK-NEXT: ret <4 x i32> [[S]] 320; 321 %b = urem <4 x i32> <i32 11, i32 12, i32 13, i32 14>, %v 322 %s = shufflevector <4 x i32> %b, <4 x i32> %v, <4 x i32> <i32 0, i32 1, i32 6, i32 undef> 323 ret <4 x i32> %s 324} 325 326define <4 x i32> @srem(<4 x i32> %v) { 327; CHECK-LABEL: @srem( 328; CHECK-NEXT: [[B:%.*]] = srem <4 x i32> <i32 11, i32 12, i32 13, i32 14>, [[V:%.*]] 329; CHECK-NEXT: [[S:%.*]] = shufflevector <4 x i32> [[V]], <4 x i32> [[B]], <4 x i32> <i32 0, i32 1, i32 6, i32 3> 330; CHECK-NEXT: ret <4 x i32> [[S]] 331; 332 %b = srem <4 x i32> <i32 11, i32 12, i32 13, i32 14>, %v 333 %s = shufflevector <4 x i32> %v, <4 x i32> %b, <4 x i32> <i32 0, i32 1, i32 6, i32 3> 334 ret <4 x i32> %s 335} 336 337; Try FP ops/types. 338 339define <4 x float> @fadd(<4 x float> %v) { 340; CHECK-LABEL: @fadd( 341; CHECK-NEXT: [[S:%.*]] = fadd <4 x float> [[V:%.*]], <float 4.100000e+01, float 4.200000e+01, float -0.000000e+00, float -0.000000e+00> 342; CHECK-NEXT: ret <4 x float> [[S]] 343; 344 %b = fadd <4 x float> %v, <float 41.0, float 42.0, float 43.0, float 44.0> 345 %s = shufflevector <4 x float> %b, <4 x float> %v, <4 x i32> <i32 0, i32 1, i32 6, i32 7> 346 ret <4 x float> %s 347} 348 349define <4 x double> @fsub(<4 x double> %v) { 350; CHECK-LABEL: @fsub( 351; CHECK-NEXT: [[B:%.*]] = fsub <4 x double> <double undef, double undef, double 4.300000e+01, double 4.400000e+01>, [[V:%.*]] 352; CHECK-NEXT: [[S:%.*]] = shufflevector <4 x double> [[V]], <4 x double> [[B]], <4 x i32> <i32 undef, i32 1, i32 6, i32 7> 353; CHECK-NEXT: ret <4 x double> [[S]] 354; 355 %b = fsub <4 x double> <double 41.0, double 42.0, double 43.0, double 44.0>, %v 356 %s = shufflevector <4 x double> %v, <4 x double> %b, <4 x i32> <i32 undef, i32 1, i32 6, i32 7> 357 ret <4 x double> %s 358} 359 360; Propagate any FMF. 361 362define <4 x float> @fmul(<4 x float> %v) { 363; CHECK-LABEL: @fmul( 364; CHECK-NEXT: [[S:%.*]] = fmul nnan ninf <4 x float> [[V:%.*]], <float 4.100000e+01, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00> 365; CHECK-NEXT: ret <4 x float> [[S]] 366; 367 %b = fmul nnan ninf <4 x float> %v, <float 41.0, float 42.0, float 43.0, float 44.0> 368 %s = shufflevector <4 x float> %b, <4 x float> %v, <4 x i32> <i32 0, i32 5, i32 6, i32 7> 369 ret <4 x float> %s 370} 371 372define <4 x double> @fdiv_constant_op0(<4 x double> %v) { 373; CHECK-LABEL: @fdiv_constant_op0( 374; CHECK-NEXT: [[B:%.*]] = fdiv fast <4 x double> <double undef, double undef, double 4.300000e+01, double 4.400000e+01>, [[V:%.*]] 375; CHECK-NEXT: [[S:%.*]] = shufflevector <4 x double> [[V]], <4 x double> [[B]], <4 x i32> <i32 undef, i32 1, i32 6, i32 7> 376; CHECK-NEXT: ret <4 x double> [[S]] 377; 378 %b = fdiv fast <4 x double> <double 41.0, double 42.0, double 43.0, double 44.0>, %v 379 %s = shufflevector <4 x double> %v, <4 x double> %b, <4 x i32> <i32 undef, i32 1, i32 6, i32 7> 380 ret <4 x double> %s 381} 382 383define <4 x double> @fdiv_constant_op1(<4 x double> %v) { 384; CHECK-LABEL: @fdiv_constant_op1( 385; CHECK-NEXT: [[S:%.*]] = fdiv reassoc <4 x double> [[V:%.*]], <double undef, double 1.000000e+00, double 4.300000e+01, double 4.400000e+01> 386; CHECK-NEXT: ret <4 x double> [[S]] 387; 388 %b = fdiv reassoc <4 x double> %v, <double 41.0, double 42.0, double 43.0, double 44.0> 389 %s = shufflevector <4 x double> %v, <4 x double> %b, <4 x i32> <i32 undef, i32 1, i32 6, i32 7> 390 ret <4 x double> %s 391} 392 393define <4 x double> @frem(<4 x double> %v) { 394; CHECK-LABEL: @frem( 395; CHECK-NEXT: [[B:%.*]] = frem <4 x double> <double 4.100000e+01, double 4.200000e+01, double undef, double undef>, [[V:%.*]] 396; CHECK-NEXT: [[S:%.*]] = shufflevector <4 x double> [[B]], <4 x double> [[V]], <4 x i32> <i32 0, i32 1, i32 6, i32 7> 397; CHECK-NEXT: ret <4 x double> [[S]] 398; 399 %b = frem <4 x double> <double 41.0, double 42.0, double 43.0, double 44.0>, %v 400 %s = shufflevector <4 x double> %b, <4 x double> %v, <4 x i32> <i32 0, i32 1, i32 6, i32 7> 401 ret <4 x double> %s 402} 403 404; Tests where both operands of the shuffle are binops with the same opcode. 405 406define <4 x i32> @add_add(<4 x i32> %v0) { 407; CHECK-LABEL: @add_add( 408; CHECK-NEXT: [[T3:%.*]] = add <4 x i32> [[V0:%.*]], <i32 1, i32 6, i32 3, i32 8> 409; CHECK-NEXT: ret <4 x i32> [[T3]] 410; 411 %t1 = add <4 x i32> %v0, <i32 1, i32 2, i32 3, i32 4> 412 %t2 = add <4 x i32> %v0, <i32 5, i32 6, i32 7, i32 8> 413 %t3 = shufflevector <4 x i32> %t1, <4 x i32> %t2, <4 x i32> <i32 0, i32 5, i32 2, i32 7> 414 ret <4 x i32> %t3 415} 416 417define <4 x i32> @add_add_nsw(<4 x i32> %v0) { 418; CHECK-LABEL: @add_add_nsw( 419; CHECK-NEXT: [[T3:%.*]] = add nsw <4 x i32> [[V0:%.*]], <i32 1, i32 6, i32 3, i32 8> 420; CHECK-NEXT: ret <4 x i32> [[T3]] 421; 422 %t1 = add nsw <4 x i32> %v0, <i32 1, i32 2, i32 3, i32 4> 423 %t2 = add nsw <4 x i32> %v0, <i32 5, i32 6, i32 7, i32 8> 424 %t3 = shufflevector <4 x i32> %t1, <4 x i32> %t2, <4 x i32> <i32 0, i32 5, i32 2, i32 7> 425 ret <4 x i32> %t3 426} 427 428define <4 x i32> @add_add_undef_mask_elt(<4 x i32> %v0) { 429; CHECK-LABEL: @add_add_undef_mask_elt( 430; CHECK-NEXT: [[T3:%.*]] = add <4 x i32> [[V0:%.*]], <i32 1, i32 6, i32 undef, i32 8> 431; CHECK-NEXT: ret <4 x i32> [[T3]] 432; 433 %t1 = add <4 x i32> %v0, <i32 1, i32 2, i32 3, i32 4> 434 %t2 = add <4 x i32> %v0, <i32 5, i32 6, i32 7, i32 8> 435 %t3 = shufflevector <4 x i32> %t1, <4 x i32> %t2, <4 x i32> <i32 0, i32 5, i32 undef, i32 7> 436 ret <4 x i32> %t3 437} 438 439; Poison flags must be dropped or undef must be replaced with safe constant. 440 441define <4 x i32> @add_add_nsw_undef_mask_elt(<4 x i32> %v0) { 442; CHECK-LABEL: @add_add_nsw_undef_mask_elt( 443; CHECK-NEXT: [[T3:%.*]] = add <4 x i32> [[V0:%.*]], <i32 1, i32 6, i32 undef, i32 8> 444; CHECK-NEXT: ret <4 x i32> [[T3]] 445; 446 %t1 = add nsw <4 x i32> %v0, <i32 1, i32 2, i32 3, i32 4> 447 %t2 = add nsw <4 x i32> %v0, <i32 5, i32 6, i32 7, i32 8> 448 %t3 = shufflevector <4 x i32> %t1, <4 x i32> %t2, <4 x i32> <i32 0, i32 5, i32 undef, i32 7> 449 ret <4 x i32> %t3 450} 451 452; Constant operand 0 (LHS) also works. 453 454define <4 x i32> @sub_sub(<4 x i32> %v0) { 455; CHECK-LABEL: @sub_sub( 456; CHECK-NEXT: [[T3:%.*]] = sub <4 x i32> <i32 1, i32 2, i32 3, i32 8>, [[V0:%.*]] 457; CHECK-NEXT: ret <4 x i32> [[T3]] 458; 459 %t1 = sub <4 x i32> <i32 1, i32 2, i32 3, i32 4>, %v0 460 %t2 = sub <4 x i32> <i32 5, i32 6, i32 7, i32 8>, %v0 461 %t3 = shufflevector <4 x i32> %t1, <4 x i32> %t2, <4 x i32> <i32 0, i32 1, i32 2, i32 7> 462 ret <4 x i32> %t3 463} 464 465define <4 x i32> @sub_sub_nuw(<4 x i32> %v0) { 466; CHECK-LABEL: @sub_sub_nuw( 467; CHECK-NEXT: [[T3:%.*]] = sub nuw <4 x i32> <i32 1, i32 2, i32 3, i32 8>, [[V0:%.*]] 468; CHECK-NEXT: ret <4 x i32> [[T3]] 469; 470 %t1 = sub nuw <4 x i32> <i32 1, i32 2, i32 3, i32 4>, %v0 471 %t2 = sub nuw <4 x i32> <i32 5, i32 6, i32 7, i32 8>, %v0 472 %t3 = shufflevector <4 x i32> %t1, <4 x i32> %t2, <4 x i32> <i32 0, i32 1, i32 2, i32 7> 473 ret <4 x i32> %t3 474} 475 476define <4 x i32> @sub_sub_undef_mask_elt(<4 x i32> %v0) { 477; CHECK-LABEL: @sub_sub_undef_mask_elt( 478; CHECK-NEXT: [[T3:%.*]] = sub <4 x i32> <i32 undef, i32 2, i32 3, i32 8>, [[V0:%.*]] 479; CHECK-NEXT: ret <4 x i32> [[T3]] 480; 481 %t1 = sub <4 x i32> <i32 1, i32 2, i32 3, i32 4>, %v0 482 %t2 = sub <4 x i32> <i32 5, i32 6, i32 7, i32 8>, %v0 483 %t3 = shufflevector <4 x i32> %t1, <4 x i32> %t2, <4 x i32> <i32 undef, i32 1, i32 2, i32 7> 484 ret <4 x i32> %t3 485} 486 487; Poison flags must be dropped or undef must be replaced with safe constant. 488 489define <4 x i32> @sub_sub_nuw_undef_mask_elt(<4 x i32> %v0) { 490; CHECK-LABEL: @sub_sub_nuw_undef_mask_elt( 491; CHECK-NEXT: [[T3:%.*]] = sub <4 x i32> <i32 undef, i32 2, i32 3, i32 8>, [[V0:%.*]] 492; CHECK-NEXT: ret <4 x i32> [[T3]] 493; 494 %t1 = sub nuw <4 x i32> <i32 1, i32 2, i32 3, i32 4>, %v0 495 %t2 = sub nuw <4 x i32> <i32 5, i32 6, i32 7, i32 8>, %v0 496 %t3 = shufflevector <4 x i32> %t1, <4 x i32> %t2, <4 x i32> <i32 undef, i32 1, i32 2, i32 7> 497 ret <4 x i32> %t3 498} 499 500; If any element of the shuffle mask operand is undef, that element of the result is undef. 501; The shuffle is eliminated in this transform, but we can replace a constant element with undef. 502 503define <4 x i32> @mul_mul(<4 x i32> %v0) { 504; CHECK-LABEL: @mul_mul( 505; CHECK-NEXT: [[T3:%.*]] = mul <4 x i32> [[V0:%.*]], <i32 undef, i32 6, i32 3, i32 8> 506; CHECK-NEXT: ret <4 x i32> [[T3]] 507; 508 %t1 = mul <4 x i32> %v0, <i32 1, i32 2, i32 3, i32 4> 509 %t2 = mul <4 x i32> %v0, <i32 5, i32 6, i32 7, i32 8> 510 %t3 = shufflevector <4 x i32> %t1, <4 x i32> %t2, <4 x i32> <i32 undef, i32 5, i32 2, i32 7> 511 ret <4 x i32> %t3 512} 513 514; Preserve flags when possible. 515 516define <4 x i32> @shl_shl(<4 x i32> %v0) { 517; CHECK-LABEL: @shl_shl( 518; CHECK-NEXT: [[T3:%.*]] = shl <4 x i32> [[V0:%.*]], <i32 5, i32 6, i32 3, i32 4> 519; CHECK-NEXT: ret <4 x i32> [[T3]] 520; 521 %t1 = shl <4 x i32> %v0, <i32 1, i32 2, i32 3, i32 4> 522 %t2 = shl <4 x i32> %v0, <i32 5, i32 6, i32 7, i32 8> 523 %t3 = shufflevector <4 x i32> %t1, <4 x i32> %t2, <4 x i32> <i32 4, i32 5, i32 2, i32 3> 524 ret <4 x i32> %t3 525} 526 527define <4 x i32> @shl_shl_nuw(<4 x i32> %v0) { 528; CHECK-LABEL: @shl_shl_nuw( 529; CHECK-NEXT: [[T3:%.*]] = shl nuw <4 x i32> [[V0:%.*]], <i32 5, i32 6, i32 3, i32 4> 530; CHECK-NEXT: ret <4 x i32> [[T3]] 531; 532 %t1 = shl nuw <4 x i32> %v0, <i32 1, i32 2, i32 3, i32 4> 533 %t2 = shl nuw <4 x i32> %v0, <i32 5, i32 6, i32 7, i32 8> 534 %t3 = shufflevector <4 x i32> %t1, <4 x i32> %t2, <4 x i32> <i32 4, i32 5, i32 2, i32 3> 535 ret <4 x i32> %t3 536} 537 538; Shift by undef is poison. Undef must be replaced by safe constant. 539 540define <4 x i32> @shl_shl_undef_mask_elt(<4 x i32> %v0) { 541; CHECK-LABEL: @shl_shl_undef_mask_elt( 542; CHECK-NEXT: [[T3:%.*]] = shl <4 x i32> [[V0:%.*]], <i32 0, i32 6, i32 3, i32 0> 543; CHECK-NEXT: ret <4 x i32> [[T3]] 544; 545 %t1 = shl <4 x i32> %v0, <i32 1, i32 2, i32 3, i32 4> 546 %t2 = shl <4 x i32> %v0, <i32 5, i32 6, i32 7, i32 8> 547 %t3 = shufflevector <4 x i32> %t1, <4 x i32> %t2, <4 x i32> <i32 undef, i32 5, i32 2, i32 undef> 548 ret <4 x i32> %t3 549} 550 551; Shift by undef is poison. Undef must be replaced by safe constant. 552 553define <4 x i32> @shl_shl_nuw_undef_mask_elt(<4 x i32> %v0) { 554; CHECK-LABEL: @shl_shl_nuw_undef_mask_elt( 555; CHECK-NEXT: [[T3:%.*]] = shl nuw <4 x i32> [[V0:%.*]], <i32 0, i32 6, i32 3, i32 0> 556; CHECK-NEXT: ret <4 x i32> [[T3]] 557; 558 %t1 = shl nuw <4 x i32> %v0, <i32 1, i32 2, i32 3, i32 4> 559 %t2 = shl nuw <4 x i32> %v0, <i32 5, i32 6, i32 7, i32 8> 560 %t3 = shufflevector <4 x i32> %t1, <4 x i32> %t2, <4 x i32> <i32 undef, i32 5, i32 2, i32 undef> 561 ret <4 x i32> %t3 562} 563 564; Can't propagate the flag here. 565 566define <4 x i32> @lshr_lshr(<4 x i32> %v0) { 567; CHECK-LABEL: @lshr_lshr( 568; CHECK-NEXT: [[T3:%.*]] = lshr <4 x i32> <i32 5, i32 6, i32 3, i32 8>, [[V0:%.*]] 569; CHECK-NEXT: ret <4 x i32> [[T3]] 570; 571 %t1 = lshr exact <4 x i32> <i32 1, i32 2, i32 3, i32 4>, %v0 572 %t2 = lshr <4 x i32> <i32 5, i32 6, i32 7, i32 8>, %v0 573 %t3 = shufflevector <4 x i32> %t1, <4 x i32> %t2, <4 x i32> <i32 4, i32 5, i32 2, i32 7> 574 ret <4 x i32> %t3 575} 576 577; Try weird types. 578 579define <3 x i32> @ashr_ashr(<3 x i32> %v0) { 580; CHECK-LABEL: @ashr_ashr( 581; CHECK-NEXT: [[T3:%.*]] = ashr <3 x i32> [[V0:%.*]], <i32 4, i32 2, i32 3> 582; CHECK-NEXT: ret <3 x i32> [[T3]] 583; 584 %t1 = ashr <3 x i32> %v0, <i32 1, i32 2, i32 3> 585 %t2 = ashr <3 x i32> %v0, <i32 4, i32 5, i32 6> 586 %t3 = shufflevector <3 x i32> %t1, <3 x i32> %t2, <3 x i32> <i32 3, i32 1, i32 2> 587 ret <3 x i32> %t3 588} 589 590define <3 x i42> @and_and(<3 x i42> %v0) { 591; CHECK-LABEL: @and_and( 592; CHECK-NEXT: [[T3:%.*]] = and <3 x i42> [[V0:%.*]], <i42 1, i42 5, i42 undef> 593; CHECK-NEXT: ret <3 x i42> [[T3]] 594; 595 %t1 = and <3 x i42> %v0, <i42 1, i42 2, i42 3> 596 %t2 = and <3 x i42> %v0, <i42 4, i42 5, i42 6> 597 %t3 = shufflevector <3 x i42> %t1, <3 x i42> %t2, <3 x i32> <i32 0, i32 4, i32 undef> 598 ret <3 x i42> %t3 599} 600 601; It doesn't matter if the intermediate ops have extra uses. 602 603define <4 x i32> @or_or(<4 x i32> %v0) { 604; CHECK-LABEL: @or_or( 605; CHECK-NEXT: [[T1:%.*]] = or <4 x i32> [[V0:%.*]], <i32 1, i32 2, i32 3, i32 4> 606; CHECK-NEXT: [[T3:%.*]] = or <4 x i32> [[V0]], <i32 5, i32 6, i32 3, i32 4> 607; CHECK-NEXT: call void @use_v4i32(<4 x i32> [[T1]]) 608; CHECK-NEXT: ret <4 x i32> [[T3]] 609; 610 %t1 = or <4 x i32> %v0, <i32 1, i32 2, i32 3, i32 4> 611 %t2 = or <4 x i32> %v0, <i32 5, i32 6, i32 7, i32 8> 612 %t3 = shufflevector <4 x i32> %t1, <4 x i32> %t2, <4 x i32> <i32 4, i32 5, i32 2, i32 3> 613 call void @use_v4i32(<4 x i32> %t1) 614 ret <4 x i32> %t3 615} 616 617define <4 x i32> @xor_xor(<4 x i32> %v0) { 618; CHECK-LABEL: @xor_xor( 619; CHECK-NEXT: [[T2:%.*]] = xor <4 x i32> [[V0:%.*]], <i32 5, i32 6, i32 7, i32 8> 620; CHECK-NEXT: [[T3:%.*]] = xor <4 x i32> [[V0]], <i32 1, i32 6, i32 3, i32 4> 621; CHECK-NEXT: call void @use_v4i32(<4 x i32> [[T2]]) 622; CHECK-NEXT: ret <4 x i32> [[T3]] 623; 624 %t1 = xor <4 x i32> %v0, <i32 1, i32 2, i32 3, i32 4> 625 %t2 = xor <4 x i32> %v0, <i32 5, i32 6, i32 7, i32 8> 626 %t3 = shufflevector <4 x i32> %t1, <4 x i32> %t2, <4 x i32> <i32 0, i32 5, i32 2, i32 3> 627 call void @use_v4i32(<4 x i32> %t2) 628 ret <4 x i32> %t3 629} 630 631define <4 x i32> @udiv_udiv(<4 x i32> %v0) { 632; CHECK-LABEL: @udiv_udiv( 633; CHECK-NEXT: [[T1:%.*]] = udiv <4 x i32> <i32 1, i32 2, i32 3, i32 4>, [[V0:%.*]] 634; CHECK-NEXT: [[T2:%.*]] = udiv <4 x i32> <i32 5, i32 6, i32 7, i32 8>, [[V0]] 635; CHECK-NEXT: [[T3:%.*]] = udiv <4 x i32> <i32 1, i32 2, i32 3, i32 8>, [[V0]] 636; CHECK-NEXT: call void @use_v4i32(<4 x i32> [[T1]]) 637; CHECK-NEXT: call void @use_v4i32(<4 x i32> [[T2]]) 638; CHECK-NEXT: ret <4 x i32> [[T3]] 639; 640 %t1 = udiv <4 x i32> <i32 1, i32 2, i32 3, i32 4>, %v0 641 %t2 = udiv <4 x i32> <i32 5, i32 6, i32 7, i32 8>, %v0 642 %t3 = shufflevector <4 x i32> %t1, <4 x i32> %t2, <4 x i32> <i32 0, i32 1, i32 2, i32 7> 643 call void @use_v4i32(<4 x i32> %t1) 644 call void @use_v4i32(<4 x i32> %t2) 645 ret <4 x i32> %t3 646} 647 648; Div/rem need special handling if the shuffle has undef elements. 649 650define <4 x i32> @sdiv_sdiv(<4 x i32> %v0) { 651; CHECK-LABEL: @sdiv_sdiv( 652; CHECK-NEXT: [[T3:%.*]] = sdiv <4 x i32> [[V0:%.*]], <i32 1, i32 2, i32 7, i32 8> 653; CHECK-NEXT: ret <4 x i32> [[T3]] 654; 655 %t1 = sdiv <4 x i32> %v0, <i32 1, i32 2, i32 3, i32 4> 656 %t2 = sdiv <4 x i32> %v0, <i32 5, i32 6, i32 7, i32 8> 657 %t3 = shufflevector <4 x i32> %t1, <4 x i32> %t2, <4 x i32> <i32 0, i32 1, i32 6, i32 7> 658 ret <4 x i32> %t3 659} 660 661define <4 x i32> @sdiv_sdiv_exact(<4 x i32> %v0) { 662; CHECK-LABEL: @sdiv_sdiv_exact( 663; CHECK-NEXT: [[T3:%.*]] = sdiv exact <4 x i32> [[V0:%.*]], <i32 1, i32 2, i32 7, i32 8> 664; CHECK-NEXT: ret <4 x i32> [[T3]] 665; 666 %t1 = sdiv exact <4 x i32> %v0, <i32 1, i32 2, i32 3, i32 4> 667 %t2 = sdiv exact <4 x i32> %v0, <i32 5, i32 6, i32 7, i32 8> 668 %t3 = shufflevector <4 x i32> %t1, <4 x i32> %t2, <4 x i32> <i32 0, i32 1, i32 6, i32 7> 669 ret <4 x i32> %t3 670} 671 672define <4 x i32> @sdiv_sdiv_undef_mask_elt(<4 x i32> %v0) { 673; CHECK-LABEL: @sdiv_sdiv_undef_mask_elt( 674; CHECK-NEXT: [[T3:%.*]] = sdiv <4 x i32> [[V0:%.*]], <i32 1, i32 2, i32 7, i32 1> 675; CHECK-NEXT: ret <4 x i32> [[T3]] 676; 677 %t1 = sdiv <4 x i32> %v0, <i32 1, i32 2, i32 3, i32 4> 678 %t2 = sdiv <4 x i32> %v0, <i32 5, i32 6, i32 7, i32 8> 679 %t3 = shufflevector <4 x i32> %t1, <4 x i32> %t2, <4 x i32> <i32 undef, i32 1, i32 6, i32 undef> 680 ret <4 x i32> %t3 681} 682 683define <4 x i32> @sdiv_sdiv_exact_undef_mask_elt(<4 x i32> %v0) { 684; CHECK-LABEL: @sdiv_sdiv_exact_undef_mask_elt( 685; CHECK-NEXT: [[T3:%.*]] = sdiv exact <4 x i32> [[V0:%.*]], <i32 1, i32 2, i32 7, i32 1> 686; CHECK-NEXT: ret <4 x i32> [[T3]] 687; 688 %t1 = sdiv exact <4 x i32> %v0, <i32 1, i32 2, i32 3, i32 4> 689 %t2 = sdiv exact <4 x i32> %v0, <i32 5, i32 6, i32 7, i32 8> 690 %t3 = shufflevector <4 x i32> %t1, <4 x i32> %t2, <4 x i32> <i32 undef, i32 1, i32 6, i32 undef> 691 ret <4 x i32> %t3 692} 693 694define <4 x i32> @urem_urem(<4 x i32> %v0) { 695; CHECK-LABEL: @urem_urem( 696; CHECK-NEXT: [[T3:%.*]] = urem <4 x i32> <i32 1, i32 2, i32 7, i32 8>, [[V0:%.*]] 697; CHECK-NEXT: ret <4 x i32> [[T3]] 698; 699 %t1 = urem <4 x i32> <i32 1, i32 2, i32 3, i32 4>, %v0 700 %t2 = urem <4 x i32> <i32 5, i32 6, i32 7, i32 8>, %v0 701 %t3 = shufflevector <4 x i32> %t1, <4 x i32> %t2, <4 x i32> <i32 0, i32 1, i32 6, i32 7> 702 ret <4 x i32> %t3 703} 704 705; This is folded by using a safe constant. 706 707define <4 x i32> @urem_urem_undef_mask_elt(<4 x i32> %v0) { 708; CHECK-LABEL: @urem_urem_undef_mask_elt( 709; CHECK-NEXT: [[T3:%.*]] = urem <4 x i32> <i32 1, i32 2, i32 7, i32 0>, [[V0:%.*]] 710; CHECK-NEXT: ret <4 x i32> [[T3]] 711; 712 %t1 = urem <4 x i32> <i32 1, i32 2, i32 3, i32 4>, %v0 713 %t2 = urem <4 x i32> <i32 5, i32 6, i32 7, i32 8>, %v0 714 %t3 = shufflevector <4 x i32> %t1, <4 x i32> %t2, <4 x i32> <i32 0, i32 1, i32 6, i32 undef> 715 ret <4 x i32> %t3 716} 717 718define <4 x i32> @srem_srem(<4 x i32> %v0) { 719; CHECK-LABEL: @srem_srem( 720; CHECK-NEXT: [[T3:%.*]] = srem <4 x i32> <i32 1, i32 2, i32 7, i32 4>, [[V0:%.*]] 721; CHECK-NEXT: ret <4 x i32> [[T3]] 722; 723 %t1 = srem <4 x i32> <i32 1, i32 2, i32 3, i32 4>, %v0 724 %t2 = srem <4 x i32> <i32 5, i32 6, i32 7, i32 8>, %v0 725 %t3 = shufflevector <4 x i32> %t1, <4 x i32> %t2, <4 x i32> <i32 0, i32 1, i32 6, i32 3> 726 ret <4 x i32> %t3 727} 728 729; This is folded by using a safe constant. 730 731define <4 x i32> @srem_srem_undef_mask_elt(<4 x i32> %v0) { 732; CHECK-LABEL: @srem_srem_undef_mask_elt( 733; CHECK-NEXT: [[T3:%.*]] = srem <4 x i32> <i32 1, i32 0, i32 7, i32 4>, [[V0:%.*]] 734; CHECK-NEXT: ret <4 x i32> [[T3]] 735; 736 %t1 = srem <4 x i32> <i32 1, i32 2, i32 3, i32 4>, %v0 737 %t2 = srem <4 x i32> <i32 5, i32 6, i32 7, i32 8>, %v0 738 %t3 = shufflevector <4 x i32> %t1, <4 x i32> %t2, <4 x i32> <i32 0, i32 undef, i32 6, i32 3> 739 ret <4 x i32> %t3 740} 741 742; Try FP ops/types. 743 744define <4 x float> @fadd_fadd(<4 x float> %v0) { 745; CHECK-LABEL: @fadd_fadd( 746; CHECK-NEXT: [[T3:%.*]] = fadd <4 x float> [[V0:%.*]], <float 1.000000e+00, float 2.000000e+00, float 7.000000e+00, float 8.000000e+00> 747; CHECK-NEXT: ret <4 x float> [[T3]] 748; 749 %t1 = fadd <4 x float> %v0, <float 1.0, float 2.0, float 3.0, float 4.0> 750 %t2 = fadd <4 x float> %v0, <float 5.0, float 6.0, float 7.0, float 8.0> 751 %t3 = shufflevector <4 x float> %t1, <4 x float> %t2, <4 x i32> <i32 0, i32 1, i32 6, i32 7> 752 ret <4 x float> %t3 753} 754 755define <4 x double> @fsub_fsub(<4 x double> %v0) { 756; CHECK-LABEL: @fsub_fsub( 757; CHECK-NEXT: [[T3:%.*]] = fsub <4 x double> <double undef, double 2.000000e+00, double 7.000000e+00, double 8.000000e+00>, [[V0:%.*]] 758; CHECK-NEXT: ret <4 x double> [[T3]] 759; 760 %t1 = fsub <4 x double> <double 1.0, double 2.0, double 3.0, double 4.0>, %v0 761 %t2 = fsub <4 x double> <double 5.0, double 6.0, double 7.0, double 8.0>, %v0 762 %t3 = shufflevector <4 x double> %t1, <4 x double> %t2, <4 x i32> <i32 undef, i32 1, i32 6, i32 7> 763 ret <4 x double> %t3 764} 765 766; Intersect any FMF. 767 768define <4 x float> @fmul_fmul(<4 x float> %v0) { 769; CHECK-LABEL: @fmul_fmul( 770; CHECK-NEXT: [[T3:%.*]] = fmul nnan ninf <4 x float> [[V0:%.*]], <float 1.000000e+00, float 6.000000e+00, float 7.000000e+00, float 8.000000e+00> 771; CHECK-NEXT: ret <4 x float> [[T3]] 772; 773 %t1 = fmul nnan ninf <4 x float> %v0, <float 1.0, float 2.0, float 3.0, float 4.0> 774 %t2 = fmul nnan ninf <4 x float> %v0, <float 5.0, float 6.0, float 7.0, float 8.0> 775 %t3 = shufflevector <4 x float> %t1, <4 x float> %t2, <4 x i32> <i32 0, i32 5, i32 6, i32 7> 776 ret <4 x float> %t3 777} 778 779define <4 x double> @fdiv_fdiv(<4 x double> %v0) { 780; CHECK-LABEL: @fdiv_fdiv( 781; CHECK-NEXT: [[T3:%.*]] = fdiv nnan arcp <4 x double> <double undef, double 2.000000e+00, double 7.000000e+00, double 8.000000e+00>, [[V0:%.*]] 782; CHECK-NEXT: ret <4 x double> [[T3]] 783; 784 %t1 = fdiv fast <4 x double> <double 1.0, double 2.0, double 3.0, double 4.0>, %v0 785 %t2 = fdiv nnan arcp <4 x double> <double 5.0, double 6.0, double 7.0, double 8.0>, %v0 786 %t3 = shufflevector <4 x double> %t1, <4 x double> %t2, <4 x i32> <i32 undef, i32 1, i32 6, i32 7> 787 ret <4 x double> %t3 788} 789 790; The variable operand must be either the first operand or second operand in both binops. 791 792define <4 x double> @frem_frem(<4 x double> %v0) { 793; CHECK-LABEL: @frem_frem( 794; CHECK-NEXT: [[T1:%.*]] = frem <4 x double> <double 1.000000e+00, double 2.000000e+00, double undef, double undef>, [[V0:%.*]] 795; CHECK-NEXT: [[T2:%.*]] = frem <4 x double> [[V0]], <double undef, double undef, double 7.000000e+00, double 8.000000e+00> 796; CHECK-NEXT: [[T3:%.*]] = shufflevector <4 x double> [[T1]], <4 x double> [[T2]], <4 x i32> <i32 0, i32 1, i32 6, i32 7> 797; CHECK-NEXT: ret <4 x double> [[T3]] 798; 799 %t1 = frem <4 x double> <double 1.0, double 2.0, double 3.0, double 4.0>, %v0 800 %t2 = frem <4 x double> %v0, <double 5.0, double 6.0, double 7.0, double 8.0> 801 %t3 = shufflevector <4 x double> %t1, <4 x double> %t2, <4 x i32> <i32 0, i32 1, i32 6, i32 7> 802 ret <4 x double> %t3 803} 804 805define <4 x i32> @add_2_vars(<4 x i32> %v0, <4 x i32> %v1) { 806; CHECK-LABEL: @add_2_vars( 807; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[V0:%.*]], <4 x i32> [[V1:%.*]], <4 x i32> <i32 0, i32 5, i32 2, i32 7> 808; CHECK-NEXT: [[T3:%.*]] = add <4 x i32> [[TMP1]], <i32 1, i32 6, i32 3, i32 8> 809; CHECK-NEXT: ret <4 x i32> [[T3]] 810; 811 %t1 = add <4 x i32> %v0, <i32 1, i32 2, i32 3, i32 4> 812 %t2 = add <4 x i32> %v1, <i32 5, i32 6, i32 7, i32 8> 813 %t3 = shufflevector <4 x i32> %t1, <4 x i32> %t2, <4 x i32> <i32 0, i32 5, i32 2, i32 7> 814 ret <4 x i32> %t3 815} 816 817; Constant operand 0 (LHS) also works. 818 819define <4 x i32> @sub_2_vars(<4 x i32> %v0, <4 x i32> %v1) { 820; CHECK-LABEL: @sub_2_vars( 821; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[V0:%.*]], <4 x i32> [[V1:%.*]], <4 x i32> <i32 0, i32 1, i32 2, i32 7> 822; CHECK-NEXT: [[T3:%.*]] = sub <4 x i32> <i32 1, i32 2, i32 3, i32 8>, [[TMP1]] 823; CHECK-NEXT: ret <4 x i32> [[T3]] 824; 825 %t1 = sub <4 x i32> <i32 1, i32 2, i32 3, i32 4>, %v0 826 %t2 = sub <4 x i32> <i32 5, i32 6, i32 7, i32 8>, %v1 827 %t3 = shufflevector <4 x i32> %t1, <4 x i32> %t2, <4 x i32> <i32 0, i32 1, i32 2, i32 7> 828 ret <4 x i32> %t3 829} 830 831define <4 x i32> @sub_2_vars_nsw(<4 x i32> %v0, <4 x i32> %v1) { 832; CHECK-LABEL: @sub_2_vars_nsw( 833; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[V0:%.*]], <4 x i32> [[V1:%.*]], <4 x i32> <i32 0, i32 1, i32 2, i32 7> 834; CHECK-NEXT: [[T3:%.*]] = sub nsw <4 x i32> <i32 1, i32 2, i32 3, i32 8>, [[TMP1]] 835; CHECK-NEXT: ret <4 x i32> [[T3]] 836; 837 %t1 = sub nsw <4 x i32> <i32 1, i32 2, i32 3, i32 4>, %v0 838 %t2 = sub nsw <4 x i32> <i32 5, i32 6, i32 7, i32 8>, %v1 839 %t3 = shufflevector <4 x i32> %t1, <4 x i32> %t2, <4 x i32> <i32 0, i32 1, i32 2, i32 7> 840 ret <4 x i32> %t3 841} 842 843define <4 x i32> @sub_2_vars_undef_mask_elt(<4 x i32> %v0, <4 x i32> %v1) { 844; CHECK-LABEL: @sub_2_vars_undef_mask_elt( 845; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[V0:%.*]], <4 x i32> [[V1:%.*]], <4 x i32> <i32 undef, i32 1, i32 2, i32 7> 846; CHECK-NEXT: [[T3:%.*]] = sub <4 x i32> <i32 undef, i32 2, i32 3, i32 8>, [[TMP1]] 847; CHECK-NEXT: ret <4 x i32> [[T3]] 848; 849 %t1 = sub <4 x i32> <i32 1, i32 2, i32 3, i32 4>, %v0 850 %t2 = sub <4 x i32> <i32 5, i32 6, i32 7, i32 8>, %v1 851 %t3 = shufflevector <4 x i32> %t1, <4 x i32> %t2, <4 x i32> <i32 undef, i32 1, i32 2, i32 7> 852 ret <4 x i32> %t3 853} 854 855; Poison flags must be dropped or undef must be replaced with safe constant. 856 857define <4 x i32> @sub_2_vars_nsw_undef_mask_elt(<4 x i32> %v0, <4 x i32> %v1) { 858; CHECK-LABEL: @sub_2_vars_nsw_undef_mask_elt( 859; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[V0:%.*]], <4 x i32> [[V1:%.*]], <4 x i32> <i32 undef, i32 1, i32 2, i32 7> 860; CHECK-NEXT: [[T3:%.*]] = sub <4 x i32> <i32 undef, i32 2, i32 3, i32 8>, [[TMP1]] 861; CHECK-NEXT: ret <4 x i32> [[T3]] 862; 863 %t1 = sub nsw <4 x i32> <i32 1, i32 2, i32 3, i32 4>, %v0 864 %t2 = sub nsw <4 x i32> <i32 5, i32 6, i32 7, i32 8>, %v1 865 %t3 = shufflevector <4 x i32> %t1, <4 x i32> %t2, <4 x i32> <i32 undef, i32 1, i32 2, i32 7> 866 ret <4 x i32> %t3 867} 868 869; If any element of the shuffle mask operand is undef, that element of the result is undef. 870; The shuffle is eliminated in this transform, but we can replace a constant element with undef. 871 872define <4 x i32> @mul_2_vars(<4 x i32> %v0, <4 x i32> %v1) { 873; CHECK-LABEL: @mul_2_vars( 874; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[V0:%.*]], <4 x i32> [[V1:%.*]], <4 x i32> <i32 0, i32 5, i32 2, i32 7> 875; CHECK-NEXT: [[T3:%.*]] = mul <4 x i32> [[TMP1]], <i32 1, i32 6, i32 3, i32 8> 876; CHECK-NEXT: ret <4 x i32> [[T3]] 877; 878 %t1 = mul <4 x i32> %v0, <i32 1, i32 2, i32 3, i32 4> 879 %t2 = mul <4 x i32> %v1, <i32 5, i32 6, i32 7, i32 8> 880 %t3 = shufflevector <4 x i32> %t1, <4 x i32> %t2, <4 x i32> <i32 0, i32 5, i32 2, i32 7> 881 ret <4 x i32> %t3 882} 883 884define <4 x i32> @mul_2_vars_nuw(<4 x i32> %v0, <4 x i32> %v1) { 885; CHECK-LABEL: @mul_2_vars_nuw( 886; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[V0:%.*]], <4 x i32> [[V1:%.*]], <4 x i32> <i32 0, i32 5, i32 2, i32 7> 887; CHECK-NEXT: [[T3:%.*]] = mul nuw <4 x i32> [[TMP1]], <i32 1, i32 6, i32 3, i32 8> 888; CHECK-NEXT: ret <4 x i32> [[T3]] 889; 890 %t1 = mul nuw <4 x i32> %v0, <i32 1, i32 2, i32 3, i32 4> 891 %t2 = mul nuw <4 x i32> %v1, <i32 5, i32 6, i32 7, i32 8> 892 %t3 = shufflevector <4 x i32> %t1, <4 x i32> %t2, <4 x i32> <i32 0, i32 5, i32 2, i32 7> 893 ret <4 x i32> %t3 894} 895 896define <4 x i32> @mul_2_vars_undef_mask_elt(<4 x i32> %v0, <4 x i32> %v1) { 897; CHECK-LABEL: @mul_2_vars_undef_mask_elt( 898; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[V0:%.*]], <4 x i32> [[V1:%.*]], <4 x i32> <i32 0, i32 undef, i32 2, i32 7> 899; CHECK-NEXT: [[T3:%.*]] = mul <4 x i32> [[TMP1]], <i32 1, i32 undef, i32 3, i32 8> 900; CHECK-NEXT: ret <4 x i32> [[T3]] 901; 902 %t1 = mul <4 x i32> %v0, <i32 1, i32 2, i32 3, i32 4> 903 %t2 = mul <4 x i32> %v1, <i32 5, i32 6, i32 7, i32 8> 904 %t3 = shufflevector <4 x i32> %t1, <4 x i32> %t2, <4 x i32> <i32 0, i32 undef, i32 2, i32 7> 905 ret <4 x i32> %t3 906} 907 908; Poison flags must be dropped or undef must be replaced with safe constant. 909 910define <4 x i32> @mul_2_vars_nuw_undef_mask_elt(<4 x i32> %v0, <4 x i32> %v1) { 911; CHECK-LABEL: @mul_2_vars_nuw_undef_mask_elt( 912; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[V0:%.*]], <4 x i32> [[V1:%.*]], <4 x i32> <i32 0, i32 undef, i32 2, i32 7> 913; CHECK-NEXT: [[T3:%.*]] = mul <4 x i32> [[TMP1]], <i32 1, i32 undef, i32 3, i32 8> 914; CHECK-NEXT: ret <4 x i32> [[T3]] 915; 916 %t1 = mul nuw <4 x i32> %v0, <i32 1, i32 2, i32 3, i32 4> 917 %t2 = mul nuw <4 x i32> %v1, <i32 5, i32 6, i32 7, i32 8> 918 %t3 = shufflevector <4 x i32> %t1, <4 x i32> %t2, <4 x i32> <i32 0, i32 undef, i32 2, i32 7> 919 ret <4 x i32> %t3 920} 921 922; Preserve flags when possible. 923 924define <4 x i32> @shl_2_vars(<4 x i32> %v0, <4 x i32> %v1) { 925; CHECK-LABEL: @shl_2_vars( 926; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[V0:%.*]], <4 x i32> [[V1:%.*]], <4 x i32> <i32 0, i32 5, i32 2, i32 3> 927; CHECK-NEXT: [[T3:%.*]] = shl <4 x i32> [[TMP1]], <i32 1, i32 6, i32 3, i32 4> 928; CHECK-NEXT: ret <4 x i32> [[T3]] 929; 930 %t1 = shl <4 x i32> %v0, <i32 1, i32 2, i32 3, i32 4> 931 %t2 = shl <4 x i32> %v1, <i32 5, i32 6, i32 7, i32 8> 932 %t3 = shufflevector <4 x i32> %t1, <4 x i32> %t2, <4 x i32> <i32 0, i32 5, i32 2, i32 3> 933 ret <4 x i32> %t3 934} 935 936define <4 x i32> @shl_2_vars_nsw(<4 x i32> %v0, <4 x i32> %v1) { 937; CHECK-LABEL: @shl_2_vars_nsw( 938; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[V0:%.*]], <4 x i32> [[V1:%.*]], <4 x i32> <i32 0, i32 5, i32 2, i32 3> 939; CHECK-NEXT: [[T3:%.*]] = shl nsw <4 x i32> [[TMP1]], <i32 1, i32 6, i32 3, i32 4> 940; CHECK-NEXT: ret <4 x i32> [[T3]] 941; 942 %t1 = shl nsw <4 x i32> %v0, <i32 1, i32 2, i32 3, i32 4> 943 %t2 = shl nsw <4 x i32> %v1, <i32 5, i32 6, i32 7, i32 8> 944 %t3 = shufflevector <4 x i32> %t1, <4 x i32> %t2, <4 x i32> <i32 0, i32 5, i32 2, i32 3> 945 ret <4 x i32> %t3 946} 947 948; Shift by undef is poison. Undef is replaced by safe constant. 949 950define <4 x i32> @shl_2_vars_undef_mask_elt(<4 x i32> %v0, <4 x i32> %v1) { 951; CHECK-LABEL: @shl_2_vars_undef_mask_elt( 952; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[V0:%.*]], <4 x i32> [[V1:%.*]], <4 x i32> <i32 undef, i32 5, i32 2, i32 undef> 953; CHECK-NEXT: [[T3:%.*]] = shl <4 x i32> [[TMP1]], <i32 0, i32 6, i32 3, i32 0> 954; CHECK-NEXT: ret <4 x i32> [[T3]] 955; 956 %t1 = shl <4 x i32> %v0, <i32 1, i32 2, i32 3, i32 4> 957 %t2 = shl <4 x i32> %v1, <i32 5, i32 6, i32 7, i32 8> 958 %t3 = shufflevector <4 x i32> %t1, <4 x i32> %t2, <4 x i32> <i32 undef, i32 5, i32 2, i32 undef> 959 ret <4 x i32> %t3 960} 961 962; Shift by undef is poison. Undef is replaced by safe constant. 963 964define <4 x i32> @shl_2_vars_nsw_undef_mask_elt(<4 x i32> %v0, <4 x i32> %v1) { 965; CHECK-LABEL: @shl_2_vars_nsw_undef_mask_elt( 966; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[V0:%.*]], <4 x i32> [[V1:%.*]], <4 x i32> <i32 undef, i32 5, i32 2, i32 undef> 967; CHECK-NEXT: [[T3:%.*]] = shl nsw <4 x i32> [[TMP1]], <i32 0, i32 6, i32 3, i32 0> 968; CHECK-NEXT: ret <4 x i32> [[T3]] 969; 970 %t1 = shl nsw <4 x i32> %v0, <i32 1, i32 2, i32 3, i32 4> 971 %t2 = shl nsw <4 x i32> %v1, <i32 5, i32 6, i32 7, i32 8> 972 %t3 = shufflevector <4 x i32> %t1, <4 x i32> %t2, <4 x i32> <i32 undef, i32 5, i32 2, i32 undef> 973 ret <4 x i32> %t3 974} 975 976; Can't propagate the flag here. 977 978define <4 x i32> @lshr_2_vars(<4 x i32> %v0, <4 x i32> %v1) { 979; CHECK-LABEL: @lshr_2_vars( 980; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[V1:%.*]], <4 x i32> [[V0:%.*]], <4 x i32> <i32 0, i32 1, i32 6, i32 3> 981; CHECK-NEXT: [[T3:%.*]] = lshr <4 x i32> <i32 5, i32 6, i32 3, i32 8>, [[TMP1]] 982; CHECK-NEXT: ret <4 x i32> [[T3]] 983; 984 %t1 = lshr <4 x i32> <i32 1, i32 2, i32 3, i32 4>, %v0 985 %t2 = lshr exact <4 x i32> <i32 5, i32 6, i32 7, i32 8>, %v1 986 %t3 = shufflevector <4 x i32> %t1, <4 x i32> %t2, <4 x i32> <i32 4, i32 5, i32 2, i32 7> 987 ret <4 x i32> %t3 988} 989 990define <4 x i32> @lshr_2_vars_exact(<4 x i32> %v0, <4 x i32> %v1) { 991; CHECK-LABEL: @lshr_2_vars_exact( 992; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[V1:%.*]], <4 x i32> [[V0:%.*]], <4 x i32> <i32 0, i32 1, i32 6, i32 3> 993; CHECK-NEXT: [[T3:%.*]] = lshr exact <4 x i32> <i32 5, i32 6, i32 3, i32 8>, [[TMP1]] 994; CHECK-NEXT: ret <4 x i32> [[T3]] 995; 996 %t1 = lshr exact <4 x i32> <i32 1, i32 2, i32 3, i32 4>, %v0 997 %t2 = lshr exact <4 x i32> <i32 5, i32 6, i32 7, i32 8>, %v1 998 %t3 = shufflevector <4 x i32> %t1, <4 x i32> %t2, <4 x i32> <i32 4, i32 5, i32 2, i32 7> 999 ret <4 x i32> %t3 1000} 1001 1002; TODO: This would require a new shuffle mask (replace undef with op0 or op1 lane). Otherwise, we have shift-by-undef. 1003 1004define <4 x i32> @lshr_2_vars_undef_mask_elt(<4 x i32> %v0, <4 x i32> %v1) { 1005; CHECK-LABEL: @lshr_2_vars_undef_mask_elt( 1006; CHECK-NEXT: [[T1:%.*]] = lshr <4 x i32> <i32 1, i32 2, i32 3, i32 4>, [[V0:%.*]] 1007; CHECK-NEXT: [[T2:%.*]] = lshr <4 x i32> <i32 5, i32 6, i32 7, i32 8>, [[V1:%.*]] 1008; CHECK-NEXT: [[T3:%.*]] = shufflevector <4 x i32> [[T1]], <4 x i32> [[T2]], <4 x i32> <i32 undef, i32 5, i32 2, i32 7> 1009; CHECK-NEXT: ret <4 x i32> [[T3]] 1010; 1011 %t1 = lshr <4 x i32> <i32 1, i32 2, i32 3, i32 4>, %v0 1012 %t2 = lshr <4 x i32> <i32 5, i32 6, i32 7, i32 8>, %v1 1013 %t3 = shufflevector <4 x i32> %t1, <4 x i32> %t2, <4 x i32> <i32 undef, i32 5, i32 2, i32 7> 1014 ret <4 x i32> %t3 1015} 1016 1017; TODO: This would require a new shuffle mask (replace undef with op0 or op1 lane). Otherwise, we have shift-by-undef. 1018 1019define <4 x i32> @lshr_2_vars_exact_undef_mask_elt(<4 x i32> %v0, <4 x i32> %v1) { 1020; CHECK-LABEL: @lshr_2_vars_exact_undef_mask_elt( 1021; CHECK-NEXT: [[T1:%.*]] = lshr exact <4 x i32> <i32 1, i32 2, i32 3, i32 4>, [[V0:%.*]] 1022; CHECK-NEXT: [[T2:%.*]] = lshr exact <4 x i32> <i32 5, i32 6, i32 7, i32 8>, [[V1:%.*]] 1023; CHECK-NEXT: [[T3:%.*]] = shufflevector <4 x i32> [[T1]], <4 x i32> [[T2]], <4 x i32> <i32 undef, i32 5, i32 2, i32 7> 1024; CHECK-NEXT: ret <4 x i32> [[T3]] 1025; 1026 %t1 = lshr exact <4 x i32> <i32 1, i32 2, i32 3, i32 4>, %v0 1027 %t2 = lshr exact <4 x i32> <i32 5, i32 6, i32 7, i32 8>, %v1 1028 %t3 = shufflevector <4 x i32> %t1, <4 x i32> %t2, <4 x i32> <i32 undef, i32 5, i32 2, i32 7> 1029 ret <4 x i32> %t3 1030} 1031 1032; Try weird types. 1033 1034define <3 x i32> @ashr_2_vars(<3 x i32> %v0, <3 x i32> %v1) { 1035; CHECK-LABEL: @ashr_2_vars( 1036; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <3 x i32> [[V1:%.*]], <3 x i32> [[V0:%.*]], <3 x i32> <i32 0, i32 4, i32 5> 1037; CHECK-NEXT: [[T3:%.*]] = ashr <3 x i32> [[TMP1]], <i32 4, i32 2, i32 3> 1038; CHECK-NEXT: ret <3 x i32> [[T3]] 1039; 1040 %t1 = ashr <3 x i32> %v0, <i32 1, i32 2, i32 3> 1041 %t2 = ashr <3 x i32> %v1, <i32 4, i32 5, i32 6> 1042 %t3 = shufflevector <3 x i32> %t1, <3 x i32> %t2, <3 x i32> <i32 3, i32 1, i32 2> 1043 ret <3 x i32> %t3 1044} 1045 1046define <3 x i42> @and_2_vars(<3 x i42> %v0, <3 x i42> %v1) { 1047; CHECK-LABEL: @and_2_vars( 1048; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <3 x i42> [[V0:%.*]], <3 x i42> [[V1:%.*]], <3 x i32> <i32 0, i32 4, i32 undef> 1049; CHECK-NEXT: [[T3:%.*]] = and <3 x i42> [[TMP1]], <i42 1, i42 5, i42 undef> 1050; CHECK-NEXT: ret <3 x i42> [[T3]] 1051; 1052 %t1 = and <3 x i42> %v0, <i42 1, i42 2, i42 3> 1053 %t2 = and <3 x i42> %v1, <i42 4, i42 5, i42 6> 1054 %t3 = shufflevector <3 x i42> %t1, <3 x i42> %t2, <3 x i32> <i32 0, i32 4, i32 undef> 1055 ret <3 x i42> %t3 1056} 1057 1058; It doesn't matter if only one intermediate op has extra uses. 1059 1060define <4 x i32> @or_2_vars(<4 x i32> %v0, <4 x i32> %v1) { 1061; CHECK-LABEL: @or_2_vars( 1062; CHECK-NEXT: [[T1:%.*]] = or <4 x i32> [[V0:%.*]], <i32 1, i32 2, i32 3, i32 4> 1063; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[V1:%.*]], <4 x i32> [[V0]], <4 x i32> <i32 0, i32 1, i32 6, i32 7> 1064; CHECK-NEXT: [[T3:%.*]] = or <4 x i32> [[TMP1]], <i32 5, i32 6, i32 3, i32 4> 1065; CHECK-NEXT: call void @use_v4i32(<4 x i32> [[T1]]) 1066; CHECK-NEXT: ret <4 x i32> [[T3]] 1067; 1068 %t1 = or <4 x i32> %v0, <i32 1, i32 2, i32 3, i32 4> 1069 %t2 = or <4 x i32> %v1, <i32 5, i32 6, i32 7, i32 8> 1070 %t3 = shufflevector <4 x i32> %t1, <4 x i32> %t2, <4 x i32> <i32 4, i32 5, i32 2, i32 3> 1071 call void @use_v4i32(<4 x i32> %t1) 1072 ret <4 x i32> %t3 1073} 1074 1075; But we don't transform if both intermediate values have extra uses. 1076 1077define <4 x i32> @xor_2_vars(<4 x i32> %v0, <4 x i32> %v1) { 1078; CHECK-LABEL: @xor_2_vars( 1079; CHECK-NEXT: [[T1:%.*]] = xor <4 x i32> [[V0:%.*]], <i32 1, i32 2, i32 3, i32 4> 1080; CHECK-NEXT: [[T2:%.*]] = xor <4 x i32> [[V1:%.*]], <i32 5, i32 6, i32 7, i32 8> 1081; CHECK-NEXT: [[T3:%.*]] = shufflevector <4 x i32> [[T1]], <4 x i32> [[T2]], <4 x i32> <i32 0, i32 5, i32 2, i32 3> 1082; CHECK-NEXT: call void @use_v4i32(<4 x i32> [[T1]]) 1083; CHECK-NEXT: call void @use_v4i32(<4 x i32> [[T2]]) 1084; CHECK-NEXT: ret <4 x i32> [[T3]] 1085; 1086 %t1 = xor <4 x i32> %v0, <i32 1, i32 2, i32 3, i32 4> 1087 %t2 = xor <4 x i32> %v1, <i32 5, i32 6, i32 7, i32 8> 1088 %t3 = shufflevector <4 x i32> %t1, <4 x i32> %t2, <4 x i32> <i32 0, i32 5, i32 2, i32 3> 1089 call void @use_v4i32(<4 x i32> %t1) 1090 call void @use_v4i32(<4 x i32> %t2) 1091 ret <4 x i32> %t3 1092} 1093 1094; Div/rem need special handling if the shuffle has undef elements. 1095 1096define <4 x i32> @udiv_2_vars(<4 x i32> %v0, <4 x i32> %v1) { 1097; CHECK-LABEL: @udiv_2_vars( 1098; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[V1:%.*]], <4 x i32> [[V0:%.*]], <4 x i32> <i32 0, i32 5, i32 6, i32 3> 1099; CHECK-NEXT: [[T3:%.*]] = udiv <4 x i32> <i32 5, i32 2, i32 3, i32 8>, [[TMP1]] 1100; CHECK-NEXT: ret <4 x i32> [[T3]] 1101; 1102 %t1 = udiv <4 x i32> <i32 1, i32 2, i32 3, i32 4>, %v0 1103 %t2 = udiv <4 x i32> <i32 5, i32 6, i32 7, i32 8>, %v1 1104 %t3 = shufflevector <4 x i32> %t1, <4 x i32> %t2, <4 x i32> <i32 4, i32 1, i32 2, i32 7> 1105 ret <4 x i32> %t3 1106} 1107 1108define <4 x i32> @udiv_2_vars_exact(<4 x i32> %v0, <4 x i32> %v1) { 1109; CHECK-LABEL: @udiv_2_vars_exact( 1110; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[V1:%.*]], <4 x i32> [[V0:%.*]], <4 x i32> <i32 0, i32 5, i32 6, i32 3> 1111; CHECK-NEXT: [[T3:%.*]] = udiv exact <4 x i32> <i32 5, i32 2, i32 3, i32 8>, [[TMP1]] 1112; CHECK-NEXT: ret <4 x i32> [[T3]] 1113; 1114 %t1 = udiv exact <4 x i32> <i32 1, i32 2, i32 3, i32 4>, %v0 1115 %t2 = udiv exact <4 x i32> <i32 5, i32 6, i32 7, i32 8>, %v1 1116 %t3 = shufflevector <4 x i32> %t1, <4 x i32> %t2, <4 x i32> <i32 4, i32 1, i32 2, i32 7> 1117 ret <4 x i32> %t3 1118} 1119 1120; TODO: This could be transformed using a safe constant. 1121 1122define <4 x i32> @udiv_2_vars_undef_mask_elt(<4 x i32> %v0, <4 x i32> %v1) { 1123; CHECK-LABEL: @udiv_2_vars_undef_mask_elt( 1124; CHECK-NEXT: [[T1:%.*]] = udiv <4 x i32> <i32 1, i32 2, i32 3, i32 4>, [[V0:%.*]] 1125; CHECK-NEXT: [[T2:%.*]] = udiv <4 x i32> <i32 5, i32 6, i32 7, i32 8>, [[V1:%.*]] 1126; CHECK-NEXT: [[T3:%.*]] = shufflevector <4 x i32> [[T1]], <4 x i32> [[T2]], <4 x i32> <i32 undef, i32 1, i32 2, i32 7> 1127; CHECK-NEXT: ret <4 x i32> [[T3]] 1128; 1129 %t1 = udiv <4 x i32> <i32 1, i32 2, i32 3, i32 4>, %v0 1130 %t2 = udiv <4 x i32> <i32 5, i32 6, i32 7, i32 8>, %v1 1131 %t3 = shufflevector <4 x i32> %t1, <4 x i32> %t2, <4 x i32> <i32 undef, i32 1, i32 2, i32 7> 1132 ret <4 x i32> %t3 1133} 1134 1135; TODO: This could be transformed using a safe constant. 1136 1137define <4 x i32> @udiv_2_vars_exact_undef_mask_elt(<4 x i32> %v0, <4 x i32> %v1) { 1138; CHECK-LABEL: @udiv_2_vars_exact_undef_mask_elt( 1139; CHECK-NEXT: [[T1:%.*]] = udiv exact <4 x i32> <i32 1, i32 2, i32 3, i32 4>, [[V0:%.*]] 1140; CHECK-NEXT: [[T2:%.*]] = udiv exact <4 x i32> <i32 5, i32 6, i32 7, i32 8>, [[V1:%.*]] 1141; CHECK-NEXT: [[T3:%.*]] = shufflevector <4 x i32> [[T1]], <4 x i32> [[T2]], <4 x i32> <i32 undef, i32 1, i32 2, i32 7> 1142; CHECK-NEXT: ret <4 x i32> [[T3]] 1143; 1144 %t1 = udiv exact <4 x i32> <i32 1, i32 2, i32 3, i32 4>, %v0 1145 %t2 = udiv exact <4 x i32> <i32 5, i32 6, i32 7, i32 8>, %v1 1146 %t3 = shufflevector <4 x i32> %t1, <4 x i32> %t2, <4 x i32> <i32 undef, i32 1, i32 2, i32 7> 1147 ret <4 x i32> %t3 1148} 1149 1150; If the shuffle has no undefs, it's safe to shuffle the variables first. 1151 1152define <4 x i32> @sdiv_2_vars(<4 x i32> %v0, <4 x i32> %v1) { 1153; CHECK-LABEL: @sdiv_2_vars( 1154; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[V0:%.*]], <4 x i32> [[V1:%.*]], <4 x i32> <i32 0, i32 1, i32 6, i32 3> 1155; CHECK-NEXT: [[T3:%.*]] = sdiv <4 x i32> [[TMP1]], <i32 1, i32 2, i32 7, i32 4> 1156; CHECK-NEXT: ret <4 x i32> [[T3]] 1157; 1158 %t1 = sdiv <4 x i32> %v0, <i32 1, i32 2, i32 3, i32 4> 1159 %t2 = sdiv <4 x i32> %v1, <i32 5, i32 6, i32 7, i32 8> 1160 %t3 = shufflevector <4 x i32> %t1, <4 x i32> %t2, <4 x i32> <i32 0, i32 1, i32 6, i32 3> 1161 ret <4 x i32> %t3 1162} 1163 1164define <4 x i32> @sdiv_2_vars_exact(<4 x i32> %v0, <4 x i32> %v1) { 1165; CHECK-LABEL: @sdiv_2_vars_exact( 1166; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[V0:%.*]], <4 x i32> [[V1:%.*]], <4 x i32> <i32 0, i32 1, i32 6, i32 3> 1167; CHECK-NEXT: [[T3:%.*]] = sdiv exact <4 x i32> [[TMP1]], <i32 1, i32 2, i32 7, i32 4> 1168; CHECK-NEXT: ret <4 x i32> [[T3]] 1169; 1170 %t1 = sdiv exact <4 x i32> %v0, <i32 1, i32 2, i32 3, i32 4> 1171 %t2 = sdiv exact <4 x i32> %v1, <i32 5, i32 6, i32 7, i32 8> 1172 %t3 = shufflevector <4 x i32> %t1, <4 x i32> %t2, <4 x i32> <i32 0, i32 1, i32 6, i32 3> 1173 ret <4 x i32> %t3 1174} 1175 1176; Div by undef is UB. Undef is replaced by safe constant. 1177 1178define <4 x i32> @sdiv_2_vars_undef_mask_elt(<4 x i32> %v0, <4 x i32> %v1) { 1179; CHECK-LABEL: @sdiv_2_vars_undef_mask_elt( 1180; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[V0:%.*]], <4 x i32> [[V1:%.*]], <4 x i32> <i32 0, i32 1, i32 6, i32 undef> 1181; CHECK-NEXT: [[T3:%.*]] = sdiv <4 x i32> [[TMP1]], <i32 1, i32 2, i32 7, i32 1> 1182; CHECK-NEXT: ret <4 x i32> [[T3]] 1183; 1184 %t1 = sdiv <4 x i32> %v0, <i32 1, i32 2, i32 3, i32 4> 1185 %t2 = sdiv <4 x i32> %v1, <i32 5, i32 6, i32 7, i32 8> 1186 %t3 = shufflevector <4 x i32> %t1, <4 x i32> %t2, <4 x i32> <i32 0, i32 1, i32 6, i32 undef> 1187 ret <4 x i32> %t3 1188} 1189 1190; Div by undef is UB. Undef is replaced by safe constant. 1191 1192define <4 x i32> @sdiv_2_vars_exact_undef_mask_elt(<4 x i32> %v0, <4 x i32> %v1) { 1193; CHECK-LABEL: @sdiv_2_vars_exact_undef_mask_elt( 1194; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[V0:%.*]], <4 x i32> [[V1:%.*]], <4 x i32> <i32 0, i32 1, i32 6, i32 undef> 1195; CHECK-NEXT: [[T3:%.*]] = sdiv exact <4 x i32> [[TMP1]], <i32 1, i32 2, i32 7, i32 1> 1196; CHECK-NEXT: ret <4 x i32> [[T3]] 1197; 1198 %t1 = sdiv exact <4 x i32> %v0, <i32 1, i32 2, i32 3, i32 4> 1199 %t2 = sdiv exact <4 x i32> %v1, <i32 5, i32 6, i32 7, i32 8> 1200 %t3 = shufflevector <4 x i32> %t1, <4 x i32> %t2, <4 x i32> <i32 0, i32 1, i32 6, i32 undef> 1201 ret <4 x i32> %t3 1202} 1203 1204; If the shuffle has no undefs, it's safe to shuffle the variables first. 1205 1206define <4 x i32> @urem_2_vars(<4 x i32> %v0, <4 x i32> %v1) { 1207; CHECK-LABEL: @urem_2_vars( 1208; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[V0:%.*]], <4 x i32> [[V1:%.*]], <4 x i32> <i32 0, i32 1, i32 6, i32 7> 1209; CHECK-NEXT: [[T3:%.*]] = urem <4 x i32> <i32 1, i32 2, i32 7, i32 8>, [[TMP1]] 1210; CHECK-NEXT: ret <4 x i32> [[T3]] 1211; 1212 %t1 = urem <4 x i32> <i32 1, i32 2, i32 3, i32 4>, %v0 1213 %t2 = urem <4 x i32> <i32 5, i32 6, i32 7, i32 8>, %v1 1214 %t3 = shufflevector <4 x i32> %t1, <4 x i32> %t2, <4 x i32> <i32 0, i32 1, i32 6, i32 7> 1215 ret <4 x i32> %t3 1216} 1217 1218define <4 x i32> @srem_2_vars(<4 x i32> %v0, <4 x i32> %v1) { 1219; CHECK-LABEL: @srem_2_vars( 1220; CHECK-NEXT: [[T1:%.*]] = srem <4 x i32> <i32 1, i32 2, i32 3, i32 4>, [[V0:%.*]] 1221; CHECK-NEXT: [[T2:%.*]] = srem <4 x i32> <i32 5, i32 6, i32 7, i32 8>, [[V1:%.*]] 1222; CHECK-NEXT: [[T3:%.*]] = shufflevector <4 x i32> [[T1]], <4 x i32> [[T2]], <4 x i32> <i32 0, i32 undef, i32 6, i32 3> 1223; CHECK-NEXT: ret <4 x i32> [[T3]] 1224; 1225 %t1 = srem <4 x i32> <i32 1, i32 2, i32 3, i32 4>, %v0 1226 %t2 = srem <4 x i32> <i32 5, i32 6, i32 7, i32 8>, %v1 1227 %t3 = shufflevector <4 x i32> %t1, <4 x i32> %t2, <4 x i32> <i32 0, i32 undef, i32 6, i32 3> 1228 ret <4 x i32> %t3 1229} 1230 1231; Try FP ops/types. 1232 1233define <4 x float> @fadd_2_vars(<4 x float> %v0, <4 x float> %v1) { 1234; CHECK-LABEL: @fadd_2_vars( 1235; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x float> [[V0:%.*]], <4 x float> [[V1:%.*]], <4 x i32> <i32 0, i32 1, i32 6, i32 7> 1236; CHECK-NEXT: [[T3:%.*]] = fadd <4 x float> [[TMP1]], <float 1.000000e+00, float 2.000000e+00, float 7.000000e+00, float 8.000000e+00> 1237; CHECK-NEXT: ret <4 x float> [[T3]] 1238; 1239 %t1 = fadd <4 x float> %v0, <float 1.0, float 2.0, float 3.0, float 4.0> 1240 %t2 = fadd <4 x float> %v1, <float 5.0, float 6.0, float 7.0, float 8.0> 1241 %t3 = shufflevector <4 x float> %t1, <4 x float> %t2, <4 x i32> <i32 0, i32 1, i32 6, i32 7> 1242 ret <4 x float> %t3 1243} 1244 1245define <4 x double> @fsub_2_vars(<4 x double> %v0, <4 x double> %v1) { 1246; CHECK-LABEL: @fsub_2_vars( 1247; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x double> [[V0:%.*]], <4 x double> [[V1:%.*]], <4 x i32> <i32 undef, i32 1, i32 6, i32 7> 1248; CHECK-NEXT: [[T3:%.*]] = fsub <4 x double> <double undef, double 2.000000e+00, double 7.000000e+00, double 8.000000e+00>, [[TMP1]] 1249; CHECK-NEXT: ret <4 x double> [[T3]] 1250; 1251 %t1 = fsub <4 x double> <double 1.0, double 2.0, double 3.0, double 4.0>, %v0 1252 %t2 = fsub <4 x double> <double 5.0, double 6.0, double 7.0, double 8.0>, %v1 1253 %t3 = shufflevector <4 x double> %t1, <4 x double> %t2, <4 x i32> <i32 undef, i32 1, i32 6, i32 7> 1254 ret <4 x double> %t3 1255} 1256 1257; Intersect any FMF. 1258 1259define <4 x float> @fmul_2_vars(<4 x float> %v0, <4 x float> %v1) { 1260; CHECK-LABEL: @fmul_2_vars( 1261; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x float> [[V0:%.*]], <4 x float> [[V1:%.*]], <4 x i32> <i32 0, i32 5, i32 6, i32 7> 1262; CHECK-NEXT: [[T3:%.*]] = fmul reassoc nsz <4 x float> [[TMP1]], <float 1.000000e+00, float 6.000000e+00, float 7.000000e+00, float 8.000000e+00> 1263; CHECK-NEXT: ret <4 x float> [[T3]] 1264; 1265 %t1 = fmul reassoc nsz <4 x float> %v0, <float 1.0, float 2.0, float 3.0, float 4.0> 1266 %t2 = fmul reassoc nsz <4 x float> %v1, <float 5.0, float 6.0, float 7.0, float 8.0> 1267 %t3 = shufflevector <4 x float> %t1, <4 x float> %t2, <4 x i32> <i32 0, i32 5, i32 6, i32 7> 1268 ret <4 x float> %t3 1269} 1270 1271define <4 x double> @frem_2_vars(<4 x double> %v0, <4 x double> %v1) { 1272; CHECK-LABEL: @frem_2_vars( 1273; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x double> [[V0:%.*]], <4 x double> [[V1:%.*]], <4 x i32> <i32 undef, i32 1, i32 6, i32 7> 1274; CHECK-NEXT: [[T3:%.*]] = frem nnan <4 x double> <double undef, double 2.000000e+00, double 7.000000e+00, double 8.000000e+00>, [[TMP1]] 1275; CHECK-NEXT: ret <4 x double> [[T3]] 1276; 1277 %t1 = frem nnan ninf <4 x double> <double 1.0, double 2.0, double 3.0, double 4.0>, %v0 1278 %t2 = frem nnan arcp <4 x double> <double 5.0, double 6.0, double 7.0, double 8.0>, %v1 1279 %t3 = shufflevector <4 x double> %t1, <4 x double> %t2, <4 x i32> <i32 undef, i32 1, i32 6, i32 7> 1280 ret <4 x double> %t3 1281} 1282 1283; The variable operand must be either the first operand or second operand in both binops. 1284 1285define <4 x double> @fdiv_2_vars(<4 x double> %v0, <4 x double> %v1) { 1286; CHECK-LABEL: @fdiv_2_vars( 1287; CHECK-NEXT: [[T1:%.*]] = fdiv <4 x double> <double 1.000000e+00, double 2.000000e+00, double undef, double undef>, [[V0:%.*]] 1288; CHECK-NEXT: [[T2:%.*]] = fdiv <4 x double> [[V1:%.*]], <double undef, double undef, double 7.000000e+00, double 8.000000e+00> 1289; CHECK-NEXT: [[T3:%.*]] = shufflevector <4 x double> [[T1]], <4 x double> [[T2]], <4 x i32> <i32 0, i32 1, i32 6, i32 7> 1290; CHECK-NEXT: ret <4 x double> [[T3]] 1291; 1292 %t1 = fdiv <4 x double> <double 1.0, double 2.0, double 3.0, double 4.0>, %v0 1293 %t2 = fdiv <4 x double> %v1, <double 5.0, double 6.0, double 7.0, double 8.0> 1294 %t3 = shufflevector <4 x double> %t1, <4 x double> %t2, <4 x i32> <i32 0, i32 1, i32 6, i32 7> 1295 ret <4 x double> %t3 1296} 1297 1298; Shift-left with constant shift amount can be converted to mul to enable the fold. 1299 1300define <4 x i32> @mul_shl(<4 x i32> %v0) { 1301; CHECK-LABEL: @mul_shl( 1302; CHECK-NEXT: [[T3:%.*]] = mul nuw <4 x i32> [[V0:%.*]], <i32 32, i32 64, i32 3, i32 4> 1303; CHECK-NEXT: ret <4 x i32> [[T3]] 1304; 1305 %t1 = mul nuw <4 x i32> %v0, <i32 1, i32 2, i32 3, i32 4> 1306 %t2 = shl nuw <4 x i32> %v0, <i32 5, i32 6, i32 7, i32 8> 1307 %t3 = shufflevector <4 x i32> %t1, <4 x i32> %t2, <4 x i32> <i32 4, i32 5, i32 2, i32 3> 1308 ret <4 x i32> %t3 1309} 1310 1311; Try with shift as operand 0 of the shuffle; 'nsw' is dropped for safety, but that could be improved. 1312 1313define <4 x i32> @shl_mul(<4 x i32> %v0) { 1314; CHECK-LABEL: @shl_mul( 1315; CHECK-NEXT: [[T3:%.*]] = mul <4 x i32> [[V0:%.*]], <i32 5, i32 undef, i32 8, i32 16> 1316; CHECK-NEXT: ret <4 x i32> [[T3]] 1317; 1318 %t1 = shl nsw <4 x i32> %v0, <i32 1, i32 2, i32 3, i32 4> 1319 %t2 = mul nsw <4 x i32> %v0, <i32 5, i32 6, i32 7, i32 8> 1320 %t3 = shufflevector <4 x i32> %t1, <4 x i32> %t2, <4 x i32> <i32 4, i32 undef, i32 2, i32 3> 1321 ret <4 x i32> %t3 1322} 1323 1324; Demanded elements + simplification can remove the mul alone, but that's not the best case. 1325 1326define <4 x i32> @mul_is_nop_shl(<4 x i32> %v0) { 1327; CHECK-LABEL: @mul_is_nop_shl( 1328; CHECK-NEXT: [[T3:%.*]] = shl <4 x i32> [[V0:%.*]], <i32 0, i32 6, i32 7, i32 8> 1329; CHECK-NEXT: ret <4 x i32> [[T3]] 1330; 1331 %t1 = mul <4 x i32> %v0, <i32 1, i32 2, i32 3, i32 4> 1332 %t2 = shl <4 x i32> %v0, <i32 5, i32 6, i32 7, i32 8> 1333 %t3 = shufflevector <4 x i32> %t1, <4 x i32> %t2, <4 x i32> <i32 0, i32 5, i32 6, i32 7> 1334 ret <4 x i32> %t3 1335} 1336 1337; Negative test: shift amount (operand 1) must be constant. 1338 1339define <4 x i32> @shl_mul_not_constant_shift_amount(<4 x i32> %v0) { 1340; CHECK-LABEL: @shl_mul_not_constant_shift_amount( 1341; CHECK-NEXT: [[T1:%.*]] = shl <4 x i32> <i32 1, i32 2, i32 3, i32 4>, [[V0:%.*]] 1342; CHECK-NEXT: [[T2:%.*]] = mul <4 x i32> [[V0]], <i32 5, i32 6, i32 undef, i32 undef> 1343; CHECK-NEXT: [[T3:%.*]] = shufflevector <4 x i32> [[T2]], <4 x i32> [[T1]], <4 x i32> <i32 0, i32 1, i32 6, i32 7> 1344; CHECK-NEXT: ret <4 x i32> [[T3]] 1345; 1346 %t1 = shl <4 x i32> <i32 1, i32 2, i32 3, i32 4>, %v0 1347 %t2 = mul <4 x i32> %v0, <i32 5, i32 6, i32 7, i32 8> 1348 %t3 = shufflevector <4 x i32> %t1, <4 x i32> %t2, <4 x i32> <i32 4, i32 5, i32 2, i32 3> 1349 ret <4 x i32> %t3 1350} 1351 1352; Try with 2 variable inputs. 1353 1354define <4 x i32> @mul_shl_2_vars(<4 x i32> %v0, <4 x i32> %v1) { 1355; CHECK-LABEL: @mul_shl_2_vars( 1356; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[V1:%.*]], <4 x i32> [[V0:%.*]], <4 x i32> <i32 0, i32 1, i32 6, i32 7> 1357; CHECK-NEXT: [[T3:%.*]] = mul nuw <4 x i32> [[TMP1]], <i32 32, i32 64, i32 3, i32 4> 1358; CHECK-NEXT: ret <4 x i32> [[T3]] 1359; 1360 %t1 = mul nuw <4 x i32> %v0, <i32 1, i32 2, i32 3, i32 4> 1361 %t2 = shl nuw <4 x i32> %v1, <i32 5, i32 6, i32 7, i32 8> 1362 %t3 = shufflevector <4 x i32> %t1, <4 x i32> %t2, <4 x i32> <i32 4, i32 5, i32 2, i32 3> 1363 ret <4 x i32> %t3 1364} 1365 1366define <4 x i32> @shl_mul_2_vars(<4 x i32> %v0, <4 x i32> %v1) { 1367; CHECK-LABEL: @shl_mul_2_vars( 1368; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[V1:%.*]], <4 x i32> [[V0:%.*]], <4 x i32> <i32 0, i32 undef, i32 6, i32 7> 1369; CHECK-NEXT: [[T3:%.*]] = mul <4 x i32> [[TMP1]], <i32 5, i32 undef, i32 8, i32 16> 1370; CHECK-NEXT: ret <4 x i32> [[T3]] 1371; 1372 %t1 = shl nsw <4 x i32> %v0, <i32 1, i32 2, i32 3, i32 4> 1373 %t2 = mul nsw <4 x i32> %v1, <i32 5, i32 6, i32 7, i32 8> 1374 %t3 = shufflevector <4 x i32> %t1, <4 x i32> %t2, <4 x i32> <i32 4, i32 undef, i32 2, i32 3> 1375 ret <4 x i32> %t3 1376} 1377 1378; Or with constant can be converted to add to enable the fold. 1379; The 'shl' is here to allow analysis to determine that the 'or' can be transformed to 'add'. 1380; TODO: The 'or' constant is limited to a splat. 1381 1382define <4 x i32> @add_or(<4 x i32> %v) { 1383; CHECK-LABEL: @add_or( 1384; CHECK-NEXT: [[V0:%.*]] = shl <4 x i32> [[V:%.*]], <i32 5, i32 5, i32 5, i32 5> 1385; CHECK-NEXT: [[T3:%.*]] = add <4 x i32> [[V0]], <i32 31, i32 31, i32 65536, i32 65537> 1386; CHECK-NEXT: ret <4 x i32> [[T3]] 1387; 1388 %v0 = shl <4 x i32> %v, <i32 5, i32 5, i32 5, i32 5> ; clear the bottom bits 1389 %t1 = add <4 x i32> %v0, <i32 65534, i32 65535, i32 65536, i32 65537> ; this can't be converted to 'or' 1390 %t2 = or <4 x i32> %v0, <i32 31, i32 31, i32 31, i32 31> ; set the bottom bits 1391 %t3 = shufflevector <4 x i32> %t1, <4 x i32> %t2, <4 x i32> <i32 4, i32 5, i32 2, i32 3> 1392 ret <4 x i32> %t3 1393} 1394 1395; Try with 'or' as operand 0 of the shuffle. 1396 1397define <4 x i8> @or_add(<4 x i8> %v) { 1398; CHECK-LABEL: @or_add( 1399; CHECK-NEXT: [[V0:%.*]] = lshr <4 x i8> [[V:%.*]], <i8 3, i8 3, i8 3, i8 3> 1400; CHECK-NEXT: [[T3:%.*]] = add nuw nsw <4 x i8> [[V0]], <i8 1, i8 2, i8 -64, i8 -64> 1401; CHECK-NEXT: ret <4 x i8> [[T3]] 1402; 1403 %v0 = lshr <4 x i8> %v, <i8 3, i8 3, i8 3, i8 3> ; clear the top bits 1404 %t1 = or <4 x i8> %v0, <i8 192, i8 192, i8 192, i8 192> ; set some top bits 1405 %t2 = add nsw nuw <4 x i8> %v0, <i8 1, i8 2, i8 3, i8 4> ; this can't be converted to 'or' 1406 %t3 = shufflevector <4 x i8> %t1, <4 x i8> %t2, <4 x i32> <i32 4, i32 5, i32 2, i32 3> 1407 ret <4 x i8> %t3 1408} 1409 1410; Negative test: not all 'or' insts can be converted to 'add'. 1411 1412define <4 x i8> @or_add_not_enough_masking(<4 x i8> %v) { 1413; CHECK-LABEL: @or_add_not_enough_masking( 1414; CHECK-NEXT: [[V0:%.*]] = lshr <4 x i8> [[V:%.*]], <i8 1, i8 1, i8 1, i8 1> 1415; CHECK-NEXT: [[T1:%.*]] = or <4 x i8> [[V0]], <i8 undef, i8 undef, i8 -64, i8 -64> 1416; CHECK-NEXT: [[T2:%.*]] = add <4 x i8> [[V0]], <i8 1, i8 2, i8 undef, i8 undef> 1417; CHECK-NEXT: [[T3:%.*]] = shufflevector <4 x i8> [[T2]], <4 x i8> [[T1]], <4 x i32> <i32 0, i32 1, i32 6, i32 7> 1418; CHECK-NEXT: ret <4 x i8> [[T3]] 1419; 1420 %v0 = lshr <4 x i8> %v, <i8 1, i8 1, i8 1, i8 1> ; clear not enough top bits 1421 %t1 = or <4 x i8> %v0, <i8 192, i8 192, i8 192, i8 192> ; set some top bits 1422 %t2 = add nsw nuw <4 x i8> %v0, <i8 1, i8 2, i8 3, i8 4> ; this can't be converted to 'or' 1423 %t3 = shufflevector <4 x i8> %t1, <4 x i8> %t2, <4 x i32> <i32 4, i32 5, i32 2, i32 3> 1424 ret <4 x i8> %t3 1425} 1426 1427; Try with 2 variable inputs. 1428 1429define <4 x i32> @add_or_2_vars(<4 x i32> %v, <4 x i32> %v1) { 1430; CHECK-LABEL: @add_or_2_vars( 1431; CHECK-NEXT: [[V0:%.*]] = shl <4 x i32> [[V:%.*]], <i32 5, i32 5, i32 5, i32 5> 1432; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[V0]], <4 x i32> [[V1:%.*]], <4 x i32> <i32 0, i32 1, i32 6, i32 7> 1433; CHECK-NEXT: [[T3:%.*]] = add <4 x i32> [[TMP1]], <i32 31, i32 31, i32 65536, i32 65537> 1434; CHECK-NEXT: ret <4 x i32> [[T3]] 1435; 1436 %v0 = shl <4 x i32> %v, <i32 5, i32 5, i32 5, i32 5> ; clear the bottom bits 1437 %t1 = add <4 x i32> %v1, <i32 65534, i32 65535, i32 65536, i32 65537> ; this can't be converted to 'or' 1438 %t2 = or <4 x i32> %v0, <i32 31, i32 31, i32 31, i32 31> ; set the bottom bits 1439 %t3 = shufflevector <4 x i32> %t1, <4 x i32> %t2, <4 x i32> <i32 4, i32 5, i32 2, i32 3> 1440 ret <4 x i32> %t3 1441} 1442 1443define <4 x i8> @or_add_2_vars(<4 x i8> %v, <4 x i8> %v1) { 1444; CHECK-LABEL: @or_add_2_vars( 1445; CHECK-NEXT: [[V0:%.*]] = lshr <4 x i8> [[V:%.*]], <i8 3, i8 3, i8 3, i8 3> 1446; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i8> [[V1:%.*]], <4 x i8> [[V0]], <4 x i32> <i32 0, i32 1, i32 6, i32 7> 1447; CHECK-NEXT: [[T3:%.*]] = add nuw nsw <4 x i8> [[TMP1]], <i8 1, i8 2, i8 -64, i8 -64> 1448; CHECK-NEXT: ret <4 x i8> [[T3]] 1449; 1450 %v0 = lshr <4 x i8> %v, <i8 3, i8 3, i8 3, i8 3> ; clear the top bits 1451 %t1 = or <4 x i8> %v0, <i8 192, i8 192, i8 192, i8 192> ; set some top bits 1452 %t2 = add nsw nuw <4 x i8> %v1, <i8 1, i8 2, i8 3, i8 4> ; this can't be converted to 'or' 1453 %t3 = shufflevector <4 x i8> %t1, <4 x i8> %t2, <4 x i32> <i32 4, i32 5, i32 2, i32 3> 1454 ret <4 x i8> %t3 1455} 1456 1457; The undef operand is used to simplify the shuffle mask, but don't assert that too soon. 1458 1459define <4 x i32> @PR41419(<4 x i32> %v) { 1460; CHECK-LABEL: @PR41419( 1461; CHECK-NEXT: [[S:%.*]] = shufflevector <4 x i32> [[V:%.*]], <4 x i32> undef, <4 x i32> <i32 undef, i32 undef, i32 2, i32 undef> 1462; CHECK-NEXT: ret <4 x i32> [[S]] 1463; 1464 %s = shufflevector <4 x i32> %v, <4 x i32> undef, <4 x i32> <i32 4, i32 5, i32 2, i32 7> 1465 ret <4 x i32> %s 1466} 1467 1468