1; RUN: opt < %s -instcombine -S | FileCheck %s 2target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" 3 4; 5; ASHR - Immediate 6; 7 8define <8 x i16> @sse2_psrai_w_0(<8 x i16> %v) { 9; CHECK-LABEL: @sse2_psrai_w_0 10; CHECK-NEXT: ret <8 x i16> %v 11 %1 = tail call <8 x i16> @llvm.x86.sse2.psrai.w(<8 x i16> %v, i32 0) 12 ret <8 x i16> %1 13} 14 15define <8 x i16> @sse2_psrai_w_15(<8 x i16> %v) { 16; CHECK-LABEL: @sse2_psrai_w_15 17; CHECK-NEXT: %1 = ashr <8 x i16> %v, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15> 18; CHECK-NEXT: ret <8 x i16> %1 19 %1 = tail call <8 x i16> @llvm.x86.sse2.psrai.w(<8 x i16> %v, i32 15) 20 ret <8 x i16> %1 21} 22 23define <8 x i16> @sse2_psrai_w_64(<8 x i16> %v) { 24; CHECK-LABEL: @sse2_psrai_w_64 25; CHECK-NEXT: %1 = ashr <8 x i16> %v, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15> 26; CHECK-NEXT: ret <8 x i16> %1 27 %1 = tail call <8 x i16> @llvm.x86.sse2.psrai.w(<8 x i16> %v, i32 64) 28 ret <8 x i16> %1 29} 30 31define <4 x i32> @sse2_psrai_d_0(<4 x i32> %v) { 32; CHECK-LABEL: @sse2_psrai_d_0 33; CHECK-NEXT: ret <4 x i32> %v 34 %1 = tail call <4 x i32> @llvm.x86.sse2.psrai.d(<4 x i32> %v, i32 0) 35 ret <4 x i32> %1 36} 37 38define <4 x i32> @sse2_psrai_d_15(<4 x i32> %v) { 39; CHECK-LABEL: @sse2_psrai_d_15 40; CHECK-NEXT: %1 = ashr <4 x i32> %v, <i32 15, i32 15, i32 15, i32 15> 41; CHECK-NEXT: ret <4 x i32> %1 42 %1 = tail call <4 x i32> @llvm.x86.sse2.psrai.d(<4 x i32> %v, i32 15) 43 ret <4 x i32> %1 44} 45 46define <4 x i32> @sse2_psrai_d_64(<4 x i32> %v) { 47; CHECK-LABEL: @sse2_psrai_d_64 48; CHECK-NEXT: %1 = ashr <4 x i32> %v, <i32 31, i32 31, i32 31, i32 31> 49; CHECK-NEXT: ret <4 x i32> %1 50 %1 = tail call <4 x i32> @llvm.x86.sse2.psrai.d(<4 x i32> %v, i32 64) 51 ret <4 x i32> %1 52} 53 54define <16 x i16> @avx2_psrai_w_0(<16 x i16> %v) { 55; CHECK-LABEL: @avx2_psrai_w_0 56; CHECK-NEXT: ret <16 x i16> %v 57 %1 = tail call <16 x i16> @llvm.x86.avx2.psrai.w(<16 x i16> %v, i32 0) 58 ret <16 x i16> %1 59} 60 61define <16 x i16> @avx2_psrai_w_15(<16 x i16> %v) { 62; CHECK-LABEL: @avx2_psrai_w_15 63; CHECK-NEXT: %1 = ashr <16 x i16> %v, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15> 64; CHECK-NEXT: ret <16 x i16> %1 65 %1 = tail call <16 x i16> @llvm.x86.avx2.psrai.w(<16 x i16> %v, i32 15) 66 ret <16 x i16> %1 67} 68 69define <16 x i16> @avx2_psrai_w_64(<16 x i16> %v) { 70; CHECK-LABEL: @avx2_psrai_w_64 71; CHECK-NEXT: %1 = ashr <16 x i16> %v, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15> 72; CHECK-NEXT: ret <16 x i16> %1 73 %1 = tail call <16 x i16> @llvm.x86.avx2.psrai.w(<16 x i16> %v, i32 64) 74 ret <16 x i16> %1 75} 76 77define <8 x i32> @avx2_psrai_d_0(<8 x i32> %v) { 78; CHECK-LABEL: @avx2_psrai_d_0 79; CHECK-NEXT: ret <8 x i32> %v 80 %1 = tail call <8 x i32> @llvm.x86.avx2.psrai.d(<8 x i32> %v, i32 0) 81 ret <8 x i32> %1 82} 83 84define <8 x i32> @avx2_psrai_d_15(<8 x i32> %v) { 85; CHECK-LABEL: @avx2_psrai_d_15 86; CHECK-NEXT: %1 = ashr <8 x i32> %v, <i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15> 87; CHECK-NEXT: ret <8 x i32> %1 88 %1 = tail call <8 x i32> @llvm.x86.avx2.psrai.d(<8 x i32> %v, i32 15) 89 ret <8 x i32> %1 90} 91 92define <8 x i32> @avx2_psrai_d_64(<8 x i32> %v) { 93; CHECK-LABEL: @avx2_psrai_d_64 94; CHECK-NEXT: %1 = ashr <8 x i32> %v, <i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31> 95; CHECK-NEXT: ret <8 x i32> %1 96 %1 = tail call <8 x i32> @llvm.x86.avx2.psrai.d(<8 x i32> %v, i32 64) 97 ret <8 x i32> %1 98} 99 100; 101; LSHR - Immediate 102; 103 104define <8 x i16> @sse2_psrli_w_0(<8 x i16> %v) { 105; CHECK-LABEL: @sse2_psrli_w_0 106; CHECK-NEXT: ret <8 x i16> %v 107 %1 = tail call <8 x i16> @llvm.x86.sse2.psrli.w(<8 x i16> %v, i32 0) 108 ret <8 x i16> %1 109} 110 111define <8 x i16> @sse2_psrli_w_15(<8 x i16> %v) { 112; CHECK-LABEL: @sse2_psrli_w_15 113; CHECK-NEXT: %1 = lshr <8 x i16> %v, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15> 114; CHECK-NEXT: ret <8 x i16> %1 115 %1 = tail call <8 x i16> @llvm.x86.sse2.psrli.w(<8 x i16> %v, i32 15) 116 ret <8 x i16> %1 117} 118 119define <8 x i16> @sse2_psrli_w_64(<8 x i16> %v) { 120; CHECK-LABEL: @sse2_psrli_w_64 121; CHECK-NEXT: ret <8 x i16> zeroinitializer 122 %1 = tail call <8 x i16> @llvm.x86.sse2.psrli.w(<8 x i16> %v, i32 64) 123 ret <8 x i16> %1 124} 125 126define <4 x i32> @sse2_psrli_d_0(<4 x i32> %v) { 127; CHECK-LABEL: @sse2_psrli_d_0 128; CHECK-NEXT: ret <4 x i32> %v 129 %1 = tail call <4 x i32> @llvm.x86.sse2.psrli.d(<4 x i32> %v, i32 0) 130 ret <4 x i32> %1 131} 132 133define <4 x i32> @sse2_psrli_d_15(<4 x i32> %v) { 134; CHECK-LABEL: @sse2_psrli_d_15 135; CHECK-NEXT: %1 = lshr <4 x i32> %v, <i32 15, i32 15, i32 15, i32 15> 136; CHECK-NEXT: ret <4 x i32> %1 137 %1 = tail call <4 x i32> @llvm.x86.sse2.psrli.d(<4 x i32> %v, i32 15) 138 ret <4 x i32> %1 139} 140 141define <4 x i32> @sse2_psrli_d_64(<4 x i32> %v) { 142; CHECK-LABEL: @sse2_psrli_d_64 143; CHECK-NEXT: ret <4 x i32> zeroinitializer 144 %1 = tail call <4 x i32> @llvm.x86.sse2.psrli.d(<4 x i32> %v, i32 64) 145 ret <4 x i32> %1 146} 147 148define <2 x i64> @sse2_psrli_q_0(<2 x i64> %v) { 149; CHECK-LABEL: @sse2_psrli_q_0 150; CHECK-NEXT: ret <2 x i64> %v 151 %1 = tail call <2 x i64> @llvm.x86.sse2.psrli.q(<2 x i64> %v, i32 0) 152 ret <2 x i64> %1 153} 154 155define <2 x i64> @sse2_psrli_q_15(<2 x i64> %v) { 156; CHECK-LABEL: @sse2_psrli_q_15 157; CHECK-NEXT: %1 = lshr <2 x i64> %v, <i64 15, i64 15> 158; CHECK-NEXT: ret <2 x i64> %1 159 %1 = tail call <2 x i64> @llvm.x86.sse2.psrli.q(<2 x i64> %v, i32 15) 160 ret <2 x i64> %1 161} 162 163define <2 x i64> @sse2_psrli_q_64(<2 x i64> %v) { 164; CHECK-LABEL: @sse2_psrli_q_64 165; CHECK-NEXT: ret <2 x i64> zeroinitializer 166 %1 = tail call <2 x i64> @llvm.x86.sse2.psrli.q(<2 x i64> %v, i32 64) 167 ret <2 x i64> %1 168} 169 170define <16 x i16> @avx2_psrli_w_0(<16 x i16> %v) { 171; CHECK-LABEL: @avx2_psrli_w_0 172; CHECK-NEXT: ret <16 x i16> %v 173 %1 = tail call <16 x i16> @llvm.x86.avx2.psrli.w(<16 x i16> %v, i32 0) 174 ret <16 x i16> %1 175} 176 177define <16 x i16> @avx2_psrli_w_15(<16 x i16> %v) { 178; CHECK-LABEL: @avx2_psrli_w_15 179; CHECK-NEXT: %1 = lshr <16 x i16> %v, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15> 180; CHECK-NEXT: ret <16 x i16> %1 181 %1 = tail call <16 x i16> @llvm.x86.avx2.psrli.w(<16 x i16> %v, i32 15) 182 ret <16 x i16> %1 183} 184 185define <16 x i16> @avx2_psrli_w_64(<16 x i16> %v) { 186; CHECK-LABEL: @avx2_psrli_w_64 187; CHECK-NEXT: ret <16 x i16> zeroinitializer 188 %1 = tail call <16 x i16> @llvm.x86.avx2.psrli.w(<16 x i16> %v, i32 64) 189 ret <16 x i16> %1 190} 191 192define <8 x i32> @avx2_psrli_d_0(<8 x i32> %v) { 193; CHECK-LABEL: @avx2_psrli_d_0 194; CHECK-NEXT: ret <8 x i32> %v 195 %1 = tail call <8 x i32> @llvm.x86.avx2.psrli.d(<8 x i32> %v, i32 0) 196 ret <8 x i32> %1 197} 198 199define <8 x i32> @avx2_psrli_d_15(<8 x i32> %v) { 200; CHECK-LABEL: @avx2_psrli_d_15 201; CHECK-NEXT: %1 = lshr <8 x i32> %v, <i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15> 202; CHECK-NEXT: ret <8 x i32> %1 203 %1 = tail call <8 x i32> @llvm.x86.avx2.psrli.d(<8 x i32> %v, i32 15) 204 ret <8 x i32> %1 205} 206 207define <8 x i32> @avx2_psrli_d_64(<8 x i32> %v) { 208; CHECK-LABEL: @avx2_psrli_d_64 209; CHECK-NEXT: ret <8 x i32> zeroinitializer 210 %1 = tail call <8 x i32> @llvm.x86.avx2.psrli.d(<8 x i32> %v, i32 64) 211 ret <8 x i32> %1 212} 213 214define <4 x i64> @avx2_psrli_q_0(<4 x i64> %v) { 215; CHECK-LABEL: @avx2_psrli_q_0 216; CHECK-NEXT: ret <4 x i64> %v 217 %1 = tail call <4 x i64> @llvm.x86.avx2.psrli.q(<4 x i64> %v, i32 0) 218 ret <4 x i64> %1 219} 220 221define <4 x i64> @avx2_psrli_q_15(<4 x i64> %v) { 222; CHECK-LABEL: @avx2_psrli_q_15 223; CHECK-NEXT: %1 = lshr <4 x i64> %v, <i64 15, i64 15, i64 15, i64 15> 224; CHECK-NEXT: ret <4 x i64> %1 225 %1 = tail call <4 x i64> @llvm.x86.avx2.psrli.q(<4 x i64> %v, i32 15) 226 ret <4 x i64> %1 227} 228 229define <4 x i64> @avx2_psrli_q_64(<4 x i64> %v) { 230; CHECK-LABEL: @avx2_psrli_q_64 231; CHECK-NEXT: ret <4 x i64> zeroinitializer 232 %1 = tail call <4 x i64> @llvm.x86.avx2.psrli.q(<4 x i64> %v, i32 64) 233 ret <4 x i64> %1 234} 235 236; 237; SHL - Immediate 238; 239 240define <8 x i16> @sse2_pslli_w_0(<8 x i16> %v) { 241; CHECK-LABEL: @sse2_pslli_w_0 242; CHECK-NEXT: ret <8 x i16> %v 243 %1 = tail call <8 x i16> @llvm.x86.sse2.pslli.w(<8 x i16> %v, i32 0) 244 ret <8 x i16> %1 245} 246 247define <8 x i16> @sse2_pslli_w_15(<8 x i16> %v) { 248; CHECK-LABEL: @sse2_pslli_w_15 249; CHECK-NEXT: %1 = shl <8 x i16> %v, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15> 250; CHECK-NEXT: ret <8 x i16> %1 251 %1 = tail call <8 x i16> @llvm.x86.sse2.pslli.w(<8 x i16> %v, i32 15) 252 ret <8 x i16> %1 253} 254 255define <8 x i16> @sse2_pslli_w_64(<8 x i16> %v) { 256; CHECK-LABEL: @sse2_pslli_w_64 257; CHECK-NEXT: ret <8 x i16> zeroinitializer 258 %1 = tail call <8 x i16> @llvm.x86.sse2.pslli.w(<8 x i16> %v, i32 64) 259 ret <8 x i16> %1 260} 261 262define <4 x i32> @sse2_pslli_d_0(<4 x i32> %v) { 263; CHECK-LABEL: @sse2_pslli_d_0 264; CHECK-NEXT: ret <4 x i32> %v 265 %1 = tail call <4 x i32> @llvm.x86.sse2.pslli.d(<4 x i32> %v, i32 0) 266 ret <4 x i32> %1 267} 268 269define <4 x i32> @sse2_pslli_d_15(<4 x i32> %v) { 270; CHECK-LABEL: @sse2_pslli_d_15 271; CHECK-NEXT: %1 = shl <4 x i32> %v, <i32 15, i32 15, i32 15, i32 15> 272; CHECK-NEXT: ret <4 x i32> %1 273 %1 = tail call <4 x i32> @llvm.x86.sse2.pslli.d(<4 x i32> %v, i32 15) 274 ret <4 x i32> %1 275} 276 277define <4 x i32> @sse2_pslli_d_64(<4 x i32> %v) { 278; CHECK-LABEL: @sse2_pslli_d_64 279; CHECK-NEXT: ret <4 x i32> zeroinitializer 280 %1 = tail call <4 x i32> @llvm.x86.sse2.pslli.d(<4 x i32> %v, i32 64) 281 ret <4 x i32> %1 282} 283 284define <2 x i64> @sse2_pslli_q_0(<2 x i64> %v) { 285; CHECK-LABEL: @sse2_pslli_q_0 286; CHECK-NEXT: ret <2 x i64> %v 287 %1 = tail call <2 x i64> @llvm.x86.sse2.pslli.q(<2 x i64> %v, i32 0) 288 ret <2 x i64> %1 289} 290 291define <2 x i64> @sse2_pslli_q_15(<2 x i64> %v) { 292; CHECK-LABEL: @sse2_pslli_q_15 293; CHECK-NEXT: %1 = shl <2 x i64> %v, <i64 15, i64 15> 294; CHECK-NEXT: ret <2 x i64> %1 295 %1 = tail call <2 x i64> @llvm.x86.sse2.pslli.q(<2 x i64> %v, i32 15) 296 ret <2 x i64> %1 297} 298 299define <2 x i64> @sse2_pslli_q_64(<2 x i64> %v) { 300; CHECK-LABEL: @sse2_pslli_q_64 301; CHECK-NEXT: ret <2 x i64> zeroinitializer 302 %1 = tail call <2 x i64> @llvm.x86.sse2.pslli.q(<2 x i64> %v, i32 64) 303 ret <2 x i64> %1 304} 305 306define <16 x i16> @avx2_pslli_w_0(<16 x i16> %v) { 307; CHECK-LABEL: @avx2_pslli_w_0 308; CHECK-NEXT: ret <16 x i16> %v 309 %1 = tail call <16 x i16> @llvm.x86.avx2.pslli.w(<16 x i16> %v, i32 0) 310 ret <16 x i16> %1 311} 312 313define <16 x i16> @avx2_pslli_w_15(<16 x i16> %v) { 314; CHECK-LABEL: @avx2_pslli_w_15 315; CHECK-NEXT: %1 = shl <16 x i16> %v, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15> 316; CHECK-NEXT: ret <16 x i16> %1 317 %1 = tail call <16 x i16> @llvm.x86.avx2.pslli.w(<16 x i16> %v, i32 15) 318 ret <16 x i16> %1 319} 320 321define <16 x i16> @avx2_pslli_w_64(<16 x i16> %v) { 322; CHECK-LABEL: @avx2_pslli_w_64 323; CHECK-NEXT: ret <16 x i16> zeroinitializer 324 %1 = tail call <16 x i16> @llvm.x86.avx2.pslli.w(<16 x i16> %v, i32 64) 325 ret <16 x i16> %1 326} 327 328define <8 x i32> @avx2_pslli_d_0(<8 x i32> %v) { 329; CHECK-LABEL: @avx2_pslli_d_0 330; CHECK-NEXT: ret <8 x i32> %v 331 %1 = tail call <8 x i32> @llvm.x86.avx2.pslli.d(<8 x i32> %v, i32 0) 332 ret <8 x i32> %1 333} 334 335define <8 x i32> @avx2_pslli_d_15(<8 x i32> %v) { 336; CHECK-LABEL: @avx2_pslli_d_15 337; CHECK-NEXT: %1 = shl <8 x i32> %v, <i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15> 338; CHECK-NEXT: ret <8 x i32> %1 339 %1 = tail call <8 x i32> @llvm.x86.avx2.pslli.d(<8 x i32> %v, i32 15) 340 ret <8 x i32> %1 341} 342 343define <8 x i32> @avx2_pslli_d_64(<8 x i32> %v) { 344; CHECK-LABEL: @avx2_pslli_d_64 345; CHECK-NEXT: ret <8 x i32> zeroinitializer 346 %1 = tail call <8 x i32> @llvm.x86.avx2.pslli.d(<8 x i32> %v, i32 64) 347 ret <8 x i32> %1 348} 349 350define <4 x i64> @avx2_pslli_q_0(<4 x i64> %v) { 351; CHECK-LABEL: @avx2_pslli_q_0 352; CHECK-NEXT: ret <4 x i64> %v 353 %1 = tail call <4 x i64> @llvm.x86.avx2.pslli.q(<4 x i64> %v, i32 0) 354 ret <4 x i64> %1 355} 356 357define <4 x i64> @avx2_pslli_q_15(<4 x i64> %v) { 358; CHECK-LABEL: @avx2_pslli_q_15 359; CHECK-NEXT: %1 = shl <4 x i64> %v, <i64 15, i64 15, i64 15, i64 15> 360; CHECK-NEXT: ret <4 x i64> %1 361 %1 = tail call <4 x i64> @llvm.x86.avx2.pslli.q(<4 x i64> %v, i32 15) 362 ret <4 x i64> %1 363} 364 365define <4 x i64> @avx2_pslli_q_64(<4 x i64> %v) { 366; CHECK-LABEL: @avx2_pslli_q_64 367; CHECK-NEXT: ret <4 x i64> zeroinitializer 368 %1 = tail call <4 x i64> @llvm.x86.avx2.pslli.q(<4 x i64> %v, i32 64) 369 ret <4 x i64> %1 370} 371 372; 373; ASHR - Constant Vector 374; 375 376define <8 x i16> @sse2_psra_w_0(<8 x i16> %v) { 377; CHECK-LABEL: @sse2_psra_w_0 378; CHECK-NEXT: ret <8 x i16> %v 379 %1 = tail call <8 x i16> @llvm.x86.sse2.psra.w(<8 x i16> %v, <8 x i16> zeroinitializer) 380 ret <8 x i16> %1 381} 382 383define <8 x i16> @sse2_psra_w_15(<8 x i16> %v) { 384; CHECK-LABEL: @sse2_psra_w_15 385; CHECK-NEXT: %1 = ashr <8 x i16> %v, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15> 386; CHECK-NEXT: ret <8 x i16> %1 387 %1 = tail call <8 x i16> @llvm.x86.sse2.psra.w(<8 x i16> %v, <8 x i16> <i16 15, i16 0, i16 0, i16 0, i16 9999, i16 9999, i16 9999, i16 9999>) 388 ret <8 x i16> %1 389} 390 391define <8 x i16> @sse2_psra_w_15_splat(<8 x i16> %v) { 392; CHECK-LABEL: @sse2_psra_w_15_splat 393; CHECK-NEXT: %1 = ashr <8 x i16> %v, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15> 394; CHECK-NEXT: ret <8 x i16> %1 395 %1 = tail call <8 x i16> @llvm.x86.sse2.psra.w(<8 x i16> %v, <8 x i16> <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>) 396 ret <8 x i16> %1 397} 398 399define <8 x i16> @sse2_psra_w_64(<8 x i16> %v) { 400; CHECK-LABEL: @sse2_psra_w_64 401; CHECK-NEXT: %1 = ashr <8 x i16> %v, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15> 402; CHECK-NEXT: ret <8 x i16> %1 403 %1 = tail call <8 x i16> @llvm.x86.sse2.psra.w(<8 x i16> %v, <8 x i16> <i16 64, i16 0, i16 0, i16 0, i16 9999, i16 9999, i16 9999, i16 9999>) 404 ret <8 x i16> %1 405} 406 407define <4 x i32> @sse2_psra_d_0(<4 x i32> %v) { 408; CHECK-LABEL: @sse2_psra_d_0 409; CHECK-NEXT: ret <4 x i32> %v 410 %1 = tail call <4 x i32> @llvm.x86.sse2.psra.d(<4 x i32> %v, <4 x i32> zeroinitializer) 411 ret <4 x i32> %1 412} 413 414define <4 x i32> @sse2_psra_d_15(<4 x i32> %v) { 415; CHECK-LABEL: @sse2_psra_d_15 416; CHECK-NEXT: %1 = ashr <4 x i32> %v, <i32 15, i32 15, i32 15, i32 15> 417; CHECK-NEXT: ret <4 x i32> %1 418 %1 = tail call <4 x i32> @llvm.x86.sse2.psra.d(<4 x i32> %v, <4 x i32> <i32 15, i32 0, i32 9999, i32 9999>) 419 ret <4 x i32> %1 420} 421 422define <4 x i32> @sse2_psra_d_15_splat(<4 x i32> %v) { 423; CHECK-LABEL: @sse2_psra_d_15_splat 424; CHECK-NEXT: %1 = ashr <4 x i32> %v, <i32 31, i32 31, i32 31, i32 31> 425; CHECK-NEXT: ret <4 x i32> %1 426 %1 = tail call <4 x i32> @llvm.x86.sse2.psra.d(<4 x i32> %v, <4 x i32> <i32 15, i32 15, i32 15, i32 15>) 427 ret <4 x i32> %1 428} 429 430define <4 x i32> @sse2_psra_d_64(<4 x i32> %v) { 431; CHECK-LABEL: @sse2_psra_d_64 432; CHECK-NEXT: %1 = ashr <4 x i32> %v, <i32 31, i32 31, i32 31, i32 31> 433; CHECK-NEXT: ret <4 x i32> %1 434 %1 = tail call <4 x i32> @llvm.x86.sse2.psra.d(<4 x i32> %v, <4 x i32> <i32 64, i32 0, i32 9999, i32 9999>) 435 ret <4 x i32> %1 436} 437 438define <16 x i16> @avx2_psra_w_0(<16 x i16> %v) { 439; CHECK-LABEL: @avx2_psra_w_0 440; CHECK-NEXT: ret <16 x i16> %v 441 %1 = tail call <16 x i16> @llvm.x86.avx2.psra.w(<16 x i16> %v, <8 x i16> zeroinitializer) 442 ret <16 x i16> %1 443} 444 445define <16 x i16> @avx2_psra_w_15(<16 x i16> %v) { 446; CHECK-LABEL: @avx2_psra_w_15 447; CHECK-NEXT: %1 = ashr <16 x i16> %v, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15> 448; CHECK-NEXT: ret <16 x i16> %1 449 %1 = tail call <16 x i16> @llvm.x86.avx2.psra.w(<16 x i16> %v, <8 x i16> <i16 15, i16 0, i16 0, i16 0, i16 9999, i16 9999, i16 9999, i16 9999>) 450 ret <16 x i16> %1 451} 452 453define <16 x i16> @avx2_psra_w_15_splat(<16 x i16> %v) { 454; CHECK-LABEL: @avx2_psra_w_15_splat 455; CHECK-NEXT: %1 = ashr <16 x i16> %v, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15> 456; CHECK-NEXT: ret <16 x i16> %1 457 %1 = tail call <16 x i16> @llvm.x86.avx2.psra.w(<16 x i16> %v, <8 x i16> <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>) 458 ret <16 x i16> %1 459} 460 461define <16 x i16> @avx2_psra_w_64(<16 x i16> %v) { 462; CHECK-LABEL: @avx2_psra_w_64 463; CHECK-NEXT: %1 = ashr <16 x i16> %v, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15> 464; CHECK-NEXT: ret <16 x i16> %1 465 %1 = tail call <16 x i16> @llvm.x86.avx2.psra.w(<16 x i16> %v, <8 x i16> <i16 64, i16 0, i16 0, i16 0, i16 9999, i16 9999, i16 9999, i16 9999>) 466 ret <16 x i16> %1 467} 468 469define <8 x i32> @avx2_psra_d_0(<8 x i32> %v) { 470; CHECK-LABEL: @avx2_psra_d_0 471; CHECK-NEXT: ret <8 x i32> %v 472 %1 = tail call <8 x i32> @llvm.x86.avx2.psra.d(<8 x i32> %v, <4 x i32> zeroinitializer) 473 ret <8 x i32> %1 474} 475 476define <8 x i32> @avx2_psra_d_15(<8 x i32> %v) { 477; CHECK-LABEL: @avx2_psra_d_15 478; CHECK-NEXT: %1 = ashr <8 x i32> %v, <i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15> 479; CHECK-NEXT: ret <8 x i32> %1 480 %1 = tail call <8 x i32> @llvm.x86.avx2.psra.d(<8 x i32> %v, <4 x i32> <i32 15, i32 0, i32 9999, i32 9999>) 481 ret <8 x i32> %1 482} 483 484define <8 x i32> @avx2_psra_d_15_splat(<8 x i32> %v) { 485; CHECK-LABEL: @avx2_psra_d_15_splat 486; CHECK-NEXT: %1 = ashr <8 x i32> %v, <i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31> 487; CHECK-NEXT: ret <8 x i32> %1 488 %1 = tail call <8 x i32> @llvm.x86.avx2.psra.d(<8 x i32> %v, <4 x i32> <i32 15, i32 15, i32 15, i32 15>) 489 ret <8 x i32> %1 490} 491 492define <8 x i32> @avx2_psra_d_64(<8 x i32> %v) { 493; CHECK-LABEL: @avx2_psra_d_64 494; CHECK-NEXT: %1 = ashr <8 x i32> %v, <i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31> 495; CHECK-NEXT: ret <8 x i32> %1 496 %1 = tail call <8 x i32> @llvm.x86.avx2.psra.d(<8 x i32> %v, <4 x i32> <i32 64, i32 0, i32 9999, i32 9999>) 497 ret <8 x i32> %1 498} 499 500; 501; LSHR - Constant Vector 502; 503 504define <8 x i16> @sse2_psrl_w_0(<8 x i16> %v) { 505; CHECK-LABEL: @sse2_psrl_w_0 506; CHECK-NEXT: ret <8 x i16> %v 507 %1 = tail call <8 x i16> @llvm.x86.sse2.psrl.w(<8 x i16> %v, <8 x i16> zeroinitializer) 508 ret <8 x i16> %1 509} 510 511define <8 x i16> @sse2_psrl_w_15(<8 x i16> %v) { 512; CHECK-LABEL: @sse2_psrl_w_15 513; CHECK-NEXT: %1 = lshr <8 x i16> %v, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15> 514; CHECK-NEXT: ret <8 x i16> %1 515 %1 = tail call <8 x i16> @llvm.x86.sse2.psrl.w(<8 x i16> %v, <8 x i16> <i16 15, i16 0, i16 0, i16 0, i16 9999, i16 9999, i16 9999, i16 9999>) 516 ret <8 x i16> %1 517} 518 519define <8 x i16> @sse2_psrl_w_15_splat(<8 x i16> %v) { 520; CHECK-LABEL: @sse2_psrl_w_15_splat 521; CHECK-NEXT: ret <8 x i16> zeroinitializer 522 %1 = tail call <8 x i16> @llvm.x86.sse2.psrl.w(<8 x i16> %v, <8 x i16> <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>) 523 ret <8 x i16> %1 524} 525 526define <8 x i16> @sse2_psrl_w_64(<8 x i16> %v) { 527; CHECK-LABEL: @sse2_psrl_w_64 528; CHECK-NEXT: ret <8 x i16> zeroinitializer 529 %1 = tail call <8 x i16> @llvm.x86.sse2.psrl.w(<8 x i16> %v, <8 x i16> <i16 64, i16 0, i16 0, i16 0, i16 9999, i16 9999, i16 9999, i16 9999>) 530 ret <8 x i16> %1 531} 532 533define <4 x i32> @sse2_psrl_d_0(<4 x i32> %v) { 534; CHECK-LABEL: @sse2_psrl_d_0 535; CHECK-NEXT: ret <4 x i32> %v 536 %1 = tail call <4 x i32> @llvm.x86.sse2.psrl.d(<4 x i32> %v, <4 x i32> zeroinitializer) 537 ret <4 x i32> %1 538} 539 540define <4 x i32> @sse2_psrl_d_15(<4 x i32> %v) { 541; CHECK-LABEL: @sse2_psrl_d_15 542; CHECK-NEXT: %1 = lshr <4 x i32> %v, <i32 15, i32 15, i32 15, i32 15> 543; CHECK-NEXT: ret <4 x i32> %1 544 %1 = tail call <4 x i32> @llvm.x86.sse2.psrl.d(<4 x i32> %v, <4 x i32> <i32 15, i32 0, i32 9999, i32 9999>) 545 ret <4 x i32> %1 546} 547 548define <4 x i32> @sse2_psrl_d_15_splat(<4 x i32> %v) { 549; CHECK-LABEL: @sse2_psrl_d_15_splat 550; CHECK-NEXT: ret <4 x i32> zeroinitializer 551 %1 = tail call <4 x i32> @llvm.x86.sse2.psrl.d(<4 x i32> %v, <4 x i32> <i32 15, i32 15, i32 15, i32 15>) 552 ret <4 x i32> %1 553} 554 555define <4 x i32> @sse2_psrl_d_64(<4 x i32> %v) { 556; CHECK-LABEL: @sse2_psrl_d_64 557; CHECK-NEXT: ret <4 x i32> zeroinitializer 558 %1 = tail call <4 x i32> @llvm.x86.sse2.psrl.d(<4 x i32> %v, <4 x i32> <i32 64, i32 0, i32 9999, i32 9999>) 559 ret <4 x i32> %1 560} 561 562define <2 x i64> @sse2_psrl_q_0(<2 x i64> %v) { 563; CHECK-LABEL: @sse2_psrl_q_0 564; CHECK-NEXT: ret <2 x i64> %v 565 %1 = tail call <2 x i64> @llvm.x86.sse2.psrl.q(<2 x i64> %v, <2 x i64> zeroinitializer) 566 ret <2 x i64> %1 567} 568 569define <2 x i64> @sse2_psrl_q_15(<2 x i64> %v) { 570; CHECK-LABEL: @sse2_psrl_q_15 571; CHECK-NEXT: %1 = lshr <2 x i64> %v, <i64 15, i64 15> 572; CHECK-NEXT: ret <2 x i64> %1 573 %1 = tail call <2 x i64> @llvm.x86.sse2.psrl.q(<2 x i64> %v, <2 x i64> <i64 15, i64 9999>) 574 ret <2 x i64> %1 575} 576 577define <2 x i64> @sse2_psrl_q_64(<2 x i64> %v) { 578; CHECK-LABEL: @sse2_psrl_q_64 579; CHECK-NEXT: ret <2 x i64> zeroinitializer 580 %1 = tail call <2 x i64> @llvm.x86.sse2.psrl.q(<2 x i64> %v, <2 x i64> <i64 64, i64 9999>) 581 ret <2 x i64> %1 582} 583 584define <16 x i16> @avx2_psrl_w_0(<16 x i16> %v) { 585; CHECK-LABEL: @avx2_psrl_w_0 586; CHECK-NEXT: ret <16 x i16> %v 587 %1 = tail call <16 x i16> @llvm.x86.avx2.psrl.w(<16 x i16> %v, <8 x i16> zeroinitializer) 588 ret <16 x i16> %1 589} 590 591define <16 x i16> @avx2_psrl_w_15(<16 x i16> %v) { 592; CHECK-LABEL: @avx2_psrl_w_15 593; CHECK-NEXT: %1 = lshr <16 x i16> %v, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15> 594; CHECK-NEXT: ret <16 x i16> %1 595 %1 = tail call <16 x i16> @llvm.x86.avx2.psrl.w(<16 x i16> %v, <8 x i16> <i16 15, i16 0, i16 0, i16 0, i16 9999, i16 9999, i16 9999, i16 9999>) 596 ret <16 x i16> %1 597} 598 599define <16 x i16> @avx2_psrl_w_15_splat(<16 x i16> %v) { 600; CHECK-LABEL: @avx2_psrl_w_15_splat 601; CHECK-NEXT: ret <16 x i16> zeroinitializer 602 %1 = tail call <16 x i16> @llvm.x86.avx2.psrl.w(<16 x i16> %v, <8 x i16> <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>) 603 ret <16 x i16> %1 604} 605 606define <16 x i16> @avx2_psrl_w_64(<16 x i16> %v) { 607; CHECK-LABEL: @avx2_psrl_w_64 608; CHECK-NEXT: ret <16 x i16> zeroinitializer 609 %1 = tail call <16 x i16> @llvm.x86.avx2.psrl.w(<16 x i16> %v, <8 x i16> <i16 64, i16 0, i16 0, i16 0, i16 9999, i16 9999, i16 9999, i16 9999>) 610 ret <16 x i16> %1 611} 612 613define <8 x i32> @avx2_psrl_d_0(<8 x i32> %v) { 614; CHECK-LABEL: @avx2_psrl_d_0 615; CHECK-NEXT: ret <8 x i32> %v 616 %1 = tail call <8 x i32> @llvm.x86.avx2.psrl.d(<8 x i32> %v, <4 x i32> zeroinitializer) 617 ret <8 x i32> %1 618} 619 620define <8 x i32> @avx2_psrl_d_15(<8 x i32> %v) { 621; CHECK-LABEL: @avx2_psrl_d_15 622; CHECK-NEXT: %1 = lshr <8 x i32> %v, <i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15> 623; CHECK-NEXT: ret <8 x i32> %1 624 %1 = tail call <8 x i32> @llvm.x86.avx2.psrl.d(<8 x i32> %v, <4 x i32> <i32 15, i32 0, i32 9999, i32 9999>) 625 ret <8 x i32> %1 626} 627 628define <8 x i32> @avx2_psrl_d_15_splat(<8 x i32> %v) { 629; CHECK-LABEL: @avx2_psrl_d_15_splat 630; CHECK-NEXT: ret <8 x i32> zeroinitializer 631 %1 = tail call <8 x i32> @llvm.x86.avx2.psrl.d(<8 x i32> %v, <4 x i32> <i32 15, i32 15, i32 15, i32 15>) 632 ret <8 x i32> %1 633} 634 635define <8 x i32> @avx2_psrl_d_64(<8 x i32> %v) { 636; CHECK-LABEL: @avx2_psrl_d_64 637; CHECK-NEXT: ret <8 x i32> zeroinitializer 638 %1 = tail call <8 x i32> @llvm.x86.avx2.psrl.d(<8 x i32> %v, <4 x i32> <i32 64, i32 0, i32 9999, i32 9999>) 639 ret <8 x i32> %1 640} 641 642define <4 x i64> @avx2_psrl_q_0(<4 x i64> %v) { 643; CHECK-LABEL: @avx2_psrl_q_0 644; CHECK-NEXT: ret <4 x i64> %v 645 %1 = tail call <4 x i64> @llvm.x86.avx2.psrl.q(<4 x i64> %v, <2 x i64> zeroinitializer) 646 ret <4 x i64> %1 647} 648 649define <4 x i64> @avx2_psrl_q_15(<4 x i64> %v) { 650; CHECK-LABEL: @avx2_psrl_q_15 651; CHECK-NEXT: %1 = lshr <4 x i64> %v, <i64 15, i64 15, i64 15, i64 15> 652; CHECK-NEXT: ret <4 x i64> %1 653 %1 = tail call <4 x i64> @llvm.x86.avx2.psrl.q(<4 x i64> %v, <2 x i64> <i64 15, i64 9999>) 654 ret <4 x i64> %1 655} 656 657define <4 x i64> @avx2_psrl_q_64(<4 x i64> %v) { 658; CHECK-LABEL: @avx2_psrl_q_64 659; CHECK-NEXT: ret <4 x i64> zeroinitializer 660 %1 = tail call <4 x i64> @llvm.x86.avx2.psrl.q(<4 x i64> %v, <2 x i64> <i64 64, i64 9999>) 661 ret <4 x i64> %1 662} 663 664; 665; SHL - Constant Vector 666; 667 668define <8 x i16> @sse2_psll_w_0(<8 x i16> %v) { 669; CHECK-LABEL: @sse2_psll_w_0 670; CHECK-NEXT: ret <8 x i16> %v 671 %1 = tail call <8 x i16> @llvm.x86.sse2.psll.w(<8 x i16> %v, <8 x i16> zeroinitializer) 672 ret <8 x i16> %1 673} 674 675define <8 x i16> @sse2_psll_w_15(<8 x i16> %v) { 676; CHECK-LABEL: @sse2_psll_w_15 677; CHECK-NEXT: %1 = shl <8 x i16> %v, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15> 678; CHECK-NEXT: ret <8 x i16> %1 679 %1 = tail call <8 x i16> @llvm.x86.sse2.psll.w(<8 x i16> %v, <8 x i16> <i16 15, i16 0, i16 0, i16 0, i16 9999, i16 9999, i16 9999, i16 9999>) 680 ret <8 x i16> %1 681} 682 683define <8 x i16> @sse2_psll_w_15_splat(<8 x i16> %v) { 684; CHECK-LABEL: @sse2_psll_w_15_splat 685; CHECK-NEXT: ret <8 x i16> zeroinitializer 686 %1 = tail call <8 x i16> @llvm.x86.sse2.psll.w(<8 x i16> %v, <8 x i16> <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>) 687 ret <8 x i16> %1 688} 689 690define <8 x i16> @sse2_psll_w_64(<8 x i16> %v) { 691; CHECK-LABEL: @sse2_psll_w_64 692; CHECK-NEXT: ret <8 x i16> zeroinitializer 693 %1 = tail call <8 x i16> @llvm.x86.sse2.psll.w(<8 x i16> %v, <8 x i16> <i16 64, i16 0, i16 0, i16 0, i16 9999, i16 9999, i16 9999, i16 9999>) 694 ret <8 x i16> %1 695} 696 697define <4 x i32> @sse2_psll_d_0(<4 x i32> %v) { 698; CHECK-LABEL: @sse2_psll_d_0 699; CHECK-NEXT: ret <4 x i32> %v 700 %1 = tail call <4 x i32> @llvm.x86.sse2.psll.d(<4 x i32> %v, <4 x i32> zeroinitializer) 701 ret <4 x i32> %1 702} 703 704define <4 x i32> @sse2_psll_d_15(<4 x i32> %v) { 705; CHECK-LABEL: @sse2_psll_d_15 706; CHECK-NEXT: %1 = shl <4 x i32> %v, <i32 15, i32 15, i32 15, i32 15> 707; CHECK-NEXT: ret <4 x i32> %1 708 %1 = tail call <4 x i32> @llvm.x86.sse2.psll.d(<4 x i32> %v, <4 x i32> <i32 15, i32 0, i32 9999, i32 9999>) 709 ret <4 x i32> %1 710} 711 712define <4 x i32> @sse2_psll_d_15_splat(<4 x i32> %v) { 713; CHECK-LABEL: @sse2_psll_d_15_splat 714; CHECK-NEXT: ret <4 x i32> zeroinitializer 715 %1 = tail call <4 x i32> @llvm.x86.sse2.psll.d(<4 x i32> %v, <4 x i32> <i32 15, i32 15, i32 15, i32 15>) 716 ret <4 x i32> %1 717} 718 719define <4 x i32> @sse2_psll_d_64(<4 x i32> %v) { 720; CHECK-LABEL: @sse2_psll_d_64 721; CHECK-NEXT: ret <4 x i32> zeroinitializer 722 %1 = tail call <4 x i32> @llvm.x86.sse2.psll.d(<4 x i32> %v, <4 x i32> <i32 64, i32 0, i32 9999, i32 9999>) 723 ret <4 x i32> %1 724} 725 726define <2 x i64> @sse2_psll_q_0(<2 x i64> %v) { 727; CHECK-LABEL: @sse2_psll_q_0 728; CHECK-NEXT: ret <2 x i64> %v 729 %1 = tail call <2 x i64> @llvm.x86.sse2.psll.q(<2 x i64> %v, <2 x i64> zeroinitializer) 730 ret <2 x i64> %1 731} 732 733define <2 x i64> @sse2_psll_q_15(<2 x i64> %v) { 734; CHECK-LABEL: @sse2_psll_q_15 735; CHECK-NEXT: %1 = shl <2 x i64> %v, <i64 15, i64 15> 736; CHECK-NEXT: ret <2 x i64> %1 737 %1 = tail call <2 x i64> @llvm.x86.sse2.psll.q(<2 x i64> %v, <2 x i64> <i64 15, i64 9999>) 738 ret <2 x i64> %1 739} 740 741define <2 x i64> @sse2_psll_q_64(<2 x i64> %v) { 742; CHECK-LABEL: @sse2_psll_q_64 743; CHECK-NEXT: ret <2 x i64> zeroinitializer 744 %1 = tail call <2 x i64> @llvm.x86.sse2.psll.q(<2 x i64> %v, <2 x i64> <i64 64, i64 9999>) 745 ret <2 x i64> %1 746} 747 748define <16 x i16> @avx2_psll_w_0(<16 x i16> %v) { 749; CHECK-LABEL: @avx2_psll_w_0 750; CHECK-NEXT: ret <16 x i16> %v 751 %1 = tail call <16 x i16> @llvm.x86.avx2.psll.w(<16 x i16> %v, <8 x i16> zeroinitializer) 752 ret <16 x i16> %1 753} 754 755define <16 x i16> @avx2_psll_w_15(<16 x i16> %v) { 756; CHECK-LABEL: @avx2_psll_w_15 757; CHECK-NEXT: %1 = shl <16 x i16> %v, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15> 758; CHECK-NEXT: ret <16 x i16> %1 759 %1 = tail call <16 x i16> @llvm.x86.avx2.psll.w(<16 x i16> %v, <8 x i16> <i16 15, i16 0, i16 0, i16 0, i16 9999, i16 9999, i16 9999, i16 9999>) 760 ret <16 x i16> %1 761} 762 763define <16 x i16> @avx2_psll_w_15_splat(<16 x i16> %v) { 764; CHECK-LABEL: @avx2_psll_w_15_splat 765; CHECK-NEXT: ret <16 x i16> zeroinitializer 766 %1 = tail call <16 x i16> @llvm.x86.avx2.psll.w(<16 x i16> %v, <8 x i16> <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>) 767 ret <16 x i16> %1 768} 769 770define <16 x i16> @avx2_psll_w_64(<16 x i16> %v) { 771; CHECK-LABEL: @avx2_psll_w_64 772; CHECK-NEXT: ret <16 x i16> zeroinitializer 773 %1 = tail call <16 x i16> @llvm.x86.avx2.psll.w(<16 x i16> %v, <8 x i16> <i16 64, i16 0, i16 0, i16 0, i16 9999, i16 9999, i16 9999, i16 9999>) 774 ret <16 x i16> %1 775} 776 777define <8 x i32> @avx2_psll_d_0(<8 x i32> %v) { 778; CHECK-LABEL: @avx2_psll_d_0 779; CHECK-NEXT: ret <8 x i32> %v 780 %1 = tail call <8 x i32> @llvm.x86.avx2.psll.d(<8 x i32> %v, <4 x i32> zeroinitializer) 781 ret <8 x i32> %1 782} 783 784define <8 x i32> @avx2_psll_d_15(<8 x i32> %v) { 785; CHECK-LABEL: @avx2_psll_d_15 786; CHECK-NEXT: %1 = shl <8 x i32> %v, <i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15> 787; CHECK-NEXT: ret <8 x i32> %1 788 %1 = tail call <8 x i32> @llvm.x86.avx2.psll.d(<8 x i32> %v, <4 x i32> <i32 15, i32 0, i32 9999, i32 9999>) 789 ret <8 x i32> %1 790} 791 792define <8 x i32> @avx2_psll_d_15_splat(<8 x i32> %v) { 793; CHECK-LABEL: @avx2_psll_d_15_splat 794; CHECK-NEXT: ret <8 x i32> zeroinitializer 795 %1 = tail call <8 x i32> @llvm.x86.avx2.psll.d(<8 x i32> %v, <4 x i32> <i32 15, i32 15, i32 15, i32 15>) 796 ret <8 x i32> %1 797} 798 799define <8 x i32> @avx2_psll_d_64(<8 x i32> %v) { 800; CHECK-LABEL: @avx2_psll_d_64 801; CHECK-NEXT: ret <8 x i32> zeroinitializer 802 %1 = tail call <8 x i32> @llvm.x86.avx2.psll.d(<8 x i32> %v, <4 x i32> <i32 64, i32 0, i32 9999, i32 9999>) 803 ret <8 x i32> %1 804} 805 806define <4 x i64> @avx2_psll_q_0(<4 x i64> %v) { 807; CHECK-LABEL: @avx2_psll_q_0 808; CHECK-NEXT: ret <4 x i64> %v 809 %1 = tail call <4 x i64> @llvm.x86.avx2.psll.q(<4 x i64> %v, <2 x i64> zeroinitializer) 810 ret <4 x i64> %1 811} 812 813define <4 x i64> @avx2_psll_q_15(<4 x i64> %v) { 814; CHECK-LABEL: @avx2_psll_q_15 815; CHECK-NEXT: %1 = shl <4 x i64> %v, <i64 15, i64 15, i64 15, i64 15> 816; CHECK-NEXT: ret <4 x i64> %1 817 %1 = tail call <4 x i64> @llvm.x86.avx2.psll.q(<4 x i64> %v, <2 x i64> <i64 15, i64 9999>) 818 ret <4 x i64> %1 819} 820 821define <4 x i64> @avx2_psll_q_64(<4 x i64> %v) { 822; CHECK-LABEL: @avx2_psll_q_64 823; CHECK-NEXT: ret <4 x i64> zeroinitializer 824 %1 = tail call <4 x i64> @llvm.x86.avx2.psll.q(<4 x i64> %v, <2 x i64> <i64 64, i64 9999>) 825 ret <4 x i64> %1 826} 827 828; 829; Vector Demanded Bits 830; 831 832define <8 x i16> @sse2_psra_w_var(<8 x i16> %v, <8 x i16> %a) { 833; CHECK-LABEL: @sse2_psra_w_var 834; CHECK-NEXT: %1 = tail call <8 x i16> @llvm.x86.sse2.psra.w(<8 x i16> %v, <8 x i16> %a) 835; CHECK-NEXT: ret <8 x i16> %1 836 %1 = shufflevector <8 x i16> %a, <8 x i16> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3> 837 %2 = tail call <8 x i16> @llvm.x86.sse2.psra.w(<8 x i16> %v, <8 x i16> %1) 838 ret <8 x i16> %2 839} 840 841define <8 x i16> @sse2_psra_w_var_bc(<8 x i16> %v, <2 x i64> %a) { 842; CHECK-LABEL: @sse2_psra_w_var_bc 843; CHECK-NEXT: %1 = bitcast <2 x i64> %a to <8 x i16> 844; CHECK-NEXT: %2 = tail call <8 x i16> @llvm.x86.sse2.psra.w(<8 x i16> %v, <8 x i16> %1) 845; CHECK-NEXT: ret <8 x i16> %2 846 %1 = shufflevector <2 x i64> %a, <2 x i64> undef, <2 x i32> <i32 0, i32 0> 847 %2 = bitcast <2 x i64> %1 to <8 x i16> 848 %3 = tail call <8 x i16> @llvm.x86.sse2.psra.w(<8 x i16> %v, <8 x i16> %2) 849 ret <8 x i16> %3 850} 851 852define <4 x i32> @sse2_psra_d_var(<4 x i32> %v, <4 x i32> %a) { 853; CHECK-LABEL: @sse2_psra_d_var 854; CHECK-NEXT: %1 = tail call <4 x i32> @llvm.x86.sse2.psra.d(<4 x i32> %v, <4 x i32> %a) 855; CHECK-NEXT: ret <4 x i32> %1 856 %1 = shufflevector <4 x i32> %a, <4 x i32> undef, <4 x i32> <i32 0, i32 1, i32 0, i32 1> 857 %2 = tail call <4 x i32> @llvm.x86.sse2.psra.d(<4 x i32> %v, <4 x i32> %1) 858 ret <4 x i32> %2 859} 860 861define <4 x i32> @sse2_psra_d_var_bc(<4 x i32> %v, <8 x i16> %a) { 862; CHECK-LABEL: @sse2_psra_d_var_bc 863; CHECK-NEXT: %1 = bitcast <8 x i16> %a to <4 x i32> 864; CHECK-NEXT: %2 = tail call <4 x i32> @llvm.x86.sse2.psra.d(<4 x i32> %v, <4 x i32> %1) 865; CHECK-NEXT: ret <4 x i32> %2 866 %1 = shufflevector <8 x i16> %a, <8 x i16> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3> 867 %2 = bitcast <8 x i16> %1 to <4 x i32> 868 %3 = tail call <4 x i32> @llvm.x86.sse2.psra.d(<4 x i32> %v, <4 x i32> %2) 869 ret <4 x i32> %3 870} 871 872define <16 x i16> @avx2_psra_w_var(<16 x i16> %v, <8 x i16> %a) { 873; CHECK-LABEL: @avx2_psra_w_var 874; CHECK-NEXT: %1 = tail call <16 x i16> @llvm.x86.avx2.psra.w(<16 x i16> %v, <8 x i16> %a) 875; CHECK-NEXT: ret <16 x i16> %1 876 %1 = shufflevector <8 x i16> %a, <8 x i16> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3> 877 %2 = tail call <16 x i16> @llvm.x86.avx2.psra.w(<16 x i16> %v, <8 x i16> %1) 878 ret <16 x i16> %2 879} 880 881define <8 x i32> @avx2_psra_d_var(<8 x i32> %v, <4 x i32> %a) { 882; CHECK-LABEL: @avx2_psra_d_var 883; CHECK-NEXT: %1 = tail call <8 x i32> @llvm.x86.avx2.psra.d(<8 x i32> %v, <4 x i32> %a) 884; CHECK-NEXT: ret <8 x i32> %1 885 %1 = shufflevector <4 x i32> %a, <4 x i32> undef, <4 x i32> <i32 0, i32 1, i32 0, i32 1> 886 %2 = tail call <8 x i32> @llvm.x86.avx2.psra.d(<8 x i32> %v, <4 x i32> %1) 887 ret <8 x i32> %2 888} 889 890define <8 x i16> @sse2_psrl_w_var(<8 x i16> %v, <8 x i16> %a) { 891; CHECK-LABEL: @sse2_psrl_w_var 892; CHECK-NEXT: %1 = tail call <8 x i16> @llvm.x86.sse2.psrl.w(<8 x i16> %v, <8 x i16> %a) 893; CHECK-NEXT: ret <8 x i16> %1 894 %1 = shufflevector <8 x i16> %a, <8 x i16> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3> 895 %2 = tail call <8 x i16> @llvm.x86.sse2.psrl.w(<8 x i16> %v, <8 x i16> %1) 896 ret <8 x i16> %2 897} 898 899define <4 x i32> @sse2_psrl_d_var(<4 x i32> %v, <4 x i32> %a) { 900; CHECK-LABEL: @sse2_psrl_d_var 901; CHECK-NEXT: %1 = tail call <4 x i32> @llvm.x86.sse2.psrl.d(<4 x i32> %v, <4 x i32> %a) 902; CHECK-NEXT: ret <4 x i32> %1 903 %1 = shufflevector <4 x i32> %a, <4 x i32> undef, <4 x i32> <i32 0, i32 1, i32 0, i32 1> 904 %2 = tail call <4 x i32> @llvm.x86.sse2.psrl.d(<4 x i32> %v, <4 x i32> %1) 905 ret <4 x i32> %2 906} 907 908define <2 x i64> @sse2_psrl_q_var(<2 x i64> %v, <2 x i64> %a) { 909; CHECK-LABEL: @sse2_psrl_q_var 910; CHECK-NEXT: %1 = tail call <2 x i64> @llvm.x86.sse2.psrl.q(<2 x i64> %v, <2 x i64> %a) 911; CHECK-NEXT: ret <2 x i64> %1 912 %1 = shufflevector <2 x i64> %a, <2 x i64> undef, <2 x i32> <i32 0, i32 0> 913 %2 = tail call <2 x i64> @llvm.x86.sse2.psrl.q(<2 x i64> %v, <2 x i64> %1) 914 ret <2 x i64> %2 915} 916 917define <16 x i16> @avx2_psrl_w_var(<16 x i16> %v, <8 x i16> %a) { 918; CHECK-LABEL: @avx2_psrl_w_var 919; CHECK-NEXT: %1 = tail call <16 x i16> @llvm.x86.avx2.psrl.w(<16 x i16> %v, <8 x i16> %a) 920; CHECK-NEXT: ret <16 x i16> %1 921 %1 = shufflevector <8 x i16> %a, <8 x i16> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3> 922 %2 = tail call <16 x i16> @llvm.x86.avx2.psrl.w(<16 x i16> %v, <8 x i16> %1) 923 ret <16 x i16> %2 924} 925 926define <16 x i16> @avx2_psrl_w_var_bc(<16 x i16> %v, <16 x i8> %a) { 927; CHECK-LABEL: @avx2_psrl_w_var_bc 928; CHECK-NEXT: %1 = bitcast <16 x i8> %a to <8 x i16> 929; CHECK-NEXT: %2 = tail call <16 x i16> @llvm.x86.avx2.psrl.w(<16 x i16> %v, <8 x i16> %1) 930; CHECK-NEXT: ret <16 x i16> %2 931 %1 = shufflevector <16 x i8> %a, <16 x i8> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 932 %2 = bitcast <16 x i8> %1 to <8 x i16> 933 %3 = tail call <16 x i16> @llvm.x86.avx2.psrl.w(<16 x i16> %v, <8 x i16> %2) 934 ret <16 x i16> %3 935} 936 937define <8 x i32> @avx2_psrl_d_var(<8 x i32> %v, <4 x i32> %a) { 938; CHECK-LABEL: @avx2_psrl_d_var 939; CHECK-NEXT: %1 = tail call <8 x i32> @llvm.x86.avx2.psrl.d(<8 x i32> %v, <4 x i32> %a) 940; CHECK-NEXT: ret <8 x i32> %1 941 %1 = shufflevector <4 x i32> %a, <4 x i32> undef, <4 x i32> <i32 0, i32 1, i32 0, i32 1> 942 %2 = tail call <8 x i32> @llvm.x86.avx2.psrl.d(<8 x i32> %v, <4 x i32> %1) 943 ret <8 x i32> %2 944} 945 946define <8 x i32> @avx2_psrl_d_var_bc(<8 x i32> %v, <2 x i64> %a) { 947; CHECK-LABEL: @avx2_psrl_d_var_bc 948; CHECK-NEXT: %1 = bitcast <2 x i64> %a to <4 x i32> 949; CHECK-NEXT: %2 = tail call <8 x i32> @llvm.x86.avx2.psrl.d(<8 x i32> %v, <4 x i32> %1) 950; CHECK-NEXT: ret <8 x i32> %2 951 %1 = shufflevector <2 x i64> %a, <2 x i64> undef, <2 x i32> <i32 0, i32 0> 952 %2 = bitcast <2 x i64> %1 to <4 x i32> 953 %3 = tail call <8 x i32> @llvm.x86.avx2.psrl.d(<8 x i32> %v, <4 x i32> %2) 954 ret <8 x i32> %3 955} 956 957define <4 x i64> @avx2_psrl_q_var(<4 x i64> %v, <2 x i64> %a) { 958; CHECK-LABEL: @avx2_psrl_q_var 959; CHECK-NEXT: %1 = tail call <4 x i64> @llvm.x86.avx2.psrl.q(<4 x i64> %v, <2 x i64> %a) 960; CHECK-NEXT: ret <4 x i64> %1 961 %1 = shufflevector <2 x i64> %a, <2 x i64> undef, <2 x i32> <i32 0, i32 0> 962 %2 = tail call <4 x i64> @llvm.x86.avx2.psrl.q(<4 x i64> %v, <2 x i64> %1) 963 ret <4 x i64> %2 964} 965 966define <8 x i16> @sse2_psll_w_var(<8 x i16> %v, <8 x i16> %a) { 967; CHECK-LABEL: @sse2_psll_w_var 968; CHECK-NEXT: %1 = tail call <8 x i16> @llvm.x86.sse2.psll.w(<8 x i16> %v, <8 x i16> %a) 969; CHECK-NEXT: ret <8 x i16> %1 970 %1 = shufflevector <8 x i16> %a, <8 x i16> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3> 971 %2 = tail call <8 x i16> @llvm.x86.sse2.psll.w(<8 x i16> %v, <8 x i16> %1) 972 ret <8 x i16> %2 973} 974 975define <4 x i32> @sse2_psll_d_var(<4 x i32> %v, <4 x i32> %a) { 976; CHECK-LABEL: @sse2_psll_d_var 977; CHECK-NEXT: %1 = tail call <4 x i32> @llvm.x86.sse2.psll.d(<4 x i32> %v, <4 x i32> %a) 978; CHECK-NEXT: ret <4 x i32> %1 979 %1 = shufflevector <4 x i32> %a, <4 x i32> undef, <4 x i32> <i32 0, i32 1, i32 0, i32 1> 980 %2 = tail call <4 x i32> @llvm.x86.sse2.psll.d(<4 x i32> %v, <4 x i32> %1) 981 ret <4 x i32> %2 982} 983 984define <2 x i64> @sse2_psll_q_var(<2 x i64> %v, <2 x i64> %a) { 985; CHECK-LABEL: @sse2_psll_q_var 986; CHECK-NEXT: %1 = tail call <2 x i64> @llvm.x86.sse2.psll.q(<2 x i64> %v, <2 x i64> %a) 987; CHECK-NEXT: ret <2 x i64> %1 988 %1 = shufflevector <2 x i64> %a, <2 x i64> undef, <2 x i32> <i32 0, i32 0> 989 %2 = tail call <2 x i64> @llvm.x86.sse2.psll.q(<2 x i64> %v, <2 x i64> %1) 990 ret <2 x i64> %2 991} 992 993define <16 x i16> @avx2_psll_w_var(<16 x i16> %v, <8 x i16> %a) { 994; CHECK-LABEL: @avx2_psll_w_var 995; CHECK-NEXT: %1 = tail call <16 x i16> @llvm.x86.avx2.psll.w(<16 x i16> %v, <8 x i16> %a) 996; CHECK-NEXT: ret <16 x i16> %1 997 %1 = shufflevector <8 x i16> %a, <8 x i16> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3> 998 %2 = tail call <16 x i16> @llvm.x86.avx2.psll.w(<16 x i16> %v, <8 x i16> %1) 999 ret <16 x i16> %2 1000} 1001 1002define <8 x i32> @avx2_psll_d_var(<8 x i32> %v, <4 x i32> %a) { 1003; CHECK-LABEL: @avx2_psll_d_var 1004; CHECK-NEXT: %1 = tail call <8 x i32> @llvm.x86.avx2.psll.d(<8 x i32> %v, <4 x i32> %a) 1005; CHECK-NEXT: ret <8 x i32> %1 1006 %1 = shufflevector <4 x i32> %a, <4 x i32> undef, <4 x i32> <i32 0, i32 1, i32 0, i32 1> 1007 %2 = tail call <8 x i32> @llvm.x86.avx2.psll.d(<8 x i32> %v, <4 x i32> %1) 1008 ret <8 x i32> %2 1009} 1010 1011define <4 x i64> @avx2_psll_q_var(<4 x i64> %v, <2 x i64> %a) { 1012; CHECK-LABEL: @avx2_psll_q_var 1013; CHECK-NEXT: %1 = tail call <4 x i64> @llvm.x86.avx2.psll.q(<4 x i64> %v, <2 x i64> %a) 1014; CHECK-NEXT: ret <4 x i64> %1 1015 %1 = shufflevector <2 x i64> %a, <2 x i64> undef, <2 x i32> <i32 0, i32 0> 1016 %2 = tail call <4 x i64> @llvm.x86.avx2.psll.q(<4 x i64> %v, <2 x i64> %1) 1017 ret <4 x i64> %2 1018} 1019 1020; 1021; Constant Folding 1022; 1023 1024define <8 x i16> @test_sse2_psra_w_0(<8 x i16> %A) { 1025; CHECK-LABEL: @test_sse2_psra_w_0 1026; CHECK-NEXT: ret <8 x i16> %A 1027 %1 = tail call <8 x i16> @llvm.x86.sse2.psrai.w(<8 x i16> %A, i32 0) 1028 %2 = tail call <8 x i16> @llvm.x86.sse2.psra.w(<8 x i16> %1, <8 x i16> <i16 0, i16 0, i16 0, i16 0, i16 7, i16 0, i16 0, i16 0>) 1029 %3 = tail call <8 x i16> @llvm.x86.sse2.psrai.w(<8 x i16> %2, i32 0) 1030 ret <8 x i16> %3 1031} 1032 1033define <8 x i16> @test_sse2_psra_w_8() { 1034; CHECK-LABEL: @test_sse2_psra_w_8 1035; CHECK-NEXT: ret <8 x i16> <i16 -128, i16 64, i16 32, i16 16, i16 -128, i16 64, i16 32, i16 16> 1036 %1 = bitcast <2 x i64> <i64 1152956690052710400, i64 1152956690052710400> to <8 x i16> 1037 %2 = tail call <8 x i16> @llvm.x86.sse2.psrai.w(<8 x i16> %1, i32 3) 1038 %3 = tail call <8 x i16> @llvm.x86.sse2.psra.w(<8 x i16> %2, <8 x i16> <i16 3, i16 0, i16 0, i16 0, i16 7, i16 0, i16 0, i16 0>) 1039 %4 = tail call <8 x i16> @llvm.x86.sse2.psrai.w(<8 x i16> %3, i32 2) 1040 ret <8 x i16> %4 1041} 1042 1043define <4 x i32> @test_sse2_psra_d_0(<4 x i32> %A) { 1044; CHECK-LABEL: @test_sse2_psra_d_0 1045; CHECK-NEXT: ret <4 x i32> %A 1046 %1 = tail call <4 x i32> @llvm.x86.sse2.psrai.d(<4 x i32> %A, i32 0) 1047 %2 = tail call <4 x i32> @llvm.x86.sse2.psra.d(<4 x i32> %1, <4 x i32> <i32 0, i32 0, i32 7, i32 0>) 1048 %3 = tail call <4 x i32> @llvm.x86.sse2.psrai.d(<4 x i32> %1, i32 0) 1049 ret <4 x i32> %3 1050} 1051 1052define <4 x i32> @sse2_psra_d_8() { 1053; CHECK-LABEL: @sse2_psra_d_8 1054; CHECK-NEXT: ret <4 x i32> <i32 4194432, i32 1048608, i32 4194432, i32 1048608> 1055 %1 = bitcast <2 x i64> <i64 1152956690052710400, i64 1152956690052710400> to <4 x i32> 1056 %2 = tail call <4 x i32> @llvm.x86.sse2.psrai.d(<4 x i32> %1, i32 3) 1057 %3 = tail call <4 x i32> @llvm.x86.sse2.psra.d(<4 x i32> %2, <4 x i32> <i32 3, i32 0, i32 7, i32 0>) 1058 %4 = tail call <4 x i32> @llvm.x86.sse2.psrai.d(<4 x i32> %3, i32 2) 1059 ret <4 x i32> %4 1060} 1061 1062define <16 x i16> @test_avx2_psra_w_0(<16 x i16> %A) { 1063; CHECK-LABEL: @test_avx2_psra_w_0 1064; CHECK-NEXT: ret <16 x i16> %A 1065 %1 = tail call <16 x i16> @llvm.x86.avx2.psrai.w(<16 x i16> %A, i32 0) 1066 %2 = tail call <16 x i16> @llvm.x86.avx2.psra.w(<16 x i16> %1, <8 x i16> <i16 0, i16 0, i16 0, i16 0, i16 7, i16 0, i16 0, i16 0>) 1067 %3 = tail call <16 x i16> @llvm.x86.avx2.psrai.w(<16 x i16> %2, i32 0) 1068 ret <16 x i16> %3 1069} 1070 1071define <16 x i16> @test_avx2_psra_w_8(<16 x i16> %A) { 1072; CHECK-LABEL: @test_avx2_psra_w_8 1073; CHECK-NEXT: ret <16 x i16> <i16 -128, i16 64, i16 32, i16 16, i16 -128, i16 64, i16 32, i16 16, i16 -128, i16 64, i16 32, i16 16, i16 -128, i16 64, i16 32, i16 16> 1074 %1 = bitcast <4 x i64> <i64 1152956690052710400, i64 1152956690052710400, i64 1152956690052710400, i64 1152956690052710400> to <16 x i16> 1075 %2 = tail call <16 x i16> @llvm.x86.avx2.psrai.w(<16 x i16> %1, i32 3) 1076 %3 = tail call <16 x i16> @llvm.x86.avx2.psra.w(<16 x i16> %2, <8 x i16> <i16 3, i16 0, i16 0, i16 0, i16 7, i16 0, i16 0, i16 0>) 1077 %4 = tail call <16 x i16> @llvm.x86.avx2.psrai.w(<16 x i16> %3, i32 2) 1078 ret <16 x i16> %4 1079} 1080 1081define <8 x i32> @test_avx2_psra_d_0(<8 x i32> %A) { 1082; CHECK-LABEL: @test_avx2_psra_d_0 1083; CHECK-NEXT: ret <8 x i32> %A 1084 %1 = tail call <8 x i32> @llvm.x86.avx2.psrai.d(<8 x i32> %A, i32 0) 1085 %2 = tail call <8 x i32> @llvm.x86.avx2.psra.d(<8 x i32> %1, <4 x i32> <i32 0, i32 0, i32 7, i32 0>) 1086 %3 = tail call <8 x i32> @llvm.x86.avx2.psrai.d(<8 x i32> %2, i32 0) 1087 ret <8 x i32> %3 1088} 1089 1090define <8 x i32> @test_avx2_psra_d_8() { 1091; CHECK-LABEL: @test_avx2_psra_d_8 1092; CHECK-NEXT: ret <8 x i32> <i32 4194432, i32 1048608, i32 4194432, i32 1048608, i32 4194432, i32 1048608, i32 4194432, i32 1048608> 1093 %1 = bitcast <4 x i64> <i64 1152956690052710400, i64 1152956690052710400, i64 1152956690052710400, i64 1152956690052710400> to <8 x i32> 1094 %2 = tail call <8 x i32> @llvm.x86.avx2.psrai.d(<8 x i32> %1, i32 3) 1095 %3 = tail call <8 x i32> @llvm.x86.avx2.psra.d(<8 x i32> %2, <4 x i32> <i32 3, i32 0, i32 7, i32 0>) 1096 %4 = tail call <8 x i32> @llvm.x86.avx2.psrai.d(<8 x i32> %3, i32 2) 1097 ret <8 x i32> %4 1098} 1099 1100define <2 x i64> @test_sse2_1() { 1101 %S = bitcast i32 1 to i32 1102 %1 = zext i32 %S to i64 1103 %2 = insertelement <2 x i64> undef, i64 %1, i32 0 1104 %3 = insertelement <2 x i64> %2, i64 0, i32 1 1105 %4 = bitcast <2 x i64> %3 to <8 x i16> 1106 %5 = tail call <8 x i16> @llvm.x86.sse2.psll.w(<8 x i16> <i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8>, <8 x i16> %4) 1107 %6 = bitcast <8 x i16> %5 to <4 x i32> 1108 %7 = bitcast <2 x i64> %3 to <4 x i32> 1109 %8 = tail call <4 x i32> @llvm.x86.sse2.psll.d(<4 x i32> %6, <4 x i32> %7) 1110 %9 = bitcast <4 x i32> %8 to <2 x i64> 1111 %10 = tail call <2 x i64> @llvm.x86.sse2.psll.q(<2 x i64> %9, <2 x i64> %3) 1112 %11 = bitcast <2 x i64> %10 to <8 x i16> 1113 %12 = tail call <8 x i16> @llvm.x86.sse2.pslli.w(<8 x i16> %11, i32 %S) 1114 %13 = bitcast <8 x i16> %12 to <4 x i32> 1115 %14 = tail call <4 x i32> @llvm.x86.sse2.pslli.d(<4 x i32> %13, i32 %S) 1116 %15 = bitcast <4 x i32> %14 to <2 x i64> 1117 %16 = tail call <2 x i64> @llvm.x86.sse2.pslli.q(<2 x i64> %15, i32 %S) 1118 ret <2 x i64> %16 1119; CHECK: test_sse2_1 1120; CHECK: ret <2 x i64> <i64 72058418680037440, i64 144117112246370624> 1121} 1122 1123define <4 x i64> @test_avx2_1() { 1124 %S = bitcast i32 1 to i32 1125 %1 = zext i32 %S to i64 1126 %2 = insertelement <2 x i64> undef, i64 %1, i32 0 1127 %3 = insertelement <2 x i64> %2, i64 0, i32 1 1128 %4 = bitcast <2 x i64> %3 to <8 x i16> 1129 %5 = tail call <16 x i16> @llvm.x86.avx2.psll.w(<16 x i16> <i16 1, i16 0, i16 0, i16 0, i16 2, i16 0, i16 0, i16 0, i16 3, i16 0, i16 0, i16 0, i16 4, i16 0, i16 0, i16 0>, <8 x i16> %4) 1130 %6 = bitcast <16 x i16> %5 to <8 x i32> 1131 %7 = bitcast <2 x i64> %3 to <4 x i32> 1132 %8 = tail call <8 x i32> @llvm.x86.avx2.psll.d(<8 x i32> %6, <4 x i32> %7) 1133 %9 = bitcast <8 x i32> %8 to <4 x i64> 1134 %10 = tail call <4 x i64> @llvm.x86.avx2.psll.q(<4 x i64> %9, <2 x i64> %3) 1135 %11 = bitcast <4 x i64> %10 to <16 x i16> 1136 %12 = tail call <16 x i16> @llvm.x86.avx2.pslli.w(<16 x i16> %11, i32 %S) 1137 %13 = bitcast <16 x i16> %12 to <8 x i32> 1138 %14 = tail call <8 x i32> @llvm.x86.avx2.pslli.d(<8 x i32> %13, i32 %S) 1139 %15 = bitcast <8 x i32> %14 to <4 x i64> 1140 %16 = tail call <4 x i64> @llvm.x86.avx2.pslli.q(<4 x i64> %15, i32 %S) 1141 ret <4 x i64> %16 1142; CHECK: test_avx2_1 1143; CHECK: ret <4 x i64> <i64 64, i64 128, i64 192, i64 256> 1144} 1145 1146define <2 x i64> @test_sse2_0() { 1147 %S = bitcast i32 128 to i32 1148 %1 = zext i32 %S to i64 1149 %2 = insertelement <2 x i64> undef, i64 %1, i32 0 1150 %3 = insertelement <2 x i64> %2, i64 0, i32 1 1151 %4 = bitcast <2 x i64> %3 to <8 x i16> 1152 %5 = tail call <8 x i16> @llvm.x86.sse2.psll.w(<8 x i16> <i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8>, <8 x i16> %4) 1153 %6 = bitcast <8 x i16> %5 to <4 x i32> 1154 %7 = bitcast <2 x i64> %3 to <4 x i32> 1155 %8 = tail call <4 x i32> @llvm.x86.sse2.psll.d(<4 x i32> %6, <4 x i32> %7) 1156 %9 = bitcast <4 x i32> %8 to <2 x i64> 1157 %10 = tail call <2 x i64> @llvm.x86.sse2.psll.q(<2 x i64> %9, <2 x i64> %3) 1158 %11 = bitcast <2 x i64> %10 to <8 x i16> 1159 %12 = tail call <8 x i16> @llvm.x86.sse2.pslli.w(<8 x i16> %11, i32 %S) 1160 %13 = bitcast <8 x i16> %12 to <4 x i32> 1161 %14 = tail call <4 x i32> @llvm.x86.sse2.pslli.d(<4 x i32> %13, i32 %S) 1162 %15 = bitcast <4 x i32> %14 to <2 x i64> 1163 %16 = tail call <2 x i64> @llvm.x86.sse2.pslli.q(<2 x i64> %15, i32 %S) 1164 ret <2 x i64> %16 1165; CHECK: test_sse2_0 1166; CHECK: ret <2 x i64> zeroinitializer 1167} 1168 1169define <4 x i64> @test_avx2_0() { 1170 %S = bitcast i32 128 to i32 1171 %1 = zext i32 %S to i64 1172 %2 = insertelement <2 x i64> undef, i64 %1, i32 0 1173 %3 = insertelement <2 x i64> %2, i64 0, i32 1 1174 %4 = bitcast <2 x i64> %3 to <8 x i16> 1175 %5 = tail call <16 x i16> @llvm.x86.avx2.psll.w(<16 x i16> <i16 1, i16 0, i16 0, i16 0, i16 2, i16 0, i16 0, i16 0, i16 3, i16 0, i16 0, i16 0, i16 4, i16 0, i16 0, i16 0>, <8 x i16> %4) 1176 %6 = bitcast <16 x i16> %5 to <8 x i32> 1177 %7 = bitcast <2 x i64> %3 to <4 x i32> 1178 %8 = tail call <8 x i32> @llvm.x86.avx2.psll.d(<8 x i32> %6, <4 x i32> %7) 1179 %9 = bitcast <8 x i32> %8 to <4 x i64> 1180 %10 = tail call <4 x i64> @llvm.x86.avx2.psll.q(<4 x i64> %9, <2 x i64> %3) 1181 %11 = bitcast <4 x i64> %10 to <16 x i16> 1182 %12 = tail call <16 x i16> @llvm.x86.avx2.pslli.w(<16 x i16> %11, i32 %S) 1183 %13 = bitcast <16 x i16> %12 to <8 x i32> 1184 %14 = tail call <8 x i32> @llvm.x86.avx2.pslli.d(<8 x i32> %13, i32 %S) 1185 %15 = bitcast <8 x i32> %14 to <4 x i64> 1186 %16 = tail call <4 x i64> @llvm.x86.avx2.pslli.q(<4 x i64> %15, i32 %S) 1187 ret <4 x i64> %16 1188; CHECK: test_avx2_0 1189; CHECK: ret <4 x i64> zeroinitializer 1190} 1191define <2 x i64> @test_sse2_psrl_1() { 1192 %S = bitcast i32 1 to i32 1193 %1 = zext i32 %S to i64 1194 %2 = insertelement <2 x i64> undef, i64 %1, i32 0 1195 %3 = insertelement <2 x i64> %2, i64 0, i32 1 1196 %4 = bitcast <2 x i64> %3 to <8 x i16> 1197 %5 = tail call <8 x i16> @llvm.x86.sse2.psrl.w(<8 x i16> <i16 16, i16 32, i16 64, i16 128, i16 256, i16 512, i16 1024, i16 2048>, <8 x i16> %4) 1198 %6 = bitcast <8 x i16> %5 to <4 x i32> 1199 %7 = bitcast <2 x i64> %3 to <4 x i32> 1200 %8 = tail call <4 x i32> @llvm.x86.sse2.psrl.d(<4 x i32> %6, <4 x i32> %7) 1201 %9 = bitcast <4 x i32> %8 to <2 x i64> 1202 %10 = tail call <2 x i64> @llvm.x86.sse2.psrl.q(<2 x i64> %9, <2 x i64> %3) 1203 %11 = bitcast <2 x i64> %10 to <8 x i16> 1204 %12 = tail call <8 x i16> @llvm.x86.sse2.psrli.w(<8 x i16> %11, i32 %S) 1205 %13 = bitcast <8 x i16> %12 to <4 x i32> 1206 %14 = tail call <4 x i32> @llvm.x86.sse2.psrli.d(<4 x i32> %13, i32 %S) 1207 %15 = bitcast <4 x i32> %14 to <2 x i64> 1208 %16 = tail call <2 x i64> @llvm.x86.sse2.psrli.q(<2 x i64> %15, i32 %S) 1209 ret <2 x i64> %16 1210; CHECK: test_sse2_psrl_1 1211; CHECK: ret <2 x i64> <i64 562954248421376, i64 9007267974742020> 1212} 1213 1214define <4 x i64> @test_avx2_psrl_1() { 1215 %S = bitcast i32 1 to i32 1216 %1 = zext i32 %S to i64 1217 %2 = insertelement <2 x i64> undef, i64 %1, i32 0 1218 %3 = insertelement <2 x i64> %2, i64 0, i32 1 1219 %4 = bitcast <2 x i64> %3 to <8 x i16> 1220 %5 = tail call <16 x i16> @llvm.x86.avx2.psrl.w(<16 x i16> <i16 1024, i16 0, i16 0, i16 0, i16 2048, i16 0, i16 0, i16 0, i16 4096, i16 0, i16 0, i16 0, i16 8192, i16 0, i16 0, i16 0>, <8 x i16> %4) 1221 %6 = bitcast <16 x i16> %5 to <8 x i32> 1222 %7 = bitcast <2 x i64> %3 to <4 x i32> 1223 %8 = tail call <8 x i32> @llvm.x86.avx2.psrl.d(<8 x i32> %6, <4 x i32> %7) 1224 %9 = bitcast <8 x i32> %8 to <4 x i64> 1225 %10 = tail call <4 x i64> @llvm.x86.avx2.psrl.q(<4 x i64> %9, <2 x i64> %3) 1226 %11 = bitcast <4 x i64> %10 to <16 x i16> 1227 %12 = tail call <16 x i16> @llvm.x86.avx2.psrli.w(<16 x i16> %11, i32 %S) 1228 %13 = bitcast <16 x i16> %12 to <8 x i32> 1229 %14 = tail call <8 x i32> @llvm.x86.avx2.psrli.d(<8 x i32> %13, i32 %S) 1230 %15 = bitcast <8 x i32> %14 to <4 x i64> 1231 %16 = tail call <4 x i64> @llvm.x86.avx2.psrli.q(<4 x i64> %15, i32 %S) 1232 ret <4 x i64> %16 1233; CHECK: test_avx2_psrl_1 1234; CHECK: ret <4 x i64> <i64 16, i64 32, i64 64, i64 128> 1235} 1236 1237define <2 x i64> @test_sse2_psrl_0() { 1238 %S = bitcast i32 128 to i32 1239 %1 = zext i32 %S to i64 1240 %2 = insertelement <2 x i64> undef, i64 %1, i32 0 1241 %3 = insertelement <2 x i64> %2, i64 0, i32 1 1242 %4 = bitcast <2 x i64> %3 to <8 x i16> 1243 %5 = tail call <8 x i16> @llvm.x86.sse2.psrl.w(<8 x i16> <i16 32, i16 64, i16 128, i16 256, i16 512, i16 1024, i16 2048, i16 4096>, <8 x i16> %4) 1244 %6 = bitcast <8 x i16> %5 to <4 x i32> 1245 %7 = bitcast <2 x i64> %3 to <4 x i32> 1246 %8 = tail call <4 x i32> @llvm.x86.sse2.psrl.d(<4 x i32> %6, <4 x i32> %7) 1247 %9 = bitcast <4 x i32> %8 to <2 x i64> 1248 %10 = tail call <2 x i64> @llvm.x86.sse2.psrl.q(<2 x i64> %9, <2 x i64> %3) 1249 %11 = bitcast <2 x i64> %10 to <8 x i16> 1250 %12 = tail call <8 x i16> @llvm.x86.sse2.psrli.w(<8 x i16> %11, i32 %S) 1251 %13 = bitcast <8 x i16> %12 to <4 x i32> 1252 %14 = tail call <4 x i32> @llvm.x86.sse2.psrli.d(<4 x i32> %13, i32 %S) 1253 %15 = bitcast <4 x i32> %14 to <2 x i64> 1254 %16 = tail call <2 x i64> @llvm.x86.sse2.psrli.q(<2 x i64> %15, i32 %S) 1255 ret <2 x i64> %16 1256; CHECK: test_sse2_psrl_0 1257; CHECK: ret <2 x i64> zeroinitializer 1258} 1259 1260define <4 x i64> @test_avx2_psrl_0() { 1261 %S = bitcast i32 128 to i32 1262 %1 = zext i32 %S to i64 1263 %2 = insertelement <2 x i64> undef, i64 %1, i32 0 1264 %3 = insertelement <2 x i64> %2, i64 0, i32 1 1265 %4 = bitcast <2 x i64> %3 to <8 x i16> 1266 %5 = tail call <16 x i16> @llvm.x86.avx2.psrl.w(<16 x i16> <i16 1024, i16 0, i16 0, i16 0, i16 2048, i16 0, i16 0, i16 0, i16 4096, i16 0, i16 0, i16 0, i16 8192, i16 0, i16 0, i16 0>, <8 x i16> %4) 1267 %6 = bitcast <16 x i16> %5 to <8 x i32> 1268 %7 = bitcast <2 x i64> %3 to <4 x i32> 1269 %8 = tail call <8 x i32> @llvm.x86.avx2.psrl.d(<8 x i32> %6, <4 x i32> %7) 1270 %9 = bitcast <8 x i32> %8 to <4 x i64> 1271 %10 = tail call <4 x i64> @llvm.x86.avx2.psrl.q(<4 x i64> %9, <2 x i64> %3) 1272 %11 = bitcast <4 x i64> %10 to <16 x i16> 1273 %12 = tail call <16 x i16> @llvm.x86.avx2.psrli.w(<16 x i16> %11, i32 %S) 1274 %13 = bitcast <16 x i16> %12 to <8 x i32> 1275 %14 = tail call <8 x i32> @llvm.x86.avx2.psrli.d(<8 x i32> %13, i32 %S) 1276 %15 = bitcast <8 x i32> %14 to <4 x i64> 1277 %16 = tail call <4 x i64> @llvm.x86.avx2.psrli.q(<4 x i64> %15, i32 %S) 1278 ret <4 x i64> %16 1279; CHECK: test_avx2_psrl_0 1280; CHECK: ret <4 x i64> zeroinitializer 1281} 1282 1283declare <4 x i64> @llvm.x86.avx2.pslli.q(<4 x i64>, i32) #1 1284declare <8 x i32> @llvm.x86.avx2.pslli.d(<8 x i32>, i32) #1 1285declare <16 x i16> @llvm.x86.avx2.pslli.w(<16 x i16>, i32) #1 1286declare <4 x i64> @llvm.x86.avx2.psll.q(<4 x i64>, <2 x i64>) #1 1287declare <8 x i32> @llvm.x86.avx2.psll.d(<8 x i32>, <4 x i32>) #1 1288declare <16 x i16> @llvm.x86.avx2.psll.w(<16 x i16>, <8 x i16>) #1 1289declare <2 x i64> @llvm.x86.sse2.pslli.q(<2 x i64>, i32) #1 1290declare <4 x i32> @llvm.x86.sse2.pslli.d(<4 x i32>, i32) #1 1291declare <8 x i16> @llvm.x86.sse2.pslli.w(<8 x i16>, i32) #1 1292declare <2 x i64> @llvm.x86.sse2.psll.q(<2 x i64>, <2 x i64>) #1 1293declare <4 x i32> @llvm.x86.sse2.psll.d(<4 x i32>, <4 x i32>) #1 1294declare <8 x i16> @llvm.x86.sse2.psll.w(<8 x i16>, <8 x i16>) #1 1295 1296declare <4 x i64> @llvm.x86.avx2.psrli.q(<4 x i64>, i32) #1 1297declare <8 x i32> @llvm.x86.avx2.psrli.d(<8 x i32>, i32) #1 1298declare <16 x i16> @llvm.x86.avx2.psrli.w(<16 x i16>, i32) #1 1299declare <4 x i64> @llvm.x86.avx2.psrl.q(<4 x i64>, <2 x i64>) #1 1300declare <8 x i32> @llvm.x86.avx2.psrl.d(<8 x i32>, <4 x i32>) #1 1301declare <16 x i16> @llvm.x86.avx2.psrl.w(<16 x i16>, <8 x i16>) #1 1302declare <2 x i64> @llvm.x86.sse2.psrli.q(<2 x i64>, i32) #1 1303declare <4 x i32> @llvm.x86.sse2.psrli.d(<4 x i32>, i32) #1 1304declare <8 x i16> @llvm.x86.sse2.psrli.w(<8 x i16>, i32) #1 1305declare <2 x i64> @llvm.x86.sse2.psrl.q(<2 x i64>, <2 x i64>) #1 1306declare <4 x i32> @llvm.x86.sse2.psrl.d(<4 x i32>, <4 x i32>) #1 1307declare <8 x i16> @llvm.x86.sse2.psrl.w(<8 x i16>, <8 x i16>) #1 1308 1309declare <8 x i32> @llvm.x86.avx2.psrai.d(<8 x i32>, i32) #1 1310declare <16 x i16> @llvm.x86.avx2.psrai.w(<16 x i16>, i32) #1 1311declare <8 x i32> @llvm.x86.avx2.psra.d(<8 x i32>, <4 x i32>) #1 1312declare <16 x i16> @llvm.x86.avx2.psra.w(<16 x i16>, <8 x i16>) #1 1313declare <4 x i32> @llvm.x86.sse2.psrai.d(<4 x i32>, i32) #1 1314declare <8 x i16> @llvm.x86.sse2.psrai.w(<8 x i16>, i32) #1 1315declare <4 x i32> @llvm.x86.sse2.psra.d(<4 x i32>, <4 x i32>) #1 1316declare <8 x i16> @llvm.x86.sse2.psra.w(<8 x i16>, <8 x i16>) #1 1317 1318attributes #1 = { nounwind readnone } 1319