1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefix=X32 3; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefix=X64 4 5define <4 x i32> @variable_shl0(<4 x i32> %x, <4 x i32> %y) { 6; X32-LABEL: variable_shl0: 7; X32: # %bb.0: 8; X32-NEXT: vpsllvd %xmm1, %xmm0, %xmm0 9; X32-NEXT: retl 10; 11; X64-LABEL: variable_shl0: 12; X64: # %bb.0: 13; X64-NEXT: vpsllvd %xmm1, %xmm0, %xmm0 14; X64-NEXT: retq 15 %k = shl <4 x i32> %x, %y 16 ret <4 x i32> %k 17} 18 19define <8 x i32> @variable_shl1(<8 x i32> %x, <8 x i32> %y) { 20; X32-LABEL: variable_shl1: 21; X32: # %bb.0: 22; X32-NEXT: vpsllvd %ymm1, %ymm0, %ymm0 23; X32-NEXT: retl 24; 25; X64-LABEL: variable_shl1: 26; X64: # %bb.0: 27; X64-NEXT: vpsllvd %ymm1, %ymm0, %ymm0 28; X64-NEXT: retq 29 %k = shl <8 x i32> %x, %y 30 ret <8 x i32> %k 31} 32 33define <2 x i64> @variable_shl2(<2 x i64> %x, <2 x i64> %y) { 34; X32-LABEL: variable_shl2: 35; X32: # %bb.0: 36; X32-NEXT: vpsllvq %xmm1, %xmm0, %xmm0 37; X32-NEXT: retl 38; 39; X64-LABEL: variable_shl2: 40; X64: # %bb.0: 41; X64-NEXT: vpsllvq %xmm1, %xmm0, %xmm0 42; X64-NEXT: retq 43 %k = shl <2 x i64> %x, %y 44 ret <2 x i64> %k 45} 46 47define <4 x i64> @variable_shl3(<4 x i64> %x, <4 x i64> %y) { 48; X32-LABEL: variable_shl3: 49; X32: # %bb.0: 50; X32-NEXT: vpsllvq %ymm1, %ymm0, %ymm0 51; X32-NEXT: retl 52; 53; X64-LABEL: variable_shl3: 54; X64: # %bb.0: 55; X64-NEXT: vpsllvq %ymm1, %ymm0, %ymm0 56; X64-NEXT: retq 57 %k = shl <4 x i64> %x, %y 58 ret <4 x i64> %k 59} 60 61define <4 x i32> @variable_srl0(<4 x i32> %x, <4 x i32> %y) { 62; X32-LABEL: variable_srl0: 63; X32: # %bb.0: 64; X32-NEXT: vpsrlvd %xmm1, %xmm0, %xmm0 65; X32-NEXT: retl 66; 67; X64-LABEL: variable_srl0: 68; X64: # %bb.0: 69; X64-NEXT: vpsrlvd %xmm1, %xmm0, %xmm0 70; X64-NEXT: retq 71 %k = lshr <4 x i32> %x, %y 72 ret <4 x i32> %k 73} 74 75define <8 x i32> @variable_srl1(<8 x i32> %x, <8 x i32> %y) { 76; X32-LABEL: variable_srl1: 77; X32: # %bb.0: 78; X32-NEXT: vpsrlvd %ymm1, %ymm0, %ymm0 79; X32-NEXT: retl 80; 81; X64-LABEL: variable_srl1: 82; X64: # %bb.0: 83; X64-NEXT: vpsrlvd %ymm1, %ymm0, %ymm0 84; X64-NEXT: retq 85 %k = lshr <8 x i32> %x, %y 86 ret <8 x i32> %k 87} 88 89define <2 x i64> @variable_srl2(<2 x i64> %x, <2 x i64> %y) { 90; X32-LABEL: variable_srl2: 91; X32: # %bb.0: 92; X32-NEXT: vpsrlvq %xmm1, %xmm0, %xmm0 93; X32-NEXT: retl 94; 95; X64-LABEL: variable_srl2: 96; X64: # %bb.0: 97; X64-NEXT: vpsrlvq %xmm1, %xmm0, %xmm0 98; X64-NEXT: retq 99 %k = lshr <2 x i64> %x, %y 100 ret <2 x i64> %k 101} 102 103define <4 x i64> @variable_srl3(<4 x i64> %x, <4 x i64> %y) { 104; X32-LABEL: variable_srl3: 105; X32: # %bb.0: 106; X32-NEXT: vpsrlvq %ymm1, %ymm0, %ymm0 107; X32-NEXT: retl 108; 109; X64-LABEL: variable_srl3: 110; X64: # %bb.0: 111; X64-NEXT: vpsrlvq %ymm1, %ymm0, %ymm0 112; X64-NEXT: retq 113 %k = lshr <4 x i64> %x, %y 114 ret <4 x i64> %k 115} 116 117define <4 x i32> @variable_sra0(<4 x i32> %x, <4 x i32> %y) { 118; X32-LABEL: variable_sra0: 119; X32: # %bb.0: 120; X32-NEXT: vpsravd %xmm1, %xmm0, %xmm0 121; X32-NEXT: retl 122; 123; X64-LABEL: variable_sra0: 124; X64: # %bb.0: 125; X64-NEXT: vpsravd %xmm1, %xmm0, %xmm0 126; X64-NEXT: retq 127 %k = ashr <4 x i32> %x, %y 128 ret <4 x i32> %k 129} 130 131define <8 x i32> @variable_sra1(<8 x i32> %x, <8 x i32> %y) { 132; X32-LABEL: variable_sra1: 133; X32: # %bb.0: 134; X32-NEXT: vpsravd %ymm1, %ymm0, %ymm0 135; X32-NEXT: retl 136; 137; X64-LABEL: variable_sra1: 138; X64: # %bb.0: 139; X64-NEXT: vpsravd %ymm1, %ymm0, %ymm0 140; X64-NEXT: retq 141 %k = ashr <8 x i32> %x, %y 142 ret <8 x i32> %k 143} 144 145;;; Shift left 146 147define <8 x i32> @vshift00(<8 x i32> %a) nounwind readnone { 148; X32-LABEL: vshift00: 149; X32: # %bb.0: 150; X32-NEXT: vpslld $2, %ymm0, %ymm0 151; X32-NEXT: retl 152; 153; X64-LABEL: vshift00: 154; X64: # %bb.0: 155; X64-NEXT: vpslld $2, %ymm0, %ymm0 156; X64-NEXT: retq 157 %s = shl <8 x i32> %a, <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2> 158 ret <8 x i32> %s 159} 160 161define <16 x i16> @vshift01(<16 x i16> %a) nounwind readnone { 162; X32-LABEL: vshift01: 163; X32: # %bb.0: 164; X32-NEXT: vpsllw $2, %ymm0, %ymm0 165; X32-NEXT: retl 166; 167; X64-LABEL: vshift01: 168; X64: # %bb.0: 169; X64-NEXT: vpsllw $2, %ymm0, %ymm0 170; X64-NEXT: retq 171 %s = shl <16 x i16> %a, <i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2> 172 ret <16 x i16> %s 173} 174 175define <4 x i64> @vshift02(<4 x i64> %a) nounwind readnone { 176; X32-LABEL: vshift02: 177; X32: # %bb.0: 178; X32-NEXT: vpsllq $2, %ymm0, %ymm0 179; X32-NEXT: retl 180; 181; X64-LABEL: vshift02: 182; X64: # %bb.0: 183; X64-NEXT: vpsllq $2, %ymm0, %ymm0 184; X64-NEXT: retq 185 %s = shl <4 x i64> %a, <i64 2, i64 2, i64 2, i64 2> 186 ret <4 x i64> %s 187} 188 189;;; Logical Shift right 190 191define <8 x i32> @vshift03(<8 x i32> %a) nounwind readnone { 192; X32-LABEL: vshift03: 193; X32: # %bb.0: 194; X32-NEXT: vpsrld $2, %ymm0, %ymm0 195; X32-NEXT: retl 196; 197; X64-LABEL: vshift03: 198; X64: # %bb.0: 199; X64-NEXT: vpsrld $2, %ymm0, %ymm0 200; X64-NEXT: retq 201 %s = lshr <8 x i32> %a, <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2> 202 ret <8 x i32> %s 203} 204 205define <16 x i16> @vshift04(<16 x i16> %a) nounwind readnone { 206; X32-LABEL: vshift04: 207; X32: # %bb.0: 208; X32-NEXT: vpsrlw $2, %ymm0, %ymm0 209; X32-NEXT: retl 210; 211; X64-LABEL: vshift04: 212; X64: # %bb.0: 213; X64-NEXT: vpsrlw $2, %ymm0, %ymm0 214; X64-NEXT: retq 215 %s = lshr <16 x i16> %a, <i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2> 216 ret <16 x i16> %s 217} 218 219define <4 x i64> @vshift05(<4 x i64> %a) nounwind readnone { 220; X32-LABEL: vshift05: 221; X32: # %bb.0: 222; X32-NEXT: vpsrlq $2, %ymm0, %ymm0 223; X32-NEXT: retl 224; 225; X64-LABEL: vshift05: 226; X64: # %bb.0: 227; X64-NEXT: vpsrlq $2, %ymm0, %ymm0 228; X64-NEXT: retq 229 %s = lshr <4 x i64> %a, <i64 2, i64 2, i64 2, i64 2> 230 ret <4 x i64> %s 231} 232 233;;; Arithmetic Shift right 234 235define <8 x i32> @vshift06(<8 x i32> %a) nounwind readnone { 236; X32-LABEL: vshift06: 237; X32: # %bb.0: 238; X32-NEXT: vpsrad $2, %ymm0, %ymm0 239; X32-NEXT: retl 240; 241; X64-LABEL: vshift06: 242; X64: # %bb.0: 243; X64-NEXT: vpsrad $2, %ymm0, %ymm0 244; X64-NEXT: retq 245 %s = ashr <8 x i32> %a, <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2> 246 ret <8 x i32> %s 247} 248 249define <16 x i16> @vshift07(<16 x i16> %a) nounwind readnone { 250; X32-LABEL: vshift07: 251; X32: # %bb.0: 252; X32-NEXT: vpsraw $2, %ymm0, %ymm0 253; X32-NEXT: retl 254; 255; X64-LABEL: vshift07: 256; X64: # %bb.0: 257; X64-NEXT: vpsraw $2, %ymm0, %ymm0 258; X64-NEXT: retq 259 %s = ashr <16 x i16> %a, <i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2> 260 ret <16 x i16> %s 261} 262 263define <4 x i32> @variable_sra0_load(<4 x i32> %x, <4 x i32>* %y) { 264; X32-LABEL: variable_sra0_load: 265; X32: # %bb.0: 266; X32-NEXT: movl {{[0-9]+}}(%esp), %eax 267; X32-NEXT: vpsravd (%eax), %xmm0, %xmm0 268; X32-NEXT: retl 269; 270; X64-LABEL: variable_sra0_load: 271; X64: # %bb.0: 272; X64-NEXT: vpsravd (%rdi), %xmm0, %xmm0 273; X64-NEXT: retq 274 %y1 = load <4 x i32>, <4 x i32>* %y 275 %k = ashr <4 x i32> %x, %y1 276 ret <4 x i32> %k 277} 278 279define <8 x i32> @variable_sra1_load(<8 x i32> %x, <8 x i32>* %y) { 280; X32-LABEL: variable_sra1_load: 281; X32: # %bb.0: 282; X32-NEXT: movl {{[0-9]+}}(%esp), %eax 283; X32-NEXT: vpsravd (%eax), %ymm0, %ymm0 284; X32-NEXT: retl 285; 286; X64-LABEL: variable_sra1_load: 287; X64: # %bb.0: 288; X64-NEXT: vpsravd (%rdi), %ymm0, %ymm0 289; X64-NEXT: retq 290 %y1 = load <8 x i32>, <8 x i32>* %y 291 %k = ashr <8 x i32> %x, %y1 292 ret <8 x i32> %k 293} 294 295define <4 x i32> @variable_shl0_load(<4 x i32> %x, <4 x i32>* %y) { 296; X32-LABEL: variable_shl0_load: 297; X32: # %bb.0: 298; X32-NEXT: movl {{[0-9]+}}(%esp), %eax 299; X32-NEXT: vpsllvd (%eax), %xmm0, %xmm0 300; X32-NEXT: retl 301; 302; X64-LABEL: variable_shl0_load: 303; X64: # %bb.0: 304; X64-NEXT: vpsllvd (%rdi), %xmm0, %xmm0 305; X64-NEXT: retq 306 %y1 = load <4 x i32>, <4 x i32>* %y 307 %k = shl <4 x i32> %x, %y1 308 ret <4 x i32> %k 309} 310 311define <8 x i32> @variable_shl1_load(<8 x i32> %x, <8 x i32>* %y) { 312; X32-LABEL: variable_shl1_load: 313; X32: # %bb.0: 314; X32-NEXT: movl {{[0-9]+}}(%esp), %eax 315; X32-NEXT: vpsllvd (%eax), %ymm0, %ymm0 316; X32-NEXT: retl 317; 318; X64-LABEL: variable_shl1_load: 319; X64: # %bb.0: 320; X64-NEXT: vpsllvd (%rdi), %ymm0, %ymm0 321; X64-NEXT: retq 322 %y1 = load <8 x i32>, <8 x i32>* %y 323 %k = shl <8 x i32> %x, %y1 324 ret <8 x i32> %k 325} 326 327define <2 x i64> @variable_shl2_load(<2 x i64> %x, <2 x i64>* %y) { 328; X32-LABEL: variable_shl2_load: 329; X32: # %bb.0: 330; X32-NEXT: movl {{[0-9]+}}(%esp), %eax 331; X32-NEXT: vpsllvq (%eax), %xmm0, %xmm0 332; X32-NEXT: retl 333; 334; X64-LABEL: variable_shl2_load: 335; X64: # %bb.0: 336; X64-NEXT: vpsllvq (%rdi), %xmm0, %xmm0 337; X64-NEXT: retq 338 %y1 = load <2 x i64>, <2 x i64>* %y 339 %k = shl <2 x i64> %x, %y1 340 ret <2 x i64> %k 341} 342 343define <4 x i64> @variable_shl3_load(<4 x i64> %x, <4 x i64>* %y) { 344; X32-LABEL: variable_shl3_load: 345; X32: # %bb.0: 346; X32-NEXT: movl {{[0-9]+}}(%esp), %eax 347; X32-NEXT: vpsllvq (%eax), %ymm0, %ymm0 348; X32-NEXT: retl 349; 350; X64-LABEL: variable_shl3_load: 351; X64: # %bb.0: 352; X64-NEXT: vpsllvq (%rdi), %ymm0, %ymm0 353; X64-NEXT: retq 354 %y1 = load <4 x i64>, <4 x i64>* %y 355 %k = shl <4 x i64> %x, %y1 356 ret <4 x i64> %k 357} 358 359define <4 x i32> @variable_srl0_load(<4 x i32> %x, <4 x i32>* %y) { 360; X32-LABEL: variable_srl0_load: 361; X32: # %bb.0: 362; X32-NEXT: movl {{[0-9]+}}(%esp), %eax 363; X32-NEXT: vpsrlvd (%eax), %xmm0, %xmm0 364; X32-NEXT: retl 365; 366; X64-LABEL: variable_srl0_load: 367; X64: # %bb.0: 368; X64-NEXT: vpsrlvd (%rdi), %xmm0, %xmm0 369; X64-NEXT: retq 370 %y1 = load <4 x i32>, <4 x i32>* %y 371 %k = lshr <4 x i32> %x, %y1 372 ret <4 x i32> %k 373} 374 375define <8 x i32> @variable_srl1_load(<8 x i32> %x, <8 x i32>* %y) { 376; X32-LABEL: variable_srl1_load: 377; X32: # %bb.0: 378; X32-NEXT: movl {{[0-9]+}}(%esp), %eax 379; X32-NEXT: vpsrlvd (%eax), %ymm0, %ymm0 380; X32-NEXT: retl 381; 382; X64-LABEL: variable_srl1_load: 383; X64: # %bb.0: 384; X64-NEXT: vpsrlvd (%rdi), %ymm0, %ymm0 385; X64-NEXT: retq 386 %y1 = load <8 x i32>, <8 x i32>* %y 387 %k = lshr <8 x i32> %x, %y1 388 ret <8 x i32> %k 389} 390 391define <2 x i64> @variable_srl2_load(<2 x i64> %x, <2 x i64>* %y) { 392; X32-LABEL: variable_srl2_load: 393; X32: # %bb.0: 394; X32-NEXT: movl {{[0-9]+}}(%esp), %eax 395; X32-NEXT: vpsrlvq (%eax), %xmm0, %xmm0 396; X32-NEXT: retl 397; 398; X64-LABEL: variable_srl2_load: 399; X64: # %bb.0: 400; X64-NEXT: vpsrlvq (%rdi), %xmm0, %xmm0 401; X64-NEXT: retq 402 %y1 = load <2 x i64>, <2 x i64>* %y 403 %k = lshr <2 x i64> %x, %y1 404 ret <2 x i64> %k 405} 406 407define <4 x i64> @variable_srl3_load(<4 x i64> %x, <4 x i64>* %y) { 408; X32-LABEL: variable_srl3_load: 409; X32: # %bb.0: 410; X32-NEXT: movl {{[0-9]+}}(%esp), %eax 411; X32-NEXT: vpsrlvq (%eax), %ymm0, %ymm0 412; X32-NEXT: retl 413; 414; X64-LABEL: variable_srl3_load: 415; X64: # %bb.0: 416; X64-NEXT: vpsrlvq (%rdi), %ymm0, %ymm0 417; X64-NEXT: retq 418 %y1 = load <4 x i64>, <4 x i64>* %y 419 %k = lshr <4 x i64> %x, %y1 420 ret <4 x i64> %k 421} 422 423define <32 x i8> @shl9(<32 x i8> %A) nounwind { 424; X32-LABEL: shl9: 425; X32: # %bb.0: 426; X32-NEXT: vpsllw $3, %ymm0, %ymm0 427; X32-NEXT: vpand {{\.LCPI.*}}, %ymm0, %ymm0 428; X32-NEXT: retl 429; 430; X64-LABEL: shl9: 431; X64: # %bb.0: 432; X64-NEXT: vpsllw $3, %ymm0, %ymm0 433; X64-NEXT: vpand {{.*}}(%rip), %ymm0, %ymm0 434; X64-NEXT: retq 435 %B = shl <32 x i8> %A, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3> 436 ret <32 x i8> %B 437} 438 439define <32 x i8> @shr9(<32 x i8> %A) nounwind { 440; X32-LABEL: shr9: 441; X32: # %bb.0: 442; X32-NEXT: vpsrlw $3, %ymm0, %ymm0 443; X32-NEXT: vpand {{\.LCPI.*}}, %ymm0, %ymm0 444; X32-NEXT: retl 445; 446; X64-LABEL: shr9: 447; X64: # %bb.0: 448; X64-NEXT: vpsrlw $3, %ymm0, %ymm0 449; X64-NEXT: vpand {{.*}}(%rip), %ymm0, %ymm0 450; X64-NEXT: retq 451 %B = lshr <32 x i8> %A, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3> 452 ret <32 x i8> %B 453} 454 455define <32 x i8> @sra_v32i8_7(<32 x i8> %A) nounwind { 456; X32-LABEL: sra_v32i8_7: 457; X32: # %bb.0: 458; X32-NEXT: vpxor %xmm1, %xmm1, %xmm1 459; X32-NEXT: vpcmpgtb %ymm0, %ymm1, %ymm0 460; X32-NEXT: retl 461; 462; X64-LABEL: sra_v32i8_7: 463; X64: # %bb.0: 464; X64-NEXT: vpxor %xmm1, %xmm1, %xmm1 465; X64-NEXT: vpcmpgtb %ymm0, %ymm1, %ymm0 466; X64-NEXT: retq 467 %B = ashr <32 x i8> %A, <i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7> 468 ret <32 x i8> %B 469} 470 471define <32 x i8> @sra_v32i8(<32 x i8> %A) nounwind { 472; X32-LABEL: sra_v32i8: 473; X32: # %bb.0: 474; X32-NEXT: vpsrlw $3, %ymm0, %ymm0 475; X32-NEXT: vpand {{\.LCPI.*}}, %ymm0, %ymm0 476; X32-NEXT: vmovdqa {{.*#+}} ymm1 = [16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16] 477; X32-NEXT: vpxor %ymm1, %ymm0, %ymm0 478; X32-NEXT: vpsubb %ymm1, %ymm0, %ymm0 479; X32-NEXT: retl 480; 481; X64-LABEL: sra_v32i8: 482; X64: # %bb.0: 483; X64-NEXT: vpsrlw $3, %ymm0, %ymm0 484; X64-NEXT: vpand {{.*}}(%rip), %ymm0, %ymm0 485; X64-NEXT: vmovdqa {{.*#+}} ymm1 = [16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16] 486; X64-NEXT: vpxor %ymm1, %ymm0, %ymm0 487; X64-NEXT: vpsubb %ymm1, %ymm0, %ymm0 488; X64-NEXT: retq 489 %B = ashr <32 x i8> %A, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3> 490 ret <32 x i8> %B 491} 492 493define <16 x i16> @sext_v16i16(<16 x i16> %a) nounwind { 494; X32-LABEL: sext_v16i16: 495; X32: # %bb.0: 496; X32-NEXT: vpsllw $8, %ymm0, %ymm0 497; X32-NEXT: vpsraw $8, %ymm0, %ymm0 498; X32-NEXT: retl 499; 500; X64-LABEL: sext_v16i16: 501; X64: # %bb.0: 502; X64-NEXT: vpsllw $8, %ymm0, %ymm0 503; X64-NEXT: vpsraw $8, %ymm0, %ymm0 504; X64-NEXT: retq 505 %b = trunc <16 x i16> %a to <16 x i8> 506 %c = sext <16 x i8> %b to <16 x i16> 507 ret <16 x i16> %c 508} 509 510define <8 x i32> @sext_v8i32(<8 x i32> %a) nounwind { 511; X32-LABEL: sext_v8i32: 512; X32: # %bb.0: 513; X32-NEXT: vpslld $16, %ymm0, %ymm0 514; X32-NEXT: vpsrad $16, %ymm0, %ymm0 515; X32-NEXT: retl 516; 517; X64-LABEL: sext_v8i32: 518; X64: # %bb.0: 519; X64-NEXT: vpslld $16, %ymm0, %ymm0 520; X64-NEXT: vpsrad $16, %ymm0, %ymm0 521; X64-NEXT: retq 522 %b = trunc <8 x i32> %a to <8 x i16> 523 %c = sext <8 x i16> %b to <8 x i32> 524 ret <8 x i32> %c 525} 526 527define <8 x i16> @variable_shl16(<8 x i16> %lhs, <8 x i16> %rhs) { 528; X32-LABEL: variable_shl16: 529; X32: # %bb.0: 530; X32-NEXT: vpmovzxwd {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero 531; X32-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero 532; X32-NEXT: vpsllvd %ymm1, %ymm0, %ymm0 533; X32-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,4,5,8,9,12,13,u,u,u,u,u,u,u,u,16,17,20,21,24,25,28,29,u,u,u,u,u,u,u,u] 534; X32-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3] 535; X32-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0 536; X32-NEXT: vzeroupper 537; X32-NEXT: retl 538; 539; X64-LABEL: variable_shl16: 540; X64: # %bb.0: 541; X64-NEXT: vpmovzxwd {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero 542; X64-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero 543; X64-NEXT: vpsllvd %ymm1, %ymm0, %ymm0 544; X64-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,4,5,8,9,12,13,u,u,u,u,u,u,u,u,16,17,20,21,24,25,28,29,u,u,u,u,u,u,u,u] 545; X64-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3] 546; X64-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0 547; X64-NEXT: vzeroupper 548; X64-NEXT: retq 549 %res = shl <8 x i16> %lhs, %rhs 550 ret <8 x i16> %res 551} 552 553define <8 x i16> @variable_ashr16(<8 x i16> %lhs, <8 x i16> %rhs) { 554; X32-LABEL: variable_ashr16: 555; X32: # %bb.0: 556; X32-NEXT: vpmovzxwd {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero 557; X32-NEXT: vpmovsxwd %xmm0, %ymm0 558; X32-NEXT: vpsravd %ymm1, %ymm0, %ymm0 559; X32-NEXT: vextracti128 $1, %ymm0, %xmm1 560; X32-NEXT: vpackssdw %xmm1, %xmm0, %xmm0 561; X32-NEXT: vzeroupper 562; X32-NEXT: retl 563; 564; X64-LABEL: variable_ashr16: 565; X64: # %bb.0: 566; X64-NEXT: vpmovzxwd {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero 567; X64-NEXT: vpmovsxwd %xmm0, %ymm0 568; X64-NEXT: vpsravd %ymm1, %ymm0, %ymm0 569; X64-NEXT: vextracti128 $1, %ymm0, %xmm1 570; X64-NEXT: vpackssdw %xmm1, %xmm0, %xmm0 571; X64-NEXT: vzeroupper 572; X64-NEXT: retq 573 %res = ashr <8 x i16> %lhs, %rhs 574 ret <8 x i16> %res 575} 576 577define <8 x i16> @variable_lshr16(<8 x i16> %lhs, <8 x i16> %rhs) { 578; X32-LABEL: variable_lshr16: 579; X32: # %bb.0: 580; X32-NEXT: vpmovzxwd {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero 581; X32-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero 582; X32-NEXT: vpsrlvd %ymm1, %ymm0, %ymm0 583; X32-NEXT: vextracti128 $1, %ymm0, %xmm1 584; X32-NEXT: vpackusdw %xmm1, %xmm0, %xmm0 585; X32-NEXT: vzeroupper 586; X32-NEXT: retl 587; 588; X64-LABEL: variable_lshr16: 589; X64: # %bb.0: 590; X64-NEXT: vpmovzxwd {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero 591; X64-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero 592; X64-NEXT: vpsrlvd %ymm1, %ymm0, %ymm0 593; X64-NEXT: vextracti128 $1, %ymm0, %xmm1 594; X64-NEXT: vpackusdw %xmm1, %xmm0, %xmm0 595; X64-NEXT: vzeroupper 596; X64-NEXT: retq 597 %res = lshr <8 x i16> %lhs, %rhs 598 ret <8 x i16> %res 599} 600