1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=x86_64-- -mattr=+sse2 | FileCheck %s --check-prefix=SSE2 3; RUN: llc < %s -mtriple=x86_64-- -mattr=+avx2 | FileCheck %s --check-prefix=AVX2 4 5define <8 x i32> @zext_and_v8i32(<8 x i16> %x, <8 x i16> %y) { 6; SSE2-LABEL: zext_and_v8i32: 7; SSE2: # %bb.0: 8; SSE2-NEXT: movdqa %xmm0, %xmm2 9; SSE2-NEXT: pand %xmm1, %xmm2 10; SSE2-NEXT: pxor %xmm1, %xmm1 11; SSE2-NEXT: movdqa %xmm2, %xmm0 12; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] 13; SSE2-NEXT: punpckhwd {{.*#+}} xmm2 = xmm2[4],xmm1[4],xmm2[5],xmm1[5],xmm2[6],xmm1[6],xmm2[7],xmm1[7] 14; SSE2-NEXT: movdqa %xmm2, %xmm1 15; SSE2-NEXT: retq 16; 17; AVX2-LABEL: zext_and_v8i32: 18; AVX2: # %bb.0: 19; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0 20; AVX2-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero 21; AVX2-NEXT: retq 22 %xz = zext <8 x i16> %x to <8 x i32> 23 %yz = zext <8 x i16> %y to <8 x i32> 24 %r = and <8 x i32> %xz, %yz 25 ret <8 x i32> %r 26} 27 28define <8 x i32> @zext_or_v8i32(<8 x i16> %x, <8 x i16> %y) { 29; SSE2-LABEL: zext_or_v8i32: 30; SSE2: # %bb.0: 31; SSE2-NEXT: movdqa %xmm0, %xmm2 32; SSE2-NEXT: por %xmm1, %xmm2 33; SSE2-NEXT: pxor %xmm1, %xmm1 34; SSE2-NEXT: movdqa %xmm2, %xmm0 35; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] 36; SSE2-NEXT: punpckhwd {{.*#+}} xmm2 = xmm2[4],xmm1[4],xmm2[5],xmm1[5],xmm2[6],xmm1[6],xmm2[7],xmm1[7] 37; SSE2-NEXT: movdqa %xmm2, %xmm1 38; SSE2-NEXT: retq 39; 40; AVX2-LABEL: zext_or_v8i32: 41; AVX2: # %bb.0: 42; AVX2-NEXT: vpor %xmm1, %xmm0, %xmm0 43; AVX2-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero 44; AVX2-NEXT: retq 45 %xz = zext <8 x i16> %x to <8 x i32> 46 %yz = zext <8 x i16> %y to <8 x i32> 47 %r = or <8 x i32> %xz, %yz 48 ret <8 x i32> %r 49} 50 51define <8 x i32> @zext_xor_v8i32(<8 x i16> %x, <8 x i16> %y) { 52; SSE2-LABEL: zext_xor_v8i32: 53; SSE2: # %bb.0: 54; SSE2-NEXT: movdqa %xmm0, %xmm2 55; SSE2-NEXT: pxor %xmm1, %xmm2 56; SSE2-NEXT: pxor %xmm1, %xmm1 57; SSE2-NEXT: movdqa %xmm2, %xmm0 58; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] 59; SSE2-NEXT: punpckhwd {{.*#+}} xmm2 = xmm2[4],xmm1[4],xmm2[5],xmm1[5],xmm2[6],xmm1[6],xmm2[7],xmm1[7] 60; SSE2-NEXT: movdqa %xmm2, %xmm1 61; SSE2-NEXT: retq 62; 63; AVX2-LABEL: zext_xor_v8i32: 64; AVX2: # %bb.0: 65; AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0 66; AVX2-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero 67; AVX2-NEXT: retq 68 %xz = zext <8 x i16> %x to <8 x i32> 69 %yz = zext <8 x i16> %y to <8 x i32> 70 %r = xor <8 x i32> %xz, %yz 71 ret <8 x i32> %r 72} 73 74define <8 x i32> @sext_and_v8i32(<8 x i16> %x, <8 x i16> %y) { 75; SSE2-LABEL: sext_and_v8i32: 76; SSE2: # %bb.0: 77; SSE2-NEXT: pand %xmm1, %xmm0 78; SSE2-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3] 79; SSE2-NEXT: psrad $16, %xmm2 80; SSE2-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7] 81; SSE2-NEXT: psrad $16, %xmm1 82; SSE2-NEXT: movdqa %xmm2, %xmm0 83; SSE2-NEXT: retq 84; 85; AVX2-LABEL: sext_and_v8i32: 86; AVX2: # %bb.0: 87; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0 88; AVX2-NEXT: vpmovsxwd %xmm0, %ymm0 89; AVX2-NEXT: retq 90 %xs = sext <8 x i16> %x to <8 x i32> 91 %ys = sext <8 x i16> %y to <8 x i32> 92 %r = and <8 x i32> %xs, %ys 93 ret <8 x i32> %r 94} 95 96define <8 x i32> @sext_or_v8i32(<8 x i16> %x, <8 x i16> %y) { 97; SSE2-LABEL: sext_or_v8i32: 98; SSE2: # %bb.0: 99; SSE2-NEXT: por %xmm1, %xmm0 100; SSE2-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3] 101; SSE2-NEXT: psrad $16, %xmm2 102; SSE2-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7] 103; SSE2-NEXT: psrad $16, %xmm1 104; SSE2-NEXT: movdqa %xmm2, %xmm0 105; SSE2-NEXT: retq 106; 107; AVX2-LABEL: sext_or_v8i32: 108; AVX2: # %bb.0: 109; AVX2-NEXT: vpor %xmm1, %xmm0, %xmm0 110; AVX2-NEXT: vpmovsxwd %xmm0, %ymm0 111; AVX2-NEXT: retq 112 %xs = sext <8 x i16> %x to <8 x i32> 113 %ys = sext <8 x i16> %y to <8 x i32> 114 %r = or <8 x i32> %xs, %ys 115 ret <8 x i32> %r 116} 117 118define <8 x i32> @sext_xor_v8i32(<8 x i16> %x, <8 x i16> %y) { 119; SSE2-LABEL: sext_xor_v8i32: 120; SSE2: # %bb.0: 121; SSE2-NEXT: pxor %xmm1, %xmm0 122; SSE2-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3] 123; SSE2-NEXT: psrad $16, %xmm2 124; SSE2-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7] 125; SSE2-NEXT: psrad $16, %xmm1 126; SSE2-NEXT: movdqa %xmm2, %xmm0 127; SSE2-NEXT: retq 128; 129; AVX2-LABEL: sext_xor_v8i32: 130; AVX2: # %bb.0: 131; AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0 132; AVX2-NEXT: vpmovsxwd %xmm0, %ymm0 133; AVX2-NEXT: retq 134 %xs = sext <8 x i16> %x to <8 x i32> 135 %ys = sext <8 x i16> %y to <8 x i32> 136 %r = xor <8 x i32> %xs, %ys 137 ret <8 x i32> %r 138} 139 140define <8 x i16> @zext_and_v8i16(<8 x i8> %x, <8 x i8> %y) { 141; SSE2-LABEL: zext_and_v8i16: 142; SSE2: # %bb.0: 143; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] 144; SSE2-NEXT: pxor %xmm2, %xmm2 145; SSE2-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3],xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7] 146; SSE2-NEXT: pand %xmm1, %xmm0 147; SSE2-NEXT: retq 148; 149; AVX2-LABEL: zext_and_v8i16: 150; AVX2: # %bb.0: 151; AVX2-NEXT: vpmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero 152; AVX2-NEXT: vpmovzxbw {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero 153; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0 154; AVX2-NEXT: retq 155 %xz = zext <8 x i8> %x to <8 x i16> 156 %yz = zext <8 x i8> %y to <8 x i16> 157 %r = and <8 x i16> %xz, %yz 158 ret <8 x i16> %r 159} 160 161define <8 x i16> @zext_or_v8i16(<8 x i8> %x, <8 x i8> %y) { 162; SSE2-LABEL: zext_or_v8i16: 163; SSE2: # %bb.0: 164; SSE2-NEXT: pxor %xmm2, %xmm2 165; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3],xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7] 166; SSE2-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3],xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7] 167; SSE2-NEXT: por %xmm1, %xmm0 168; SSE2-NEXT: retq 169; 170; AVX2-LABEL: zext_or_v8i16: 171; AVX2: # %bb.0: 172; AVX2-NEXT: vpmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero 173; AVX2-NEXT: vpmovzxbw {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero 174; AVX2-NEXT: vpor %xmm1, %xmm0, %xmm0 175; AVX2-NEXT: retq 176 %xz = zext <8 x i8> %x to <8 x i16> 177 %yz = zext <8 x i8> %y to <8 x i16> 178 %r = or <8 x i16> %xz, %yz 179 ret <8 x i16> %r 180} 181 182define <8 x i16> @zext_xor_v8i16(<8 x i8> %x, <8 x i8> %y) { 183; SSE2-LABEL: zext_xor_v8i16: 184; SSE2: # %bb.0: 185; SSE2-NEXT: pxor %xmm2, %xmm2 186; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3],xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7] 187; SSE2-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3],xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7] 188; SSE2-NEXT: pxor %xmm1, %xmm0 189; SSE2-NEXT: retq 190; 191; AVX2-LABEL: zext_xor_v8i16: 192; AVX2: # %bb.0: 193; AVX2-NEXT: vpmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero 194; AVX2-NEXT: vpmovzxbw {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero 195; AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0 196; AVX2-NEXT: retq 197 %xz = zext <8 x i8> %x to <8 x i16> 198 %yz = zext <8 x i8> %y to <8 x i16> 199 %r = xor <8 x i16> %xz, %yz 200 ret <8 x i16> %r 201} 202 203define <8 x i16> @sext_and_v8i16(<8 x i8> %x, <8 x i8> %y) { 204; SSE2-LABEL: sext_and_v8i16: 205; SSE2: # %bb.0: 206; SSE2-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7] 207; SSE2-NEXT: psraw $8, %xmm2 208; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] 209; SSE2-NEXT: psraw $8, %xmm0 210; SSE2-NEXT: pand %xmm2, %xmm0 211; SSE2-NEXT: retq 212; 213; AVX2-LABEL: sext_and_v8i16: 214; AVX2: # %bb.0: 215; AVX2-NEXT: vpmovsxbw %xmm0, %xmm0 216; AVX2-NEXT: vpmovsxbw %xmm1, %xmm1 217; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0 218; AVX2-NEXT: retq 219 %xs = sext <8 x i8> %x to <8 x i16> 220 %ys = sext <8 x i8> %y to <8 x i16> 221 %r = and <8 x i16> %xs, %ys 222 ret <8 x i16> %r 223} 224 225define <8 x i16> @sext_or_v8i16(<8 x i8> %x, <8 x i8> %y) { 226; SSE2-LABEL: sext_or_v8i16: 227; SSE2: # %bb.0: 228; SSE2-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7] 229; SSE2-NEXT: psraw $8, %xmm2 230; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] 231; SSE2-NEXT: psraw $8, %xmm0 232; SSE2-NEXT: por %xmm2, %xmm0 233; SSE2-NEXT: retq 234; 235; AVX2-LABEL: sext_or_v8i16: 236; AVX2: # %bb.0: 237; AVX2-NEXT: vpmovsxbw %xmm0, %xmm0 238; AVX2-NEXT: vpmovsxbw %xmm1, %xmm1 239; AVX2-NEXT: vpor %xmm1, %xmm0, %xmm0 240; AVX2-NEXT: retq 241 %xs = sext <8 x i8> %x to <8 x i16> 242 %ys = sext <8 x i8> %y to <8 x i16> 243 %r = or <8 x i16> %xs, %ys 244 ret <8 x i16> %r 245} 246 247define <8 x i16> @sext_xor_v8i16(<8 x i8> %x, <8 x i8> %y) { 248; SSE2-LABEL: sext_xor_v8i16: 249; SSE2: # %bb.0: 250; SSE2-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7] 251; SSE2-NEXT: psraw $8, %xmm2 252; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] 253; SSE2-NEXT: psraw $8, %xmm0 254; SSE2-NEXT: pxor %xmm2, %xmm0 255; SSE2-NEXT: retq 256; 257; AVX2-LABEL: sext_xor_v8i16: 258; AVX2: # %bb.0: 259; AVX2-NEXT: vpmovsxbw %xmm0, %xmm0 260; AVX2-NEXT: vpmovsxbw %xmm1, %xmm1 261; AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0 262; AVX2-NEXT: retq 263 %xs = sext <8 x i8> %x to <8 x i16> 264 %ys = sext <8 x i8> %y to <8 x i16> 265 %r = xor <8 x i16> %xs, %ys 266 ret <8 x i16> %r 267} 268 269define <8 x i32> @bool_zext_and(<8 x i1> %x, <8 x i1> %y) { 270; SSE2-LABEL: bool_zext_and: 271; SSE2: # %bb.0: 272; SSE2-NEXT: movdqa %xmm0, %xmm3 273; SSE2-NEXT: punpckhwd {{.*#+}} xmm3 = xmm3[4,4,5,5,6,6,7,7] 274; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3] 275; SSE2-NEXT: pand {{.*}}(%rip), %xmm1 276; SSE2-NEXT: pxor %xmm4, %xmm4 277; SSE2-NEXT: movdqa %xmm1, %xmm2 278; SSE2-NEXT: punpckhwd {{.*#+}} xmm2 = xmm2[4],xmm4[4],xmm2[5],xmm4[5],xmm2[6],xmm4[6],xmm2[7],xmm4[7] 279; SSE2-NEXT: pand %xmm3, %xmm2 280; SSE2-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm4[0],xmm1[1],xmm4[1],xmm1[2],xmm4[2],xmm1[3],xmm4[3] 281; SSE2-NEXT: pand %xmm1, %xmm0 282; SSE2-NEXT: movdqa %xmm2, %xmm1 283; SSE2-NEXT: retq 284; 285; AVX2-LABEL: bool_zext_and: 286; AVX2: # %bb.0: 287; AVX2-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero 288; AVX2-NEXT: vpand {{.*}}(%rip), %xmm1, %xmm1 289; AVX2-NEXT: vpmovzxwd {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero 290; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm0 291; AVX2-NEXT: retq 292 %xz = zext <8 x i1> %x to <8 x i32> 293 %yz = zext <8 x i1> %y to <8 x i32> 294 %r = and <8 x i32> %xz, %yz 295 ret <8 x i32> %r 296} 297 298define <8 x i32> @bool_zext_or(<8 x i1> %x, <8 x i1> %y) { 299; SSE2-LABEL: bool_zext_or: 300; SSE2: # %bb.0: 301; SSE2-NEXT: movdqa %xmm0, %xmm2 302; SSE2-NEXT: por %xmm1, %xmm2 303; SSE2-NEXT: pand {{.*}}(%rip), %xmm2 304; SSE2-NEXT: pxor %xmm1, %xmm1 305; SSE2-NEXT: movdqa %xmm2, %xmm0 306; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] 307; SSE2-NEXT: punpckhwd {{.*#+}} xmm2 = xmm2[4],xmm1[4],xmm2[5],xmm1[5],xmm2[6],xmm1[6],xmm2[7],xmm1[7] 308; SSE2-NEXT: movdqa %xmm2, %xmm1 309; SSE2-NEXT: retq 310; 311; AVX2-LABEL: bool_zext_or: 312; AVX2: # %bb.0: 313; AVX2-NEXT: vpor %xmm1, %xmm0, %xmm0 314; AVX2-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0 315; AVX2-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero 316; AVX2-NEXT: retq 317 %xz = zext <8 x i1> %x to <8 x i32> 318 %yz = zext <8 x i1> %y to <8 x i32> 319 %r = or <8 x i32> %xz, %yz 320 ret <8 x i32> %r 321} 322 323define <8 x i32> @bool_zext_xor(<8 x i1> %x, <8 x i1> %y) { 324; SSE2-LABEL: bool_zext_xor: 325; SSE2: # %bb.0: 326; SSE2-NEXT: movdqa %xmm0, %xmm2 327; SSE2-NEXT: pxor %xmm1, %xmm2 328; SSE2-NEXT: pand {{.*}}(%rip), %xmm2 329; SSE2-NEXT: pxor %xmm1, %xmm1 330; SSE2-NEXT: movdqa %xmm2, %xmm0 331; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] 332; SSE2-NEXT: punpckhwd {{.*#+}} xmm2 = xmm2[4],xmm1[4],xmm2[5],xmm1[5],xmm2[6],xmm1[6],xmm2[7],xmm1[7] 333; SSE2-NEXT: movdqa %xmm2, %xmm1 334; SSE2-NEXT: retq 335; 336; AVX2-LABEL: bool_zext_xor: 337; AVX2: # %bb.0: 338; AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0 339; AVX2-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0 340; AVX2-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero 341; AVX2-NEXT: retq 342 %xz = zext <8 x i1> %x to <8 x i32> 343 %yz = zext <8 x i1> %y to <8 x i32> 344 %r = xor <8 x i32> %xz, %yz 345 ret <8 x i32> %r 346} 347 348define <8 x i32> @bool_sext_and(<8 x i1> %x, <8 x i1> %y) { 349; SSE2-LABEL: bool_sext_and: 350; SSE2: # %bb.0: 351; SSE2-NEXT: movdqa %xmm1, %xmm3 352; SSE2-NEXT: punpcklwd {{.*#+}} xmm3 = xmm3[0,0,1,1,2,2,3,3] 353; SSE2-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4,4,5,5,6,6,7,7] 354; SSE2-NEXT: movdqa %xmm0, %xmm2 355; SSE2-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0,0,1,1,2,2,3,3] 356; SSE2-NEXT: punpckhwd {{.*#+}} xmm0 = xmm0[4,4,5,5,6,6,7,7] 357; SSE2-NEXT: pslld $31, %xmm0 358; SSE2-NEXT: psrad $31, %xmm0 359; SSE2-NEXT: pslld $31, %xmm2 360; SSE2-NEXT: psrad $31, %xmm2 361; SSE2-NEXT: pslld $31, %xmm1 362; SSE2-NEXT: psrad $31, %xmm1 363; SSE2-NEXT: pand %xmm0, %xmm1 364; SSE2-NEXT: pslld $31, %xmm3 365; SSE2-NEXT: psrad $31, %xmm3 366; SSE2-NEXT: pand %xmm3, %xmm2 367; SSE2-NEXT: movdqa %xmm2, %xmm0 368; SSE2-NEXT: retq 369; 370; AVX2-LABEL: bool_sext_and: 371; AVX2: # %bb.0: 372; AVX2-NEXT: vpmovzxwd {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero 373; AVX2-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero 374; AVX2-NEXT: vpslld $31, %ymm0, %ymm0 375; AVX2-NEXT: vpsrad $31, %ymm0, %ymm0 376; AVX2-NEXT: vpslld $31, %ymm1, %ymm1 377; AVX2-NEXT: vpsrad $31, %ymm1, %ymm1 378; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm0 379; AVX2-NEXT: retq 380 %xs = sext <8 x i1> %x to <8 x i32> 381 %ys = sext <8 x i1> %y to <8 x i32> 382 %r = and <8 x i32> %xs, %ys 383 ret <8 x i32> %r 384} 385 386define <8 x i32> @bool_sext_or(<8 x i1> %x, <8 x i1> %y) { 387; SSE2-LABEL: bool_sext_or: 388; SSE2: # %bb.0: 389; SSE2-NEXT: movdqa %xmm1, %xmm3 390; SSE2-NEXT: punpcklwd {{.*#+}} xmm3 = xmm3[0,0,1,1,2,2,3,3] 391; SSE2-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4,4,5,5,6,6,7,7] 392; SSE2-NEXT: movdqa %xmm0, %xmm2 393; SSE2-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0,0,1,1,2,2,3,3] 394; SSE2-NEXT: punpckhwd {{.*#+}} xmm0 = xmm0[4,4,5,5,6,6,7,7] 395; SSE2-NEXT: pslld $31, %xmm0 396; SSE2-NEXT: psrad $31, %xmm0 397; SSE2-NEXT: pslld $31, %xmm2 398; SSE2-NEXT: psrad $31, %xmm2 399; SSE2-NEXT: pslld $31, %xmm1 400; SSE2-NEXT: psrad $31, %xmm1 401; SSE2-NEXT: por %xmm0, %xmm1 402; SSE2-NEXT: pslld $31, %xmm3 403; SSE2-NEXT: psrad $31, %xmm3 404; SSE2-NEXT: por %xmm3, %xmm2 405; SSE2-NEXT: movdqa %xmm2, %xmm0 406; SSE2-NEXT: retq 407; 408; AVX2-LABEL: bool_sext_or: 409; AVX2: # %bb.0: 410; AVX2-NEXT: vpmovzxwd {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero 411; AVX2-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero 412; AVX2-NEXT: vpslld $31, %ymm0, %ymm0 413; AVX2-NEXT: vpsrad $31, %ymm0, %ymm0 414; AVX2-NEXT: vpslld $31, %ymm1, %ymm1 415; AVX2-NEXT: vpsrad $31, %ymm1, %ymm1 416; AVX2-NEXT: vpor %ymm1, %ymm0, %ymm0 417; AVX2-NEXT: retq 418 %xs = sext <8 x i1> %x to <8 x i32> 419 %ys = sext <8 x i1> %y to <8 x i32> 420 %r = or <8 x i32> %xs, %ys 421 ret <8 x i32> %r 422} 423 424define <8 x i32> @bool_sext_xor(<8 x i1> %x, <8 x i1> %y) { 425; SSE2-LABEL: bool_sext_xor: 426; SSE2: # %bb.0: 427; SSE2-NEXT: movdqa %xmm1, %xmm3 428; SSE2-NEXT: punpcklwd {{.*#+}} xmm3 = xmm3[0,0,1,1,2,2,3,3] 429; SSE2-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4,4,5,5,6,6,7,7] 430; SSE2-NEXT: movdqa %xmm0, %xmm2 431; SSE2-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0,0,1,1,2,2,3,3] 432; SSE2-NEXT: punpckhwd {{.*#+}} xmm0 = xmm0[4,4,5,5,6,6,7,7] 433; SSE2-NEXT: pslld $31, %xmm0 434; SSE2-NEXT: psrad $31, %xmm0 435; SSE2-NEXT: pslld $31, %xmm2 436; SSE2-NEXT: psrad $31, %xmm2 437; SSE2-NEXT: pslld $31, %xmm1 438; SSE2-NEXT: psrad $31, %xmm1 439; SSE2-NEXT: pxor %xmm0, %xmm1 440; SSE2-NEXT: pslld $31, %xmm3 441; SSE2-NEXT: psrad $31, %xmm3 442; SSE2-NEXT: pxor %xmm3, %xmm2 443; SSE2-NEXT: movdqa %xmm2, %xmm0 444; SSE2-NEXT: retq 445; 446; AVX2-LABEL: bool_sext_xor: 447; AVX2: # %bb.0: 448; AVX2-NEXT: vpmovzxwd {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero 449; AVX2-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero 450; AVX2-NEXT: vpslld $31, %ymm0, %ymm0 451; AVX2-NEXT: vpsrad $31, %ymm0, %ymm0 452; AVX2-NEXT: vpslld $31, %ymm1, %ymm1 453; AVX2-NEXT: vpsrad $31, %ymm1, %ymm1 454; AVX2-NEXT: vpxor %ymm1, %ymm0, %ymm0 455; AVX2-NEXT: retq 456 %xs = sext <8 x i1> %x to <8 x i32> 457 %ys = sext <8 x i1> %y to <8 x i32> 458 %r = xor <8 x i32> %xs, %ys 459 ret <8 x i32> %r 460} 461 462