1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -o - -mtriple=x86_64-apple-darwin -mattr=+sse2 | FileCheck %s --check-prefixes=CHECK,SSE2 3; RUN: llc < %s -o - -mtriple=x86_64-apple-darwin -mattr=+sse4.2 | FileCheck %s --check-prefixes=CHECK,SSE41 4 5; For a setult against a constant, turn it into a setule and lower via psubusw. 6 7define void @loop_no_const_reload(<2 x i64>* %in, <2 x i64>* %out, i32 %n) { 8; SSE2-LABEL: loop_no_const_reload: 9; SSE2: ## %bb.0: ## %entry 10; SSE2-NEXT: testl %edx, %edx 11; SSE2-NEXT: je LBB0_3 12; SSE2-NEXT: ## %bb.1: ## %for.body.preheader 13; SSE2-NEXT: xorl %eax, %eax 14; SSE2-NEXT: movdqa {{.*#+}} xmm0 = [25,25,25,25,25,25,25,25] 15; SSE2-NEXT: pxor %xmm1, %xmm1 16; SSE2-NEXT: .p2align 4, 0x90 17; SSE2-NEXT: LBB0_2: ## %for.body 18; SSE2-NEXT: ## =>This Inner Loop Header: Depth=1 19; SSE2-NEXT: movdqa (%rdi,%rax), %xmm2 20; SSE2-NEXT: psubusw %xmm0, %xmm2 21; SSE2-NEXT: pcmpeqw %xmm1, %xmm2 22; SSE2-NEXT: movdqa %xmm2, (%rsi,%rax) 23; SSE2-NEXT: addq $16, %rax 24; SSE2-NEXT: decl %edx 25; SSE2-NEXT: jne LBB0_2 26; SSE2-NEXT: LBB0_3: ## %for.end 27; SSE2-NEXT: retq 28; 29; SSE41-LABEL: loop_no_const_reload: 30; SSE41: ## %bb.0: ## %entry 31; SSE41-NEXT: testl %edx, %edx 32; SSE41-NEXT: je LBB0_3 33; SSE41-NEXT: ## %bb.1: ## %for.body.preheader 34; SSE41-NEXT: xorl %eax, %eax 35; SSE41-NEXT: movdqa {{.*#+}} xmm0 = [25,25,25,25,25,25,25,25] 36; SSE41-NEXT: .p2align 4, 0x90 37; SSE41-NEXT: LBB0_2: ## %for.body 38; SSE41-NEXT: ## =>This Inner Loop Header: Depth=1 39; SSE41-NEXT: movdqa (%rdi,%rax), %xmm1 40; SSE41-NEXT: movdqa %xmm1, %xmm2 41; SSE41-NEXT: pminuw %xmm0, %xmm2 42; SSE41-NEXT: pcmpeqw %xmm1, %xmm2 43; SSE41-NEXT: movdqa %xmm2, (%rsi,%rax) 44; SSE41-NEXT: addq $16, %rax 45; SSE41-NEXT: decl %edx 46; SSE41-NEXT: jne LBB0_2 47; SSE41-NEXT: LBB0_3: ## %for.end 48; SSE41-NEXT: retq 49entry: 50 %cmp9 = icmp eq i32 %n, 0 51 br i1 %cmp9, label %for.end, label %for.body 52 53for.body: ; preds = %for.body, %entry 54 %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ] 55 %arrayidx1 = getelementptr inbounds <2 x i64>, <2 x i64>* %in, i64 %indvars.iv 56 %arrayidx1.val = load <2 x i64>, <2 x i64>* %arrayidx1, align 16 57 %0 = bitcast <2 x i64> %arrayidx1.val to <8 x i16> 58 %cmp.i.i = icmp ult <8 x i16> %0, <i16 26, i16 26, i16 26, i16 26, i16 26, i16 26, i16 26, i16 26> 59 %sext.i.i = sext <8 x i1> %cmp.i.i to <8 x i16> 60 %1 = bitcast <8 x i16> %sext.i.i to <2 x i64> 61 %arrayidx5 = getelementptr inbounds <2 x i64>, <2 x i64>* %out, i64 %indvars.iv 62 store <2 x i64> %1, <2 x i64>* %arrayidx5, align 16 63 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 64 %lftr.wideiv = trunc i64 %indvars.iv.next to i32 65 %exitcond = icmp eq i32 %lftr.wideiv, %n 66 br i1 %exitcond, label %for.end, label %for.body 67 68for.end: ; preds = %for.body, %entry 69 ret void 70} 71 72; Be careful if decrementing the constant would undeflow. 73 74define void @loop_const_folding_underflow(<2 x i64>* %in, <2 x i64>* %out, i32 %n) { 75; SSE2-LABEL: loop_const_folding_underflow: 76; SSE2: ## %bb.0: ## %entry 77; SSE2-NEXT: testl %edx, %edx 78; SSE2-NEXT: je LBB1_3 79; SSE2-NEXT: ## %bb.1: ## %for.body.preheader 80; SSE2-NEXT: xorl %eax, %eax 81; SSE2-NEXT: movdqa {{.*#+}} xmm0 = [32768,32768,32768,32768,32768,32768,32768,32768] 82; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [32768,32794,32794,32794,32794,32794,32794,32794] 83; SSE2-NEXT: .p2align 4, 0x90 84; SSE2-NEXT: LBB1_2: ## %for.body 85; SSE2-NEXT: ## =>This Inner Loop Header: Depth=1 86; SSE2-NEXT: movdqa (%rdi,%rax), %xmm2 87; SSE2-NEXT: pxor %xmm0, %xmm2 88; SSE2-NEXT: movdqa %xmm1, %xmm3 89; SSE2-NEXT: pcmpgtw %xmm2, %xmm3 90; SSE2-NEXT: movdqa %xmm3, (%rsi,%rax) 91; SSE2-NEXT: addq $16, %rax 92; SSE2-NEXT: decl %edx 93; SSE2-NEXT: jne LBB1_2 94; SSE2-NEXT: LBB1_3: ## %for.end 95; SSE2-NEXT: retq 96; 97; SSE41-LABEL: loop_const_folding_underflow: 98; SSE41: ## %bb.0: ## %entry 99; SSE41-NEXT: testl %edx, %edx 100; SSE41-NEXT: je LBB1_3 101; SSE41-NEXT: ## %bb.1: ## %for.body.preheader 102; SSE41-NEXT: xorl %eax, %eax 103; SSE41-NEXT: movdqa {{.*#+}} xmm0 = [0,26,26,26,26,26,26,26] 104; SSE41-NEXT: pcmpeqd %xmm1, %xmm1 105; SSE41-NEXT: .p2align 4, 0x90 106; SSE41-NEXT: LBB1_2: ## %for.body 107; SSE41-NEXT: ## =>This Inner Loop Header: Depth=1 108; SSE41-NEXT: movdqa (%rdi,%rax), %xmm2 109; SSE41-NEXT: movdqa %xmm2, %xmm3 110; SSE41-NEXT: pmaxuw %xmm0, %xmm3 111; SSE41-NEXT: pcmpeqw %xmm2, %xmm3 112; SSE41-NEXT: pxor %xmm1, %xmm3 113; SSE41-NEXT: movdqa %xmm3, (%rsi,%rax) 114; SSE41-NEXT: addq $16, %rax 115; SSE41-NEXT: decl %edx 116; SSE41-NEXT: jne LBB1_2 117; SSE41-NEXT: LBB1_3: ## %for.end 118; SSE41-NEXT: retq 119entry: 120 %cmp9 = icmp eq i32 %n, 0 121 br i1 %cmp9, label %for.end, label %for.body 122 123for.body: ; preds = %for.body, %entry 124 %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ] 125 %arrayidx1 = getelementptr inbounds <2 x i64>, <2 x i64>* %in, i64 %indvars.iv 126 %arrayidx1.val = load <2 x i64>, <2 x i64>* %arrayidx1, align 16 127 %0 = bitcast <2 x i64> %arrayidx1.val to <8 x i16> 128 %cmp.i.i = icmp ult <8 x i16> %0, <i16 0, i16 26, i16 26, i16 26, i16 26, i16 26, i16 26, i16 26> 129 %sext.i.i = sext <8 x i1> %cmp.i.i to <8 x i16> 130 %1 = bitcast <8 x i16> %sext.i.i to <2 x i64> 131 %arrayidx5 = getelementptr inbounds <2 x i64>, <2 x i64>* %out, i64 %indvars.iv 132 store <2 x i64> %1, <2 x i64>* %arrayidx5, align 16 133 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 134 %lftr.wideiv = trunc i64 %indvars.iv.next to i32 135 %exitcond = icmp eq i32 %lftr.wideiv, %n 136 br i1 %exitcond, label %for.end, label %for.body 137 138for.end: ; preds = %for.body, %entry 139 ret void 140} 141 142; Test for PSUBUSB 143 144define <16 x i8> @test_ult_byte(<16 x i8> %a) { 145; CHECK-LABEL: test_ult_byte: 146; CHECK: ## %bb.0: ## %entry 147; CHECK-NEXT: movdqa {{.*#+}} xmm1 = [10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10] 148; CHECK-NEXT: pminub %xmm0, %xmm1 149; CHECK-NEXT: pcmpeqb %xmm1, %xmm0 150; CHECK-NEXT: retq 151entry: 152 %icmp = icmp ult <16 x i8> %a, <i8 11, i8 11, i8 11, i8 11, i8 11, i8 11, i8 11, i8 11, i8 11, i8 11, i8 11, i8 11, i8 11, i8 11, i8 11, i8 11> 153 %sext = sext <16 x i1> %icmp to <16 x i8> 154 ret <16 x i8> %sext 155} 156 157; Only do this when we can turn the comparison into a setule. I.e. not for 158; register operands. 159 160define <8 x i16> @test_ult_register(<8 x i16> %a, <8 x i16> %b) { 161; SSE2-LABEL: test_ult_register: 162; SSE2: ## %bb.0: ## %entry 163; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [32768,32768,32768,32768,32768,32768,32768,32768] 164; SSE2-NEXT: pxor %xmm2, %xmm0 165; SSE2-NEXT: pxor %xmm1, %xmm2 166; SSE2-NEXT: pcmpgtw %xmm0, %xmm2 167; SSE2-NEXT: movdqa %xmm2, %xmm0 168; SSE2-NEXT: retq 169; 170; SSE41-LABEL: test_ult_register: 171; SSE41: ## %bb.0: ## %entry 172; SSE41-NEXT: pmaxuw %xmm0, %xmm1 173; SSE41-NEXT: pcmpeqw %xmm1, %xmm0 174; SSE41-NEXT: pcmpeqd %xmm1, %xmm1 175; SSE41-NEXT: pxor %xmm1, %xmm0 176; SSE41-NEXT: retq 177entry: 178 %icmp = icmp ult <8 x i16> %a, %b 179 %sext = sext <8 x i1> %icmp to <8 x i16> 180 ret <8 x i16> %sext 181} 182 183define <16 x i1> @ugt_v16i8_splat(<16 x i8> %x) { 184; CHECK-LABEL: ugt_v16i8_splat: 185; CHECK: ## %bb.0: 186; CHECK-NEXT: movdqa {{.*#+}} xmm1 = [43,43,43,43,43,43,43,43,43,43,43,43,43,43,43,43] 187; CHECK-NEXT: pmaxub %xmm0, %xmm1 188; CHECK-NEXT: pcmpeqb %xmm1, %xmm0 189; CHECK-NEXT: retq 190 %cmp = icmp ugt <16 x i8> %x, <i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42> 191 ret <16 x i1> %cmp 192} 193 194define <8 x i1> @ugt_v8i16_splat(<8 x i16> %x) { 195; SSE2-LABEL: ugt_v8i16_splat: 196; SSE2: ## %bb.0: 197; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [243,243,243,243,243,243,243,243] 198; SSE2-NEXT: psubusw %xmm0, %xmm1 199; SSE2-NEXT: pxor %xmm0, %xmm0 200; SSE2-NEXT: pcmpeqw %xmm1, %xmm0 201; SSE2-NEXT: retq 202; 203; SSE41-LABEL: ugt_v8i16_splat: 204; SSE41: ## %bb.0: 205; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [243,243,243,243,243,243,243,243] 206; SSE41-NEXT: pmaxuw %xmm0, %xmm1 207; SSE41-NEXT: pcmpeqw %xmm1, %xmm0 208; SSE41-NEXT: retq 209 %cmp = icmp ugt <8 x i16> %x, <i16 242, i16 242, i16 242, i16 242, i16 242, i16 242, i16 242, i16 242> 210 ret <8 x i1> %cmp 211} 212 213define <4 x i1> @ugt_v4i32_splat(<4 x i32> %x) { 214; SSE2-LABEL: ugt_v4i32_splat: 215; SSE2: ## %bb.0: 216; SSE2-NEXT: pxor {{.*}}(%rip), %xmm0 217; SSE2-NEXT: pcmpgtd {{.*}}(%rip), %xmm0 218; SSE2-NEXT: retq 219; 220; SSE41-LABEL: ugt_v4i32_splat: 221; SSE41: ## %bb.0: 222; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [4294967255,4294967255,4294967255,4294967255] 223; SSE41-NEXT: pmaxud %xmm0, %xmm1 224; SSE41-NEXT: pcmpeqd %xmm1, %xmm0 225; SSE41-NEXT: retq 226 %cmp = icmp ugt <4 x i32> %x, <i32 -42, i32 -42, i32 -42, i32 -42> 227 ret <4 x i1> %cmp 228} 229 230define <2 x i1> @ugt_v2i64_splat(<2 x i64> %x) { 231; SSE2-LABEL: ugt_v2i64_splat: 232; SSE2: ## %bb.0: 233; SSE2-NEXT: pxor {{.*}}(%rip), %xmm0 234; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [9223372039002259898,9223372039002259898] 235; SSE2-NEXT: movdqa %xmm0, %xmm2 236; SSE2-NEXT: pcmpgtd %xmm1, %xmm2 237; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] 238; SSE2-NEXT: pcmpeqd %xmm1, %xmm0 239; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 240; SSE2-NEXT: pand %xmm3, %xmm1 241; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3] 242; SSE2-NEXT: por %xmm1, %xmm0 243; SSE2-NEXT: retq 244; 245; SSE41-LABEL: ugt_v2i64_splat: 246; SSE41: ## %bb.0: 247; SSE41-NEXT: pxor {{.*}}(%rip), %xmm0 248; SSE41-NEXT: pcmpgtq {{.*}}(%rip), %xmm0 249; SSE41-NEXT: retq 250 %cmp = icmp ugt <2 x i64> %x, <i64 442, i64 442> 251 ret <2 x i1> %cmp 252} 253 254define <16 x i1> @uge_v16i8_splat(<16 x i8> %x) { 255; CHECK-LABEL: uge_v16i8_splat: 256; CHECK: ## %bb.0: 257; CHECK-NEXT: movdqa {{.*#+}} xmm1 = [42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42] 258; CHECK-NEXT: pmaxub %xmm0, %xmm1 259; CHECK-NEXT: pcmpeqb %xmm1, %xmm0 260; CHECK-NEXT: retq 261 %cmp = icmp uge <16 x i8> %x, <i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42> 262 ret <16 x i1> %cmp 263} 264 265define <8 x i1> @uge_v8i16_splat(<8 x i16> %x) { 266; SSE2-LABEL: uge_v8i16_splat: 267; SSE2: ## %bb.0: 268; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [242,242,242,242,242,242,242,242] 269; SSE2-NEXT: psubusw %xmm0, %xmm1 270; SSE2-NEXT: pxor %xmm0, %xmm0 271; SSE2-NEXT: pcmpeqw %xmm1, %xmm0 272; SSE2-NEXT: retq 273; 274; SSE41-LABEL: uge_v8i16_splat: 275; SSE41: ## %bb.0: 276; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [242,242,242,242,242,242,242,242] 277; SSE41-NEXT: pmaxuw %xmm0, %xmm1 278; SSE41-NEXT: pcmpeqw %xmm1, %xmm0 279; SSE41-NEXT: retq 280 %cmp = icmp uge <8 x i16> %x, <i16 242, i16 242, i16 242, i16 242, i16 242, i16 242, i16 242, i16 242> 281 ret <8 x i1> %cmp 282} 283 284define <4 x i1> @uge_v4i32_splat(<4 x i32> %x) { 285; SSE2-LABEL: uge_v4i32_splat: 286; SSE2: ## %bb.0: 287; SSE2-NEXT: pxor {{.*}}(%rip), %xmm0 288; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [2147483606,2147483606,2147483606,2147483606] 289; SSE2-NEXT: pcmpgtd %xmm0, %xmm1 290; SSE2-NEXT: pcmpeqd %xmm0, %xmm0 291; SSE2-NEXT: pxor %xmm1, %xmm0 292; SSE2-NEXT: retq 293; 294; SSE41-LABEL: uge_v4i32_splat: 295; SSE41: ## %bb.0: 296; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [4294967254,4294967254,4294967254,4294967254] 297; SSE41-NEXT: pmaxud %xmm0, %xmm1 298; SSE41-NEXT: pcmpeqd %xmm1, %xmm0 299; SSE41-NEXT: retq 300 %cmp = icmp uge <4 x i32> %x, <i32 -42, i32 -42, i32 -42, i32 -42> 301 ret <4 x i1> %cmp 302} 303 304define <2 x i1> @uge_v2i64_splat(<2 x i64> %x) { 305; SSE2-LABEL: uge_v2i64_splat: 306; SSE2: ## %bb.0: 307; SSE2-NEXT: pxor {{.*}}(%rip), %xmm0 308; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [9223372039002259898,9223372039002259898] 309; SSE2-NEXT: movdqa %xmm1, %xmm2 310; SSE2-NEXT: pcmpgtd %xmm0, %xmm2 311; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] 312; SSE2-NEXT: pcmpeqd %xmm1, %xmm0 313; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3] 314; SSE2-NEXT: pand %xmm3, %xmm0 315; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm2[1,1,3,3] 316; SSE2-NEXT: por %xmm0, %xmm1 317; SSE2-NEXT: pcmpeqd %xmm0, %xmm0 318; SSE2-NEXT: pxor %xmm1, %xmm0 319; SSE2-NEXT: retq 320; 321; SSE41-LABEL: uge_v2i64_splat: 322; SSE41: ## %bb.0: 323; SSE41-NEXT: pxor {{.*}}(%rip), %xmm0 324; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [9223372036854776250,9223372036854776250] 325; SSE41-NEXT: pcmpgtq %xmm0, %xmm1 326; SSE41-NEXT: pcmpeqd %xmm0, %xmm0 327; SSE41-NEXT: pxor %xmm1, %xmm0 328; SSE41-NEXT: retq 329 %cmp = icmp uge <2 x i64> %x, <i64 442, i64 442> 330 ret <2 x i1> %cmp 331} 332 333define <16 x i1> @ult_v16i8_splat(<16 x i8> %x) { 334; CHECK-LABEL: ult_v16i8_splat: 335; CHECK: ## %bb.0: 336; CHECK-NEXT: movdqa {{.*#+}} xmm1 = [41,41,41,41,41,41,41,41,41,41,41,41,41,41,41,41] 337; CHECK-NEXT: pminub %xmm0, %xmm1 338; CHECK-NEXT: pcmpeqb %xmm1, %xmm0 339; CHECK-NEXT: retq 340 %cmp = icmp ult <16 x i8> %x, <i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42> 341 ret <16 x i1> %cmp 342} 343 344define <8 x i1> @ult_v8i16_splat(<8 x i16> %x) { 345; SSE2-LABEL: ult_v8i16_splat: 346; SSE2: ## %bb.0: 347; SSE2-NEXT: psubusw {{.*}}(%rip), %xmm0 348; SSE2-NEXT: pxor %xmm1, %xmm1 349; SSE2-NEXT: pcmpeqw %xmm1, %xmm0 350; SSE2-NEXT: retq 351; 352; SSE41-LABEL: ult_v8i16_splat: 353; SSE41: ## %bb.0: 354; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [241,241,241,241,241,241,241,241] 355; SSE41-NEXT: pminuw %xmm0, %xmm1 356; SSE41-NEXT: pcmpeqw %xmm1, %xmm0 357; SSE41-NEXT: retq 358 %cmp = icmp ult <8 x i16> %x, <i16 242, i16 242, i16 242, i16 242, i16 242, i16 242, i16 242, i16 242> 359 ret <8 x i1> %cmp 360} 361 362define <4 x i1> @ult_v4i32_splat(<4 x i32> %x) { 363; SSE2-LABEL: ult_v4i32_splat: 364; SSE2: ## %bb.0: 365; SSE2-NEXT: pxor {{.*}}(%rip), %xmm0 366; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [2147483606,2147483606,2147483606,2147483606] 367; SSE2-NEXT: pcmpgtd %xmm0, %xmm1 368; SSE2-NEXT: movdqa %xmm1, %xmm0 369; SSE2-NEXT: retq 370; 371; SSE41-LABEL: ult_v4i32_splat: 372; SSE41: ## %bb.0: 373; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [4294967253,4294967253,4294967253,4294967253] 374; SSE41-NEXT: pminud %xmm0, %xmm1 375; SSE41-NEXT: pcmpeqd %xmm1, %xmm0 376; SSE41-NEXT: retq 377 %cmp = icmp ult <4 x i32> %x, <i32 -42, i32 -42, i32 -42, i32 -42> 378 ret <4 x i1> %cmp 379} 380 381define <2 x i1> @ult_v2i64_splat(<2 x i64> %x) { 382; SSE2-LABEL: ult_v2i64_splat: 383; SSE2: ## %bb.0: 384; SSE2-NEXT: pxor {{.*}}(%rip), %xmm0 385; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [9223372039002259898,9223372039002259898] 386; SSE2-NEXT: movdqa %xmm1, %xmm2 387; SSE2-NEXT: pcmpgtd %xmm0, %xmm2 388; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] 389; SSE2-NEXT: pcmpeqd %xmm1, %xmm0 390; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 391; SSE2-NEXT: pand %xmm3, %xmm1 392; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3] 393; SSE2-NEXT: por %xmm1, %xmm0 394; SSE2-NEXT: retq 395; 396; SSE41-LABEL: ult_v2i64_splat: 397; SSE41: ## %bb.0: 398; SSE41-NEXT: pxor {{.*}}(%rip), %xmm0 399; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [9223372036854776250,9223372036854776250] 400; SSE41-NEXT: pcmpgtq %xmm0, %xmm1 401; SSE41-NEXT: movdqa %xmm1, %xmm0 402; SSE41-NEXT: retq 403 %cmp = icmp ult <2 x i64> %x, <i64 442, i64 442> 404 ret <2 x i1> %cmp 405} 406 407define <16 x i1> @ule_v16i8_splat(<16 x i8> %x) { 408; CHECK-LABEL: ule_v16i8_splat: 409; CHECK: ## %bb.0: 410; CHECK-NEXT: movdqa {{.*#+}} xmm1 = [42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42] 411; CHECK-NEXT: pminub %xmm0, %xmm1 412; CHECK-NEXT: pcmpeqb %xmm1, %xmm0 413; CHECK-NEXT: retq 414 %cmp = icmp ule <16 x i8> %x, <i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42> 415 ret <16 x i1> %cmp 416} 417 418define <8 x i1> @ule_v8i16_splat(<8 x i16> %x) { 419; SSE2-LABEL: ule_v8i16_splat: 420; SSE2: ## %bb.0: 421; SSE2-NEXT: psubusw {{.*}}(%rip), %xmm0 422; SSE2-NEXT: pxor %xmm1, %xmm1 423; SSE2-NEXT: pcmpeqw %xmm1, %xmm0 424; SSE2-NEXT: retq 425; 426; SSE41-LABEL: ule_v8i16_splat: 427; SSE41: ## %bb.0: 428; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [242,242,242,242,242,242,242,242] 429; SSE41-NEXT: pminuw %xmm0, %xmm1 430; SSE41-NEXT: pcmpeqw %xmm1, %xmm0 431; SSE41-NEXT: retq 432 %cmp = icmp ule <8 x i16> %x, <i16 242, i16 242, i16 242, i16 242, i16 242, i16 242, i16 242, i16 242> 433 ret <8 x i1> %cmp 434} 435 436define <4 x i1> @ule_v4i32_splat(<4 x i32> %x) { 437; SSE2-LABEL: ule_v4i32_splat: 438; SSE2: ## %bb.0: 439; SSE2-NEXT: pxor {{.*}}(%rip), %xmm0 440; SSE2-NEXT: pcmpgtd {{.*}}(%rip), %xmm0 441; SSE2-NEXT: pcmpeqd %xmm1, %xmm1 442; SSE2-NEXT: pxor %xmm1, %xmm0 443; SSE2-NEXT: retq 444; 445; SSE41-LABEL: ule_v4i32_splat: 446; SSE41: ## %bb.0: 447; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [4294967254,4294967254,4294967254,4294967254] 448; SSE41-NEXT: pminud %xmm0, %xmm1 449; SSE41-NEXT: pcmpeqd %xmm1, %xmm0 450; SSE41-NEXT: retq 451 %cmp = icmp ule <4 x i32> %x, <i32 -42, i32 -42, i32 -42, i32 -42> 452 ret <4 x i1> %cmp 453} 454 455define <2 x i1> @ule_v2i64_splat(<2 x i64> %x) { 456; SSE2-LABEL: ule_v2i64_splat: 457; SSE2: ## %bb.0: 458; SSE2-NEXT: pxor {{.*}}(%rip), %xmm0 459; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [9223372039002259898,9223372039002259898] 460; SSE2-NEXT: movdqa %xmm0, %xmm2 461; SSE2-NEXT: pcmpgtd %xmm1, %xmm2 462; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] 463; SSE2-NEXT: pcmpeqd %xmm1, %xmm0 464; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3] 465; SSE2-NEXT: pand %xmm3, %xmm0 466; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm2[1,1,3,3] 467; SSE2-NEXT: por %xmm0, %xmm1 468; SSE2-NEXT: pcmpeqd %xmm0, %xmm0 469; SSE2-NEXT: pxor %xmm1, %xmm0 470; SSE2-NEXT: retq 471; 472; SSE41-LABEL: ule_v2i64_splat: 473; SSE41: ## %bb.0: 474; SSE41-NEXT: pxor {{.*}}(%rip), %xmm0 475; SSE41-NEXT: pcmpgtq {{.*}}(%rip), %xmm0 476; SSE41-NEXT: pcmpeqd %xmm1, %xmm1 477; SSE41-NEXT: pxor %xmm1, %xmm0 478; SSE41-NEXT: retq 479 %cmp = icmp ule <2 x i64> %x, <i64 442, i64 442> 480 ret <2 x i1> %cmp 481} 482 483; This should be simplified before we reach lowering, but 484; make sure that we are not getting it wrong by underflowing. 485 486define <4 x i1> @ult_v4i32_splat_0_simplify(<4 x i32> %x) { 487; CHECK-LABEL: ult_v4i32_splat_0_simplify: 488; CHECK: ## %bb.0: 489; CHECK-NEXT: xorps %xmm0, %xmm0 490; CHECK-NEXT: retq 491 %cmp = icmp ult <4 x i32> %x, <i32 0, i32 0, i32 0, i32 0> 492 ret <4 x i1> %cmp 493} 494 495; This should be simplified before we reach lowering, but 496; make sure that we are not getting it wrong by overflowing. 497 498define <4 x i1> @ugt_v4i32_splat_maxval_simplify(<4 x i32> %x) { 499; CHECK-LABEL: ugt_v4i32_splat_maxval_simplify: 500; CHECK: ## %bb.0: 501; CHECK-NEXT: xorps %xmm0, %xmm0 502; CHECK-NEXT: retq 503 %cmp = icmp ugt <4 x i32> %x, <i32 -1, i32 -1, i32 -1, i32 -1> 504 ret <4 x i1> %cmp 505} 506 507define <4 x i1> @ugt_v4i32_nonsplat(<4 x i32> %x) { 508; SSE2-LABEL: ugt_v4i32_nonsplat: 509; SSE2: ## %bb.0: 510; SSE2-NEXT: pxor {{.*}}(%rip), %xmm0 511; SSE2-NEXT: pcmpgtd {{.*}}(%rip), %xmm0 512; SSE2-NEXT: retq 513; 514; SSE41-LABEL: ugt_v4i32_nonsplat: 515; SSE41: ## %bb.0: 516; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [4294967254,4294967255,4294967256,4294967257] 517; SSE41-NEXT: pmaxud %xmm0, %xmm1 518; SSE41-NEXT: pcmpeqd %xmm1, %xmm0 519; SSE41-NEXT: retq 520 %cmp = icmp ugt <4 x i32> %x, <i32 -43, i32 -42, i32 -41, i32 -40> 521 ret <4 x i1> %cmp 522} 523 524define <4 x i1> @ugt_v4i32_splat_commute(<4 x i32> %x) { 525; SSE2-LABEL: ugt_v4i32_splat_commute: 526; SSE2: ## %bb.0: 527; SSE2-NEXT: pxor {{.*}}(%rip), %xmm0 528; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [2147483652,2147483652,2147483652,2147483652] 529; SSE2-NEXT: pcmpgtd %xmm0, %xmm1 530; SSE2-NEXT: movdqa %xmm1, %xmm0 531; SSE2-NEXT: retq 532; 533; SSE41-LABEL: ugt_v4i32_splat_commute: 534; SSE41: ## %bb.0: 535; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [3,3,3,3] 536; SSE41-NEXT: pminud %xmm0, %xmm1 537; SSE41-NEXT: pcmpeqd %xmm1, %xmm0 538; SSE41-NEXT: retq 539 %cmp = icmp ugt <4 x i32> <i32 4, i32 4, i32 4, i32 4>, %x 540 ret <4 x i1> %cmp 541} 542 543define <8 x i16> @PR39859(<8 x i16> %x, <8 x i16> %y) { 544; SSE2-LABEL: PR39859: 545; SSE2: ## %bb.0: 546; SSE2-NEXT: movdqa {{.*#+}} xmm3 = [43,43,43,43,43,43,43,43] 547; SSE2-NEXT: psubusw %xmm0, %xmm3 548; SSE2-NEXT: pxor %xmm2, %xmm2 549; SSE2-NEXT: pcmpeqw %xmm3, %xmm2 550; SSE2-NEXT: pand %xmm2, %xmm1 551; SSE2-NEXT: pandn %xmm0, %xmm2 552; SSE2-NEXT: por %xmm1, %xmm2 553; SSE2-NEXT: movdqa %xmm2, %xmm0 554; SSE2-NEXT: retq 555; 556; SSE41-LABEL: PR39859: 557; SSE41: ## %bb.0: 558; SSE41-NEXT: movdqa %xmm0, %xmm2 559; SSE41-NEXT: movdqa {{.*#+}} xmm0 = [43,43,43,43,43,43,43,43] 560; SSE41-NEXT: pmaxuw %xmm2, %xmm0 561; SSE41-NEXT: pcmpeqw %xmm2, %xmm0 562; SSE41-NEXT: pblendvb %xmm0, %xmm1, %xmm2 563; SSE41-NEXT: movdqa %xmm2, %xmm0 564; SSE41-NEXT: retq 565 %cmp = icmp ugt <8 x i16> %x, <i16 42, i16 42, i16 42, i16 42, i16 42, i16 42, i16 42, i16 42> 566 %sel = select <8 x i1> %cmp, <8 x i16> %y, <8 x i16> %x 567 ret <8 x i16> %sel 568} 569 570