1; NOTE: Assertions have been autogenerated by update_test_checks.py 2; RUN: llc -mtriple=x86_64-unknown-unknown < %s -mattr=sse | FileCheck %s --check-prefix=SSE 3; RUN: llc -mtriple=x86_64-unknown-unknown < %s -mattr=sse2 | FileCheck %s --check-prefix=SSE 4; RUN: llc -mtriple=x86_64-unknown-unknown < %s -mattr=sse2,-slow-unaligned-mem-16 | FileCheck %s --check-prefix=SSE2FAST 5; RUN: llc -mtriple=x86_64-unknown-unknown < %s -mattr=avx | FileCheck %s --check-prefix=AVX --check-prefix=AVX1 6; RUN: llc -mtriple=x86_64-unknown-unknown < %s -mattr=avx2 | FileCheck %s --check-prefix=AVX --check-prefix=AVX2 7 8; https://llvm.org/bugs/show_bug.cgi?id=27100 9 10define void @memset_16_nonzero_bytes(i8* %x) { 11; SSE-LABEL: memset_16_nonzero_bytes: 12; SSE: # BB#0: 13; SSE-NEXT: movabsq $3038287259199220266, %rax # imm = 0x2A2A2A2A2A2A2A2A 14; SSE-NEXT: movq %rax, 8(%rdi) 15; SSE-NEXT: movq %rax, (%rdi) 16; SSE-NEXT: retq 17; 18; SSE2FAST-LABEL: memset_16_nonzero_bytes: 19; SSE2FAST: # BB#0: 20; SSE2FAST-NEXT: movaps {{.*#+}} xmm0 = [42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42] 21; SSE2FAST-NEXT: movups %xmm0, (%rdi) 22; SSE2FAST-NEXT: retq 23; 24; AVX-LABEL: memset_16_nonzero_bytes: 25; AVX: # BB#0: 26; AVX-NEXT: vmovaps {{.*#+}} xmm0 = [42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42] 27; AVX-NEXT: vmovups %xmm0, (%rdi) 28; AVX-NEXT: retq 29; 30 %call = tail call i8* @__memset_chk(i8* %x, i32 42, i64 16, i64 -1) 31 ret void 32} 33 34define void @memset_32_nonzero_bytes(i8* %x) { 35; SSE-LABEL: memset_32_nonzero_bytes: 36; SSE: # BB#0: 37; SSE-NEXT: movabsq $3038287259199220266, %rax # imm = 0x2A2A2A2A2A2A2A2A 38; SSE-NEXT: movq %rax, 24(%rdi) 39; SSE-NEXT: movq %rax, 16(%rdi) 40; SSE-NEXT: movq %rax, 8(%rdi) 41; SSE-NEXT: movq %rax, (%rdi) 42; SSE-NEXT: retq 43; 44; SSE2FAST-LABEL: memset_32_nonzero_bytes: 45; SSE2FAST: # BB#0: 46; SSE2FAST-NEXT: movaps {{.*#+}} xmm0 = [42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42] 47; SSE2FAST-NEXT: movups %xmm0, 16(%rdi) 48; SSE2FAST-NEXT: movups %xmm0, (%rdi) 49; SSE2FAST-NEXT: retq 50; 51; AVX-LABEL: memset_32_nonzero_bytes: 52; AVX: # BB#0: 53; AVX-NEXT: vmovaps {{.*#+}} ymm0 = [42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42] 54; AVX-NEXT: vmovups %ymm0, (%rdi) 55; AVX-NEXT: vzeroupper 56; AVX-NEXT: retq 57; 58 %call = tail call i8* @__memset_chk(i8* %x, i32 42, i64 32, i64 -1) 59 ret void 60} 61 62define void @memset_64_nonzero_bytes(i8* %x) { 63; SSE-LABEL: memset_64_nonzero_bytes: 64; SSE: # BB#0: 65; SSE-NEXT: movabsq $3038287259199220266, %rax # imm = 0x2A2A2A2A2A2A2A2A 66; SSE-NEXT: movq %rax, 56(%rdi) 67; SSE-NEXT: movq %rax, 48(%rdi) 68; SSE-NEXT: movq %rax, 40(%rdi) 69; SSE-NEXT: movq %rax, 32(%rdi) 70; SSE-NEXT: movq %rax, 24(%rdi) 71; SSE-NEXT: movq %rax, 16(%rdi) 72; SSE-NEXT: movq %rax, 8(%rdi) 73; SSE-NEXT: movq %rax, (%rdi) 74; SSE-NEXT: retq 75; 76; SSE2FAST-LABEL: memset_64_nonzero_bytes: 77; SSE2FAST: # BB#0: 78; SSE2FAST-NEXT: movaps {{.*#+}} xmm0 = [42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42] 79; SSE2FAST-NEXT: movups %xmm0, 48(%rdi) 80; SSE2FAST-NEXT: movups %xmm0, 32(%rdi) 81; SSE2FAST-NEXT: movups %xmm0, 16(%rdi) 82; SSE2FAST-NEXT: movups %xmm0, (%rdi) 83; SSE2FAST-NEXT: retq 84; 85; AVX-LABEL: memset_64_nonzero_bytes: 86; AVX: # BB#0: 87; AVX-NEXT: vmovaps {{.*#+}} ymm0 = [42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42] 88; AVX-NEXT: vmovups %ymm0, 32(%rdi) 89; AVX-NEXT: vmovups %ymm0, (%rdi) 90; AVX-NEXT: vzeroupper 91; AVX-NEXT: retq 92; 93 %call = tail call i8* @__memset_chk(i8* %x, i32 42, i64 64, i64 -1) 94 ret void 95} 96 97define void @memset_128_nonzero_bytes(i8* %x) { 98; SSE-LABEL: memset_128_nonzero_bytes: 99; SSE: # BB#0: 100; SSE-NEXT: movabsq $3038287259199220266, %rax # imm = 0x2A2A2A2A2A2A2A2A 101; SSE-NEXT: movq %rax, 120(%rdi) 102; SSE-NEXT: movq %rax, 112(%rdi) 103; SSE-NEXT: movq %rax, 104(%rdi) 104; SSE-NEXT: movq %rax, 96(%rdi) 105; SSE-NEXT: movq %rax, 88(%rdi) 106; SSE-NEXT: movq %rax, 80(%rdi) 107; SSE-NEXT: movq %rax, 72(%rdi) 108; SSE-NEXT: movq %rax, 64(%rdi) 109; SSE-NEXT: movq %rax, 56(%rdi) 110; SSE-NEXT: movq %rax, 48(%rdi) 111; SSE-NEXT: movq %rax, 40(%rdi) 112; SSE-NEXT: movq %rax, 32(%rdi) 113; SSE-NEXT: movq %rax, 24(%rdi) 114; SSE-NEXT: movq %rax, 16(%rdi) 115; SSE-NEXT: movq %rax, 8(%rdi) 116; SSE-NEXT: movq %rax, (%rdi) 117; SSE-NEXT: retq 118; 119; SSE2FAST-LABEL: memset_128_nonzero_bytes: 120; SSE2FAST: # BB#0: 121; SSE2FAST-NEXT: movaps {{.*#+}} xmm0 = [42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42] 122; SSE2FAST-NEXT: movups %xmm0, 112(%rdi) 123; SSE2FAST-NEXT: movups %xmm0, 96(%rdi) 124; SSE2FAST-NEXT: movups %xmm0, 80(%rdi) 125; SSE2FAST-NEXT: movups %xmm0, 64(%rdi) 126; SSE2FAST-NEXT: movups %xmm0, 48(%rdi) 127; SSE2FAST-NEXT: movups %xmm0, 32(%rdi) 128; SSE2FAST-NEXT: movups %xmm0, 16(%rdi) 129; SSE2FAST-NEXT: movups %xmm0, (%rdi) 130; SSE2FAST-NEXT: retq 131; 132; AVX-LABEL: memset_128_nonzero_bytes: 133; AVX: # BB#0: 134; AVX-NEXT: vmovaps {{.*#+}} ymm0 = [42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42] 135; AVX-NEXT: vmovups %ymm0, 96(%rdi) 136; AVX-NEXT: vmovups %ymm0, 64(%rdi) 137; AVX-NEXT: vmovups %ymm0, 32(%rdi) 138; AVX-NEXT: vmovups %ymm0, (%rdi) 139; AVX-NEXT: vzeroupper 140; AVX-NEXT: retq 141; 142 %call = tail call i8* @__memset_chk(i8* %x, i32 42, i64 128, i64 -1) 143 ret void 144} 145 146define void @memset_256_nonzero_bytes(i8* %x) { 147; SSE-LABEL: memset_256_nonzero_bytes: 148; SSE: # BB#0: 149; SSE-NEXT: pushq %rax 150; SSE-NEXT: .Ltmp0: 151; SSE-NEXT: .cfi_def_cfa_offset 16 152; SSE-NEXT: movl $42, %esi 153; SSE-NEXT: movl $256, %edx # imm = 0x100 154; SSE-NEXT: callq memset 155; SSE-NEXT: popq %rax 156; SSE-NEXT: retq 157; 158; SSE2FAST-LABEL: memset_256_nonzero_bytes: 159; SSE2FAST: # BB#0: 160; SSE2FAST-NEXT: movaps {{.*#+}} xmm0 = [42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42] 161; SSE2FAST-NEXT: movups %xmm0, 240(%rdi) 162; SSE2FAST-NEXT: movups %xmm0, 224(%rdi) 163; SSE2FAST-NEXT: movups %xmm0, 208(%rdi) 164; SSE2FAST-NEXT: movups %xmm0, 192(%rdi) 165; SSE2FAST-NEXT: movups %xmm0, 176(%rdi) 166; SSE2FAST-NEXT: movups %xmm0, 160(%rdi) 167; SSE2FAST-NEXT: movups %xmm0, 144(%rdi) 168; SSE2FAST-NEXT: movups %xmm0, 128(%rdi) 169; SSE2FAST-NEXT: movups %xmm0, 112(%rdi) 170; SSE2FAST-NEXT: movups %xmm0, 96(%rdi) 171; SSE2FAST-NEXT: movups %xmm0, 80(%rdi) 172; SSE2FAST-NEXT: movups %xmm0, 64(%rdi) 173; SSE2FAST-NEXT: movups %xmm0, 48(%rdi) 174; SSE2FAST-NEXT: movups %xmm0, 32(%rdi) 175; SSE2FAST-NEXT: movups %xmm0, 16(%rdi) 176; SSE2FAST-NEXT: movups %xmm0, (%rdi) 177; SSE2FAST-NEXT: retq 178; 179; AVX-LABEL: memset_256_nonzero_bytes: 180; AVX: # BB#0: 181; AVX-NEXT: vmovaps {{.*#+}} ymm0 = [42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42] 182; AVX-NEXT: vmovups %ymm0, 224(%rdi) 183; AVX-NEXT: vmovups %ymm0, 192(%rdi) 184; AVX-NEXT: vmovups %ymm0, 160(%rdi) 185; AVX-NEXT: vmovups %ymm0, 128(%rdi) 186; AVX-NEXT: vmovups %ymm0, 96(%rdi) 187; AVX-NEXT: vmovups %ymm0, 64(%rdi) 188; AVX-NEXT: vmovups %ymm0, 32(%rdi) 189; AVX-NEXT: vmovups %ymm0, (%rdi) 190; AVX-NEXT: vzeroupper 191; AVX-NEXT: retq 192; 193 %call = tail call i8* @__memset_chk(i8* %x, i32 42, i64 256, i64 -1) 194 ret void 195} 196 197declare i8* @__memset_chk(i8*, i32, i64, i64) 198 199; Repeat with a non-constant value for the stores. 200 201define void @memset_16_nonconst_bytes(i8* %x, i8 %c) { 202; SSE-LABEL: memset_16_nonconst_bytes: 203; SSE: # BB#0: 204; SSE-NEXT: movzbl %sil, %eax 205; SSE-NEXT: movabsq $72340172838076673, %rcx # imm = 0x101010101010101 206; SSE-NEXT: imulq %rax, %rcx 207; SSE-NEXT: movq %rcx, 8(%rdi) 208; SSE-NEXT: movq %rcx, (%rdi) 209; SSE-NEXT: retq 210; 211; SSE2FAST-LABEL: memset_16_nonconst_bytes: 212; SSE2FAST: # BB#0: 213; SSE2FAST-NEXT: movd %esi, %xmm0 214; SSE2FAST-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] 215; SSE2FAST-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7] 216; SSE2FAST-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,1,1] 217; SSE2FAST-NEXT: movdqu %xmm0, (%rdi) 218; SSE2FAST-NEXT: retq 219; 220; AVX1-LABEL: memset_16_nonconst_bytes: 221; AVX1: # BB#0: 222; AVX1-NEXT: vmovd %esi, %xmm0 223; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 224; AVX1-NEXT: vpshufb %xmm1, %xmm0, %xmm0 225; AVX1-NEXT: vmovdqu %xmm0, (%rdi) 226; AVX1-NEXT: retq 227; 228; AVX2-LABEL: memset_16_nonconst_bytes: 229; AVX2: # BB#0: 230; AVX2-NEXT: vmovd %esi, %xmm0 231; AVX2-NEXT: vpbroadcastb %xmm0, %xmm0 232; AVX2-NEXT: vmovdqu %xmm0, (%rdi) 233; AVX2-NEXT: retq 234; 235 tail call void @llvm.memset.p0i8.i64(i8* %x, i8 %c, i64 16, i32 1, i1 false) 236 ret void 237} 238 239define void @memset_32_nonconst_bytes(i8* %x, i8 %c) { 240; SSE-LABEL: memset_32_nonconst_bytes: 241; SSE: # BB#0: 242; SSE-NEXT: movzbl %sil, %eax 243; SSE-NEXT: movabsq $72340172838076673, %rcx # imm = 0x101010101010101 244; SSE-NEXT: imulq %rax, %rcx 245; SSE-NEXT: movq %rcx, 24(%rdi) 246; SSE-NEXT: movq %rcx, 16(%rdi) 247; SSE-NEXT: movq %rcx, 8(%rdi) 248; SSE-NEXT: movq %rcx, (%rdi) 249; SSE-NEXT: retq 250; 251; SSE2FAST-LABEL: memset_32_nonconst_bytes: 252; SSE2FAST: # BB#0: 253; SSE2FAST-NEXT: movd %esi, %xmm0 254; SSE2FAST-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] 255; SSE2FAST-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7] 256; SSE2FAST-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,1,1] 257; SSE2FAST-NEXT: movdqu %xmm0, 16(%rdi) 258; SSE2FAST-NEXT: movdqu %xmm0, (%rdi) 259; SSE2FAST-NEXT: retq 260; 261; AVX1-LABEL: memset_32_nonconst_bytes: 262; AVX1: # BB#0: 263; AVX1-NEXT: vmovd %esi, %xmm0 264; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 265; AVX1-NEXT: vpshufb %xmm1, %xmm0, %xmm0 266; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 267; AVX1-NEXT: vmovups %ymm0, (%rdi) 268; AVX1-NEXT: vzeroupper 269; AVX1-NEXT: retq 270; 271; AVX2-LABEL: memset_32_nonconst_bytes: 272; AVX2: # BB#0: 273; AVX2-NEXT: vmovd %esi, %xmm0 274; AVX2-NEXT: vpbroadcastb %xmm0, %ymm0 275; AVX2-NEXT: vmovdqu %ymm0, (%rdi) 276; AVX2-NEXT: vzeroupper 277; AVX2-NEXT: retq 278; 279 tail call void @llvm.memset.p0i8.i64(i8* %x, i8 %c, i64 32, i32 1, i1 false) 280 ret void 281} 282 283define void @memset_64_nonconst_bytes(i8* %x, i8 %c) { 284; SSE-LABEL: memset_64_nonconst_bytes: 285; SSE: # BB#0: 286; SSE-NEXT: movzbl %sil, %eax 287; SSE-NEXT: movabsq $72340172838076673, %rcx # imm = 0x101010101010101 288; SSE-NEXT: imulq %rax, %rcx 289; SSE-NEXT: movq %rcx, 56(%rdi) 290; SSE-NEXT: movq %rcx, 48(%rdi) 291; SSE-NEXT: movq %rcx, 40(%rdi) 292; SSE-NEXT: movq %rcx, 32(%rdi) 293; SSE-NEXT: movq %rcx, 24(%rdi) 294; SSE-NEXT: movq %rcx, 16(%rdi) 295; SSE-NEXT: movq %rcx, 8(%rdi) 296; SSE-NEXT: movq %rcx, (%rdi) 297; SSE-NEXT: retq 298; 299; SSE2FAST-LABEL: memset_64_nonconst_bytes: 300; SSE2FAST: # BB#0: 301; SSE2FAST-NEXT: movd %esi, %xmm0 302; SSE2FAST-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] 303; SSE2FAST-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7] 304; SSE2FAST-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,1,1] 305; SSE2FAST-NEXT: movdqu %xmm0, 48(%rdi) 306; SSE2FAST-NEXT: movdqu %xmm0, 32(%rdi) 307; SSE2FAST-NEXT: movdqu %xmm0, 16(%rdi) 308; SSE2FAST-NEXT: movdqu %xmm0, (%rdi) 309; SSE2FAST-NEXT: retq 310; 311; AVX1-LABEL: memset_64_nonconst_bytes: 312; AVX1: # BB#0: 313; AVX1-NEXT: vmovd %esi, %xmm0 314; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 315; AVX1-NEXT: vpshufb %xmm1, %xmm0, %xmm0 316; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 317; AVX1-NEXT: vmovups %ymm0, 32(%rdi) 318; AVX1-NEXT: vmovups %ymm0, (%rdi) 319; AVX1-NEXT: vzeroupper 320; AVX1-NEXT: retq 321; 322; AVX2-LABEL: memset_64_nonconst_bytes: 323; AVX2: # BB#0: 324; AVX2-NEXT: vmovd %esi, %xmm0 325; AVX2-NEXT: vpbroadcastb %xmm0, %ymm0 326; AVX2-NEXT: vmovdqu %ymm0, 32(%rdi) 327; AVX2-NEXT: vmovdqu %ymm0, (%rdi) 328; AVX2-NEXT: vzeroupper 329; AVX2-NEXT: retq 330; 331 tail call void @llvm.memset.p0i8.i64(i8* %x, i8 %c, i64 64, i32 1, i1 false) 332 ret void 333} 334 335define void @memset_128_nonconst_bytes(i8* %x, i8 %c) { 336; SSE-LABEL: memset_128_nonconst_bytes: 337; SSE: # BB#0: 338; SSE-NEXT: movzbl %sil, %eax 339; SSE-NEXT: movabsq $72340172838076673, %rcx # imm = 0x101010101010101 340; SSE-NEXT: imulq %rax, %rcx 341; SSE-NEXT: movq %rcx, 120(%rdi) 342; SSE-NEXT: movq %rcx, 112(%rdi) 343; SSE-NEXT: movq %rcx, 104(%rdi) 344; SSE-NEXT: movq %rcx, 96(%rdi) 345; SSE-NEXT: movq %rcx, 88(%rdi) 346; SSE-NEXT: movq %rcx, 80(%rdi) 347; SSE-NEXT: movq %rcx, 72(%rdi) 348; SSE-NEXT: movq %rcx, 64(%rdi) 349; SSE-NEXT: movq %rcx, 56(%rdi) 350; SSE-NEXT: movq %rcx, 48(%rdi) 351; SSE-NEXT: movq %rcx, 40(%rdi) 352; SSE-NEXT: movq %rcx, 32(%rdi) 353; SSE-NEXT: movq %rcx, 24(%rdi) 354; SSE-NEXT: movq %rcx, 16(%rdi) 355; SSE-NEXT: movq %rcx, 8(%rdi) 356; SSE-NEXT: movq %rcx, (%rdi) 357; SSE-NEXT: retq 358; 359; SSE2FAST-LABEL: memset_128_nonconst_bytes: 360; SSE2FAST: # BB#0: 361; SSE2FAST-NEXT: movd %esi, %xmm0 362; SSE2FAST-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] 363; SSE2FAST-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7] 364; SSE2FAST-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,1,1] 365; SSE2FAST-NEXT: movdqu %xmm0, 112(%rdi) 366; SSE2FAST-NEXT: movdqu %xmm0, 96(%rdi) 367; SSE2FAST-NEXT: movdqu %xmm0, 80(%rdi) 368; SSE2FAST-NEXT: movdqu %xmm0, 64(%rdi) 369; SSE2FAST-NEXT: movdqu %xmm0, 48(%rdi) 370; SSE2FAST-NEXT: movdqu %xmm0, 32(%rdi) 371; SSE2FAST-NEXT: movdqu %xmm0, 16(%rdi) 372; SSE2FAST-NEXT: movdqu %xmm0, (%rdi) 373; SSE2FAST-NEXT: retq 374; 375; AVX1-LABEL: memset_128_nonconst_bytes: 376; AVX1: # BB#0: 377; AVX1-NEXT: vmovd %esi, %xmm0 378; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 379; AVX1-NEXT: vpshufb %xmm1, %xmm0, %xmm0 380; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 381; AVX1-NEXT: vmovups %ymm0, 96(%rdi) 382; AVX1-NEXT: vmovups %ymm0, 64(%rdi) 383; AVX1-NEXT: vmovups %ymm0, 32(%rdi) 384; AVX1-NEXT: vmovups %ymm0, (%rdi) 385; AVX1-NEXT: vzeroupper 386; AVX1-NEXT: retq 387; 388; AVX2-LABEL: memset_128_nonconst_bytes: 389; AVX2: # BB#0: 390; AVX2-NEXT: vmovd %esi, %xmm0 391; AVX2-NEXT: vpbroadcastb %xmm0, %ymm0 392; AVX2-NEXT: vmovdqu %ymm0, 96(%rdi) 393; AVX2-NEXT: vmovdqu %ymm0, 64(%rdi) 394; AVX2-NEXT: vmovdqu %ymm0, 32(%rdi) 395; AVX2-NEXT: vmovdqu %ymm0, (%rdi) 396; AVX2-NEXT: vzeroupper 397; AVX2-NEXT: retq 398; 399 tail call void @llvm.memset.p0i8.i64(i8* %x, i8 %c, i64 128, i32 1, i1 false) 400 ret void 401} 402 403define void @memset_256_nonconst_bytes(i8* %x, i8 %c) { 404; SSE-LABEL: memset_256_nonconst_bytes: 405; SSE: # BB#0: 406; SSE-NEXT: movl $256, %edx # imm = 0x100 407; SSE-NEXT: jmp memset # TAILCALL 408; 409; SSE2FAST-LABEL: memset_256_nonconst_bytes: 410; SSE2FAST: # BB#0: 411; SSE2FAST-NEXT: movd %esi, %xmm0 412; SSE2FAST-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] 413; SSE2FAST-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7] 414; SSE2FAST-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,1,1] 415; SSE2FAST-NEXT: movdqu %xmm0, 240(%rdi) 416; SSE2FAST-NEXT: movdqu %xmm0, 224(%rdi) 417; SSE2FAST-NEXT: movdqu %xmm0, 208(%rdi) 418; SSE2FAST-NEXT: movdqu %xmm0, 192(%rdi) 419; SSE2FAST-NEXT: movdqu %xmm0, 176(%rdi) 420; SSE2FAST-NEXT: movdqu %xmm0, 160(%rdi) 421; SSE2FAST-NEXT: movdqu %xmm0, 144(%rdi) 422; SSE2FAST-NEXT: movdqu %xmm0, 128(%rdi) 423; SSE2FAST-NEXT: movdqu %xmm0, 112(%rdi) 424; SSE2FAST-NEXT: movdqu %xmm0, 96(%rdi) 425; SSE2FAST-NEXT: movdqu %xmm0, 80(%rdi) 426; SSE2FAST-NEXT: movdqu %xmm0, 64(%rdi) 427; SSE2FAST-NEXT: movdqu %xmm0, 48(%rdi) 428; SSE2FAST-NEXT: movdqu %xmm0, 32(%rdi) 429; SSE2FAST-NEXT: movdqu %xmm0, 16(%rdi) 430; SSE2FAST-NEXT: movdqu %xmm0, (%rdi) 431; SSE2FAST-NEXT: retq 432; 433; AVX1-LABEL: memset_256_nonconst_bytes: 434; AVX1: # BB#0: 435; AVX1-NEXT: vmovd %esi, %xmm0 436; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 437; AVX1-NEXT: vpshufb %xmm1, %xmm0, %xmm0 438; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 439; AVX1-NEXT: vmovups %ymm0, 224(%rdi) 440; AVX1-NEXT: vmovups %ymm0, 192(%rdi) 441; AVX1-NEXT: vmovups %ymm0, 160(%rdi) 442; AVX1-NEXT: vmovups %ymm0, 128(%rdi) 443; AVX1-NEXT: vmovups %ymm0, 96(%rdi) 444; AVX1-NEXT: vmovups %ymm0, 64(%rdi) 445; AVX1-NEXT: vmovups %ymm0, 32(%rdi) 446; AVX1-NEXT: vmovups %ymm0, (%rdi) 447; AVX1-NEXT: vzeroupper 448; AVX1-NEXT: retq 449; 450; AVX2-LABEL: memset_256_nonconst_bytes: 451; AVX2: # BB#0: 452; AVX2-NEXT: vmovd %esi, %xmm0 453; AVX2-NEXT: vpbroadcastb %xmm0, %ymm0 454; AVX2-NEXT: vmovdqu %ymm0, 224(%rdi) 455; AVX2-NEXT: vmovdqu %ymm0, 192(%rdi) 456; AVX2-NEXT: vmovdqu %ymm0, 160(%rdi) 457; AVX2-NEXT: vmovdqu %ymm0, 128(%rdi) 458; AVX2-NEXT: vmovdqu %ymm0, 96(%rdi) 459; AVX2-NEXT: vmovdqu %ymm0, 64(%rdi) 460; AVX2-NEXT: vmovdqu %ymm0, 32(%rdi) 461; AVX2-NEXT: vmovdqu %ymm0, (%rdi) 462; AVX2-NEXT: vzeroupper 463; AVX2-NEXT: retq 464; 465 tail call void @llvm.memset.p0i8.i64(i8* %x, i8 %c, i64 256, i32 1, i1 false) 466 ret void 467} 468 469declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i32, i1) #1 470 471