1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --no_x86_scrub_mem_shuffle 2; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx | FileCheck %s --check-prefixes=CHECK,X86 3; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefixes=CHECK,X64 4 5define <32 x i8> @funcA(<32 x i8> %a) nounwind uwtable readnone ssp { 6; CHECK-LABEL: funcA: 7; CHECK: # %bb.0: # %entry 8; CHECK-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5] 9; CHECK-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 10; CHECK-NEXT: ret{{[l|q]}} 11entry: 12 %shuffle = shufflevector <32 x i8> %a, <32 x i8> undef, <32 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5> 13 ret <32 x i8> %shuffle 14} 15 16define <16 x i16> @funcB(<16 x i16> %a) nounwind uwtable readnone ssp { 17; CHECK-LABEL: funcB: 18; CHECK: # %bb.0: # %entry 19; CHECK-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,5,5,5,5] 20; CHECK-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,2,2,2] 21; CHECK-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 22; CHECK-NEXT: ret{{[l|q]}} 23entry: 24 %shuffle = shufflevector <16 x i16> %a, <16 x i16> undef, <16 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5> 25 ret <16 x i16> %shuffle 26} 27 28define <4 x i64> @funcC(i64 %q) nounwind uwtable readnone ssp { 29; X86-LABEL: funcC: 30; X86: # %bb.0: # %entry 31; X86-NEXT: vbroadcastsd {{[0-9]+}}(%esp), %ymm0 32; X86-NEXT: retl 33; 34; X64-LABEL: funcC: 35; X64: # %bb.0: # %entry 36; X64-NEXT: vmovq %rdi, %xmm0 37; X64-NEXT: vmovddup {{.*#+}} xmm0 = xmm0[0,0] 38; X64-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 39; X64-NEXT: retq 40entry: 41 %vecinit.i = insertelement <4 x i64> undef, i64 %q, i32 0 42 %vecinit2.i = insertelement <4 x i64> %vecinit.i, i64 %q, i32 1 43 %vecinit4.i = insertelement <4 x i64> %vecinit2.i, i64 %q, i32 2 44 %vecinit6.i = insertelement <4 x i64> %vecinit4.i, i64 %q, i32 3 45 ret <4 x i64> %vecinit6.i 46} 47 48define <4 x double> @funcD(double %q) nounwind uwtable readnone ssp { 49; X86-LABEL: funcD: 50; X86: # %bb.0: # %entry 51; X86-NEXT: vbroadcastsd {{[0-9]+}}(%esp), %ymm0 52; X86-NEXT: retl 53; 54; X64-LABEL: funcD: 55; X64: # %bb.0: # %entry 56; X64-NEXT: vmovddup {{.*#+}} xmm0 = xmm0[0,0] 57; X64-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 58; X64-NEXT: retq 59entry: 60 %vecinit.i = insertelement <4 x double> undef, double %q, i32 0 61 %vecinit2.i = insertelement <4 x double> %vecinit.i, double %q, i32 1 62 %vecinit4.i = insertelement <4 x double> %vecinit2.i, double %q, i32 2 63 %vecinit6.i = insertelement <4 x double> %vecinit4.i, double %q, i32 3 64 ret <4 x double> %vecinit6.i 65} 66 67; Test this turns into a broadcast: 68; shuffle (scalar_to_vector (load (ptr + 4))), undef, <0, 0, 0, 0> 69; 70define <8 x float> @funcE() nounwind { 71; X86-LABEL: funcE: 72; X86: # %bb.0: # %allocas 73; X86-NEXT: xorl %eax, %eax 74; X86-NEXT: testb %al, %al 75; X86-NEXT: # implicit-def: $ymm0 76; X86-NEXT: jne .LBB4_2 77; X86-NEXT: # %bb.1: # %load.i1247 78; X86-NEXT: pushl %ebp 79; X86-NEXT: movl %esp, %ebp 80; X86-NEXT: andl $-32, %esp 81; X86-NEXT: subl $1312, %esp # imm = 0x520 82; X86-NEXT: vbroadcastss {{[0-9]+}}(%esp), %ymm0 83; X86-NEXT: movl %ebp, %esp 84; X86-NEXT: popl %ebp 85; X86-NEXT: .LBB4_2: # %__load_and_broadcast_32.exit1249 86; X86-NEXT: retl 87; 88; X64-LABEL: funcE: 89; X64: # %bb.0: # %allocas 90; X64-NEXT: xorl %eax, %eax 91; X64-NEXT: testb %al, %al 92; X64-NEXT: # implicit-def: $ymm0 93; X64-NEXT: jne .LBB4_2 94; X64-NEXT: # %bb.1: # %load.i1247 95; X64-NEXT: pushq %rbp 96; X64-NEXT: movq %rsp, %rbp 97; X64-NEXT: andq $-32, %rsp 98; X64-NEXT: subq $1312, %rsp # imm = 0x520 99; X64-NEXT: vbroadcastss {{[0-9]+}}(%rsp), %ymm0 100; X64-NEXT: movq %rbp, %rsp 101; X64-NEXT: popq %rbp 102; X64-NEXT: .LBB4_2: # %__load_and_broadcast_32.exit1249 103; X64-NEXT: retq 104allocas: 105 %udx495 = alloca [18 x [18 x float]], align 32 106 br label %for_test505.preheader 107 108for_test505.preheader: ; preds = %for_test505.preheader, %allocas 109 br i1 undef, label %for_exit499, label %for_test505.preheader 110 111for_exit499: ; preds = %for_test505.preheader 112 br i1 undef, label %__load_and_broadcast_32.exit1249, label %load.i1247 113 114load.i1247: ; preds = %for_exit499 115 %ptr1227 = getelementptr [18 x [18 x float]], [18 x [18 x float]]* %udx495, i64 0, i64 1, i64 1 116 %ptr.i1237 = bitcast float* %ptr1227 to i32* 117 %val.i1238 = load i32, i32* %ptr.i1237, align 4 118 %ret6.i1245 = insertelement <8 x i32> undef, i32 %val.i1238, i32 6 119 %ret7.i1246 = insertelement <8 x i32> %ret6.i1245, i32 %val.i1238, i32 7 120 %phitmp = bitcast <8 x i32> %ret7.i1246 to <8 x float> 121 br label %__load_and_broadcast_32.exit1249 122 123__load_and_broadcast_32.exit1249: ; preds = %load.i1247, %for_exit499 124 %load_broadcast12281250 = phi <8 x float> [ %phitmp, %load.i1247 ], [ undef, %for_exit499 ] 125 ret <8 x float> %load_broadcast12281250 126} 127 128define <8 x float> @funcF(i32 %val) nounwind { 129; X86-LABEL: funcF: 130; X86: # %bb.0: 131; X86-NEXT: vbroadcastss {{[0-9]+}}(%esp), %ymm0 132; X86-NEXT: retl 133; 134; X64-LABEL: funcF: 135; X64: # %bb.0: 136; X64-NEXT: vmovd %edi, %xmm0 137; X64-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0] 138; X64-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 139; X64-NEXT: retq 140 %ret6 = insertelement <8 x i32> undef, i32 %val, i32 6 141 %ret7 = insertelement <8 x i32> %ret6, i32 %val, i32 7 142 %tmp = bitcast <8 x i32> %ret7 to <8 x float> 143 ret <8 x float> %tmp 144} 145 146define <8 x float> @funcG(<8 x float> %a) nounwind uwtable readnone ssp { 147; CHECK-LABEL: funcG: 148; CHECK: # %bb.0: # %entry 149; CHECK-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,0,0,0] 150; CHECK-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 151; CHECK-NEXT: ret{{[l|q]}} 152entry: 153 %shuffle = shufflevector <8 x float> %a, <8 x float> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0> 154 ret <8 x float> %shuffle 155} 156 157define <8 x float> @funcH(<8 x float> %a) nounwind uwtable readnone ssp { 158; CHECK-LABEL: funcH: 159; CHECK: # %bb.0: # %entry 160; CHECK-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[1,1,1,1,5,5,5,5] 161; CHECK-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3,2,3] 162; CHECK-NEXT: ret{{[l|q]}} 163entry: 164 %shuffle = shufflevector <8 x float> %a, <8 x float> undef, <8 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5> 165 ret <8 x float> %shuffle 166} 167 168define <2 x double> @splat_load_2f64_11(<2 x double>* %ptr) { 169; X86-LABEL: splat_load_2f64_11: 170; X86: # %bb.0: 171; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 172; X86-NEXT: vmovddup 8(%eax), %xmm0 # xmm0 = mem[0,0] 173; X86-NEXT: retl 174; 175; X64-LABEL: splat_load_2f64_11: 176; X64: # %bb.0: 177; X64-NEXT: vmovddup 8(%rdi), %xmm0 # xmm0 = mem[0,0] 178; X64-NEXT: retq 179 %x = load <2 x double>, <2 x double>* %ptr 180 %x1 = shufflevector <2 x double> %x, <2 x double> undef, <2 x i32> <i32 1, i32 1> 181 ret <2 x double> %x1 182} 183 184define <4 x double> @splat_load_4f64_2222(<4 x double>* %ptr) { 185; X86-LABEL: splat_load_4f64_2222: 186; X86: # %bb.0: 187; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 188; X86-NEXT: vbroadcastsd 16(%eax), %ymm0 189; X86-NEXT: retl 190; 191; X64-LABEL: splat_load_4f64_2222: 192; X64: # %bb.0: 193; X64-NEXT: vbroadcastsd 16(%rdi), %ymm0 194; X64-NEXT: retq 195 %x = load <4 x double>, <4 x double>* %ptr 196 %x1 = shufflevector <4 x double> %x, <4 x double> undef, <4 x i32> <i32 2, i32 2, i32 2, i32 2> 197 ret <4 x double> %x1 198} 199 200define <4 x float> @splat_load_4f32_0000(<4 x float>* %ptr) { 201; X86-LABEL: splat_load_4f32_0000: 202; X86: # %bb.0: 203; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 204; X86-NEXT: vbroadcastss (%eax), %xmm0 205; X86-NEXT: retl 206; 207; X64-LABEL: splat_load_4f32_0000: 208; X64: # %bb.0: 209; X64-NEXT: vbroadcastss (%rdi), %xmm0 210; X64-NEXT: retq 211 %x = load <4 x float>, <4 x float>* %ptr 212 %x1 = shufflevector <4 x float> %x, <4 x float> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0> 213 ret <4 x float> %x1 214} 215 216define <8 x float> @splat_load_8f32_77777777(<8 x float>* %ptr) { 217; X86-LABEL: splat_load_8f32_77777777: 218; X86: # %bb.0: 219; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 220; X86-NEXT: vbroadcastss 28(%eax), %ymm0 221; X86-NEXT: retl 222; 223; X64-LABEL: splat_load_8f32_77777777: 224; X64: # %bb.0: 225; X64-NEXT: vbroadcastss 28(%rdi), %ymm0 226; X64-NEXT: retq 227 %x = load <8 x float>, <8 x float>* %ptr 228 %x1 = shufflevector <8 x float> %x, <8 x float> undef, <8 x i32> <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7> 229 ret <8 x float> %x1 230} 231