1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=i686-apple-darwin -mattr=+avx2 | FileCheck %s --check-prefix=CHECK --check-prefix=X32 3; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+avx2 | FileCheck %s --check-prefix=CHECK --check-prefix=X64 4 5define <16 x i8> @BB16(i8* %ptr) nounwind uwtable readnone ssp { 6; X32-LABEL: BB16: 7; X32: ## BB#0: ## %entry 8; X32-NEXT: movl {{[0-9]+}}(%esp), %eax 9; X32-NEXT: vpbroadcastb (%eax), %xmm0 10; X32-NEXT: retl 11; 12; X64-LABEL: BB16: 13; X64: ## BB#0: ## %entry 14; X64-NEXT: vpbroadcastb (%rdi), %xmm0 15; X64-NEXT: retq 16entry: 17 %q = load i8, i8* %ptr, align 4 18 %q0 = insertelement <16 x i8> undef, i8 %q, i32 0 19 %q1 = insertelement <16 x i8> %q0, i8 %q, i32 1 20 %q2 = insertelement <16 x i8> %q1, i8 %q, i32 2 21 %q3 = insertelement <16 x i8> %q2, i8 %q, i32 3 22 %q4 = insertelement <16 x i8> %q3, i8 %q, i32 4 23 %q5 = insertelement <16 x i8> %q4, i8 %q, i32 5 24 %q6 = insertelement <16 x i8> %q5, i8 %q, i32 6 25 %q7 = insertelement <16 x i8> %q6, i8 %q, i32 7 26 %q8 = insertelement <16 x i8> %q7, i8 %q, i32 8 27 %q9 = insertelement <16 x i8> %q8, i8 %q, i32 9 28 %qa = insertelement <16 x i8> %q9, i8 %q, i32 10 29 %qb = insertelement <16 x i8> %qa, i8 %q, i32 11 30 %qc = insertelement <16 x i8> %qb, i8 %q, i32 12 31 %qd = insertelement <16 x i8> %qc, i8 %q, i32 13 32 %qe = insertelement <16 x i8> %qd, i8 %q, i32 14 33 %qf = insertelement <16 x i8> %qe, i8 %q, i32 15 34 ret <16 x i8> %qf 35} 36 37define <32 x i8> @BB32(i8* %ptr) nounwind uwtable readnone ssp { 38; X32-LABEL: BB32: 39; X32: ## BB#0: ## %entry 40; X32-NEXT: movl {{[0-9]+}}(%esp), %eax 41; X32-NEXT: vpbroadcastb (%eax), %ymm0 42; X32-NEXT: retl 43; 44; X64-LABEL: BB32: 45; X64: ## BB#0: ## %entry 46; X64-NEXT: vpbroadcastb (%rdi), %ymm0 47; X64-NEXT: retq 48entry: 49 %q = load i8, i8* %ptr, align 4 50 %q0 = insertelement <32 x i8> undef, i8 %q, i32 0 51 %q1 = insertelement <32 x i8> %q0, i8 %q, i32 1 52 %q2 = insertelement <32 x i8> %q1, i8 %q, i32 2 53 %q3 = insertelement <32 x i8> %q2, i8 %q, i32 3 54 %q4 = insertelement <32 x i8> %q3, i8 %q, i32 4 55 %q5 = insertelement <32 x i8> %q4, i8 %q, i32 5 56 %q6 = insertelement <32 x i8> %q5, i8 %q, i32 6 57 %q7 = insertelement <32 x i8> %q6, i8 %q, i32 7 58 %q8 = insertelement <32 x i8> %q7, i8 %q, i32 8 59 %q9 = insertelement <32 x i8> %q8, i8 %q, i32 9 60 %qa = insertelement <32 x i8> %q9, i8 %q, i32 10 61 %qb = insertelement <32 x i8> %qa, i8 %q, i32 11 62 %qc = insertelement <32 x i8> %qb, i8 %q, i32 12 63 %qd = insertelement <32 x i8> %qc, i8 %q, i32 13 64 %qe = insertelement <32 x i8> %qd, i8 %q, i32 14 65 %qf = insertelement <32 x i8> %qe, i8 %q, i32 15 66 67 %q20 = insertelement <32 x i8> %qf, i8 %q, i32 16 68 %q21 = insertelement <32 x i8> %q20, i8 %q, i32 17 69 %q22 = insertelement <32 x i8> %q21, i8 %q, i32 18 70 %q23 = insertelement <32 x i8> %q22, i8 %q, i32 19 71 %q24 = insertelement <32 x i8> %q23, i8 %q, i32 20 72 %q25 = insertelement <32 x i8> %q24, i8 %q, i32 21 73 %q26 = insertelement <32 x i8> %q25, i8 %q, i32 22 74 %q27 = insertelement <32 x i8> %q26, i8 %q, i32 23 75 %q28 = insertelement <32 x i8> %q27, i8 %q, i32 24 76 %q29 = insertelement <32 x i8> %q28, i8 %q, i32 25 77 %q2a = insertelement <32 x i8> %q29, i8 %q, i32 26 78 %q2b = insertelement <32 x i8> %q2a, i8 %q, i32 27 79 %q2c = insertelement <32 x i8> %q2b, i8 %q, i32 28 80 %q2d = insertelement <32 x i8> %q2c, i8 %q, i32 29 81 %q2e = insertelement <32 x i8> %q2d, i8 %q, i32 30 82 %q2f = insertelement <32 x i8> %q2e, i8 %q, i32 31 83 ret <32 x i8> %q2f 84} 85 86define <8 x i16> @W16(i16* %ptr) nounwind uwtable readnone ssp { 87; X32-LABEL: W16: 88; X32: ## BB#0: ## %entry 89; X32-NEXT: movl {{[0-9]+}}(%esp), %eax 90; X32-NEXT: vpbroadcastw (%eax), %xmm0 91; X32-NEXT: retl 92; 93; X64-LABEL: W16: 94; X64: ## BB#0: ## %entry 95; X64-NEXT: vpbroadcastw (%rdi), %xmm0 96; X64-NEXT: retq 97entry: 98 %q = load i16, i16* %ptr, align 4 99 %q0 = insertelement <8 x i16> undef, i16 %q, i32 0 100 %q1 = insertelement <8 x i16> %q0, i16 %q, i32 1 101 %q2 = insertelement <8 x i16> %q1, i16 %q, i32 2 102 %q3 = insertelement <8 x i16> %q2, i16 %q, i32 3 103 %q4 = insertelement <8 x i16> %q3, i16 %q, i32 4 104 %q5 = insertelement <8 x i16> %q4, i16 %q, i32 5 105 %q6 = insertelement <8 x i16> %q5, i16 %q, i32 6 106 %q7 = insertelement <8 x i16> %q6, i16 %q, i32 7 107 ret <8 x i16> %q7 108} 109 110define <16 x i16> @WW16(i16* %ptr) nounwind uwtable readnone ssp { 111; X32-LABEL: WW16: 112; X32: ## BB#0: ## %entry 113; X32-NEXT: movl {{[0-9]+}}(%esp), %eax 114; X32-NEXT: vpbroadcastw (%eax), %ymm0 115; X32-NEXT: retl 116; 117; X64-LABEL: WW16: 118; X64: ## BB#0: ## %entry 119; X64-NEXT: vpbroadcastw (%rdi), %ymm0 120; X64-NEXT: retq 121entry: 122 %q = load i16, i16* %ptr, align 4 123 %q0 = insertelement <16 x i16> undef, i16 %q, i32 0 124 %q1 = insertelement <16 x i16> %q0, i16 %q, i32 1 125 %q2 = insertelement <16 x i16> %q1, i16 %q, i32 2 126 %q3 = insertelement <16 x i16> %q2, i16 %q, i32 3 127 %q4 = insertelement <16 x i16> %q3, i16 %q, i32 4 128 %q5 = insertelement <16 x i16> %q4, i16 %q, i32 5 129 %q6 = insertelement <16 x i16> %q5, i16 %q, i32 6 130 %q7 = insertelement <16 x i16> %q6, i16 %q, i32 7 131 %q8 = insertelement <16 x i16> %q7, i16 %q, i32 8 132 %q9 = insertelement <16 x i16> %q8, i16 %q, i32 9 133 %qa = insertelement <16 x i16> %q9, i16 %q, i32 10 134 %qb = insertelement <16 x i16> %qa, i16 %q, i32 11 135 %qc = insertelement <16 x i16> %qb, i16 %q, i32 12 136 %qd = insertelement <16 x i16> %qc, i16 %q, i32 13 137 %qe = insertelement <16 x i16> %qd, i16 %q, i32 14 138 %qf = insertelement <16 x i16> %qe, i16 %q, i32 15 139 ret <16 x i16> %qf 140} 141 142define <4 x i32> @D32(i32* %ptr) nounwind uwtable readnone ssp { 143; X32-LABEL: D32: 144; X32: ## BB#0: ## %entry 145; X32-NEXT: movl {{[0-9]+}}(%esp), %eax 146; X32-NEXT: vbroadcastss (%eax), %xmm0 147; X32-NEXT: retl 148; 149; X64-LABEL: D32: 150; X64: ## BB#0: ## %entry 151; X64-NEXT: vbroadcastss (%rdi), %xmm0 152; X64-NEXT: retq 153entry: 154 %q = load i32, i32* %ptr, align 4 155 %q0 = insertelement <4 x i32> undef, i32 %q, i32 0 156 %q1 = insertelement <4 x i32> %q0, i32 %q, i32 1 157 %q2 = insertelement <4 x i32> %q1, i32 %q, i32 2 158 %q3 = insertelement <4 x i32> %q2, i32 %q, i32 3 159 ret <4 x i32> %q3 160} 161 162define <8 x i32> @DD32(i32* %ptr) nounwind uwtable readnone ssp { 163; X32-LABEL: DD32: 164; X32: ## BB#0: ## %entry 165; X32-NEXT: movl {{[0-9]+}}(%esp), %eax 166; X32-NEXT: vbroadcastss (%eax), %ymm0 167; X32-NEXT: retl 168; 169; X64-LABEL: DD32: 170; X64: ## BB#0: ## %entry 171; X64-NEXT: vbroadcastss (%rdi), %ymm0 172; X64-NEXT: retq 173entry: 174 %q = load i32, i32* %ptr, align 4 175 %q0 = insertelement <8 x i32> undef, i32 %q, i32 0 176 %q1 = insertelement <8 x i32> %q0, i32 %q, i32 1 177 %q2 = insertelement <8 x i32> %q1, i32 %q, i32 2 178 %q3 = insertelement <8 x i32> %q2, i32 %q, i32 3 179 %q4 = insertelement <8 x i32> %q3, i32 %q, i32 4 180 %q5 = insertelement <8 x i32> %q4, i32 %q, i32 5 181 %q6 = insertelement <8 x i32> %q5, i32 %q, i32 6 182 %q7 = insertelement <8 x i32> %q6, i32 %q, i32 7 183 ret <8 x i32> %q7 184} 185 186define <2 x i64> @Q64(i64* %ptr) nounwind uwtable readnone ssp { 187; X32-LABEL: Q64: 188; X32: ## BB#0: ## %entry 189; X32-NEXT: movl {{[0-9]+}}(%esp), %eax 190; X32-NEXT: movl (%eax), %ecx 191; X32-NEXT: movl 4(%eax), %eax 192; X32-NEXT: vmovd %ecx, %xmm0 193; X32-NEXT: vpinsrd $1, %eax, %xmm0, %xmm0 194; X32-NEXT: vpinsrd $2, %ecx, %xmm0, %xmm0 195; X32-NEXT: vpinsrd $3, %eax, %xmm0, %xmm0 196; X32-NEXT: retl 197; 198; X64-LABEL: Q64: 199; X64: ## BB#0: ## %entry 200; X64-NEXT: vpbroadcastq (%rdi), %xmm0 201; X64-NEXT: retq 202entry: 203 %q = load i64, i64* %ptr, align 4 204 %q0 = insertelement <2 x i64> undef, i64 %q, i32 0 205 %q1 = insertelement <2 x i64> %q0, i64 %q, i32 1 206 ret <2 x i64> %q1 207} 208 209define <4 x i64> @QQ64(i64* %ptr) nounwind uwtable readnone ssp { 210; X32-LABEL: QQ64: 211; X32: ## BB#0: ## %entry 212; X32-NEXT: movl {{[0-9]+}}(%esp), %eax 213; X32-NEXT: movl (%eax), %ecx 214; X32-NEXT: movl 4(%eax), %eax 215; X32-NEXT: vmovd %ecx, %xmm0 216; X32-NEXT: vpinsrd $1, %eax, %xmm0, %xmm0 217; X32-NEXT: vpinsrd $2, %ecx, %xmm0, %xmm0 218; X32-NEXT: vpinsrd $3, %eax, %xmm0, %xmm0 219; X32-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0 220; X32-NEXT: retl 221; 222; X64-LABEL: QQ64: 223; X64: ## BB#0: ## %entry 224; X64-NEXT: vbroadcastsd (%rdi), %ymm0 225; X64-NEXT: retq 226entry: 227 %q = load i64, i64* %ptr, align 4 228 %q0 = insertelement <4 x i64> undef, i64 %q, i32 0 229 %q1 = insertelement <4 x i64> %q0, i64 %q, i32 1 230 %q2 = insertelement <4 x i64> %q1, i64 %q, i32 2 231 %q3 = insertelement <4 x i64> %q2, i64 %q, i32 3 232 ret <4 x i64> %q3 233} 234 235; FIXME: Pointer adjusted broadcasts 236 237define <16 x i8> @load_splat_16i8_16i8_1111111111111111(<16 x i8>* %ptr) nounwind uwtable readnone ssp { 238; X32-LABEL: load_splat_16i8_16i8_1111111111111111: 239; X32: ## BB#0: ## %entry 240; X32-NEXT: movl {{[0-9]+}}(%esp), %eax 241; X32-NEXT: vpbroadcastb 1(%eax), %xmm0 242; X32-NEXT: retl 243; 244; X64-LABEL: load_splat_16i8_16i8_1111111111111111: 245; X64: ## BB#0: ## %entry 246; X64-NEXT: vpbroadcastb 1(%rdi), %xmm0 247; X64-NEXT: retq 248entry: 249 %ld = load <16 x i8>, <16 x i8>* %ptr 250 %ret = shufflevector <16 x i8> %ld, <16 x i8> undef, <16 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1> 251 ret <16 x i8> %ret 252} 253 254define <32 x i8> @load_splat_32i8_16i8_11111111111111111111111111111111(<16 x i8>* %ptr) nounwind uwtable readnone ssp { 255; X32-LABEL: load_splat_32i8_16i8_11111111111111111111111111111111: 256; X32: ## BB#0: ## %entry 257; X32-NEXT: movl {{[0-9]+}}(%esp), %eax 258; X32-NEXT: vpbroadcastb 1(%eax), %ymm0 259; X32-NEXT: retl 260; 261; X64-LABEL: load_splat_32i8_16i8_11111111111111111111111111111111: 262; X64: ## BB#0: ## %entry 263; X64-NEXT: vpbroadcastb 1(%rdi), %ymm0 264; X64-NEXT: retq 265entry: 266 %ld = load <16 x i8>, <16 x i8>* %ptr 267 %ret = shufflevector <16 x i8> %ld, <16 x i8> undef, <32 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1> 268 ret <32 x i8> %ret 269} 270 271define <32 x i8> @load_splat_32i8_32i8_11111111111111111111111111111111(<32 x i8>* %ptr) nounwind uwtable readnone ssp { 272; X32-LABEL: load_splat_32i8_32i8_11111111111111111111111111111111: 273; X32: ## BB#0: ## %entry 274; X32-NEXT: movl {{[0-9]+}}(%esp), %eax 275; X32-NEXT: vpbroadcastb 1(%eax), %ymm0 276; X32-NEXT: retl 277; 278; X64-LABEL: load_splat_32i8_32i8_11111111111111111111111111111111: 279; X64: ## BB#0: ## %entry 280; X64-NEXT: vpbroadcastb 1(%rdi), %ymm0 281; X64-NEXT: retq 282entry: 283 %ld = load <32 x i8>, <32 x i8>* %ptr 284 %ret = shufflevector <32 x i8> %ld, <32 x i8> undef, <32 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1> 285 ret <32 x i8> %ret 286} 287 288define <8 x i16> @load_splat_8i16_8i16_11111111(<8 x i16>* %ptr) nounwind uwtable readnone ssp { 289; X32-LABEL: load_splat_8i16_8i16_11111111: 290; X32: ## BB#0: ## %entry 291; X32-NEXT: movl {{[0-9]+}}(%esp), %eax 292; X32-NEXT: vpbroadcastw 2(%eax), %xmm0 293; X32-NEXT: retl 294; 295; X64-LABEL: load_splat_8i16_8i16_11111111: 296; X64: ## BB#0: ## %entry 297; X64-NEXT: vpbroadcastw 2(%rdi), %xmm0 298; X64-NEXT: retq 299entry: 300 %ld = load <8 x i16>, <8 x i16>* %ptr 301 %ret = shufflevector <8 x i16> %ld, <8 x i16> undef, <8 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1> 302 ret <8 x i16> %ret 303} 304 305define <16 x i16> @load_splat_16i16_8i16_1111111111111111(<8 x i16>* %ptr) nounwind uwtable readnone ssp { 306; X32-LABEL: load_splat_16i16_8i16_1111111111111111: 307; X32: ## BB#0: ## %entry 308; X32-NEXT: movl {{[0-9]+}}(%esp), %eax 309; X32-NEXT: vpbroadcastw 2(%eax), %ymm0 310; X32-NEXT: retl 311; 312; X64-LABEL: load_splat_16i16_8i16_1111111111111111: 313; X64: ## BB#0: ## %entry 314; X64-NEXT: vpbroadcastw 2(%rdi), %ymm0 315; X64-NEXT: retq 316entry: 317 %ld = load <8 x i16>, <8 x i16>* %ptr 318 %ret = shufflevector <8 x i16> %ld, <8 x i16> undef, <16 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1> 319 ret <16 x i16> %ret 320} 321 322define <16 x i16> @load_splat_16i16_16i16_1111111111111111(<16 x i16>* %ptr) nounwind uwtable readnone ssp { 323; X32-LABEL: load_splat_16i16_16i16_1111111111111111: 324; X32: ## BB#0: ## %entry 325; X32-NEXT: movl {{[0-9]+}}(%esp), %eax 326; X32-NEXT: vpbroadcastw 2(%eax), %ymm0 327; X32-NEXT: retl 328; 329; X64-LABEL: load_splat_16i16_16i16_1111111111111111: 330; X64: ## BB#0: ## %entry 331; X64-NEXT: vpbroadcastw 2(%rdi), %ymm0 332; X64-NEXT: retq 333entry: 334 %ld = load <16 x i16>, <16 x i16>* %ptr 335 %ret = shufflevector <16 x i16> %ld, <16 x i16> undef, <16 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1> 336 ret <16 x i16> %ret 337} 338 339define <4 x i32> @load_splat_4i32_4i32_1111(<4 x i32>* %ptr) nounwind uwtable readnone ssp { 340; X32-LABEL: load_splat_4i32_4i32_1111: 341; X32: ## BB#0: ## %entry 342; X32-NEXT: movl {{[0-9]+}}(%esp), %eax 343; X32-NEXT: vbroadcastss 4(%eax), %xmm0 344; X32-NEXT: retl 345; 346; X64-LABEL: load_splat_4i32_4i32_1111: 347; X64: ## BB#0: ## %entry 348; X64-NEXT: vbroadcastss 4(%rdi), %xmm0 349; X64-NEXT: retq 350entry: 351 %ld = load <4 x i32>, <4 x i32>* %ptr 352 %ret = shufflevector <4 x i32> %ld, <4 x i32> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1> 353 ret <4 x i32> %ret 354} 355 356define <8 x i32> @load_splat_8i32_4i32_33333333(<4 x i32>* %ptr) nounwind uwtable readnone ssp { 357; X32-LABEL: load_splat_8i32_4i32_33333333: 358; X32: ## BB#0: ## %entry 359; X32-NEXT: movl {{[0-9]+}}(%esp), %eax 360; X32-NEXT: vbroadcastss 12(%eax), %ymm0 361; X32-NEXT: retl 362; 363; X64-LABEL: load_splat_8i32_4i32_33333333: 364; X64: ## BB#0: ## %entry 365; X64-NEXT: vbroadcastss 12(%rdi), %ymm0 366; X64-NEXT: retq 367entry: 368 %ld = load <4 x i32>, <4 x i32>* %ptr 369 %ret = shufflevector <4 x i32> %ld, <4 x i32> undef, <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3> 370 ret <8 x i32> %ret 371} 372 373define <8 x i32> @load_splat_8i32_8i32_55555555(<8 x i32>* %ptr) nounwind uwtable readnone ssp { 374; X32-LABEL: load_splat_8i32_8i32_55555555: 375; X32: ## BB#0: ## %entry 376; X32-NEXT: movl {{[0-9]+}}(%esp), %eax 377; X32-NEXT: vbroadcastss 20(%eax), %ymm0 378; X32-NEXT: retl 379; 380; X64-LABEL: load_splat_8i32_8i32_55555555: 381; X64: ## BB#0: ## %entry 382; X64-NEXT: vbroadcastss 20(%rdi), %ymm0 383; X64-NEXT: retq 384entry: 385 %ld = load <8 x i32>, <8 x i32>* %ptr 386 %ret = shufflevector <8 x i32> %ld, <8 x i32> undef, <8 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5> 387 ret <8 x i32> %ret 388} 389 390define <4 x float> @load_splat_4f32_4f32_1111(<4 x float>* %ptr) nounwind uwtable readnone ssp { 391; X32-LABEL: load_splat_4f32_4f32_1111: 392; X32: ## BB#0: ## %entry 393; X32-NEXT: movl {{[0-9]+}}(%esp), %eax 394; X32-NEXT: vbroadcastss 4(%eax), %xmm0 395; X32-NEXT: retl 396; 397; X64-LABEL: load_splat_4f32_4f32_1111: 398; X64: ## BB#0: ## %entry 399; X64-NEXT: vbroadcastss 4(%rdi), %xmm0 400; X64-NEXT: retq 401entry: 402 %ld = load <4 x float>, <4 x float>* %ptr 403 %ret = shufflevector <4 x float> %ld, <4 x float> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1> 404 ret <4 x float> %ret 405} 406 407define <8 x float> @load_splat_8f32_4f32_33333333(<4 x float>* %ptr) nounwind uwtable readnone ssp { 408; X32-LABEL: load_splat_8f32_4f32_33333333: 409; X32: ## BB#0: ## %entry 410; X32-NEXT: movl {{[0-9]+}}(%esp), %eax 411; X32-NEXT: vbroadcastss 12(%eax), %ymm0 412; X32-NEXT: retl 413; 414; X64-LABEL: load_splat_8f32_4f32_33333333: 415; X64: ## BB#0: ## %entry 416; X64-NEXT: vbroadcastss 12(%rdi), %ymm0 417; X64-NEXT: retq 418entry: 419 %ld = load <4 x float>, <4 x float>* %ptr 420 %ret = shufflevector <4 x float> %ld, <4 x float> undef, <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3> 421 ret <8 x float> %ret 422} 423 424define <8 x float> @load_splat_8f32_8f32_55555555(<8 x float>* %ptr) nounwind uwtable readnone ssp { 425; X32-LABEL: load_splat_8f32_8f32_55555555: 426; X32: ## BB#0: ## %entry 427; X32-NEXT: movl {{[0-9]+}}(%esp), %eax 428; X32-NEXT: vbroadcastss 20(%eax), %ymm0 429; X32-NEXT: retl 430; 431; X64-LABEL: load_splat_8f32_8f32_55555555: 432; X64: ## BB#0: ## %entry 433; X64-NEXT: vbroadcastss 20(%rdi), %ymm0 434; X64-NEXT: retq 435entry: 436 %ld = load <8 x float>, <8 x float>* %ptr 437 %ret = shufflevector <8 x float> %ld, <8 x float> undef, <8 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5> 438 ret <8 x float> %ret 439} 440 441define <2 x i64> @load_splat_2i64_2i64_1111(<2 x i64>* %ptr) nounwind uwtable readnone ssp { 442; X32-LABEL: load_splat_2i64_2i64_1111: 443; X32: ## BB#0: ## %entry 444; X32-NEXT: movl {{[0-9]+}}(%esp), %eax 445; X32-NEXT: vmovddup {{.*#+}} xmm0 = mem[0,0] 446; X32-NEXT: retl 447; 448; X64-LABEL: load_splat_2i64_2i64_1111: 449; X64: ## BB#0: ## %entry 450; X64-NEXT: vpbroadcastq 8(%rdi), %xmm0 451; X64-NEXT: retq 452entry: 453 %ld = load <2 x i64>, <2 x i64>* %ptr 454 %ret = shufflevector <2 x i64> %ld, <2 x i64> undef, <2 x i32> <i32 1, i32 1> 455 ret <2 x i64> %ret 456} 457 458define <4 x i64> @load_splat_4i64_2i64_1111(<2 x i64>* %ptr) nounwind uwtable readnone ssp { 459; X32-LABEL: load_splat_4i64_2i64_1111: 460; X32: ## BB#0: ## %entry 461; X32-NEXT: movl {{[0-9]+}}(%esp), %eax 462; X32-NEXT: vbroadcastsd 8(%eax), %ymm0 463; X32-NEXT: retl 464; 465; X64-LABEL: load_splat_4i64_2i64_1111: 466; X64: ## BB#0: ## %entry 467; X64-NEXT: vbroadcastsd 8(%rdi), %ymm0 468; X64-NEXT: retq 469entry: 470 %ld = load <2 x i64>, <2 x i64>* %ptr 471 %ret = shufflevector <2 x i64> %ld, <2 x i64> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1> 472 ret <4 x i64> %ret 473} 474 475define <4 x i64> @load_splat_4i64_4i64_2222(<4 x i64>* %ptr) nounwind uwtable readnone ssp { 476; X32-LABEL: load_splat_4i64_4i64_2222: 477; X32: ## BB#0: ## %entry 478; X32-NEXT: movl {{[0-9]+}}(%esp), %eax 479; X32-NEXT: vbroadcastsd 16(%eax), %ymm0 480; X32-NEXT: retl 481; 482; X64-LABEL: load_splat_4i64_4i64_2222: 483; X64: ## BB#0: ## %entry 484; X64-NEXT: vbroadcastsd 16(%rdi), %ymm0 485; X64-NEXT: retq 486entry: 487 %ld = load <4 x i64>, <4 x i64>* %ptr 488 %ret = shufflevector <4 x i64> %ld, <4 x i64> undef, <4 x i32> <i32 2, i32 2, i32 2, i32 2> 489 ret <4 x i64> %ret 490} 491 492define <2 x double> @load_splat_2f64_2f64_1111(<2 x double>* %ptr) nounwind uwtable readnone ssp { 493; X32-LABEL: load_splat_2f64_2f64_1111: 494; X32: ## BB#0: ## %entry 495; X32-NEXT: movl {{[0-9]+}}(%esp), %eax 496; X32-NEXT: vmovddup {{.*#+}} xmm0 = mem[0,0] 497; X32-NEXT: retl 498; 499; X64-LABEL: load_splat_2f64_2f64_1111: 500; X64: ## BB#0: ## %entry 501; X64-NEXT: vmovddup {{.*#+}} xmm0 = mem[0,0] 502; X64-NEXT: retq 503entry: 504 %ld = load <2 x double>, <2 x double>* %ptr 505 %ret = shufflevector <2 x double> %ld, <2 x double> undef, <2 x i32> <i32 1, i32 1> 506 ret <2 x double> %ret 507} 508 509define <4 x double> @load_splat_4f64_2f64_1111(<2 x double>* %ptr) nounwind uwtable readnone ssp { 510; X32-LABEL: load_splat_4f64_2f64_1111: 511; X32: ## BB#0: ## %entry 512; X32-NEXT: movl {{[0-9]+}}(%esp), %eax 513; X32-NEXT: vbroadcastsd 8(%eax), %ymm0 514; X32-NEXT: retl 515; 516; X64-LABEL: load_splat_4f64_2f64_1111: 517; X64: ## BB#0: ## %entry 518; X64-NEXT: vbroadcastsd 8(%rdi), %ymm0 519; X64-NEXT: retq 520entry: 521 %ld = load <2 x double>, <2 x double>* %ptr 522 %ret = shufflevector <2 x double> %ld, <2 x double> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1> 523 ret <4 x double> %ret 524} 525 526define <4 x double> @load_splat_4f64_4f64_2222(<4 x double>* %ptr) nounwind uwtable readnone ssp { 527; X32-LABEL: load_splat_4f64_4f64_2222: 528; X32: ## BB#0: ## %entry 529; X32-NEXT: movl {{[0-9]+}}(%esp), %eax 530; X32-NEXT: vbroadcastsd 16(%eax), %ymm0 531; X32-NEXT: retl 532; 533; X64-LABEL: load_splat_4f64_4f64_2222: 534; X64: ## BB#0: ## %entry 535; X64-NEXT: vbroadcastsd 16(%rdi), %ymm0 536; X64-NEXT: retq 537entry: 538 %ld = load <4 x double>, <4 x double>* %ptr 539 %ret = shufflevector <4 x double> %ld, <4 x double> undef, <4 x i32> <i32 2, i32 2, i32 2, i32 2> 540 ret <4 x double> %ret 541} 542 543; make sure that we still don't support broadcast double into 128-bit vector 544; this used to crash 545define <2 x double> @I(double* %ptr) nounwind uwtable readnone ssp { 546; X32-LABEL: I: 547; X32: ## BB#0: ## %entry 548; X32-NEXT: movl {{[0-9]+}}(%esp), %eax 549; X32-NEXT: vmovddup {{.*#+}} xmm0 = mem[0,0] 550; X32-NEXT: retl 551; 552; X64-LABEL: I: 553; X64: ## BB#0: ## %entry 554; X64-NEXT: vmovddup {{.*#+}} xmm0 = mem[0,0] 555; X64-NEXT: retq 556entry: 557 %q = load double, double* %ptr, align 4 558 %vecinit.i = insertelement <2 x double> undef, double %q, i32 0 559 %vecinit2.i = insertelement <2 x double> %vecinit.i, double %q, i32 1 560 ret <2 x double> %vecinit2.i 561} 562 563define <8 x i32> @V111(<8 x i32> %in) nounwind uwtable readnone ssp { 564; X32-LABEL: V111: 565; X32: ## BB#0: ## %entry 566; X32-NEXT: vpbroadcastd LCPI27_0, %ymm1 567; X32-NEXT: vpaddd %ymm1, %ymm0, %ymm0 568; X32-NEXT: retl 569; 570; X64-LABEL: V111: 571; X64: ## BB#0: ## %entry 572; X64-NEXT: vpbroadcastd {{.*}}(%rip), %ymm1 573; X64-NEXT: vpaddd %ymm1, %ymm0, %ymm0 574; X64-NEXT: retq 575entry: 576 %g = add <8 x i32> %in, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1> 577 ret <8 x i32> %g 578} 579 580define <8 x float> @V113(<8 x float> %in) nounwind uwtable readnone ssp { 581; X32-LABEL: V113: 582; X32: ## BB#0: ## %entry 583; X32-NEXT: vbroadcastss LCPI28_0, %ymm1 584; X32-NEXT: vaddps %ymm1, %ymm0, %ymm0 585; X32-NEXT: retl 586; 587; X64-LABEL: V113: 588; X64: ## BB#0: ## %entry 589; X64-NEXT: vbroadcastss {{.*}}(%rip), %ymm1 590; X64-NEXT: vaddps %ymm1, %ymm0, %ymm0 591; X64-NEXT: retq 592entry: 593 %g = fadd <8 x float> %in, <float 0xbf80000000000000, float 0xbf80000000000000, float 0xbf80000000000000, float 0xbf80000000000000, float 0xbf80000000000000, float 0xbf80000000000000, float 0xbf80000000000000, float 0xbf80000000000000> 594 ret <8 x float> %g 595} 596 597define <4 x float> @_e2(float* %ptr) nounwind uwtable readnone ssp { 598; X32-LABEL: _e2: 599; X32: ## BB#0: 600; X32-NEXT: vbroadcastss LCPI29_0, %xmm0 601; X32-NEXT: retl 602; 603; X64-LABEL: _e2: 604; X64: ## BB#0: 605; X64-NEXT: vbroadcastss {{.*}}(%rip), %xmm0 606; X64-NEXT: retq 607 %vecinit.i = insertelement <4 x float> undef, float 0xbf80000000000000, i32 0 608 %vecinit2.i = insertelement <4 x float> %vecinit.i, float 0xbf80000000000000, i32 1 609 %vecinit4.i = insertelement <4 x float> %vecinit2.i, float 0xbf80000000000000, i32 2 610 %vecinit6.i = insertelement <4 x float> %vecinit4.i, float 0xbf80000000000000, i32 3 611 ret <4 x float> %vecinit6.i 612} 613 614define <8 x i8> @_e4(i8* %ptr) nounwind uwtable readnone ssp { 615; X32-LABEL: _e4: 616; X32: ## BB#0: 617; X32-NEXT: vmovaps {{.*#+}} xmm0 = [52,52,52,52,52,52,52,52] 618; X32-NEXT: retl 619; 620; X64-LABEL: _e4: 621; X64: ## BB#0: 622; X64-NEXT: vmovaps {{.*#+}} xmm0 = [52,52,52,52,52,52,52,52] 623; X64-NEXT: retq 624 %vecinit0.i = insertelement <8 x i8> undef, i8 52, i32 0 625 %vecinit1.i = insertelement <8 x i8> %vecinit0.i, i8 52, i32 1 626 %vecinit2.i = insertelement <8 x i8> %vecinit1.i, i8 52, i32 2 627 %vecinit3.i = insertelement <8 x i8> %vecinit2.i, i8 52, i32 3 628 %vecinit4.i = insertelement <8 x i8> %vecinit3.i, i8 52, i32 4 629 %vecinit5.i = insertelement <8 x i8> %vecinit4.i, i8 52, i32 5 630 %vecinit6.i = insertelement <8 x i8> %vecinit5.i, i8 52, i32 6 631 %vecinit7.i = insertelement <8 x i8> %vecinit6.i, i8 52, i32 7 632 ret <8 x i8> %vecinit7.i 633} 634 635define void @crash() nounwind alwaysinline { 636; X32-LABEL: crash: 637; X32: ## BB#0: ## %WGLoopsEntry 638; X32-NEXT: xorl %eax, %eax 639; X32-NEXT: testb %al, %al 640; X32-NEXT: je LBB31_1 641; X32-NEXT: ## BB#2: ## %ret 642; X32-NEXT: retl 643; X32-NEXT: .p2align 4, 0x90 644; X32-NEXT: LBB31_1: ## %footer349VF 645; X32-NEXT: ## =>This Inner Loop Header: Depth=1 646; X32-NEXT: jmp LBB31_1 647; 648; X64-LABEL: crash: 649; X64: ## BB#0: ## %WGLoopsEntry 650; X64-NEXT: xorl %eax, %eax 651; X64-NEXT: testb %al, %al 652; X64-NEXT: je LBB31_1 653; X64-NEXT: ## BB#2: ## %ret 654; X64-NEXT: retq 655; X64-NEXT: .p2align 4, 0x90 656; X64-NEXT: LBB31_1: ## %footer349VF 657; X64-NEXT: ## =>This Inner Loop Header: Depth=1 658; X64-NEXT: jmp LBB31_1 659WGLoopsEntry: 660 br i1 undef, label %ret, label %footer329VF 661 662footer329VF: 663 %A.0.inVF = fmul float undef, 6.553600e+04 664 %B.0.in407VF = fmul <8 x float> undef, <float 6.553600e+04, float 6.553600e+04, float 6.553600e+04, float 6.553600e+04, float 6.553600e+04, float 6.553600e+04, float 6.553600e+04, float 6.553600e+04> 665 %A.0VF = fptosi float %A.0.inVF to i32 666 %B.0408VF = fptosi <8 x float> %B.0.in407VF to <8 x i32> 667 %0 = and <8 x i32> %B.0408VF, <i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535> 668 %1 = and i32 %A.0VF, 65535 669 %temp1098VF = insertelement <8 x i32> undef, i32 %1, i32 0 670 %vector1099VF = shufflevector <8 x i32> %temp1098VF, <8 x i32> undef, <8 x i32> zeroinitializer 671 br i1 undef, label %preload1201VF, label %footer349VF 672 673preload1201VF: 674 br label %footer349VF 675 676footer349VF: 677 %2 = mul nsw <8 x i32> undef, %0 678 %3 = mul nsw <8 x i32> undef, %vector1099VF 679 br label %footer329VF 680 681ret: 682 ret void 683} 684 685define <8 x i32> @_inreg0(i32 %scalar) nounwind uwtable readnone ssp { 686; X32-LABEL: _inreg0: 687; X32: ## BB#0: 688; X32-NEXT: vbroadcastss {{[0-9]+}}(%esp), %ymm0 689; X32-NEXT: retl 690; 691; X64-LABEL: _inreg0: 692; X64: ## BB#0: 693; X64-NEXT: vmovd %edi, %xmm0 694; X64-NEXT: vbroadcastss %xmm0, %ymm0 695; X64-NEXT: retq 696 %in = insertelement <8 x i32> undef, i32 %scalar, i32 0 697 %wide = shufflevector <8 x i32> %in, <8 x i32> undef, <8 x i32> zeroinitializer 698 ret <8 x i32> %wide 699} 700 701define <8 x float> @_inreg1(float %scalar) nounwind uwtable readnone ssp { 702; X32-LABEL: _inreg1: 703; X32: ## BB#0: 704; X32-NEXT: vbroadcastss {{[0-9]+}}(%esp), %ymm0 705; X32-NEXT: retl 706; 707; X64-LABEL: _inreg1: 708; X64: ## BB#0: 709; X64-NEXT: vbroadcastss %xmm0, %ymm0 710; X64-NEXT: retq 711 %in = insertelement <8 x float> undef, float %scalar, i32 0 712 %wide = shufflevector <8 x float> %in, <8 x float> undef, <8 x i32> zeroinitializer 713 ret <8 x float> %wide 714} 715 716define <4 x float> @_inreg2(float %scalar) nounwind uwtable readnone ssp { 717; X32-LABEL: _inreg2: 718; X32: ## BB#0: 719; X32-NEXT: vbroadcastss {{[0-9]+}}(%esp), %xmm0 720; X32-NEXT: retl 721; 722; X64-LABEL: _inreg2: 723; X64: ## BB#0: 724; X64-NEXT: vbroadcastss %xmm0, %xmm0 725; X64-NEXT: retq 726 %in = insertelement <4 x float> undef, float %scalar, i32 0 727 %wide = shufflevector <4 x float> %in, <4 x float> undef, <4 x i32> zeroinitializer 728 ret <4 x float> %wide 729} 730 731define <4 x double> @_inreg3(double %scalar) nounwind uwtable readnone ssp { 732; X32-LABEL: _inreg3: 733; X32: ## BB#0: 734; X32-NEXT: vbroadcastsd {{[0-9]+}}(%esp), %ymm0 735; X32-NEXT: retl 736; 737; X64-LABEL: _inreg3: 738; X64: ## BB#0: 739; X64-NEXT: vbroadcastsd %xmm0, %ymm0 740; X64-NEXT: retq 741 %in = insertelement <4 x double> undef, double %scalar, i32 0 742 %wide = shufflevector <4 x double> %in, <4 x double> undef, <4 x i32> zeroinitializer 743 ret <4 x double> %wide 744} 745 746define <8 x float> @_inreg8xfloat(<8 x float> %a) { 747; X32-LABEL: _inreg8xfloat: 748; X32: ## BB#0: 749; X32-NEXT: vbroadcastss %xmm0, %ymm0 750; X32-NEXT: retl 751; 752; X64-LABEL: _inreg8xfloat: 753; X64: ## BB#0: 754; X64-NEXT: vbroadcastss %xmm0, %ymm0 755; X64-NEXT: retq 756 %b = shufflevector <8 x float> %a, <8 x float> undef, <8 x i32> zeroinitializer 757 ret <8 x float> %b 758} 759 760define <4 x float> @_inreg4xfloat(<4 x float> %a) { 761; X32-LABEL: _inreg4xfloat: 762; X32: ## BB#0: 763; X32-NEXT: vbroadcastss %xmm0, %xmm0 764; X32-NEXT: retl 765; 766; X64-LABEL: _inreg4xfloat: 767; X64: ## BB#0: 768; X64-NEXT: vbroadcastss %xmm0, %xmm0 769; X64-NEXT: retq 770 %b = shufflevector <4 x float> %a, <4 x float> undef, <4 x i32> zeroinitializer 771 ret <4 x float> %b 772} 773 774define <16 x i16> @_inreg16xi16(<16 x i16> %a) { 775; X32-LABEL: _inreg16xi16: 776; X32: ## BB#0: 777; X32-NEXT: vpbroadcastw %xmm0, %ymm0 778; X32-NEXT: retl 779; 780; X64-LABEL: _inreg16xi16: 781; X64: ## BB#0: 782; X64-NEXT: vpbroadcastw %xmm0, %ymm0 783; X64-NEXT: retq 784 %b = shufflevector <16 x i16> %a, <16 x i16> undef, <16 x i32> zeroinitializer 785 ret <16 x i16> %b 786} 787 788define <8 x i16> @_inreg8xi16(<8 x i16> %a) { 789; X32-LABEL: _inreg8xi16: 790; X32: ## BB#0: 791; X32-NEXT: vpbroadcastw %xmm0, %xmm0 792; X32-NEXT: retl 793; 794; X64-LABEL: _inreg8xi16: 795; X64: ## BB#0: 796; X64-NEXT: vpbroadcastw %xmm0, %xmm0 797; X64-NEXT: retq 798 %b = shufflevector <8 x i16> %a, <8 x i16> undef, <8 x i32> zeroinitializer 799 ret <8 x i16> %b 800} 801 802define <4 x i64> @_inreg4xi64(<4 x i64> %a) { 803; X32-LABEL: _inreg4xi64: 804; X32: ## BB#0: 805; X32-NEXT: vbroadcastsd %xmm0, %ymm0 806; X32-NEXT: retl 807; 808; X64-LABEL: _inreg4xi64: 809; X64: ## BB#0: 810; X64-NEXT: vbroadcastsd %xmm0, %ymm0 811; X64-NEXT: retq 812 %b = shufflevector <4 x i64> %a, <4 x i64> undef, <4 x i32> zeroinitializer 813 ret <4 x i64> %b 814} 815 816define <2 x i64> @_inreg2xi64(<2 x i64> %a) { 817; X32-LABEL: _inreg2xi64: 818; X32: ## BB#0: 819; X32-NEXT: vpbroadcastq %xmm0, %xmm0 820; X32-NEXT: retl 821; 822; X64-LABEL: _inreg2xi64: 823; X64: ## BB#0: 824; X64-NEXT: vpbroadcastq %xmm0, %xmm0 825; X64-NEXT: retq 826 %b = shufflevector <2 x i64> %a, <2 x i64> undef, <2 x i32> zeroinitializer 827 ret <2 x i64> %b 828} 829 830define <4 x double> @_inreg4xdouble(<4 x double> %a) { 831; X32-LABEL: _inreg4xdouble: 832; X32: ## BB#0: 833; X32-NEXT: vbroadcastsd %xmm0, %ymm0 834; X32-NEXT: retl 835; 836; X64-LABEL: _inreg4xdouble: 837; X64: ## BB#0: 838; X64-NEXT: vbroadcastsd %xmm0, %ymm0 839; X64-NEXT: retq 840 %b = shufflevector <4 x double> %a, <4 x double> undef, <4 x i32> zeroinitializer 841 ret <4 x double> %b 842} 843 844define <2 x double> @_inreg2xdouble(<2 x double> %a) { 845; X32-LABEL: _inreg2xdouble: 846; X32: ## BB#0: 847; X32-NEXT: vmovddup {{.*#+}} xmm0 = xmm0[0,0] 848; X32-NEXT: retl 849; 850; X64-LABEL: _inreg2xdouble: 851; X64: ## BB#0: 852; X64-NEXT: vmovddup {{.*#+}} xmm0 = xmm0[0,0] 853; X64-NEXT: retq 854 %b = shufflevector <2 x double> %a, <2 x double> undef, <2 x i32> zeroinitializer 855 ret <2 x double> %b 856} 857 858define <8 x i32> @_inreg8xi32(<8 x i32> %a) { 859; X32-LABEL: _inreg8xi32: 860; X32: ## BB#0: 861; X32-NEXT: vbroadcastss %xmm0, %ymm0 862; X32-NEXT: retl 863; 864; X64-LABEL: _inreg8xi32: 865; X64: ## BB#0: 866; X64-NEXT: vbroadcastss %xmm0, %ymm0 867; X64-NEXT: retq 868 %b = shufflevector <8 x i32> %a, <8 x i32> undef, <8 x i32> zeroinitializer 869 ret <8 x i32> %b 870} 871 872define <4 x i32> @_inreg4xi32(<4 x i32> %a) { 873; X32-LABEL: _inreg4xi32: 874; X32: ## BB#0: 875; X32-NEXT: vbroadcastss %xmm0, %xmm0 876; X32-NEXT: retl 877; 878; X64-LABEL: _inreg4xi32: 879; X64: ## BB#0: 880; X64-NEXT: vbroadcastss %xmm0, %xmm0 881; X64-NEXT: retq 882 %b = shufflevector <4 x i32> %a, <4 x i32> undef, <4 x i32> zeroinitializer 883 ret <4 x i32> %b 884} 885 886define <32 x i8> @_inreg32xi8(<32 x i8> %a) { 887; X32-LABEL: _inreg32xi8: 888; X32: ## BB#0: 889; X32-NEXT: vpbroadcastb %xmm0, %ymm0 890; X32-NEXT: retl 891; 892; X64-LABEL: _inreg32xi8: 893; X64: ## BB#0: 894; X64-NEXT: vpbroadcastb %xmm0, %ymm0 895; X64-NEXT: retq 896 %b = shufflevector <32 x i8> %a, <32 x i8> undef, <32 x i32> zeroinitializer 897 ret <32 x i8> %b 898} 899 900define <16 x i8> @_inreg16xi8(<16 x i8> %a) { 901; X32-LABEL: _inreg16xi8: 902; X32: ## BB#0: 903; X32-NEXT: vpbroadcastb %xmm0, %xmm0 904; X32-NEXT: retl 905; 906; X64-LABEL: _inreg16xi8: 907; X64: ## BB#0: 908; X64-NEXT: vpbroadcastb %xmm0, %xmm0 909; X64-NEXT: retq 910 %b = shufflevector <16 x i8> %a, <16 x i8> undef, <16 x i32> zeroinitializer 911 ret <16 x i8> %b 912} 913 914; These tests check that a vbroadcast instruction is used when we have a splat 915; formed from a concat_vectors (via the shufflevector) of two BUILD_VECTORs 916; (via the insertelements). 917 918define <8 x float> @splat_concat1(float %f) { 919; X32-LABEL: splat_concat1: 920; X32: ## BB#0: 921; X32-NEXT: vbroadcastss {{[0-9]+}}(%esp), %ymm0 922; X32-NEXT: retl 923; 924; X64-LABEL: splat_concat1: 925; X64: ## BB#0: 926; X64-NEXT: vbroadcastss %xmm0, %ymm0 927; X64-NEXT: retq 928 %1 = insertelement <4 x float> undef, float %f, i32 0 929 %2 = insertelement <4 x float> %1, float %f, i32 1 930 %3 = insertelement <4 x float> %2, float %f, i32 2 931 %4 = insertelement <4 x float> %3, float %f, i32 3 932 %5 = shufflevector <4 x float> %4, <4 x float> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3> 933 ret <8 x float> %5 934} 935 936define <8 x float> @splat_concat2(float %f) { 937; X32-LABEL: splat_concat2: 938; X32: ## BB#0: 939; X32-NEXT: vbroadcastss {{[0-9]+}}(%esp), %ymm0 940; X32-NEXT: retl 941; 942; X64-LABEL: splat_concat2: 943; X64: ## BB#0: 944; X64-NEXT: vbroadcastss %xmm0, %ymm0 945; X64-NEXT: retq 946 %1 = insertelement <4 x float> undef, float %f, i32 0 947 %2 = insertelement <4 x float> %1, float %f, i32 1 948 %3 = insertelement <4 x float> %2, float %f, i32 2 949 %4 = insertelement <4 x float> %3, float %f, i32 3 950 %5 = insertelement <4 x float> undef, float %f, i32 0 951 %6 = insertelement <4 x float> %5, float %f, i32 1 952 %7 = insertelement <4 x float> %6, float %f, i32 2 953 %8 = insertelement <4 x float> %7, float %f, i32 3 954 %9 = shufflevector <4 x float> %4, <4 x float> %8, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 955 ret <8 x float> %9 956} 957 958define <4 x double> @splat_concat3(double %d) { 959; X32-LABEL: splat_concat3: 960; X32: ## BB#0: 961; X32-NEXT: vbroadcastsd {{[0-9]+}}(%esp), %ymm0 962; X32-NEXT: retl 963; 964; X64-LABEL: splat_concat3: 965; X64: ## BB#0: 966; X64-NEXT: vbroadcastsd %xmm0, %ymm0 967; X64-NEXT: retq 968 %1 = insertelement <2 x double> undef, double %d, i32 0 969 %2 = insertelement <2 x double> %1, double %d, i32 1 970 %3 = shufflevector <2 x double> %2, <2 x double> undef, <4 x i32> <i32 0, i32 1, i32 0, i32 1> 971 ret <4 x double> %3 972} 973 974define <4 x double> @splat_concat4(double %d) { 975; X32-LABEL: splat_concat4: 976; X32: ## BB#0: 977; X32-NEXT: vbroadcastsd {{[0-9]+}}(%esp), %ymm0 978; X32-NEXT: retl 979; 980; X64-LABEL: splat_concat4: 981; X64: ## BB#0: 982; X64-NEXT: vbroadcastsd %xmm0, %ymm0 983; X64-NEXT: retq 984 %1 = insertelement <2 x double> undef, double %d, i32 0 985 %2 = insertelement <2 x double> %1, double %d, i32 1 986 %3 = insertelement <2 x double> undef, double %d, i32 0 987 %4 = insertelement <2 x double> %3, double %d, i32 1 988 %5 = shufflevector <2 x double> %2, <2 x double> %4, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 989 ret <4 x double> %5 990} 991 992; Test cases for <rdar://problem/16074331>. 993; Instruction selection for broacast instruction fails if 994; the load cannot be folded into the broadcast. 995; This happens if the load has initial one use but other uses are 996; created later, or if selection DAG cannot prove that folding the 997; load will not create a cycle in the DAG. 998; Those test cases exerce the latter. 999 1000; CHECK-LABEL: isel_crash_16b 1001; CHECK: vpbroadcastb {{[^,]+}}, %xmm{{[0-9]+}} 1002; CHECK: ret 1003define void @isel_crash_16b(i8* %cV_R.addr) { 1004eintry: 1005 %__a.addr.i = alloca <2 x i64>, align 16 1006 %__b.addr.i = alloca <2 x i64>, align 16 1007 %vCr = alloca <2 x i64>, align 16 1008 store <2 x i64> zeroinitializer, <2 x i64>* %vCr, align 16 1009 %tmp = load <2 x i64>, <2 x i64>* %vCr, align 16 1010 %tmp2 = load i8, i8* %cV_R.addr, align 4 1011 %splat.splatinsert = insertelement <16 x i8> undef, i8 %tmp2, i32 0 1012 %splat.splat = shufflevector <16 x i8> %splat.splatinsert, <16 x i8> undef, <16 x i32> zeroinitializer 1013 %tmp3 = bitcast <16 x i8> %splat.splat to <2 x i64> 1014 store <2 x i64> %tmp, <2 x i64>* %__a.addr.i, align 16 1015 store <2 x i64> %tmp3, <2 x i64>* %__b.addr.i, align 16 1016 ret void 1017} 1018 1019; CHECK-LABEL: isel_crash_32b 1020; CHECK: vpbroadcastb {{[^,]+}}, %ymm{{[0-9]+}} 1021; CHECK: ret 1022define void @isel_crash_32b(i8* %cV_R.addr) { 1023eintry: 1024 %__a.addr.i = alloca <4 x i64>, align 16 1025 %__b.addr.i = alloca <4 x i64>, align 16 1026 %vCr = alloca <4 x i64>, align 16 1027 store <4 x i64> zeroinitializer, <4 x i64>* %vCr, align 16 1028 %tmp = load <4 x i64>, <4 x i64>* %vCr, align 16 1029 %tmp2 = load i8, i8* %cV_R.addr, align 4 1030 %splat.splatinsert = insertelement <32 x i8> undef, i8 %tmp2, i32 0 1031 %splat.splat = shufflevector <32 x i8> %splat.splatinsert, <32 x i8> undef, <32 x i32> zeroinitializer 1032 %tmp3 = bitcast <32 x i8> %splat.splat to <4 x i64> 1033 store <4 x i64> %tmp, <4 x i64>* %__a.addr.i, align 16 1034 store <4 x i64> %tmp3, <4 x i64>* %__b.addr.i, align 16 1035 ret void 1036} 1037 1038; CHECK-LABEL: isel_crash_8w 1039; CHECK: vpbroadcastw {{[^,]+}}, %xmm{{[0-9]+}} 1040; CHECK: ret 1041define void @isel_crash_8w(i16* %cV_R.addr) { 1042entry: 1043 %__a.addr.i = alloca <2 x i64>, align 16 1044 %__b.addr.i = alloca <2 x i64>, align 16 1045 %vCr = alloca <2 x i64>, align 16 1046 store <2 x i64> zeroinitializer, <2 x i64>* %vCr, align 16 1047 %tmp = load <2 x i64>, <2 x i64>* %vCr, align 16 1048 %tmp2 = load i16, i16* %cV_R.addr, align 4 1049 %splat.splatinsert = insertelement <8 x i16> undef, i16 %tmp2, i32 0 1050 %splat.splat = shufflevector <8 x i16> %splat.splatinsert, <8 x i16> undef, <8 x i32> zeroinitializer 1051 %tmp3 = bitcast <8 x i16> %splat.splat to <2 x i64> 1052 store <2 x i64> %tmp, <2 x i64>* %__a.addr.i, align 16 1053 store <2 x i64> %tmp3, <2 x i64>* %__b.addr.i, align 16 1054 ret void 1055} 1056 1057; CHECK-LABEL: isel_crash_16w 1058; CHECK: vpbroadcastw {{[^,]+}}, %ymm{{[0-9]+}} 1059; CHECK: ret 1060define void @isel_crash_16w(i16* %cV_R.addr) { 1061eintry: 1062 %__a.addr.i = alloca <4 x i64>, align 16 1063 %__b.addr.i = alloca <4 x i64>, align 16 1064 %vCr = alloca <4 x i64>, align 16 1065 store <4 x i64> zeroinitializer, <4 x i64>* %vCr, align 16 1066 %tmp = load <4 x i64>, <4 x i64>* %vCr, align 16 1067 %tmp2 = load i16, i16* %cV_R.addr, align 4 1068 %splat.splatinsert = insertelement <16 x i16> undef, i16 %tmp2, i32 0 1069 %splat.splat = shufflevector <16 x i16> %splat.splatinsert, <16 x i16> undef, <16 x i32> zeroinitializer 1070 %tmp3 = bitcast <16 x i16> %splat.splat to <4 x i64> 1071 store <4 x i64> %tmp, <4 x i64>* %__a.addr.i, align 16 1072 store <4 x i64> %tmp3, <4 x i64>* %__b.addr.i, align 16 1073 ret void 1074} 1075 1076; CHECK-LABEL: isel_crash_4d 1077; CHECK: vbroadcastss {{[^,]+}}, %xmm{{[0-9]+}} 1078; CHECK: ret 1079define void @isel_crash_4d(i32* %cV_R.addr) { 1080entry: 1081 %__a.addr.i = alloca <2 x i64>, align 16 1082 %__b.addr.i = alloca <2 x i64>, align 16 1083 %vCr = alloca <2 x i64>, align 16 1084 store <2 x i64> zeroinitializer, <2 x i64>* %vCr, align 16 1085 %tmp = load <2 x i64>, <2 x i64>* %vCr, align 16 1086 %tmp2 = load i32, i32* %cV_R.addr, align 4 1087 %splat.splatinsert = insertelement <4 x i32> undef, i32 %tmp2, i32 0 1088 %splat.splat = shufflevector <4 x i32> %splat.splatinsert, <4 x i32> undef, <4 x i32> zeroinitializer 1089 %tmp3 = bitcast <4 x i32> %splat.splat to <2 x i64> 1090 store <2 x i64> %tmp, <2 x i64>* %__a.addr.i, align 16 1091 store <2 x i64> %tmp3, <2 x i64>* %__b.addr.i, align 16 1092 ret void 1093} 1094 1095; CHECK-LABEL: isel_crash_8d 1096; CHECK: vbroadcastss {{[^,]+}}, %ymm{{[0-9]+}} 1097; CHECK: ret 1098define void @isel_crash_8d(i32* %cV_R.addr) { 1099eintry: 1100 %__a.addr.i = alloca <4 x i64>, align 16 1101 %__b.addr.i = alloca <4 x i64>, align 16 1102 %vCr = alloca <4 x i64>, align 16 1103 store <4 x i64> zeroinitializer, <4 x i64>* %vCr, align 16 1104 %tmp = load <4 x i64>, <4 x i64>* %vCr, align 16 1105 %tmp2 = load i32, i32* %cV_R.addr, align 4 1106 %splat.splatinsert = insertelement <8 x i32> undef, i32 %tmp2, i32 0 1107 %splat.splat = shufflevector <8 x i32> %splat.splatinsert, <8 x i32> undef, <8 x i32> zeroinitializer 1108 %tmp3 = bitcast <8 x i32> %splat.splat to <4 x i64> 1109 store <4 x i64> %tmp, <4 x i64>* %__a.addr.i, align 16 1110 store <4 x i64> %tmp3, <4 x i64>* %__b.addr.i, align 16 1111 ret void 1112} 1113 1114; X64-LABEL: isel_crash_2q 1115; X64: vpbroadcastq {{[^,]+}}, %xmm{{[0-9]+}} 1116; X64: ret 1117define void @isel_crash_2q(i64* %cV_R.addr) { 1118entry: 1119 %__a.addr.i = alloca <2 x i64>, align 16 1120 %__b.addr.i = alloca <2 x i64>, align 16 1121 %vCr = alloca <2 x i64>, align 16 1122 store <2 x i64> zeroinitializer, <2 x i64>* %vCr, align 16 1123 %tmp = load <2 x i64>, <2 x i64>* %vCr, align 16 1124 %tmp2 = load i64, i64* %cV_R.addr, align 4 1125 %splat.splatinsert = insertelement <2 x i64> undef, i64 %tmp2, i32 0 1126 %splat.splat = shufflevector <2 x i64> %splat.splatinsert, <2 x i64> undef, <2 x i32> zeroinitializer 1127 store <2 x i64> %tmp, <2 x i64>* %__a.addr.i, align 16 1128 store <2 x i64> %splat.splat, <2 x i64>* %__b.addr.i, align 16 1129 ret void 1130} 1131 1132; X64-LABEL: isel_crash_4q 1133; X64: vbroadcastsd {{[^,]+}}, %ymm{{[0-9]+}} 1134; X64: ret 1135define void @isel_crash_4q(i64* %cV_R.addr) { 1136eintry: 1137 %__a.addr.i = alloca <4 x i64>, align 16 1138 %__b.addr.i = alloca <4 x i64>, align 16 1139 %vCr = alloca <4 x i64>, align 16 1140 store <4 x i64> zeroinitializer, <4 x i64>* %vCr, align 16 1141 %tmp = load <4 x i64>, <4 x i64>* %vCr, align 16 1142 %tmp2 = load i64, i64* %cV_R.addr, align 4 1143 %splat.splatinsert = insertelement <4 x i64> undef, i64 %tmp2, i32 0 1144 %splat.splat = shufflevector <4 x i64> %splat.splatinsert, <4 x i64> undef, <4 x i32> zeroinitializer 1145 store <4 x i64> %tmp, <4 x i64>* %__a.addr.i, align 16 1146 store <4 x i64> %splat.splat, <4 x i64>* %__b.addr.i, align 16 1147 ret void 1148} 1149