1; check AVX2 instructions that are disabled in case avx512VL/avx512BW present 2 3; RUN: llc < %s -mtriple=x86_64-apple-darwin -show-mc-encoding -mcpu=corei7-avx -o /dev/null 4; RUN: llc < %s -mtriple=x86_64-apple-darwin -show-mc-encoding -mcpu=core-avx2 -mattr=+avx2 -o /dev/null 5; RUN: llc < %s -mtriple=x86_64-apple-darwin -show-mc-encoding -mcpu=knl -o /dev/null 6; RUN: llc < %s -mtriple=x86_64-apple-darwin -show-mc-encoding -mcpu=knl -mattr=+avx512vl -o /dev/null 7; RUN: llc < %s -mtriple=x86_64-apple-darwin -show-mc-encoding -mcpu=knl -mattr=+avx512bw -o /dev/null 8; RUN: llc < %s -mtriple=x86_64-apple-darwin -show-mc-encoding -mcpu=knl -mattr=+avx512vl -mattr=+avx512bw -o /dev/null 9; RUN: llc < %s -mtriple=x86_64-apple-darwin -show-mc-encoding -mcpu=skx -o /dev/null 10 11define <4 x i64> @vpand_256(<4 x i64> %a, <4 x i64> %b) nounwind uwtable readnone ssp { 12 ; Force the execution domain with an add. 13 %a2 = add <4 x i64> %a, <i64 1, i64 1, i64 1, i64 1> 14 %x = and <4 x i64> %a2, %b 15 ret <4 x i64> %x 16} 17 18define <2 x i64> @vpand_128(<2 x i64> %a, <2 x i64> %b) nounwind uwtable readnone ssp { 19 ; Force the execution domain with an add. 20 %a2 = add <2 x i64> %a, <i64 1, i64 1> 21 %x = and <2 x i64> %a2, %b 22 ret <2 x i64> %x 23} 24 25define <4 x i64> @vpandn_256(<4 x i64> %a, <4 x i64> %b) nounwind uwtable readnone ssp { 26 ; Force the execution domain with an add. 27 %a2 = add <4 x i64> %a, <i64 1, i64 1, i64 1, i64 1> 28 %y = xor <4 x i64> %a2, <i64 -1, i64 -1, i64 -1, i64 -1> 29 %x = and <4 x i64> %a, %y 30 ret <4 x i64> %x 31} 32 33define <2 x i64> @vpandn_128(<2 x i64> %a, <2 x i64> %b) nounwind uwtable readnone ssp { 34 ; Force the execution domain with an add. 35 %a2 = add <2 x i64> %a, <i64 1, i64 1> 36 %y = xor <2 x i64> %a2, <i64 -1, i64 -1> 37 %x = and <2 x i64> %a, %y 38 ret <2 x i64> %x 39} 40 41define <4 x i64> @vpor_256(<4 x i64> %a, <4 x i64> %b) nounwind uwtable readnone ssp { 42 ; Force the execution domain with an add. 43 %a2 = add <4 x i64> %a, <i64 1, i64 1, i64 1, i64 1> 44 %x = or <4 x i64> %a2, %b 45 ret <4 x i64> %x 46} 47 48define <4 x i64> @vpxor_256(<4 x i64> %a, <4 x i64> %b) nounwind uwtable readnone ssp { 49 ; Force the execution domain with an add. 50 %a2 = add <4 x i64> %a, <i64 1, i64 1, i64 1, i64 1> 51 %x = xor <4 x i64> %a2, %b 52 ret <4 x i64> %x 53} 54 55define <2 x i64> @vpor_128(<2 x i64> %a, <2 x i64> %b) nounwind uwtable readnone ssp { 56 ; Force the execution domain with an add. 57 %a2 = add <2 x i64> %a, <i64 1, i64 1> 58 %x = or <2 x i64> %a2, %b 59 ret <2 x i64> %x 60} 61 62define <2 x i64> @vpxor_128(<2 x i64> %a, <2 x i64> %b) nounwind uwtable readnone ssp { 63 ; Force the execution domain with an add. 64 %a2 = add <2 x i64> %a, <i64 1, i64 1> 65 %x = xor <2 x i64> %a2, %b 66 ret <2 x i64> %x 67} 68 69define <4 x i64> @test_vpaddq_256(<4 x i64> %i, <4 x i64> %j) nounwind readnone { 70 %x = add <4 x i64> %i, %j 71 ret <4 x i64> %x 72} 73 74define <8 x i32> @test_vpaddd_256(<8 x i32> %i, <8 x i32> %j) nounwind readnone { 75 %x = add <8 x i32> %i, %j 76 ret <8 x i32> %x 77} 78 79define <16 x i16> @test_vpaddw_256(<16 x i16> %i, <16 x i16> %j) nounwind readnone { 80 %x = add <16 x i16> %i, %j 81 ret <16 x i16> %x 82} 83 84define <32 x i8> @test_vpaddb_256(<32 x i8> %i, <32 x i8> %j) nounwind readnone { 85 %x = add <32 x i8> %i, %j 86 ret <32 x i8> %x 87} 88 89define <4 x i64> @test_vpsubq_256(<4 x i64> %i, <4 x i64> %j) nounwind readnone { 90 %x = sub <4 x i64> %i, %j 91 ret <4 x i64> %x 92} 93 94define <8 x i32> @test_vpsubd_256(<8 x i32> %i, <8 x i32> %j) nounwind readnone { 95 %x = sub <8 x i32> %i, %j 96 ret <8 x i32> %x 97} 98 99define <16 x i16> @test_vpsubw_256(<16 x i16> %i, <16 x i16> %j) nounwind readnone { 100 %x = sub <16 x i16> %i, %j 101 ret <16 x i16> %x 102} 103 104define <32 x i8> @test_vpsubb_256(<32 x i8> %i, <32 x i8> %j) nounwind readnone { 105 %x = sub <32 x i8> %i, %j 106 ret <32 x i8> %x 107} 108 109define <16 x i16> @test_vpmullw_256(<16 x i16> %i, <16 x i16> %j) nounwind readnone { 110 %x = mul <16 x i16> %i, %j 111 ret <16 x i16> %x 112} 113 114define <8 x i32> @test_vpcmpgtd_256(<8 x i32> %i, <8 x i32> %j) nounwind readnone { 115 %bincmp = icmp slt <8 x i32> %i, %j 116 %x = sext <8 x i1> %bincmp to <8 x i32> 117 ret <8 x i32> %x 118} 119 120define <32 x i8> @test_vpcmpeqb_256(<32 x i8> %i, <32 x i8> %j) nounwind readnone { 121 %bincmp = icmp eq <32 x i8> %i, %j 122 %x = sext <32 x i1> %bincmp to <32 x i8> 123 ret <32 x i8> %x 124} 125 126define <16 x i16> @test_vpcmpeqw_256(<16 x i16> %i, <16 x i16> %j) nounwind readnone { 127 %bincmp = icmp eq <16 x i16> %i, %j 128 %x = sext <16 x i1> %bincmp to <16 x i16> 129 ret <16 x i16> %x 130} 131 132define <32 x i8> @test_vpcmpgtb_256(<32 x i8> %i, <32 x i8> %j) nounwind readnone { 133 %bincmp = icmp slt <32 x i8> %i, %j 134 %x = sext <32 x i1> %bincmp to <32 x i8> 135 ret <32 x i8> %x 136} 137 138define <16 x i16> @test_vpcmpgtw_256(<16 x i16> %i, <16 x i16> %j) nounwind readnone { 139 %bincmp = icmp slt <16 x i16> %i, %j 140 %x = sext <16 x i1> %bincmp to <16 x i16> 141 ret <16 x i16> %x 142} 143 144define <8 x i32> @test_vpcmpeqd_256(<8 x i32> %i, <8 x i32> %j) nounwind readnone { 145 %bincmp = icmp eq <8 x i32> %i, %j 146 %x = sext <8 x i1> %bincmp to <8 x i32> 147 ret <8 x i32> %x 148} 149 150define <2 x i64> @test_vpaddq_128(<2 x i64> %i, <2 x i64> %j) nounwind readnone { 151 %x = add <2 x i64> %i, %j 152 ret <2 x i64> %x 153} 154 155define <4 x i32> @test_vpaddd_128(<4 x i32> %i, <4 x i32> %j) nounwind readnone { 156 %x = add <4 x i32> %i, %j 157 ret <4 x i32> %x 158} 159 160define <8 x i16> @test_vpaddw_128(<8 x i16> %i, <8 x i16> %j) nounwind readnone { 161 %x = add <8 x i16> %i, %j 162 ret <8 x i16> %x 163} 164 165define <16 x i8> @test_vpaddb_128(<16 x i8> %i, <16 x i8> %j) nounwind readnone { 166 %x = add <16 x i8> %i, %j 167 ret <16 x i8> %x 168} 169 170define <2 x i64> @test_vpsubq_128(<2 x i64> %i, <2 x i64> %j) nounwind readnone { 171 %x = sub <2 x i64> %i, %j 172 ret <2 x i64> %x 173} 174 175define <4 x i32> @test_vpsubd_128(<4 x i32> %i, <4 x i32> %j) nounwind readnone { 176 %x = sub <4 x i32> %i, %j 177 ret <4 x i32> %x 178} 179 180define <8 x i16> @test_vpsubw_128(<8 x i16> %i, <8 x i16> %j) nounwind readnone { 181 %x = sub <8 x i16> %i, %j 182 ret <8 x i16> %x 183} 184 185define <16 x i8> @test_vpsubb_128(<16 x i8> %i, <16 x i8> %j) nounwind readnone { 186 %x = sub <16 x i8> %i, %j 187 ret <16 x i8> %x 188} 189 190define <8 x i16> @test_vpmullw_128(<8 x i16> %i, <8 x i16> %j) nounwind readnone { 191 %x = mul <8 x i16> %i, %j 192 ret <8 x i16> %x 193} 194 195define <8 x i16> @test_vpcmpgtw_128(<8 x i16> %i, <8 x i16> %j) nounwind readnone { 196 %bincmp = icmp slt <8 x i16> %i, %j 197 %x = sext <8 x i1> %bincmp to <8 x i16> 198 ret <8 x i16> %x 199} 200 201define <16 x i8> @test_vpcmpgtb_128(<16 x i8> %i, <16 x i8> %j) nounwind readnone { 202 %bincmp = icmp slt <16 x i8> %i, %j 203 %x = sext <16 x i1> %bincmp to <16 x i8> 204 ret <16 x i8> %x 205} 206 207define <8 x i16> @test_vpcmpeqw_128(<8 x i16> %i, <8 x i16> %j) nounwind readnone { 208 %bincmp = icmp eq <8 x i16> %i, %j 209 %x = sext <8 x i1> %bincmp to <8 x i16> 210 ret <8 x i16> %x 211} 212 213define <16 x i8> @test_vpcmpeqb_128(<16 x i8> %i, <16 x i8> %j) nounwind readnone { 214 %bincmp = icmp eq <16 x i8> %i, %j 215 %x = sext <16 x i1> %bincmp to <16 x i8> 216 ret <16 x i8> %x 217} 218 219define <8 x i16> @shuffle_v8i16_vpalignr(<8 x i16> %a, <8 x i16> %b) { 220 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11> 221 ret <8 x i16> %shuffle 222} 223 224define <16 x i16> @shuffle_v16i16_vpalignr(<16 x i16> %a, <16 x i16> %b) { 225 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 23, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 31, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14> 226 ret <16 x i16> %shuffle 227} 228 229define <16 x i8> @shuffle_v16i8_vpalignr(<16 x i8> %a, <16 x i8> %b) { 230 %shuffle = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 31, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14> 231 ret <16 x i8> %shuffle 232} 233 234define <32 x i8> @shuffle_v32i8_vpalignr(<32 x i8> %a, <32 x i8> %b) { 235 %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 undef, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 63, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 236 ret <32 x i8> %shuffle 237} 238 239define <2 x i64> @shuffle_v2i64_vpalignr(<2 x i64> %a, <2 x i64> %b) { 240 %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 1, i32 2> 241 ret <2 x i64> %shuffle 242} 243 244define <4 x i32> @shuffle_v4i32_vpalignr(<4 x i32> %a, <4 x i32> %b) { 245 %shuffle = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 7, i32 0, i32 1, i32 2> 246 ret <4 x i32> %shuffle 247} 248 249define <8 x i32> @shuffle_v8i32_vpalignr(<8 x i32> %a, <8 x i32> %b) { 250 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 11, i32 0, i32 1, i32 2, i32 15, i32 4, i32 5, i32 6> 251 ret <8 x i32> %shuffle 252} 253 254define <4 x double> @shuffle_v4f64_5163(<4 x double> %a, <4 x double> %b) { 255 %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 5, i32 1, i32 6, i32 3> 256 ret <4 x double> %shuffle 257} 258 259define <2 x double> @shuffle_v2f64_bitcast_1z(<2 x double> %a) { 260 %shuffle64 = shufflevector <2 x double> %a, <2 x double> zeroinitializer, <2 x i32> <i32 2, i32 1> 261 %bitcast32 = bitcast <2 x double> %shuffle64 to <4 x float> 262 %shuffle32 = shufflevector <4 x float> %bitcast32, <4 x float> undef, <4 x i32> <i32 2, i32 3, i32 0, i32 1> 263 %bitcast64 = bitcast <4 x float> %shuffle32 to <2 x double> 264 ret <2 x double> %bitcast64 265} 266 267define <16 x i16> @shuffle_v16i16_zz_zz_zz_zz_zz_zz_zz_16_zz_zz_zz_zz_zz_zz_zz_24(<16 x i16> %a) { 268 %shuffle = shufflevector <16 x i16> zeroinitializer, <16 x i16> %a, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 16, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 24> 269 ret <16 x i16> %shuffle 270} 271 272define i64 @extract_v2i64(<2 x i64> %x, i64* %dst) { 273 %r1 = extractelement <2 x i64> %x, i32 0 274 %r2 = extractelement <2 x i64> %x, i32 1 275 store i64 %r2, i64* %dst, align 1 276 ret i64 %r1 277} 278 279define i32 @extract_v4i32(<4 x i32> %x, i32* %dst) { 280 %r1 = extractelement <4 x i32> %x, i32 1 281 %r2 = extractelement <4 x i32> %x, i32 3 282 store i32 %r2, i32* %dst, align 1 283 ret i32 %r1 284} 285 286define i16 @extract_v8i16(<8 x i16> %x, i16* %dst) { 287 %r1 = extractelement <8 x i16> %x, i32 1 288 %r2 = extractelement <8 x i16> %x, i32 3 289 store i16 %r2, i16* %dst, align 1 290 ret i16 %r1 291} 292 293define i8 @extract_v16i8(<16 x i8> %x, i8* %dst) { 294 %r1 = extractelement <16 x i8> %x, i32 1 295 %r2 = extractelement <16 x i8> %x, i32 3 296 store i8 %r2, i8* %dst, align 1 297 ret i8 %r1 298} 299 300define <2 x i64> @insert_v2i64(<2 x i64> %x, i64 %y , i64* %ptr) { 301 %val = load i64, i64* %ptr 302 %r1 = insertelement <2 x i64> %x, i64 %val, i32 1 303 %r2 = insertelement <2 x i64> %r1, i64 %y, i32 3 304 ret <2 x i64> %r2 305} 306 307define <4 x i32> @insert_v4i32(<4 x i32> %x, i32 %y, i32* %ptr) { 308 %val = load i32, i32* %ptr 309 %r1 = insertelement <4 x i32> %x, i32 %val, i32 1 310 %r2 = insertelement <4 x i32> %r1, i32 %y, i32 3 311 ret <4 x i32> %r2 312} 313 314define <8 x i16> @insert_v8i16(<8 x i16> %x, i16 %y, i16* %ptr) { 315 %val = load i16, i16* %ptr 316 %r1 = insertelement <8 x i16> %x, i16 %val, i32 1 317 %r2 = insertelement <8 x i16> %r1, i16 %y, i32 5 318 ret <8 x i16> %r2 319} 320 321define <16 x i8> @insert_v16i8(<16 x i8> %x, i8 %y, i8* %ptr) { 322 %val = load i8, i8* %ptr 323 %r1 = insertelement <16 x i8> %x, i8 %val, i32 3 324 %r2 = insertelement <16 x i8> %r1, i8 %y, i32 10 325 ret <16 x i8> %r2 326} 327 328define <4 x i32> @shuffle_v4i32_0451(<4 x i32> %a, <4 x i32> %b) { 329 %shuffle = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 0, i32 4, i32 5, i32 1> 330 ret <4 x i32> %shuffle 331} 332 333define <4 x i32> @shuffle_v4i32_0142(<4 x i32> %a, <4 x i32> %b) { 334 %shuffle = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 0, i32 1, i32 4, i32 2> 335 ret <4 x i32> %shuffle 336} 337 338define <16 x i8> @shuffle_v16i8_0101010101010101(<16 x i8> %a, <16 x i8> %b) { 339 %shuffle = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1> 340 ret <16 x i8> %shuffle 341} 342 343define <16 x i16> @shuffle_v16i16_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00(<16 x i16> %a, <16 x i16> %b) { 344 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0> 345 ret <16 x i16> %shuffle 346} 347 348define <8 x float> @shuffle_v8f32_11335577(<8 x float> %a, <8 x float> %b) { 349; vmovshdup 256 test 350 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 1, i32 1, i32 3, i32 3, i32 5, i32 5, i32 7, i32 7> 351 ret <8 x float> %shuffle 352} 353 354define <4 x float> @shuffle_v4f32_1133(<4 x float> %a, <4 x float> %b) { 355; vmovshdup 128 test 356 %shuffle = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 1, i32 1, i32 3, i32 3> 357 ret <4 x float> %shuffle 358} 359 360define <8 x float> @shuffle_v8f32_00224466(<8 x float> %a, <8 x float> %b) { 361; vmovsldup 256 test 362 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 0, i32 2, i32 2, i32 4, i32 4, i32 6, i32 6> 363 ret <8 x float> %shuffle 364} 365 366define <4 x float> @shuffle_v4f32_0022(<4 x float> %a, <4 x float> %b) { 367; vmovsldup 128 test 368 %shuffle = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 0, i32 0, i32 2, i32 2> 369 ret <4 x float> %shuffle 370} 371 372define <2 x double> @insert_mem_lo_v2f64(double* %ptr, <2 x double> %b) { 373 %a = load double, double* %ptr 374 %v = insertelement <2 x double> undef, double %a, i32 0 375 %shuffle = shufflevector <2 x double> %v, <2 x double> %b, <2 x i32> <i32 0, i32 3> 376 ret <2 x double> %shuffle 377} 378 379define <2 x double> @insert_mem_hi_v2f64(double* %ptr, <2 x double> %b) { 380 %a = load double, double* %ptr 381 %v = insertelement <2 x double> undef, double %a, i32 0 382 %shuffle = shufflevector <2 x double> %v, <2 x double> %b, <2 x i32> <i32 2, i32 0> 383 ret <2 x double> %shuffle 384} 385 386define void @store_floats(<4 x float> %x, i64* %p) { 387 %a = fadd <4 x float> %x, %x 388 %b = shufflevector <4 x float> %a, <4 x float> undef, <2 x i32> <i32 0, i32 1> 389 %c = bitcast <2 x float> %b to i64 390 store i64 %c, i64* %p 391 ret void 392} 393 394define void @store_double(<2 x double> %x, i64* %p) { 395 %a = fadd <2 x double> %x, %x 396 %b = extractelement <2 x double> %a, i32 0 397 %c = bitcast double %b to i64 398 store i64 %c, i64* %p 399 ret void 400} 401 402define void @store_h_double(<2 x double> %x, i64* %p) { 403 %a = fadd <2 x double> %x, %x 404 %b = extractelement <2 x double> %a, i32 1 405 %c = bitcast double %b to i64 406 store i64 %c, i64* %p 407 ret void 408} 409 410define <2 x double> @test39(double* %ptr) nounwind { 411 %a = load double, double* %ptr 412 %v = insertelement <2 x double> undef, double %a, i32 0 413 %shuffle = shufflevector <2 x double> %v, <2 x double> undef, <2 x i32> <i32 0, i32 0> 414 ret <2 x double> %shuffle 415 } 416 417define <2 x double> @test40(<2 x double>* %ptr) nounwind { 418 %v = load <2 x double>, <2 x double>* %ptr 419 %shuffle = shufflevector <2 x double> %v, <2 x double> undef, <2 x i32> <i32 0, i32 0> 420 ret <2 x double> %shuffle 421 } 422 423define <2 x double> @shuffle_v2f64_00(<2 x double> %a, <2 x double> %b) { 424 %shuffle = shufflevector <2 x double> %a, <2 x double> %b, <2 x i32> <i32 0, i32 0> 425 ret <2 x double> %shuffle 426} 427 428define <4 x double> @shuffle_v4f64_0022(<4 x double> %a, <4 x double> %b) { 429 %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 0, i32 0, i32 2, i32 2> 430 ret <4 x double> %shuffle 431} 432 433define <8 x i32> @ashr_v8i32(<8 x i32> %a, <8 x i32> %b) { 434 %shift = ashr <8 x i32> %a, %b 435 ret <8 x i32> %shift 436} 437 438define <8 x i32> @lshr_v8i32(<8 x i32> %a, <8 x i32> %b) { 439 %shift = lshr <8 x i32> %a, %b 440 ret <8 x i32> %shift 441} 442 443define <8 x i32> @shl_v8i32(<8 x i32> %a, <8 x i32> %b) { 444 %shift = shl <8 x i32> %a, %b 445 ret <8 x i32> %shift 446} 447 448define <8 x i32> @ashr_const_v8i32(<8 x i32> %a) { 449 %shift = ashr <8 x i32> %a, <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3> 450 ret <8 x i32> %shift 451} 452 453define <8 x i32> @lshr_const_v8i32(<8 x i32> %a) { 454 %shift = lshr <8 x i32> %a, <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3> 455 ret <8 x i32> %shift 456} 457 458define <8 x i32> @shl_const_v8i32(<8 x i32> %a) { 459 %shift = shl <8 x i32> %a, <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3> 460 ret <8 x i32> %shift 461} 462 463define <4 x i64> @ashr_v4i64(<4 x i64> %a, <4 x i64> %b) { 464 %shift = ashr <4 x i64> %a, %b 465 ret <4 x i64> %shift 466} 467 468define <4 x i64> @lshr_v4i64(<4 x i64> %a, <4 x i64> %b) { 469 %shift = lshr <4 x i64> %a, %b 470 ret <4 x i64> %shift 471} 472 473define <4 x i64> @shl_v4i64(<4 x i64> %a, <4 x i64> %b) { 474 %shift = shl <4 x i64> %a, %b 475 ret <4 x i64> %shift 476} 477 478define <4 x i64> @ashr_const_v4i64(<4 x i64> %a) { 479 %shift = ashr <4 x i64> %a, <i64 3, i64 3, i64 3, i64 3> 480 ret <4 x i64> %shift 481} 482 483define <4 x i64> @lshr_const_v4i64(<4 x i64> %a) { 484 %shift = lshr <4 x i64> %a, <i64 3, i64 3, i64 3, i64 3> 485 ret <4 x i64> %shift 486} 487 488define <4 x i64> @shl_const_v4i64(<4 x i64> %a) { 489 %shift = shl <4 x i64> %a, <i64 3, i64 3, i64 3, i64 3> 490 ret <4 x i64> %shift 491} 492 493define <16 x i16> @ashr_v16i16(<16 x i16> %a, <16 x i16> %b) { 494 %shift = ashr <16 x i16> %a, %b 495 ret <16 x i16> %shift 496} 497 498define <16 x i16> @lshr_v16i16(<16 x i16> %a, <16 x i16> %b) { 499 %shift = lshr <16 x i16> %a, %b 500 ret <16 x i16> %shift 501} 502 503define <16 x i16> @shl_v16i16(<16 x i16> %a, <16 x i16> %b) { 504 %shift = shl <16 x i16> %a, %b 505 ret <16 x i16> %shift 506} 507 508define <16 x i16> @ashr_const_v16i16(<16 x i16> %a) { 509 %shift = ashr <16 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3> 510 ret <16 x i16> %shift 511} 512 513define <16 x i16> @lshr_const_v16i16(<16 x i16> %a) { 514 %shift = lshr <16 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3> 515 ret <16 x i16> %shift 516} 517 518define <16 x i16> @shl_const_v16i16(<16 x i16> %a) { 519 %shift = shl <16 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3> 520 ret <16 x i16> %shift 521} 522 523define <4 x i32> @ashr_v4i32(<4 x i32> %a, <4 x i32> %b) { 524 %shift = ashr <4 x i32> %a, %b 525 ret <4 x i32> %shift 526} 527 528define <4 x i32> @shl_const_v4i32(<4 x i32> %a) { 529 %shift = shl <4 x i32> %a, <i32 3, i32 3, i32 3, i32 3> 530 ret <4 x i32> %shift 531} 532 533define <2 x i64> @ashr_v2i64(<2 x i64> %a, <2 x i64> %b) { 534 %shift = ashr <2 x i64> %a, %b 535 ret <2 x i64> %shift 536} 537 538define <2 x i64> @shl_const_v2i64(<2 x i64> %a) { 539 %shift = shl <2 x i64> %a, <i64 3, i64 3> 540 ret <2 x i64> %shift 541} 542 543define <8 x i16> @ashr_v8i16(<8 x i16> %a, <8 x i16> %b) { 544 %shift = ashr <8 x i16> %a, %b 545 ret <8 x i16> %shift 546} 547 548define <8 x i16> @lshr_v8i16(<8 x i16> %a, <8 x i16> %b) { 549 %shift = lshr <8 x i16> %a, %b 550 ret <8 x i16> %shift 551} 552 553define <8 x i16> @shl_v8i16(<8 x i16> %a, <8 x i16> %b) { 554 %shift = shl <8 x i16> %a, %b 555 ret <8 x i16> %shift 556} 557 558define <8 x i16> @ashr_const_v8i16(<8 x i16> %a) { 559 %shift = ashr <8 x i16> %a,<i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3> 560 ret <8 x i16> %shift 561} 562 563define <8 x i16> @lshr_const_v8i16(<8 x i16> %a) { 564 %shift = lshr <8 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3> 565 ret <8 x i16> %shift 566} 567 568define <8 x i16> @shl_const_v8i16(<8 x i16> %a) { 569 %shift = shl <8 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3> 570 ret <8 x i16> %shift 571} 572 573define <8 x i16> @zext_16i8_to_8i16(<16 x i8> %A) nounwind uwtable readnone ssp { 574entry: 575 %B = shufflevector <16 x i8> %A, <16 x i8> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 576 %C = zext <8 x i8> %B to <8 x i16> 577 ret <8 x i16> %C 578} 579 580define <32 x i8> @_broadcast32xi8(i8 %a) { 581 %b = insertelement <32 x i8> undef, i8 %a, i32 0 582 %c = shufflevector <32 x i8> %b, <32 x i8> undef, <32 x i32> zeroinitializer 583 ret <32 x i8> %c 584} 585 586define <16 x i8> @_broadcast16xi8(i8 %a) { 587 %b = insertelement <16 x i8> undef, i8 %a, i32 0 588 %c = shufflevector <16 x i8> %b, <16 x i8> undef, <16 x i32> zeroinitializer 589 ret <16 x i8> %c 590} 591 592define <16 x i16> @_broadcast16xi16(i16 %a) { 593 %b = insertelement <16 x i16> undef, i16 %a, i32 0 594 %c = shufflevector <16 x i16> %b, <16 x i16> undef, <16 x i32> zeroinitializer 595 ret <16 x i16> %c 596} 597 598define <8 x i16> @_broadcast8xi16(i16 %a) { 599 %b = insertelement <8 x i16> undef, i16 %a, i32 0 600 %c = shufflevector <8 x i16> %b, <8 x i16> undef, <8 x i32> zeroinitializer 601 ret <8 x i16> %c 602} 603 604define <8 x i32> @_broadcast8xi32(i32 %a) { 605 %b = insertelement <8 x i32> undef, i32 %a, i32 0 606 %c = shufflevector <8 x i32> %b, <8 x i32> undef, <8 x i32> zeroinitializer 607 ret <8 x i32> %c 608} 609 610define <4 x i32> @_broadcast4xi32(i32 %a) { 611 %b = insertelement <4 x i32> undef, i32 %a, i32 0 612 %c = shufflevector <4 x i32> %b, <4 x i32> undef, <4 x i32> zeroinitializer 613 ret <4 x i32> %c 614} 615 616define <4 x i64> @_broadcast4xi64(i64 %a) { 617 %b = insertelement <4 x i64> undef, i64 %a, i64 0 618 %c = shufflevector <4 x i64> %b, <4 x i64> undef, <4 x i32> zeroinitializer 619 ret <4 x i64> %c 620} 621 622define <2 x i64> @_broadcast2xi64(i64 %a) { 623 %b = insertelement <2 x i64> undef, i64 %a, i64 0 624 %c = shufflevector <2 x i64> %b, <2 x i64> undef, <2 x i32> zeroinitializer 625 ret <2 x i64> %c 626} 627 628define <8 x float> @_broadcast8xfloat(float %a) { 629 %b = insertelement <8 x float> undef, float %a, i32 0 630 %c = shufflevector <8 x float> %b, <8 x float> undef, <8 x i32> zeroinitializer 631 ret <8 x float> %c 632} 633 634define <4 x float> @_broadcast4xfloat(float %a) { 635 %b = insertelement <4 x float> undef, float %a, i32 0 636 %c = shufflevector <4 x float> %b, <4 x float> undef, <4 x i32> zeroinitializer 637 ret <4 x float> %c 638} 639 640define <4 x double> @_broadcast4xdouble(double %a) { 641 %b = insertelement <4 x double> undef, double %a, i32 0 642 %c = shufflevector <4 x double> %b, <4 x double> undef, <4 x i32> zeroinitializer 643 ret <4 x double> %c 644} 645 646define <2 x double> @_broadcast2xdouble(double %a) { 647 %b = insertelement <2 x double> undef, double %a, i32 0 648 %c = shufflevector <2 x double> %b, <2 x double> undef, <2 x i32> zeroinitializer 649 ret <2 x double> %c 650} 651 652define <4 x float> @test_x86_fmsub_ps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) { 653 %x = fmul <4 x float> %a0, %a1 654 %res = fsub <4 x float> %x, %a2 655 ret <4 x float> %res 656} 657 658define <32 x i8> @test_cmpgtb(<32 x i8> %A) { 659; generate the follow code 660; vpxor %ymm1, %ymm1, %ymm1 661; vpcmpgtb %ymm0, %ymm1, %ymm0 662 %B = ashr <32 x i8> %A, <i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7> 663 ret <32 x i8> %B 664} 665 666define <4 x float> @_inreg4xfloat(float %a) { 667 %b = insertelement <4 x float> undef, float %a, i32 0 668 %c = shufflevector <4 x float> %b, <4 x float> undef, <4 x i32> zeroinitializer 669 ret <4 x float> %c 670} 671 672define <8 x float> @_inreg8xfloat(float %a) { 673 %b = insertelement <8 x float> undef, float %a, i32 0 674 %c = shufflevector <8 x float> %b, <8 x float> undef, <8 x i32> zeroinitializer 675 ret <8 x float> %c 676} 677 678define <4 x double> @_inreg4xdouble(double %a) { 679 %b = insertelement <4 x double> undef, double %a, i32 0 680 %c = shufflevector <4 x double> %b, <4 x double> undef, <4 x i32> zeroinitializer 681 ret <4 x double> %c 682} 683