1; RUN: llc < %s -verify-machineinstrs -mtriple=arm64-none-linux-gnu -mattr=+neon -fp-contract=fast | FileCheck %s 2 3 4define <16 x i8> @ins16bw(<16 x i8> %tmp1, i8 %tmp2) { 5; CHECK-LABEL: ins16bw: 6; CHECK: ins {{v[0-9]+}}.b[15], {{w[0-9]+}} 7 %tmp3 = insertelement <16 x i8> %tmp1, i8 %tmp2, i32 15 8 ret <16 x i8> %tmp3 9} 10 11define <8 x i16> @ins8hw(<8 x i16> %tmp1, i16 %tmp2) { 12; CHECK-LABEL: ins8hw: 13; CHECK: ins {{v[0-9]+}}.h[6], {{w[0-9]+}} 14 %tmp3 = insertelement <8 x i16> %tmp1, i16 %tmp2, i32 6 15 ret <8 x i16> %tmp3 16} 17 18define <4 x i32> @ins4sw(<4 x i32> %tmp1, i32 %tmp2) { 19; CHECK-LABEL: ins4sw: 20; CHECK: ins {{v[0-9]+}}.s[2], {{w[0-9]+}} 21 %tmp3 = insertelement <4 x i32> %tmp1, i32 %tmp2, i32 2 22 ret <4 x i32> %tmp3 23} 24 25define <2 x i64> @ins2dw(<2 x i64> %tmp1, i64 %tmp2) { 26; CHECK-LABEL: ins2dw: 27; CHECK: ins {{v[0-9]+}}.d[1], {{x[0-9]+}} 28 %tmp3 = insertelement <2 x i64> %tmp1, i64 %tmp2, i32 1 29 ret <2 x i64> %tmp3 30} 31 32define <8 x i8> @ins8bw(<8 x i8> %tmp1, i8 %tmp2) { 33; CHECK-LABEL: ins8bw: 34; CHECK: ins {{v[0-9]+}}.b[5], {{w[0-9]+}} 35 %tmp3 = insertelement <8 x i8> %tmp1, i8 %tmp2, i32 5 36 ret <8 x i8> %tmp3 37} 38 39define <4 x i16> @ins4hw(<4 x i16> %tmp1, i16 %tmp2) { 40; CHECK-LABEL: ins4hw: 41; CHECK: ins {{v[0-9]+}}.h[3], {{w[0-9]+}} 42 %tmp3 = insertelement <4 x i16> %tmp1, i16 %tmp2, i32 3 43 ret <4 x i16> %tmp3 44} 45 46define <2 x i32> @ins2sw(<2 x i32> %tmp1, i32 %tmp2) { 47; CHECK-LABEL: ins2sw: 48; CHECK: ins {{v[0-9]+}}.s[1], {{w[0-9]+}} 49 %tmp3 = insertelement <2 x i32> %tmp1, i32 %tmp2, i32 1 50 ret <2 x i32> %tmp3 51} 52 53define <16 x i8> @ins16b16(<16 x i8> %tmp1, <16 x i8> %tmp2) { 54; CHECK-LABEL: ins16b16: 55; CHECK: ins {{v[0-9]+}}.b[15], {{v[0-9]+}}.b[2] 56 %tmp3 = extractelement <16 x i8> %tmp1, i32 2 57 %tmp4 = insertelement <16 x i8> %tmp2, i8 %tmp3, i32 15 58 ret <16 x i8> %tmp4 59} 60 61define <8 x i16> @ins8h8(<8 x i16> %tmp1, <8 x i16> %tmp2) { 62; CHECK-LABEL: ins8h8: 63; CHECK: ins {{v[0-9]+}}.h[7], {{v[0-9]+}}.h[2] 64 %tmp3 = extractelement <8 x i16> %tmp1, i32 2 65 %tmp4 = insertelement <8 x i16> %tmp2, i16 %tmp3, i32 7 66 ret <8 x i16> %tmp4 67} 68 69define <4 x i32> @ins4s4(<4 x i32> %tmp1, <4 x i32> %tmp2) { 70; CHECK-LABEL: ins4s4: 71; CHECK: ins {{v[0-9]+}}.s[1], {{v[0-9]+}}.s[2] 72 %tmp3 = extractelement <4 x i32> %tmp1, i32 2 73 %tmp4 = insertelement <4 x i32> %tmp2, i32 %tmp3, i32 1 74 ret <4 x i32> %tmp4 75} 76 77define <2 x i64> @ins2d2(<2 x i64> %tmp1, <2 x i64> %tmp2) { 78; CHECK-LABEL: ins2d2: 79; CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0] 80 %tmp3 = extractelement <2 x i64> %tmp1, i32 0 81 %tmp4 = insertelement <2 x i64> %tmp2, i64 %tmp3, i32 1 82 ret <2 x i64> %tmp4 83} 84 85define <4 x float> @ins4f4(<4 x float> %tmp1, <4 x float> %tmp2) { 86; CHECK-LABEL: ins4f4: 87; CHECK: ins {{v[0-9]+}}.s[1], {{v[0-9]+}}.s[2] 88 %tmp3 = extractelement <4 x float> %tmp1, i32 2 89 %tmp4 = insertelement <4 x float> %tmp2, float %tmp3, i32 1 90 ret <4 x float> %tmp4 91} 92 93define <2 x double> @ins2df2(<2 x double> %tmp1, <2 x double> %tmp2) { 94; CHECK-LABEL: ins2df2: 95; CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0] 96 %tmp3 = extractelement <2 x double> %tmp1, i32 0 97 %tmp4 = insertelement <2 x double> %tmp2, double %tmp3, i32 1 98 ret <2 x double> %tmp4 99} 100 101define <16 x i8> @ins8b16(<8 x i8> %tmp1, <16 x i8> %tmp2) { 102; CHECK-LABEL: ins8b16: 103; CHECK: ins {{v[0-9]+}}.b[15], {{v[0-9]+}}.b[2] 104 %tmp3 = extractelement <8 x i8> %tmp1, i32 2 105 %tmp4 = insertelement <16 x i8> %tmp2, i8 %tmp3, i32 15 106 ret <16 x i8> %tmp4 107} 108 109define <8 x i16> @ins4h8(<4 x i16> %tmp1, <8 x i16> %tmp2) { 110; CHECK-LABEL: ins4h8: 111; CHECK: ins {{v[0-9]+}}.h[7], {{v[0-9]+}}.h[2] 112 %tmp3 = extractelement <4 x i16> %tmp1, i32 2 113 %tmp4 = insertelement <8 x i16> %tmp2, i16 %tmp3, i32 7 114 ret <8 x i16> %tmp4 115} 116 117define <4 x i32> @ins2s4(<2 x i32> %tmp1, <4 x i32> %tmp2) { 118; CHECK-LABEL: ins2s4: 119; CHECK: ins {{v[0-9]+}}.s[1], {{v[0-9]+}}.s[1] 120 %tmp3 = extractelement <2 x i32> %tmp1, i32 1 121 %tmp4 = insertelement <4 x i32> %tmp2, i32 %tmp3, i32 1 122 ret <4 x i32> %tmp4 123} 124 125define <2 x i64> @ins1d2(<1 x i64> %tmp1, <2 x i64> %tmp2) { 126; CHECK-LABEL: ins1d2: 127; CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0] 128 %tmp3 = extractelement <1 x i64> %tmp1, i32 0 129 %tmp4 = insertelement <2 x i64> %tmp2, i64 %tmp3, i32 1 130 ret <2 x i64> %tmp4 131} 132 133define <4 x float> @ins2f4(<2 x float> %tmp1, <4 x float> %tmp2) { 134; CHECK-LABEL: ins2f4: 135; CHECK: ins {{v[0-9]+}}.s[1], {{v[0-9]+}}.s[1] 136 %tmp3 = extractelement <2 x float> %tmp1, i32 1 137 %tmp4 = insertelement <4 x float> %tmp2, float %tmp3, i32 1 138 ret <4 x float> %tmp4 139} 140 141define <2 x double> @ins1f2(<1 x double> %tmp1, <2 x double> %tmp2) { 142; CHECK-LABEL: ins1f2: 143; CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0] 144 %tmp3 = extractelement <1 x double> %tmp1, i32 0 145 %tmp4 = insertelement <2 x double> %tmp2, double %tmp3, i32 1 146 ret <2 x double> %tmp4 147} 148 149define <8 x i8> @ins16b8(<16 x i8> %tmp1, <8 x i8> %tmp2) { 150; CHECK-LABEL: ins16b8: 151; CHECK: ins {{v[0-9]+}}.b[7], {{v[0-9]+}}.b[2] 152 %tmp3 = extractelement <16 x i8> %tmp1, i32 2 153 %tmp4 = insertelement <8 x i8> %tmp2, i8 %tmp3, i32 7 154 ret <8 x i8> %tmp4 155} 156 157define <4 x i16> @ins8h4(<8 x i16> %tmp1, <4 x i16> %tmp2) { 158; CHECK-LABEL: ins8h4: 159; CHECK: ins {{v[0-9]+}}.h[3], {{v[0-9]+}}.h[2] 160 %tmp3 = extractelement <8 x i16> %tmp1, i32 2 161 %tmp4 = insertelement <4 x i16> %tmp2, i16 %tmp3, i32 3 162 ret <4 x i16> %tmp4 163} 164 165define <2 x i32> @ins4s2(<4 x i32> %tmp1, <2 x i32> %tmp2) { 166; CHECK-LABEL: ins4s2: 167; CHECK: ins {{v[0-9]+}}.s[1], {{v[0-9]+}}.s[2] 168 %tmp3 = extractelement <4 x i32> %tmp1, i32 2 169 %tmp4 = insertelement <2 x i32> %tmp2, i32 %tmp3, i32 1 170 ret <2 x i32> %tmp4 171} 172 173define <1 x i64> @ins2d1(<2 x i64> %tmp1, <1 x i64> %tmp2) { 174; CHECK-LABEL: ins2d1: 175; CHECK: ins {{v[0-9]+}}.d[0], {{v[0-9]+}}.d[0] 176 %tmp3 = extractelement <2 x i64> %tmp1, i32 0 177 %tmp4 = insertelement <1 x i64> %tmp2, i64 %tmp3, i32 0 178 ret <1 x i64> %tmp4 179} 180 181define <2 x float> @ins4f2(<4 x float> %tmp1, <2 x float> %tmp2) { 182; CHECK-LABEL: ins4f2: 183; CHECK: ins {{v[0-9]+}}.s[1], {{v[0-9]+}}.s[2] 184 %tmp3 = extractelement <4 x float> %tmp1, i32 2 185 %tmp4 = insertelement <2 x float> %tmp2, float %tmp3, i32 1 186 ret <2 x float> %tmp4 187} 188 189define <1 x double> @ins2f1(<2 x double> %tmp1, <1 x double> %tmp2) { 190; CHECK-LABEL: ins2f1: 191; CHECK: mov {{d[0-9]+}}, {{v[0-9]+}}.d[1] 192 %tmp3 = extractelement <2 x double> %tmp1, i32 1 193 %tmp4 = insertelement <1 x double> %tmp2, double %tmp3, i32 0 194 ret <1 x double> %tmp4 195} 196 197define <8 x i8> @ins8b8(<8 x i8> %tmp1, <8 x i8> %tmp2) { 198; CHECK-LABEL: ins8b8: 199; CHECK: ins {{v[0-9]+}}.b[4], {{v[0-9]+}}.b[2] 200 %tmp3 = extractelement <8 x i8> %tmp1, i32 2 201 %tmp4 = insertelement <8 x i8> %tmp2, i8 %tmp3, i32 4 202 ret <8 x i8> %tmp4 203} 204 205define <4 x i16> @ins4h4(<4 x i16> %tmp1, <4 x i16> %tmp2) { 206; CHECK-LABEL: ins4h4: 207; CHECK: ins {{v[0-9]+}}.h[3], {{v[0-9]+}}.h[2] 208 %tmp3 = extractelement <4 x i16> %tmp1, i32 2 209 %tmp4 = insertelement <4 x i16> %tmp2, i16 %tmp3, i32 3 210 ret <4 x i16> %tmp4 211} 212 213define <2 x i32> @ins2s2(<2 x i32> %tmp1, <2 x i32> %tmp2) { 214; CHECK-LABEL: ins2s2: 215; CHECK: ins {{v[0-9]+}}.s[1], {{v[0-9]+}}.s[0] 216 %tmp3 = extractelement <2 x i32> %tmp1, i32 0 217 %tmp4 = insertelement <2 x i32> %tmp2, i32 %tmp3, i32 1 218 ret <2 x i32> %tmp4 219} 220 221define <1 x i64> @ins1d1(<1 x i64> %tmp1, <1 x i64> %tmp2) { 222; CHECK-LABEL: ins1d1: 223; CHECK: ins {{v[0-9]+}}.d[0], {{v[0-9]+}}.d[0] 224 %tmp3 = extractelement <1 x i64> %tmp1, i32 0 225 %tmp4 = insertelement <1 x i64> %tmp2, i64 %tmp3, i32 0 226 ret <1 x i64> %tmp4 227} 228 229define <2 x float> @ins2f2(<2 x float> %tmp1, <2 x float> %tmp2) { 230; CHECK-LABEL: ins2f2: 231; CHECK: ins {{v[0-9]+}}.s[1], {{v[0-9]+}}.s[0] 232 %tmp3 = extractelement <2 x float> %tmp1, i32 0 233 %tmp4 = insertelement <2 x float> %tmp2, float %tmp3, i32 1 234 ret <2 x float> %tmp4 235} 236 237define <1 x double> @ins1df1(<1 x double> %tmp1, <1 x double> %tmp2) { 238; CHECK-LABEL: ins1df1: 239; CHECK-NOT: ins {{v[0-9]+}} 240 %tmp3 = extractelement <1 x double> %tmp1, i32 0 241 %tmp4 = insertelement <1 x double> %tmp2, double %tmp3, i32 0 242 ret <1 x double> %tmp4 243} 244 245define i32 @umovw16b(<16 x i8> %tmp1) { 246; CHECK-LABEL: umovw16b: 247; CHECK: umov {{w[0-9]+}}, {{v[0-9]+}}.b[8] 248 %tmp3 = extractelement <16 x i8> %tmp1, i32 8 249 %tmp4 = zext i8 %tmp3 to i32 250 ret i32 %tmp4 251} 252 253define i32 @umovw8h(<8 x i16> %tmp1) { 254; CHECK-LABEL: umovw8h: 255; CHECK: umov {{w[0-9]+}}, {{v[0-9]+}}.h[2] 256 %tmp3 = extractelement <8 x i16> %tmp1, i32 2 257 %tmp4 = zext i16 %tmp3 to i32 258 ret i32 %tmp4 259} 260 261define i32 @umovw4s(<4 x i32> %tmp1) { 262; CHECK-LABEL: umovw4s: 263; CHECK: mov {{w[0-9]+}}, {{v[0-9]+}}.s[2] 264 %tmp3 = extractelement <4 x i32> %tmp1, i32 2 265 ret i32 %tmp3 266} 267 268define i64 @umovx2d(<2 x i64> %tmp1) { 269; CHECK-LABEL: umovx2d: 270; CHECK: mov {{x[0-9]+}}, {{v[0-9]+}}.d[1] 271 %tmp3 = extractelement <2 x i64> %tmp1, i32 1 272 ret i64 %tmp3 273} 274 275define i32 @umovw8b(<8 x i8> %tmp1) { 276; CHECK-LABEL: umovw8b: 277; CHECK: mov {{w[0-9]+}}, {{v[0-9]+}}.b[7] 278 %tmp3 = extractelement <8 x i8> %tmp1, i32 7 279 %tmp4 = zext i8 %tmp3 to i32 280 ret i32 %tmp4 281} 282 283define i32 @umovw4h(<4 x i16> %tmp1) { 284; CHECK-LABEL: umovw4h: 285; CHECK: mov {{w[0-9]+}}, {{v[0-9]+}}.h[2] 286 %tmp3 = extractelement <4 x i16> %tmp1, i32 2 287 %tmp4 = zext i16 %tmp3 to i32 288 ret i32 %tmp4 289} 290 291define i32 @umovw2s(<2 x i32> %tmp1) { 292; CHECK-LABEL: umovw2s: 293; CHECK: mov {{w[0-9]+}}, {{v[0-9]+}}.s[1] 294 %tmp3 = extractelement <2 x i32> %tmp1, i32 1 295 ret i32 %tmp3 296} 297 298define i64 @umovx1d(<1 x i64> %tmp1) { 299; CHECK-LABEL: umovx1d: 300; CHECK: fmov {{x[0-9]+}}, {{d[0-9]+}} 301 %tmp3 = extractelement <1 x i64> %tmp1, i32 0 302 ret i64 %tmp3 303} 304 305define i32 @smovw16b(<16 x i8> %tmp1) { 306; CHECK-LABEL: smovw16b: 307; CHECK: smov {{w[0-9]+}}, {{v[0-9]+}}.b[8] 308 %tmp3 = extractelement <16 x i8> %tmp1, i32 8 309 %tmp4 = sext i8 %tmp3 to i32 310 %tmp5 = add i32 %tmp4, %tmp4 311 ret i32 %tmp5 312} 313 314define i32 @smovw8h(<8 x i16> %tmp1) { 315; CHECK-LABEL: smovw8h: 316; CHECK: smov {{w[0-9]+}}, {{v[0-9]+}}.h[2] 317 %tmp3 = extractelement <8 x i16> %tmp1, i32 2 318 %tmp4 = sext i16 %tmp3 to i32 319 %tmp5 = add i32 %tmp4, %tmp4 320 ret i32 %tmp5 321} 322 323define i32 @smovx16b(<16 x i8> %tmp1) { 324; CHECK-LABEL: smovx16b: 325; CHECK: smov {{[xw][0-9]+}}, {{v[0-9]+}}.b[8] 326 %tmp3 = extractelement <16 x i8> %tmp1, i32 8 327 %tmp4 = sext i8 %tmp3 to i32 328 %tmp5 = add i32 %tmp4, %tmp4 329 ret i32 %tmp5 330} 331 332define i32 @smovx8h(<8 x i16> %tmp1) { 333; CHECK-LABEL: smovx8h: 334; CHECK: smov {{[xw][0-9]+}}, {{v[0-9]+}}.h[2] 335 %tmp3 = extractelement <8 x i16> %tmp1, i32 2 336 %tmp4 = sext i16 %tmp3 to i32 337 ret i32 %tmp4 338} 339 340define i64 @smovx4s(<4 x i32> %tmp1) { 341; CHECK-LABEL: smovx4s: 342; CHECK: smov {{x[0-9]+}}, {{v[0-9]+}}.s[2] 343 %tmp3 = extractelement <4 x i32> %tmp1, i32 2 344 %tmp4 = sext i32 %tmp3 to i64 345 ret i64 %tmp4 346} 347 348define i32 @smovw8b(<8 x i8> %tmp1) { 349; CHECK-LABEL: smovw8b: 350; CHECK: smov {{w[0-9]+}}, {{v[0-9]+}}.b[4] 351 %tmp3 = extractelement <8 x i8> %tmp1, i32 4 352 %tmp4 = sext i8 %tmp3 to i32 353 %tmp5 = add i32 %tmp4, %tmp4 354 ret i32 %tmp5 355} 356 357define i32 @smovw4h(<4 x i16> %tmp1) { 358; CHECK-LABEL: smovw4h: 359; CHECK: smov {{w[0-9]+}}, {{v[0-9]+}}.h[2] 360 %tmp3 = extractelement <4 x i16> %tmp1, i32 2 361 %tmp4 = sext i16 %tmp3 to i32 362 %tmp5 = add i32 %tmp4, %tmp4 363 ret i32 %tmp5 364} 365 366define i32 @smovx8b(<8 x i8> %tmp1) { 367; CHECK-LABEL: smovx8b: 368; CHECK: smov {{[xw][0-9]+}}, {{v[0-9]+}}.b[6] 369 %tmp3 = extractelement <8 x i8> %tmp1, i32 6 370 %tmp4 = sext i8 %tmp3 to i32 371 ret i32 %tmp4 372} 373 374define i32 @smovx4h(<4 x i16> %tmp1) { 375; CHECK-LABEL: smovx4h: 376; CHECK: smov {{[xw][0-9]+}}, {{v[0-9]+}}.h[2] 377 %tmp3 = extractelement <4 x i16> %tmp1, i32 2 378 %tmp4 = sext i16 %tmp3 to i32 379 ret i32 %tmp4 380} 381 382define i64 @smovx2s(<2 x i32> %tmp1) { 383; CHECK-LABEL: smovx2s: 384; CHECK: smov {{x[0-9]+}}, {{v[0-9]+}}.s[1] 385 %tmp3 = extractelement <2 x i32> %tmp1, i32 1 386 %tmp4 = sext i32 %tmp3 to i64 387 ret i64 %tmp4 388} 389 390define <8 x i8> @test_vcopy_lane_s8(<8 x i8> %v1, <8 x i8> %v2) { 391; CHECK-LABEL: test_vcopy_lane_s8: 392; CHECK: ins {{v[0-9]+}}.b[5], {{v[0-9]+}}.b[3] 393 %vset_lane = shufflevector <8 x i8> %v1, <8 x i8> %v2, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 11, i32 6, i32 7> 394 ret <8 x i8> %vset_lane 395} 396 397define <16 x i8> @test_vcopyq_laneq_s8(<16 x i8> %v1, <16 x i8> %v2) { 398; CHECK-LABEL: test_vcopyq_laneq_s8: 399; CHECK: ins {{v[0-9]+}}.b[14], {{v[0-9]+}}.b[6] 400 %vset_lane = shufflevector <16 x i8> %v1, <16 x i8> %v2, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 22, i32 15> 401 ret <16 x i8> %vset_lane 402} 403 404define <8 x i8> @test_vcopy_lane_swap_s8(<8 x i8> %v1, <8 x i8> %v2) { 405; CHECK-LABEL: test_vcopy_lane_swap_s8: 406; CHECK: ins {{v[0-9]+}}.b[7], {{v[0-9]+}}.b[0] 407 %vset_lane = shufflevector <8 x i8> %v1, <8 x i8> %v2, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 0> 408 ret <8 x i8> %vset_lane 409} 410 411define <16 x i8> @test_vcopyq_laneq_swap_s8(<16 x i8> %v1, <16 x i8> %v2) { 412; CHECK-LABEL: test_vcopyq_laneq_swap_s8: 413; CHECK: ins {{v[0-9]+}}.b[0], {{v[0-9]+}}.b[15] 414 %vset_lane = shufflevector <16 x i8> %v1, <16 x i8> %v2, <16 x i32> <i32 15, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31> 415 ret <16 x i8> %vset_lane 416} 417 418define <8 x i8> @test_vdup_n_u8(i8 %v1) #0 { 419; CHECK-LABEL: test_vdup_n_u8: 420; CHECK: dup {{v[0-9]+}}.8b, {{w[0-9]+}} 421 %vecinit.i = insertelement <8 x i8> undef, i8 %v1, i32 0 422 %vecinit1.i = insertelement <8 x i8> %vecinit.i, i8 %v1, i32 1 423 %vecinit2.i = insertelement <8 x i8> %vecinit1.i, i8 %v1, i32 2 424 %vecinit3.i = insertelement <8 x i8> %vecinit2.i, i8 %v1, i32 3 425 %vecinit4.i = insertelement <8 x i8> %vecinit3.i, i8 %v1, i32 4 426 %vecinit5.i = insertelement <8 x i8> %vecinit4.i, i8 %v1, i32 5 427 %vecinit6.i = insertelement <8 x i8> %vecinit5.i, i8 %v1, i32 6 428 %vecinit7.i = insertelement <8 x i8> %vecinit6.i, i8 %v1, i32 7 429 ret <8 x i8> %vecinit7.i 430} 431 432define <4 x i16> @test_vdup_n_u16(i16 %v1) #0 { 433; CHECK-LABEL: test_vdup_n_u16: 434; CHECK: dup {{v[0-9]+}}.4h, {{w[0-9]+}} 435 %vecinit.i = insertelement <4 x i16> undef, i16 %v1, i32 0 436 %vecinit1.i = insertelement <4 x i16> %vecinit.i, i16 %v1, i32 1 437 %vecinit2.i = insertelement <4 x i16> %vecinit1.i, i16 %v1, i32 2 438 %vecinit3.i = insertelement <4 x i16> %vecinit2.i, i16 %v1, i32 3 439 ret <4 x i16> %vecinit3.i 440} 441 442define <2 x i32> @test_vdup_n_u32(i32 %v1) #0 { 443; CHECK-LABEL: test_vdup_n_u32: 444; CHECK: dup {{v[0-9]+}}.2s, {{w[0-9]+}} 445 %vecinit.i = insertelement <2 x i32> undef, i32 %v1, i32 0 446 %vecinit1.i = insertelement <2 x i32> %vecinit.i, i32 %v1, i32 1 447 ret <2 x i32> %vecinit1.i 448} 449 450define <1 x i64> @test_vdup_n_u64(i64 %v1) #0 { 451; CHECK-LABEL: test_vdup_n_u64: 452; CHECK: fmov {{d[0-9]+}}, {{x[0-9]+}} 453 %vecinit.i = insertelement <1 x i64> undef, i64 %v1, i32 0 454 ret <1 x i64> %vecinit.i 455} 456 457define <16 x i8> @test_vdupq_n_u8(i8 %v1) #0 { 458; CHECK-LABEL: test_vdupq_n_u8: 459; CHECK: dup {{v[0-9]+}}.16b, {{w[0-9]+}} 460 %vecinit.i = insertelement <16 x i8> undef, i8 %v1, i32 0 461 %vecinit1.i = insertelement <16 x i8> %vecinit.i, i8 %v1, i32 1 462 %vecinit2.i = insertelement <16 x i8> %vecinit1.i, i8 %v1, i32 2 463 %vecinit3.i = insertelement <16 x i8> %vecinit2.i, i8 %v1, i32 3 464 %vecinit4.i = insertelement <16 x i8> %vecinit3.i, i8 %v1, i32 4 465 %vecinit5.i = insertelement <16 x i8> %vecinit4.i, i8 %v1, i32 5 466 %vecinit6.i = insertelement <16 x i8> %vecinit5.i, i8 %v1, i32 6 467 %vecinit7.i = insertelement <16 x i8> %vecinit6.i, i8 %v1, i32 7 468 %vecinit8.i = insertelement <16 x i8> %vecinit7.i, i8 %v1, i32 8 469 %vecinit9.i = insertelement <16 x i8> %vecinit8.i, i8 %v1, i32 9 470 %vecinit10.i = insertelement <16 x i8> %vecinit9.i, i8 %v1, i32 10 471 %vecinit11.i = insertelement <16 x i8> %vecinit10.i, i8 %v1, i32 11 472 %vecinit12.i = insertelement <16 x i8> %vecinit11.i, i8 %v1, i32 12 473 %vecinit13.i = insertelement <16 x i8> %vecinit12.i, i8 %v1, i32 13 474 %vecinit14.i = insertelement <16 x i8> %vecinit13.i, i8 %v1, i32 14 475 %vecinit15.i = insertelement <16 x i8> %vecinit14.i, i8 %v1, i32 15 476 ret <16 x i8> %vecinit15.i 477} 478 479define <8 x i16> @test_vdupq_n_u16(i16 %v1) #0 { 480; CHECK-LABEL: test_vdupq_n_u16: 481; CHECK: dup {{v[0-9]+}}.8h, {{w[0-9]+}} 482 %vecinit.i = insertelement <8 x i16> undef, i16 %v1, i32 0 483 %vecinit1.i = insertelement <8 x i16> %vecinit.i, i16 %v1, i32 1 484 %vecinit2.i = insertelement <8 x i16> %vecinit1.i, i16 %v1, i32 2 485 %vecinit3.i = insertelement <8 x i16> %vecinit2.i, i16 %v1, i32 3 486 %vecinit4.i = insertelement <8 x i16> %vecinit3.i, i16 %v1, i32 4 487 %vecinit5.i = insertelement <8 x i16> %vecinit4.i, i16 %v1, i32 5 488 %vecinit6.i = insertelement <8 x i16> %vecinit5.i, i16 %v1, i32 6 489 %vecinit7.i = insertelement <8 x i16> %vecinit6.i, i16 %v1, i32 7 490 ret <8 x i16> %vecinit7.i 491} 492 493define <4 x i32> @test_vdupq_n_u32(i32 %v1) #0 { 494; CHECK-LABEL: test_vdupq_n_u32: 495; CHECK: dup {{v[0-9]+}}.4s, {{w[0-9]+}} 496 %vecinit.i = insertelement <4 x i32> undef, i32 %v1, i32 0 497 %vecinit1.i = insertelement <4 x i32> %vecinit.i, i32 %v1, i32 1 498 %vecinit2.i = insertelement <4 x i32> %vecinit1.i, i32 %v1, i32 2 499 %vecinit3.i = insertelement <4 x i32> %vecinit2.i, i32 %v1, i32 3 500 ret <4 x i32> %vecinit3.i 501} 502 503define <2 x i64> @test_vdupq_n_u64(i64 %v1) #0 { 504; CHECK-LABEL: test_vdupq_n_u64: 505; CHECK: dup {{v[0-9]+}}.2d, {{x[0-9]+}} 506 %vecinit.i = insertelement <2 x i64> undef, i64 %v1, i32 0 507 %vecinit1.i = insertelement <2 x i64> %vecinit.i, i64 %v1, i32 1 508 ret <2 x i64> %vecinit1.i 509} 510 511define <8 x i8> @test_vdup_lane_s8(<8 x i8> %v1) #0 { 512; CHECK-LABEL: test_vdup_lane_s8: 513; CHECK: dup {{v[0-9]+}}.8b, {{v[0-9]+}}.b[5] 514 %shuffle = shufflevector <8 x i8> %v1, <8 x i8> undef, <8 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5> 515 ret <8 x i8> %shuffle 516} 517 518define <4 x i16> @test_vdup_lane_s16(<4 x i16> %v1) #0 { 519; CHECK-LABEL: test_vdup_lane_s16: 520; CHECK: dup {{v[0-9]+}}.4h, {{v[0-9]+}}.h[2] 521 %shuffle = shufflevector <4 x i16> %v1, <4 x i16> undef, <4 x i32> <i32 2, i32 2, i32 2, i32 2> 522 ret <4 x i16> %shuffle 523} 524 525define <2 x i32> @test_vdup_lane_s32(<2 x i32> %v1) #0 { 526; CHECK-LABEL: test_vdup_lane_s32: 527; CHECK: dup {{v[0-9]+}}.2s, {{v[0-9]+}}.s[1] 528 %shuffle = shufflevector <2 x i32> %v1, <2 x i32> undef, <2 x i32> <i32 1, i32 1> 529 ret <2 x i32> %shuffle 530} 531 532define <16 x i8> @test_vdupq_lane_s8(<8 x i8> %v1) #0 { 533; CHECK-LABEL: test_vdupq_lane_s8: 534; CHECK: {{v[0-9]+}}.16b, {{v[0-9]+}}.b[5] 535 %shuffle = shufflevector <8 x i8> %v1, <8 x i8> undef, <16 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5> 536 ret <16 x i8> %shuffle 537} 538 539define <8 x i16> @test_vdupq_lane_s16(<4 x i16> %v1) #0 { 540; CHECK-LABEL: test_vdupq_lane_s16: 541; CHECK: {{v[0-9]+}}.8h, {{v[0-9]+}}.h[2] 542 %shuffle = shufflevector <4 x i16> %v1, <4 x i16> undef, <8 x i32> <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2> 543 ret <8 x i16> %shuffle 544} 545 546define <4 x i32> @test_vdupq_lane_s32(<2 x i32> %v1) #0 { 547; CHECK-LABEL: test_vdupq_lane_s32: 548; CHECK: {{v[0-9]+}}.4s, {{v[0-9]+}}.s[1] 549 %shuffle = shufflevector <2 x i32> %v1, <2 x i32> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1> 550 ret <4 x i32> %shuffle 551} 552 553define <2 x i64> @test_vdupq_lane_s64(<1 x i64> %v1) #0 { 554; CHECK-LABEL: test_vdupq_lane_s64: 555; CHECK: {{v[0-9]+}}.2d, {{v[0-9]+}}.d[0] 556 %shuffle = shufflevector <1 x i64> %v1, <1 x i64> undef, <2 x i32> zeroinitializer 557 ret <2 x i64> %shuffle 558} 559 560define <8 x i8> @test_vdup_laneq_s8(<16 x i8> %v1) #0 { 561; CHECK-LABEL: test_vdup_laneq_s8: 562; CHECK: dup {{v[0-9]+}}.8b, {{v[0-9]+}}.b[5] 563 %shuffle = shufflevector <16 x i8> %v1, <16 x i8> undef, <8 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5> 564 ret <8 x i8> %shuffle 565} 566 567define <4 x i16> @test_vdup_laneq_s16(<8 x i16> %v1) #0 { 568; CHECK-LABEL: test_vdup_laneq_s16: 569; CHECK: dup {{v[0-9]+}}.4h, {{v[0-9]+}}.h[2] 570 %shuffle = shufflevector <8 x i16> %v1, <8 x i16> undef, <4 x i32> <i32 2, i32 2, i32 2, i32 2> 571 ret <4 x i16> %shuffle 572} 573 574define <2 x i32> @test_vdup_laneq_s32(<4 x i32> %v1) #0 { 575; CHECK-LABEL: test_vdup_laneq_s32: 576; CHECK: dup {{v[0-9]+}}.2s, {{v[0-9]+}}.s[1] 577 %shuffle = shufflevector <4 x i32> %v1, <4 x i32> undef, <2 x i32> <i32 1, i32 1> 578 ret <2 x i32> %shuffle 579} 580 581define <16 x i8> @test_vdupq_laneq_s8(<16 x i8> %v1) #0 { 582; CHECK-LABEL: test_vdupq_laneq_s8: 583; CHECK: dup {{v[0-9]+}}.16b, {{v[0-9]+}}.b[5] 584 %shuffle = shufflevector <16 x i8> %v1, <16 x i8> undef, <16 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5> 585 ret <16 x i8> %shuffle 586} 587 588define <8 x i16> @test_vdupq_laneq_s16(<8 x i16> %v1) #0 { 589; CHECK-LABEL: test_vdupq_laneq_s16: 590; CHECK: {{v[0-9]+}}.8h, {{v[0-9]+}}.h[2] 591 %shuffle = shufflevector <8 x i16> %v1, <8 x i16> undef, <8 x i32> <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2> 592 ret <8 x i16> %shuffle 593} 594 595define <4 x i32> @test_vdupq_laneq_s32(<4 x i32> %v1) #0 { 596; CHECK-LABEL: test_vdupq_laneq_s32: 597; CHECK: dup {{v[0-9]+}}.4s, {{v[0-9]+}}.s[1] 598 %shuffle = shufflevector <4 x i32> %v1, <4 x i32> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1> 599 ret <4 x i32> %shuffle 600} 601 602define <2 x i64> @test_vdupq_laneq_s64(<2 x i64> %v1) #0 { 603; CHECK-LABEL: test_vdupq_laneq_s64: 604; CHECK: dup {{v[0-9]+}}.2d, {{v[0-9]+}}.d[0] 605 %shuffle = shufflevector <2 x i64> %v1, <2 x i64> undef, <2 x i32> zeroinitializer 606 ret <2 x i64> %shuffle 607} 608 609define i64 @test_bitcastv8i8toi64(<8 x i8> %in) { 610; CHECK-LABEL: test_bitcastv8i8toi64: 611 %res = bitcast <8 x i8> %in to i64 612; CHECK: fmov {{x[0-9]+}}, {{d[0-9]+}} 613 ret i64 %res 614} 615 616define i64 @test_bitcastv4i16toi64(<4 x i16> %in) { 617; CHECK-LABEL: test_bitcastv4i16toi64: 618 %res = bitcast <4 x i16> %in to i64 619; CHECK: fmov {{x[0-9]+}}, {{d[0-9]+}} 620 ret i64 %res 621} 622 623define i64 @test_bitcastv2i32toi64(<2 x i32> %in) { 624; CHECK-LABEL: test_bitcastv2i32toi64: 625 %res = bitcast <2 x i32> %in to i64 626; CHECK: fmov {{x[0-9]+}}, {{d[0-9]+}} 627 ret i64 %res 628} 629 630define i64 @test_bitcastv2f32toi64(<2 x float> %in) { 631; CHECK-LABEL: test_bitcastv2f32toi64: 632 %res = bitcast <2 x float> %in to i64 633; CHECK: fmov {{x[0-9]+}}, {{d[0-9]+}} 634 ret i64 %res 635} 636 637define i64 @test_bitcastv1i64toi64(<1 x i64> %in) { 638; CHECK-LABEL: test_bitcastv1i64toi64: 639 %res = bitcast <1 x i64> %in to i64 640; CHECK: fmov {{x[0-9]+}}, {{d[0-9]+}} 641 ret i64 %res 642} 643 644define i64 @test_bitcastv1f64toi64(<1 x double> %in) { 645; CHECK-LABEL: test_bitcastv1f64toi64: 646 %res = bitcast <1 x double> %in to i64 647; CHECK: fmov {{x[0-9]+}}, {{d[0-9]+}} 648 ret i64 %res 649} 650 651define <8 x i8> @test_bitcasti64tov8i8(i64 %in) { 652; CHECK-LABEL: test_bitcasti64tov8i8: 653 %res = bitcast i64 %in to <8 x i8> 654; CHECK: fmov {{d[0-9]+}}, {{x[0-9]+}} 655 ret <8 x i8> %res 656} 657 658define <4 x i16> @test_bitcasti64tov4i16(i64 %in) { 659; CHECK-LABEL: test_bitcasti64tov4i16: 660 %res = bitcast i64 %in to <4 x i16> 661; CHECK: fmov {{d[0-9]+}}, {{x[0-9]+}} 662 ret <4 x i16> %res 663} 664 665define <2 x i32> @test_bitcasti64tov2i32(i64 %in) { 666; CHECK-LABEL: test_bitcasti64tov2i32: 667 %res = bitcast i64 %in to <2 x i32> 668; CHECK: fmov {{d[0-9]+}}, {{x[0-9]+}} 669 ret <2 x i32> %res 670} 671 672define <2 x float> @test_bitcasti64tov2f32(i64 %in) { 673; CHECK-LABEL: test_bitcasti64tov2f32: 674 %res = bitcast i64 %in to <2 x float> 675; CHECK: fmov {{d[0-9]+}}, {{x[0-9]+}} 676 ret <2 x float> %res 677} 678 679define <1 x i64> @test_bitcasti64tov1i64(i64 %in) { 680; CHECK-LABEL: test_bitcasti64tov1i64: 681 %res = bitcast i64 %in to <1 x i64> 682; CHECK: fmov {{d[0-9]+}}, {{x[0-9]+}} 683 ret <1 x i64> %res 684} 685 686define <1 x double> @test_bitcasti64tov1f64(i64 %in) { 687; CHECK-LABEL: test_bitcasti64tov1f64: 688 %res = bitcast i64 %in to <1 x double> 689; CHECK: fmov {{d[0-9]+}}, {{x[0-9]+}} 690 ret <1 x double> %res 691} 692 693define <1 x i64> @test_bitcastv8i8tov1f64(<8 x i8> %a) #0 { 694; CHECK-LABEL: test_bitcastv8i8tov1f64: 695; CHECK: neg {{v[0-9]+}}.8b, {{v[0-9]+}}.8b 696; CHECK-NEXT: fcvtzs {{[xd][0-9]+}}, {{d[0-9]+}} 697 %sub.i = sub <8 x i8> zeroinitializer, %a 698 %1 = bitcast <8 x i8> %sub.i to <1 x double> 699 %vcvt.i = fptosi <1 x double> %1 to <1 x i64> 700 ret <1 x i64> %vcvt.i 701} 702 703define <1 x i64> @test_bitcastv4i16tov1f64(<4 x i16> %a) #0 { 704; CHECK-LABEL: test_bitcastv4i16tov1f64: 705; CHECK: neg {{v[0-9]+}}.4h, {{v[0-9]+}}.4h 706; CHECK-NEXT: fcvtzs {{[dx][0-9]+}}, {{d[0-9]+}} 707 %sub.i = sub <4 x i16> zeroinitializer, %a 708 %1 = bitcast <4 x i16> %sub.i to <1 x double> 709 %vcvt.i = fptosi <1 x double> %1 to <1 x i64> 710 ret <1 x i64> %vcvt.i 711} 712 713define <1 x i64> @test_bitcastv2i32tov1f64(<2 x i32> %a) #0 { 714; CHECK-LABEL: test_bitcastv2i32tov1f64: 715; CHECK: neg {{v[0-9]+}}.2s, {{v[0-9]+}}.2s 716; CHECK-NEXT: fcvtzs {{[xd][0-9]+}}, {{d[0-9]+}} 717 %sub.i = sub <2 x i32> zeroinitializer, %a 718 %1 = bitcast <2 x i32> %sub.i to <1 x double> 719 %vcvt.i = fptosi <1 x double> %1 to <1 x i64> 720 ret <1 x i64> %vcvt.i 721} 722 723define <1 x i64> @test_bitcastv1i64tov1f64(<1 x i64> %a) #0 { 724; CHECK-LABEL: test_bitcastv1i64tov1f64: 725; CHECK: neg {{d[0-9]+}}, {{d[0-9]+}} 726; CHECK-NEXT: fcvtzs {{[dx][0-9]+}}, {{d[0-9]+}} 727 %sub.i = sub <1 x i64> zeroinitializer, %a 728 %1 = bitcast <1 x i64> %sub.i to <1 x double> 729 %vcvt.i = fptosi <1 x double> %1 to <1 x i64> 730 ret <1 x i64> %vcvt.i 731} 732 733define <1 x i64> @test_bitcastv2f32tov1f64(<2 x float> %a) #0 { 734; CHECK-LABEL: test_bitcastv2f32tov1f64: 735; CHECK: fneg {{v[0-9]+}}.2s, {{v[0-9]+}}.2s 736; CHECK-NEXT: fcvtzs {{[xd][0-9]+}}, {{d[0-9]+}} 737 %sub.i = fsub <2 x float> <float -0.000000e+00, float -0.000000e+00>, %a 738 %1 = bitcast <2 x float> %sub.i to <1 x double> 739 %vcvt.i = fptosi <1 x double> %1 to <1 x i64> 740 ret <1 x i64> %vcvt.i 741} 742 743define <8 x i8> @test_bitcastv1f64tov8i8(<1 x i64> %a) #0 { 744; CHECK-LABEL: test_bitcastv1f64tov8i8: 745; CHECK: scvtf {{d[0-9]+}}, {{[xd][0-9]+}} 746; CHECK-NEXT: neg {{v[0-9]+}}.8b, {{v[0-9]+}}.8b 747 %vcvt.i = sitofp <1 x i64> %a to <1 x double> 748 %1 = bitcast <1 x double> %vcvt.i to <8 x i8> 749 %sub.i = sub <8 x i8> zeroinitializer, %1 750 ret <8 x i8> %sub.i 751} 752 753define <4 x i16> @test_bitcastv1f64tov4i16(<1 x i64> %a) #0 { 754; CHECK-LABEL: test_bitcastv1f64tov4i16: 755; CHECK: scvtf {{d[0-9]+}}, {{[xd][0-9]+}} 756; CHECK-NEXT: neg {{v[0-9]+}}.4h, {{v[0-9]+}}.4h 757 %vcvt.i = sitofp <1 x i64> %a to <1 x double> 758 %1 = bitcast <1 x double> %vcvt.i to <4 x i16> 759 %sub.i = sub <4 x i16> zeroinitializer, %1 760 ret <4 x i16> %sub.i 761} 762 763define <2 x i32> @test_bitcastv1f64tov2i32(<1 x i64> %a) #0 { 764; CHECK-LABEL: test_bitcastv1f64tov2i32: 765; CHECK: scvtf {{d[0-9]+}}, {{[xd][0-9]+}} 766; CHECK-NEXT: neg {{v[0-9]+}}.2s, {{v[0-9]+}}.2s 767 %vcvt.i = sitofp <1 x i64> %a to <1 x double> 768 %1 = bitcast <1 x double> %vcvt.i to <2 x i32> 769 %sub.i = sub <2 x i32> zeroinitializer, %1 770 ret <2 x i32> %sub.i 771} 772 773define <1 x i64> @test_bitcastv1f64tov1i64(<1 x i64> %a) #0 { 774; CHECK-LABEL: test_bitcastv1f64tov1i64: 775; CHECK: scvtf {{d[0-9]+}}, {{[xd][0-9]+}} 776; CHECK-NEXT: neg {{d[0-9]+}}, {{d[0-9]+}} 777 %vcvt.i = sitofp <1 x i64> %a to <1 x double> 778 %1 = bitcast <1 x double> %vcvt.i to <1 x i64> 779 %sub.i = sub <1 x i64> zeroinitializer, %1 780 ret <1 x i64> %sub.i 781} 782 783define <2 x float> @test_bitcastv1f64tov2f32(<1 x i64> %a) #0 { 784; CHECK-LABEL: test_bitcastv1f64tov2f32: 785; CHECK: scvtf {{d[0-9]+}}, {{[xd][0-9]+}} 786; CHECK-NEXT: fneg {{v[0-9]+}}.2s, {{v[0-9]+}}.2s 787 %vcvt.i = sitofp <1 x i64> %a to <1 x double> 788 %1 = bitcast <1 x double> %vcvt.i to <2 x float> 789 %sub.i = fsub <2 x float> <float -0.000000e+00, float -0.000000e+00>, %1 790 ret <2 x float> %sub.i 791} 792 793; Test insert element into an undef vector 794define <8 x i8> @scalar_to_vector.v8i8(i8 %a) { 795; CHECK-LABEL: scalar_to_vector.v8i8: 796; CHECK: fmov {{s[0-9]+}}, {{w[0-9]+}} 797 %b = insertelement <8 x i8> undef, i8 %a, i32 0 798 ret <8 x i8> %b 799} 800 801define <16 x i8> @scalar_to_vector.v16i8(i8 %a) { 802; CHECK-LABEL: scalar_to_vector.v16i8: 803; CHECK: fmov {{s[0-9]+}}, {{w[0-9]+}} 804 %b = insertelement <16 x i8> undef, i8 %a, i32 0 805 ret <16 x i8> %b 806} 807 808define <4 x i16> @scalar_to_vector.v4i16(i16 %a) { 809; CHECK-LABEL: scalar_to_vector.v4i16: 810; CHECK: fmov {{s[0-9]+}}, {{w[0-9]+}} 811 %b = insertelement <4 x i16> undef, i16 %a, i32 0 812 ret <4 x i16> %b 813} 814 815define <8 x i16> @scalar_to_vector.v8i16(i16 %a) { 816; CHECK-LABEL: scalar_to_vector.v8i16: 817; CHECK: fmov {{s[0-9]+}}, {{w[0-9]+}} 818 %b = insertelement <8 x i16> undef, i16 %a, i32 0 819 ret <8 x i16> %b 820} 821 822define <2 x i32> @scalar_to_vector.v2i32(i32 %a) { 823; CHECK-LABEL: scalar_to_vector.v2i32: 824; CHECK: fmov {{s[0-9]+}}, {{w[0-9]+}} 825 %b = insertelement <2 x i32> undef, i32 %a, i32 0 826 ret <2 x i32> %b 827} 828 829define <4 x i32> @scalar_to_vector.v4i32(i32 %a) { 830; CHECK-LABEL: scalar_to_vector.v4i32: 831; CHECK: fmov {{s[0-9]+}}, {{w[0-9]+}} 832 %b = insertelement <4 x i32> undef, i32 %a, i32 0 833 ret <4 x i32> %b 834} 835 836define <2 x i64> @scalar_to_vector.v2i64(i64 %a) { 837; CHECK-LABEL: scalar_to_vector.v2i64: 838; CHECK: fmov {{d[0-9]+}}, {{x[0-9]+}} 839 %b = insertelement <2 x i64> undef, i64 %a, i32 0 840 ret <2 x i64> %b 841} 842 843define <8 x i8> @testDUP.v1i8(<1 x i8> %a) { 844; CHECK-LABEL: testDUP.v1i8: 845; CHECK: dup v0.8b, v0.b[0] 846 %b = extractelement <1 x i8> %a, i32 0 847 %c = insertelement <8 x i8> undef, i8 %b, i32 0 848 %d = insertelement <8 x i8> %c, i8 %b, i32 1 849 %e = insertelement <8 x i8> %d, i8 %b, i32 2 850 %f = insertelement <8 x i8> %e, i8 %b, i32 3 851 %g = insertelement <8 x i8> %f, i8 %b, i32 4 852 %h = insertelement <8 x i8> %g, i8 %b, i32 5 853 %i = insertelement <8 x i8> %h, i8 %b, i32 6 854 %j = insertelement <8 x i8> %i, i8 %b, i32 7 855 ret <8 x i8> %j 856} 857 858define <8 x i16> @testDUP.v1i16(<1 x i16> %a) { 859; CHECK-LABEL: testDUP.v1i16: 860; CHECK: dup v0.8h, v0.h[0] 861 %b = extractelement <1 x i16> %a, i32 0 862 %c = insertelement <8 x i16> undef, i16 %b, i32 0 863 %d = insertelement <8 x i16> %c, i16 %b, i32 1 864 %e = insertelement <8 x i16> %d, i16 %b, i32 2 865 %f = insertelement <8 x i16> %e, i16 %b, i32 3 866 %g = insertelement <8 x i16> %f, i16 %b, i32 4 867 %h = insertelement <8 x i16> %g, i16 %b, i32 5 868 %i = insertelement <8 x i16> %h, i16 %b, i32 6 869 %j = insertelement <8 x i16> %i, i16 %b, i32 7 870 ret <8 x i16> %j 871} 872 873define <4 x i32> @testDUP.v1i32(<1 x i32> %a) { 874; CHECK-LABEL: testDUP.v1i32: 875; CHECK: dup v0.4s, v0.s[0] 876 %b = extractelement <1 x i32> %a, i32 0 877 %c = insertelement <4 x i32> undef, i32 %b, i32 0 878 %d = insertelement <4 x i32> %c, i32 %b, i32 1 879 %e = insertelement <4 x i32> %d, i32 %b, i32 2 880 %f = insertelement <4 x i32> %e, i32 %b, i32 3 881 ret <4 x i32> %f 882} 883 884define <8 x i8> @getl(<16 x i8> %x) #0 { 885; CHECK-LABEL: getl: 886; CHECK: ret 887 %vecext = extractelement <16 x i8> %x, i32 0 888 %vecinit = insertelement <8 x i8> undef, i8 %vecext, i32 0 889 %vecext1 = extractelement <16 x i8> %x, i32 1 890 %vecinit2 = insertelement <8 x i8> %vecinit, i8 %vecext1, i32 1 891 %vecext3 = extractelement <16 x i8> %x, i32 2 892 %vecinit4 = insertelement <8 x i8> %vecinit2, i8 %vecext3, i32 2 893 %vecext5 = extractelement <16 x i8> %x, i32 3 894 %vecinit6 = insertelement <8 x i8> %vecinit4, i8 %vecext5, i32 3 895 %vecext7 = extractelement <16 x i8> %x, i32 4 896 %vecinit8 = insertelement <8 x i8> %vecinit6, i8 %vecext7, i32 4 897 %vecext9 = extractelement <16 x i8> %x, i32 5 898 %vecinit10 = insertelement <8 x i8> %vecinit8, i8 %vecext9, i32 5 899 %vecext11 = extractelement <16 x i8> %x, i32 6 900 %vecinit12 = insertelement <8 x i8> %vecinit10, i8 %vecext11, i32 6 901 %vecext13 = extractelement <16 x i8> %x, i32 7 902 %vecinit14 = insertelement <8 x i8> %vecinit12, i8 %vecext13, i32 7 903 ret <8 x i8> %vecinit14 904} 905 906define <4 x i16> @test_dup_v2i32_v4i16(<2 x i32> %a) { 907; CHECK-LABEL: test_dup_v2i32_v4i16: 908; CHECK: dup v0.4h, v0.h[2] 909entry: 910 %x = extractelement <2 x i32> %a, i32 1 911 %vget_lane = trunc i32 %x to i16 912 %vecinit.i = insertelement <4 x i16> undef, i16 %vget_lane, i32 0 913 %vecinit1.i = insertelement <4 x i16> %vecinit.i, i16 %vget_lane, i32 1 914 %vecinit2.i = insertelement <4 x i16> %vecinit1.i, i16 %vget_lane, i32 2 915 %vecinit3.i = insertelement <4 x i16> %vecinit2.i, i16 %vget_lane, i32 3 916 ret <4 x i16> %vecinit3.i 917} 918 919define <8 x i16> @test_dup_v4i32_v8i16(<4 x i32> %a) { 920; CHECK-LABEL: test_dup_v4i32_v8i16: 921; CHECK: dup v0.8h, v0.h[6] 922entry: 923 %x = extractelement <4 x i32> %a, i32 3 924 %vget_lane = trunc i32 %x to i16 925 %vecinit.i = insertelement <8 x i16> undef, i16 %vget_lane, i32 0 926 %vecinit1.i = insertelement <8 x i16> %vecinit.i, i16 %vget_lane, i32 1 927 %vecinit2.i = insertelement <8 x i16> %vecinit1.i, i16 %vget_lane, i32 2 928 %vecinit3.i = insertelement <8 x i16> %vecinit2.i, i16 %vget_lane, i32 3 929 %vecinit4.i = insertelement <8 x i16> %vecinit3.i, i16 %vget_lane, i32 4 930 %vecinit5.i = insertelement <8 x i16> %vecinit4.i, i16 %vget_lane, i32 5 931 %vecinit6.i = insertelement <8 x i16> %vecinit5.i, i16 %vget_lane, i32 6 932 %vecinit7.i = insertelement <8 x i16> %vecinit6.i, i16 %vget_lane, i32 7 933 ret <8 x i16> %vecinit7.i 934} 935 936define <4 x i16> @test_dup_v1i64_v4i16(<1 x i64> %a) { 937; CHECK-LABEL: test_dup_v1i64_v4i16: 938; CHECK: dup v0.4h, v0.h[0] 939entry: 940 %x = extractelement <1 x i64> %a, i32 0 941 %vget_lane = trunc i64 %x to i16 942 %vecinit.i = insertelement <4 x i16> undef, i16 %vget_lane, i32 0 943 %vecinit1.i = insertelement <4 x i16> %vecinit.i, i16 %vget_lane, i32 1 944 %vecinit2.i = insertelement <4 x i16> %vecinit1.i, i16 %vget_lane, i32 2 945 %vecinit3.i = insertelement <4 x i16> %vecinit2.i, i16 %vget_lane, i32 3 946 ret <4 x i16> %vecinit3.i 947} 948 949define <2 x i32> @test_dup_v1i64_v2i32(<1 x i64> %a) { 950; CHECK-LABEL: test_dup_v1i64_v2i32: 951; CHECK: dup v0.2s, v0.s[0] 952entry: 953 %x = extractelement <1 x i64> %a, i32 0 954 %vget_lane = trunc i64 %x to i32 955 %vecinit.i = insertelement <2 x i32> undef, i32 %vget_lane, i32 0 956 %vecinit1.i = insertelement <2 x i32> %vecinit.i, i32 %vget_lane, i32 1 957 ret <2 x i32> %vecinit1.i 958} 959 960define <8 x i16> @test_dup_v2i64_v8i16(<2 x i64> %a) { 961; CHECK-LABEL: test_dup_v2i64_v8i16: 962; CHECK: dup v0.8h, v0.h[4] 963entry: 964 %x = extractelement <2 x i64> %a, i32 1 965 %vget_lane = trunc i64 %x to i16 966 %vecinit.i = insertelement <8 x i16> undef, i16 %vget_lane, i32 0 967 %vecinit1.i = insertelement <8 x i16> %vecinit.i, i16 %vget_lane, i32 1 968 %vecinit2.i = insertelement <8 x i16> %vecinit1.i, i16 %vget_lane, i32 2 969 %vecinit3.i = insertelement <8 x i16> %vecinit2.i, i16 %vget_lane, i32 3 970 %vecinit4.i = insertelement <8 x i16> %vecinit3.i, i16 %vget_lane, i32 4 971 %vecinit5.i = insertelement <8 x i16> %vecinit4.i, i16 %vget_lane, i32 5 972 %vecinit6.i = insertelement <8 x i16> %vecinit5.i, i16 %vget_lane, i32 6 973 %vecinit7.i = insertelement <8 x i16> %vecinit6.i, i16 %vget_lane, i32 7 974 ret <8 x i16> %vecinit7.i 975} 976 977define <4 x i32> @test_dup_v2i64_v4i32(<2 x i64> %a) { 978; CHECK-LABEL: test_dup_v2i64_v4i32: 979; CHECK: dup v0.4s, v0.s[2] 980entry: 981 %x = extractelement <2 x i64> %a, i32 1 982 %vget_lane = trunc i64 %x to i32 983 %vecinit.i = insertelement <4 x i32> undef, i32 %vget_lane, i32 0 984 %vecinit1.i = insertelement <4 x i32> %vecinit.i, i32 %vget_lane, i32 1 985 %vecinit2.i = insertelement <4 x i32> %vecinit1.i, i32 %vget_lane, i32 2 986 %vecinit3.i = insertelement <4 x i32> %vecinit2.i, i32 %vget_lane, i32 3 987 ret <4 x i32> %vecinit3.i 988} 989 990define <4 x i16> @test_dup_v4i32_v4i16(<4 x i32> %a) { 991; CHECK-LABEL: test_dup_v4i32_v4i16: 992; CHECK: dup v0.4h, v0.h[2] 993entry: 994 %x = extractelement <4 x i32> %a, i32 1 995 %vget_lane = trunc i32 %x to i16 996 %vecinit.i = insertelement <4 x i16> undef, i16 %vget_lane, i32 0 997 %vecinit1.i = insertelement <4 x i16> %vecinit.i, i16 %vget_lane, i32 1 998 %vecinit2.i = insertelement <4 x i16> %vecinit1.i, i16 %vget_lane, i32 2 999 %vecinit3.i = insertelement <4 x i16> %vecinit2.i, i16 %vget_lane, i32 3 1000 ret <4 x i16> %vecinit3.i 1001} 1002 1003define <4 x i16> @test_dup_v2i64_v4i16(<2 x i64> %a) { 1004; CHECK-LABEL: test_dup_v2i64_v4i16: 1005; CHECK: dup v0.4h, v0.h[0] 1006entry: 1007 %x = extractelement <2 x i64> %a, i32 0 1008 %vget_lane = trunc i64 %x to i16 1009 %vecinit.i = insertelement <4 x i16> undef, i16 %vget_lane, i32 0 1010 %vecinit1.i = insertelement <4 x i16> %vecinit.i, i16 %vget_lane, i32 1 1011 %vecinit2.i = insertelement <4 x i16> %vecinit1.i, i16 %vget_lane, i32 2 1012 %vecinit3.i = insertelement <4 x i16> %vecinit2.i, i16 %vget_lane, i32 3 1013 ret <4 x i16> %vecinit3.i 1014} 1015 1016define <2 x i32> @test_dup_v2i64_v2i32(<2 x i64> %a) { 1017; CHECK-LABEL: test_dup_v2i64_v2i32: 1018; CHECK: dup v0.2s, v0.s[0] 1019entry: 1020 %x = extractelement <2 x i64> %a, i32 0 1021 %vget_lane = trunc i64 %x to i32 1022 %vecinit.i = insertelement <2 x i32> undef, i32 %vget_lane, i32 0 1023 %vecinit1.i = insertelement <2 x i32> %vecinit.i, i32 %vget_lane, i32 1 1024 ret <2 x i32> %vecinit1.i 1025} 1026 1027 1028define <2 x float> @test_scalar_to_vector_f32_to_v2f32(<2 x float> %a) { 1029; CHECK-LABEL: test_scalar_to_vector_f32_to_v2f32: 1030; CHECK: fmaxp s{{[0-9]+}}, v{{[0-9]+}}.2s 1031; CHECK-NEXT: ret 1032entry: 1033 %0 = call float @llvm.aarch64.neon.fmaxv.f32.v2f32(<2 x float> %a) 1034 %1 = insertelement <1 x float> undef, float %0, i32 0 1035 %2 = extractelement <1 x float> %1, i32 0 1036 %vecinit1.i = insertelement <2 x float> undef, float %2, i32 0 1037 ret <2 x float> %vecinit1.i 1038} 1039 1040define <4 x float> @test_scalar_to_vector_f32_to_v4f32(<2 x float> %a) { 1041; CHECK-LABEL: test_scalar_to_vector_f32_to_v4f32: 1042; CHECK: fmaxp s{{[0-9]+}}, v{{[0-9]+}}.2s 1043; CHECK-NEXT: ret 1044entry: 1045 %0 = call float @llvm.aarch64.neon.fmaxv.f32.v2f32(<2 x float> %a) 1046 %1 = insertelement <1 x float> undef, float %0, i32 0 1047 %2 = extractelement <1 x float> %1, i32 0 1048 %vecinit1.i = insertelement <4 x float> undef, float %2, i32 0 1049 ret <4 x float> %vecinit1.i 1050} 1051 1052declare float @llvm.aarch64.neon.fmaxv.f32.v2f32(<2 x float>) 1053 1054define <2 x i32> @test_concat_undef_v1i32(<2 x i32> %a) { 1055; CHECK-LABEL: test_concat_undef_v1i32: 1056; CHECK: dup {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0] 1057entry: 1058 %0 = extractelement <2 x i32> %a, i32 0 1059 %vecinit1.i = insertelement <2 x i32> undef, i32 %0, i32 1 1060 ret <2 x i32> %vecinit1.i 1061} 1062 1063declare i32 @llvm.aarch64.neon.sqabs.i32(i32) #4 1064 1065define <2 x i32> @test_concat_v1i32_undef(i32 %a) { 1066; CHECK-LABEL: test_concat_v1i32_undef: 1067; CHECK: sqabs s{{[0-9]+}}, s{{[0-9]+}} 1068; CHECK-NEXT: ret 1069entry: 1070 %b = tail call i32 @llvm.aarch64.neon.sqabs.i32(i32 %a) 1071 %vecinit.i432 = insertelement <2 x i32> undef, i32 %b, i32 0 1072 ret <2 x i32> %vecinit.i432 1073} 1074 1075define <2 x i32> @test_concat_same_v1i32_v1i32(<2 x i32> %a) { 1076; CHECK-LABEL: test_concat_same_v1i32_v1i32: 1077; CHECK: dup v{{[0-9]+}}.2s, v{{[0-9]+}}.s[0] 1078entry: 1079 %0 = extractelement <2 x i32> %a, i32 0 1080 %vecinit.i = insertelement <2 x i32> undef, i32 %0, i32 0 1081 %vecinit1.i = insertelement <2 x i32> %vecinit.i, i32 %0, i32 1 1082 ret <2 x i32> %vecinit1.i 1083} 1084 1085define <2 x i32> @test_concat_diff_v1i32_v1i32(i32 %a, i32 %b) { 1086; CHECK-LABEL: test_concat_diff_v1i32_v1i32: 1087; CHECK: sqabs s{{[0-9]+}}, s{{[0-9]+}} 1088; CHECK: sqabs s{{[0-9]+}}, s{{[0-9]+}} 1089; CHECK: ins {{v[0-9]+}}.s[1], w{{[0-9]+}} 1090entry: 1091 %c = tail call i32 @llvm.aarch64.neon.sqabs.i32(i32 %a) 1092 %d = insertelement <2 x i32> undef, i32 %c, i32 0 1093 %e = tail call i32 @llvm.aarch64.neon.sqabs.i32(i32 %b) 1094 %f = insertelement <2 x i32> undef, i32 %e, i32 0 1095 %h = shufflevector <2 x i32> %d, <2 x i32> %f, <2 x i32> <i32 0, i32 2> 1096 ret <2 x i32> %h 1097} 1098 1099define <16 x i8> @test_concat_v16i8_v16i8_v16i8(<16 x i8> %x, <16 x i8> %y) #0 { 1100; CHECK-LABEL: test_concat_v16i8_v16i8_v16i8: 1101; CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0] 1102entry: 1103 %vecinit30 = shufflevector <16 x i8> %x, <16 x i8> %y, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23> 1104 ret <16 x i8> %vecinit30 1105} 1106 1107define <16 x i8> @test_concat_v16i8_v8i8_v16i8(<8 x i8> %x, <16 x i8> %y) #0 { 1108; CHECK-LABEL: test_concat_v16i8_v8i8_v16i8: 1109; CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0] 1110entry: 1111 %vecext = extractelement <8 x i8> %x, i32 0 1112 %vecinit = insertelement <16 x i8> undef, i8 %vecext, i32 0 1113 %vecext1 = extractelement <8 x i8> %x, i32 1 1114 %vecinit2 = insertelement <16 x i8> %vecinit, i8 %vecext1, i32 1 1115 %vecext3 = extractelement <8 x i8> %x, i32 2 1116 %vecinit4 = insertelement <16 x i8> %vecinit2, i8 %vecext3, i32 2 1117 %vecext5 = extractelement <8 x i8> %x, i32 3 1118 %vecinit6 = insertelement <16 x i8> %vecinit4, i8 %vecext5, i32 3 1119 %vecext7 = extractelement <8 x i8> %x, i32 4 1120 %vecinit8 = insertelement <16 x i8> %vecinit6, i8 %vecext7, i32 4 1121 %vecext9 = extractelement <8 x i8> %x, i32 5 1122 %vecinit10 = insertelement <16 x i8> %vecinit8, i8 %vecext9, i32 5 1123 %vecext11 = extractelement <8 x i8> %x, i32 6 1124 %vecinit12 = insertelement <16 x i8> %vecinit10, i8 %vecext11, i32 6 1125 %vecext13 = extractelement <8 x i8> %x, i32 7 1126 %vecinit14 = insertelement <16 x i8> %vecinit12, i8 %vecext13, i32 7 1127 %vecinit30 = shufflevector <16 x i8> %vecinit14, <16 x i8> %y, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23> 1128 ret <16 x i8> %vecinit30 1129} 1130 1131define <16 x i8> @test_concat_v16i8_v16i8_v8i8(<16 x i8> %x, <8 x i8> %y) #0 { 1132; CHECK-LABEL: test_concat_v16i8_v16i8_v8i8: 1133; CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0] 1134entry: 1135 %vecext = extractelement <16 x i8> %x, i32 0 1136 %vecinit = insertelement <16 x i8> undef, i8 %vecext, i32 0 1137 %vecext1 = extractelement <16 x i8> %x, i32 1 1138 %vecinit2 = insertelement <16 x i8> %vecinit, i8 %vecext1, i32 1 1139 %vecext3 = extractelement <16 x i8> %x, i32 2 1140 %vecinit4 = insertelement <16 x i8> %vecinit2, i8 %vecext3, i32 2 1141 %vecext5 = extractelement <16 x i8> %x, i32 3 1142 %vecinit6 = insertelement <16 x i8> %vecinit4, i8 %vecext5, i32 3 1143 %vecext7 = extractelement <16 x i8> %x, i32 4 1144 %vecinit8 = insertelement <16 x i8> %vecinit6, i8 %vecext7, i32 4 1145 %vecext9 = extractelement <16 x i8> %x, i32 5 1146 %vecinit10 = insertelement <16 x i8> %vecinit8, i8 %vecext9, i32 5 1147 %vecext11 = extractelement <16 x i8> %x, i32 6 1148 %vecinit12 = insertelement <16 x i8> %vecinit10, i8 %vecext11, i32 6 1149 %vecext13 = extractelement <16 x i8> %x, i32 7 1150 %vecinit14 = insertelement <16 x i8> %vecinit12, i8 %vecext13, i32 7 1151 %vecext15 = extractelement <8 x i8> %y, i32 0 1152 %vecinit16 = insertelement <16 x i8> %vecinit14, i8 %vecext15, i32 8 1153 %vecext17 = extractelement <8 x i8> %y, i32 1 1154 %vecinit18 = insertelement <16 x i8> %vecinit16, i8 %vecext17, i32 9 1155 %vecext19 = extractelement <8 x i8> %y, i32 2 1156 %vecinit20 = insertelement <16 x i8> %vecinit18, i8 %vecext19, i32 10 1157 %vecext21 = extractelement <8 x i8> %y, i32 3 1158 %vecinit22 = insertelement <16 x i8> %vecinit20, i8 %vecext21, i32 11 1159 %vecext23 = extractelement <8 x i8> %y, i32 4 1160 %vecinit24 = insertelement <16 x i8> %vecinit22, i8 %vecext23, i32 12 1161 %vecext25 = extractelement <8 x i8> %y, i32 5 1162 %vecinit26 = insertelement <16 x i8> %vecinit24, i8 %vecext25, i32 13 1163 %vecext27 = extractelement <8 x i8> %y, i32 6 1164 %vecinit28 = insertelement <16 x i8> %vecinit26, i8 %vecext27, i32 14 1165 %vecext29 = extractelement <8 x i8> %y, i32 7 1166 %vecinit30 = insertelement <16 x i8> %vecinit28, i8 %vecext29, i32 15 1167 ret <16 x i8> %vecinit30 1168} 1169 1170define <16 x i8> @test_concat_v16i8_v8i8_v8i8(<8 x i8> %x, <8 x i8> %y) #0 { 1171; CHECK-LABEL: test_concat_v16i8_v8i8_v8i8: 1172; CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0] 1173entry: 1174 %vecext = extractelement <8 x i8> %x, i32 0 1175 %vecinit = insertelement <16 x i8> undef, i8 %vecext, i32 0 1176 %vecext1 = extractelement <8 x i8> %x, i32 1 1177 %vecinit2 = insertelement <16 x i8> %vecinit, i8 %vecext1, i32 1 1178 %vecext3 = extractelement <8 x i8> %x, i32 2 1179 %vecinit4 = insertelement <16 x i8> %vecinit2, i8 %vecext3, i32 2 1180 %vecext5 = extractelement <8 x i8> %x, i32 3 1181 %vecinit6 = insertelement <16 x i8> %vecinit4, i8 %vecext5, i32 3 1182 %vecext7 = extractelement <8 x i8> %x, i32 4 1183 %vecinit8 = insertelement <16 x i8> %vecinit6, i8 %vecext7, i32 4 1184 %vecext9 = extractelement <8 x i8> %x, i32 5 1185 %vecinit10 = insertelement <16 x i8> %vecinit8, i8 %vecext9, i32 5 1186 %vecext11 = extractelement <8 x i8> %x, i32 6 1187 %vecinit12 = insertelement <16 x i8> %vecinit10, i8 %vecext11, i32 6 1188 %vecext13 = extractelement <8 x i8> %x, i32 7 1189 %vecinit14 = insertelement <16 x i8> %vecinit12, i8 %vecext13, i32 7 1190 %vecext15 = extractelement <8 x i8> %y, i32 0 1191 %vecinit16 = insertelement <16 x i8> %vecinit14, i8 %vecext15, i32 8 1192 %vecext17 = extractelement <8 x i8> %y, i32 1 1193 %vecinit18 = insertelement <16 x i8> %vecinit16, i8 %vecext17, i32 9 1194 %vecext19 = extractelement <8 x i8> %y, i32 2 1195 %vecinit20 = insertelement <16 x i8> %vecinit18, i8 %vecext19, i32 10 1196 %vecext21 = extractelement <8 x i8> %y, i32 3 1197 %vecinit22 = insertelement <16 x i8> %vecinit20, i8 %vecext21, i32 11 1198 %vecext23 = extractelement <8 x i8> %y, i32 4 1199 %vecinit24 = insertelement <16 x i8> %vecinit22, i8 %vecext23, i32 12 1200 %vecext25 = extractelement <8 x i8> %y, i32 5 1201 %vecinit26 = insertelement <16 x i8> %vecinit24, i8 %vecext25, i32 13 1202 %vecext27 = extractelement <8 x i8> %y, i32 6 1203 %vecinit28 = insertelement <16 x i8> %vecinit26, i8 %vecext27, i32 14 1204 %vecext29 = extractelement <8 x i8> %y, i32 7 1205 %vecinit30 = insertelement <16 x i8> %vecinit28, i8 %vecext29, i32 15 1206 ret <16 x i8> %vecinit30 1207} 1208 1209define <8 x i16> @test_concat_v8i16_v8i16_v8i16(<8 x i16> %x, <8 x i16> %y) #0 { 1210; CHECK-LABEL: test_concat_v8i16_v8i16_v8i16: 1211; CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0] 1212entry: 1213 %vecinit14 = shufflevector <8 x i16> %x, <8 x i16> %y, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 10, i32 11> 1214 ret <8 x i16> %vecinit14 1215} 1216 1217define <8 x i16> @test_concat_v8i16_v4i16_v8i16(<4 x i16> %x, <8 x i16> %y) #0 { 1218; CHECK-LABEL: test_concat_v8i16_v4i16_v8i16: 1219; CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0] 1220entry: 1221 %vecext = extractelement <4 x i16> %x, i32 0 1222 %vecinit = insertelement <8 x i16> undef, i16 %vecext, i32 0 1223 %vecext1 = extractelement <4 x i16> %x, i32 1 1224 %vecinit2 = insertelement <8 x i16> %vecinit, i16 %vecext1, i32 1 1225 %vecext3 = extractelement <4 x i16> %x, i32 2 1226 %vecinit4 = insertelement <8 x i16> %vecinit2, i16 %vecext3, i32 2 1227 %vecext5 = extractelement <4 x i16> %x, i32 3 1228 %vecinit6 = insertelement <8 x i16> %vecinit4, i16 %vecext5, i32 3 1229 %vecinit14 = shufflevector <8 x i16> %vecinit6, <8 x i16> %y, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 10, i32 11> 1230 ret <8 x i16> %vecinit14 1231} 1232 1233define <8 x i16> @test_concat_v8i16_v8i16_v4i16(<8 x i16> %x, <4 x i16> %y) #0 { 1234; CHECK-LABEL: test_concat_v8i16_v8i16_v4i16: 1235; CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0] 1236entry: 1237 %vecext = extractelement <8 x i16> %x, i32 0 1238 %vecinit = insertelement <8 x i16> undef, i16 %vecext, i32 0 1239 %vecext1 = extractelement <8 x i16> %x, i32 1 1240 %vecinit2 = insertelement <8 x i16> %vecinit, i16 %vecext1, i32 1 1241 %vecext3 = extractelement <8 x i16> %x, i32 2 1242 %vecinit4 = insertelement <8 x i16> %vecinit2, i16 %vecext3, i32 2 1243 %vecext5 = extractelement <8 x i16> %x, i32 3 1244 %vecinit6 = insertelement <8 x i16> %vecinit4, i16 %vecext5, i32 3 1245 %vecext7 = extractelement <4 x i16> %y, i32 0 1246 %vecinit8 = insertelement <8 x i16> %vecinit6, i16 %vecext7, i32 4 1247 %vecext9 = extractelement <4 x i16> %y, i32 1 1248 %vecinit10 = insertelement <8 x i16> %vecinit8, i16 %vecext9, i32 5 1249 %vecext11 = extractelement <4 x i16> %y, i32 2 1250 %vecinit12 = insertelement <8 x i16> %vecinit10, i16 %vecext11, i32 6 1251 %vecext13 = extractelement <4 x i16> %y, i32 3 1252 %vecinit14 = insertelement <8 x i16> %vecinit12, i16 %vecext13, i32 7 1253 ret <8 x i16> %vecinit14 1254} 1255 1256define <8 x i16> @test_concat_v8i16_v4i16_v4i16(<4 x i16> %x, <4 x i16> %y) #0 { 1257; CHECK-LABEL: test_concat_v8i16_v4i16_v4i16: 1258; CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0] 1259entry: 1260 %vecext = extractelement <4 x i16> %x, i32 0 1261 %vecinit = insertelement <8 x i16> undef, i16 %vecext, i32 0 1262 %vecext1 = extractelement <4 x i16> %x, i32 1 1263 %vecinit2 = insertelement <8 x i16> %vecinit, i16 %vecext1, i32 1 1264 %vecext3 = extractelement <4 x i16> %x, i32 2 1265 %vecinit4 = insertelement <8 x i16> %vecinit2, i16 %vecext3, i32 2 1266 %vecext5 = extractelement <4 x i16> %x, i32 3 1267 %vecinit6 = insertelement <8 x i16> %vecinit4, i16 %vecext5, i32 3 1268 %vecext7 = extractelement <4 x i16> %y, i32 0 1269 %vecinit8 = insertelement <8 x i16> %vecinit6, i16 %vecext7, i32 4 1270 %vecext9 = extractelement <4 x i16> %y, i32 1 1271 %vecinit10 = insertelement <8 x i16> %vecinit8, i16 %vecext9, i32 5 1272 %vecext11 = extractelement <4 x i16> %y, i32 2 1273 %vecinit12 = insertelement <8 x i16> %vecinit10, i16 %vecext11, i32 6 1274 %vecext13 = extractelement <4 x i16> %y, i32 3 1275 %vecinit14 = insertelement <8 x i16> %vecinit12, i16 %vecext13, i32 7 1276 ret <8 x i16> %vecinit14 1277} 1278 1279define <4 x i32> @test_concat_v4i32_v4i32_v4i32(<4 x i32> %x, <4 x i32> %y) #0 { 1280; CHECK-LABEL: test_concat_v4i32_v4i32_v4i32: 1281; CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0] 1282entry: 1283 %vecinit6 = shufflevector <4 x i32> %x, <4 x i32> %y, <4 x i32> <i32 0, i32 1, i32 4, i32 5> 1284 ret <4 x i32> %vecinit6 1285} 1286 1287define <4 x i32> @test_concat_v4i32_v2i32_v4i32(<2 x i32> %x, <4 x i32> %y) #0 { 1288; CHECK-LABEL: test_concat_v4i32_v2i32_v4i32: 1289; CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0] 1290entry: 1291 %vecext = extractelement <2 x i32> %x, i32 0 1292 %vecinit = insertelement <4 x i32> undef, i32 %vecext, i32 0 1293 %vecext1 = extractelement <2 x i32> %x, i32 1 1294 %vecinit2 = insertelement <4 x i32> %vecinit, i32 %vecext1, i32 1 1295 %vecinit6 = shufflevector <4 x i32> %vecinit2, <4 x i32> %y, <4 x i32> <i32 0, i32 1, i32 4, i32 5> 1296 ret <4 x i32> %vecinit6 1297} 1298 1299define <4 x i32> @test_concat_v4i32_v4i32_v2i32(<4 x i32> %x, <2 x i32> %y) #0 { 1300; CHECK-LABEL: test_concat_v4i32_v4i32_v2i32: 1301; CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0] 1302entry: 1303 %vecext = extractelement <4 x i32> %x, i32 0 1304 %vecinit = insertelement <4 x i32> undef, i32 %vecext, i32 0 1305 %vecext1 = extractelement <4 x i32> %x, i32 1 1306 %vecinit2 = insertelement <4 x i32> %vecinit, i32 %vecext1, i32 1 1307 %vecext3 = extractelement <2 x i32> %y, i32 0 1308 %vecinit4 = insertelement <4 x i32> %vecinit2, i32 %vecext3, i32 2 1309 %vecext5 = extractelement <2 x i32> %y, i32 1 1310 %vecinit6 = insertelement <4 x i32> %vecinit4, i32 %vecext5, i32 3 1311 ret <4 x i32> %vecinit6 1312} 1313 1314define <4 x i32> @test_concat_v4i32_v2i32_v2i32(<2 x i32> %x, <2 x i32> %y) #0 { 1315; CHECK-LABEL: test_concat_v4i32_v2i32_v2i32: 1316; CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0] 1317entry: 1318 %vecinit6 = shufflevector <2 x i32> %x, <2 x i32> %y, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 1319 ret <4 x i32> %vecinit6 1320} 1321 1322define <2 x i64> @test_concat_v2i64_v2i64_v2i64(<2 x i64> %x, <2 x i64> %y) #0 { 1323; CHECK-LABEL: test_concat_v2i64_v2i64_v2i64: 1324; CHECK: zip1 {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d 1325entry: 1326 %vecinit2 = shufflevector <2 x i64> %x, <2 x i64> %y, <2 x i32> <i32 0, i32 2> 1327 ret <2 x i64> %vecinit2 1328} 1329 1330define <2 x i64> @test_concat_v2i64_v1i64_v2i64(<1 x i64> %x, <2 x i64> %y) #0 { 1331; CHECK-LABEL: test_concat_v2i64_v1i64_v2i64: 1332; CHECK: zip1 {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d 1333entry: 1334 %vecext = extractelement <1 x i64> %x, i32 0 1335 %vecinit = insertelement <2 x i64> undef, i64 %vecext, i32 0 1336 %vecinit2 = shufflevector <2 x i64> %vecinit, <2 x i64> %y, <2 x i32> <i32 0, i32 2> 1337 ret <2 x i64> %vecinit2 1338} 1339 1340define <2 x i64> @test_concat_v2i64_v2i64_v1i64(<2 x i64> %x, <1 x i64> %y) #0 { 1341; CHECK-LABEL: test_concat_v2i64_v2i64_v1i64: 1342; CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0] 1343entry: 1344 %vecext = extractelement <2 x i64> %x, i32 0 1345 %vecinit = insertelement <2 x i64> undef, i64 %vecext, i32 0 1346 %vecext1 = extractelement <1 x i64> %y, i32 0 1347 %vecinit2 = insertelement <2 x i64> %vecinit, i64 %vecext1, i32 1 1348 ret <2 x i64> %vecinit2 1349} 1350 1351define <2 x i64> @test_concat_v2i64_v1i64_v1i64(<1 x i64> %x, <1 x i64> %y) #0 { 1352; CHECK-LABEL: test_concat_v2i64_v1i64_v1i64: 1353; CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0] 1354entry: 1355 %vecext = extractelement <1 x i64> %x, i32 0 1356 %vecinit = insertelement <2 x i64> undef, i64 %vecext, i32 0 1357 %vecext1 = extractelement <1 x i64> %y, i32 0 1358 %vecinit2 = insertelement <2 x i64> %vecinit, i64 %vecext1, i32 1 1359 ret <2 x i64> %vecinit2 1360} 1361 1362 1363define <4 x i16> @concat_vector_v4i16_const() { 1364; CHECK-LABEL: concat_vector_v4i16_const: 1365; CHECK: movi {{d[0-9]+}}, #0 1366 %r = shufflevector <1 x i16> zeroinitializer, <1 x i16> undef, <4 x i32> zeroinitializer 1367 ret <4 x i16> %r 1368} 1369 1370define <4 x i16> @concat_vector_v4i16_const_one() { 1371; CHECK-LABEL: concat_vector_v4i16_const_one: 1372; CHECK: movi {{v[0-9]+}}.4h, #0x1 1373 %r = shufflevector <1 x i16> <i16 1>, <1 x i16> undef, <4 x i32> zeroinitializer 1374 ret <4 x i16> %r 1375} 1376 1377define <4 x i32> @concat_vector_v4i32_const() { 1378; CHECK-LABEL: concat_vector_v4i32_const: 1379; CHECK: movi {{v[0-9]+}}.2d, #0 1380 %r = shufflevector <1 x i32> zeroinitializer, <1 x i32> undef, <4 x i32> zeroinitializer 1381 ret <4 x i32> %r 1382} 1383 1384define <8 x i8> @concat_vector_v8i8_const() { 1385; CHECK-LABEL: concat_vector_v8i8_const: 1386; CHECK: movi {{d[0-9]+}}, #0 1387 %r = shufflevector <1 x i8> zeroinitializer, <1 x i8> undef, <8 x i32> zeroinitializer 1388 ret <8 x i8> %r 1389} 1390 1391define <8 x i16> @concat_vector_v8i16_const() { 1392; CHECK-LABEL: concat_vector_v8i16_const: 1393; CHECK: movi {{v[0-9]+}}.2d, #0 1394 %r = shufflevector <1 x i16> zeroinitializer, <1 x i16> undef, <8 x i32> zeroinitializer 1395 ret <8 x i16> %r 1396} 1397 1398define <8 x i16> @concat_vector_v8i16_const_one() { 1399; CHECK-LABEL: concat_vector_v8i16_const_one: 1400; CHECK: movi {{v[0-9]+}}.8h, #0x1 1401 %r = shufflevector <1 x i16> <i16 1>, <1 x i16> undef, <8 x i32> zeroinitializer 1402 ret <8 x i16> %r 1403} 1404 1405define <16 x i8> @concat_vector_v16i8_const() { 1406; CHECK-LABEL: concat_vector_v16i8_const: 1407; CHECK: movi {{v[0-9]+}}.2d, #0 1408 %r = shufflevector <1 x i8> zeroinitializer, <1 x i8> undef, <16 x i32> zeroinitializer 1409 ret <16 x i8> %r 1410} 1411 1412define <4 x i16> @concat_vector_v4i16(<1 x i16> %a) { 1413; CHECK-LABEL: concat_vector_v4i16: 1414; CHECK: dup v0.4h, v0.h[0] 1415 %r = shufflevector <1 x i16> %a, <1 x i16> undef, <4 x i32> zeroinitializer 1416 ret <4 x i16> %r 1417} 1418 1419define <4 x i32> @concat_vector_v4i32(<1 x i32> %a) { 1420; CHECK-LABEL: concat_vector_v4i32: 1421; CHECK: dup v0.4s, v0.s[0] 1422 %r = shufflevector <1 x i32> %a, <1 x i32> undef, <4 x i32> zeroinitializer 1423 ret <4 x i32> %r 1424} 1425 1426define <8 x i8> @concat_vector_v8i8(<1 x i8> %a) { 1427; CHECK-LABEL: concat_vector_v8i8: 1428; CHECK: dup v0.8b, v0.b[0] 1429 %r = shufflevector <1 x i8> %a, <1 x i8> undef, <8 x i32> zeroinitializer 1430 ret <8 x i8> %r 1431} 1432 1433define <8 x i16> @concat_vector_v8i16(<1 x i16> %a) { 1434; CHECK-LABEL: concat_vector_v8i16: 1435; CHECK: dup v0.8h, v0.h[0] 1436 %r = shufflevector <1 x i16> %a, <1 x i16> undef, <8 x i32> zeroinitializer 1437 ret <8 x i16> %r 1438} 1439 1440define <16 x i8> @concat_vector_v16i8(<1 x i8> %a) { 1441; CHECK-LABEL: concat_vector_v16i8: 1442; CHECK: dup v0.16b, v0.b[0] 1443 %r = shufflevector <1 x i8> %a, <1 x i8> undef, <16 x i32> zeroinitializer 1444 ret <16 x i8> %r 1445} 1446