1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+sse3 | FileCheck %s --check-prefix=SSE3 3; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+avx | FileCheck %s --check-prefix=AVX 4 5define <2 x double> @haddpd1(<2 x double> %x, <2 x double> %y) { 6; SSE3-LABEL: haddpd1: 7; SSE3: # BB#0: 8; SSE3-NEXT: haddpd %xmm1, %xmm0 9; SSE3-NEXT: retq 10; 11; AVX-LABEL: haddpd1: 12; AVX: # BB#0: 13; AVX-NEXT: vhaddpd %xmm1, %xmm0, %xmm0 14; AVX-NEXT: retq 15 %a = shufflevector <2 x double> %x, <2 x double> %y, <2 x i32> <i32 0, i32 2> 16 %b = shufflevector <2 x double> %x, <2 x double> %y, <2 x i32> <i32 1, i32 3> 17 %r = fadd <2 x double> %a, %b 18 ret <2 x double> %r 19} 20 21define <2 x double> @haddpd2(<2 x double> %x, <2 x double> %y) { 22; SSE3-LABEL: haddpd2: 23; SSE3: # BB#0: 24; SSE3-NEXT: haddpd %xmm1, %xmm0 25; SSE3-NEXT: retq 26; 27; AVX-LABEL: haddpd2: 28; AVX: # BB#0: 29; AVX-NEXT: vhaddpd %xmm1, %xmm0, %xmm0 30; AVX-NEXT: retq 31 %a = shufflevector <2 x double> %x, <2 x double> %y, <2 x i32> <i32 1, i32 2> 32 %b = shufflevector <2 x double> %y, <2 x double> %x, <2 x i32> <i32 2, i32 1> 33 %r = fadd <2 x double> %a, %b 34 ret <2 x double> %r 35} 36 37define <2 x double> @haddpd3(<2 x double> %x) { 38; SSE3-LABEL: haddpd3: 39; SSE3: # BB#0: 40; SSE3-NEXT: haddpd %xmm0, %xmm0 41; SSE3-NEXT: retq 42; 43; AVX-LABEL: haddpd3: 44; AVX: # BB#0: 45; AVX-NEXT: vhaddpd %xmm0, %xmm0, %xmm0 46; AVX-NEXT: retq 47 %a = shufflevector <2 x double> %x, <2 x double> undef, <2 x i32> <i32 0, i32 undef> 48 %b = shufflevector <2 x double> %x, <2 x double> undef, <2 x i32> <i32 1, i32 undef> 49 %r = fadd <2 x double> %a, %b 50 ret <2 x double> %r 51} 52 53define <4 x float> @haddps1(<4 x float> %x, <4 x float> %y) { 54; SSE3-LABEL: haddps1: 55; SSE3: # BB#0: 56; SSE3-NEXT: haddps %xmm1, %xmm0 57; SSE3-NEXT: retq 58; 59; AVX-LABEL: haddps1: 60; AVX: # BB#0: 61; AVX-NEXT: vhaddps %xmm1, %xmm0, %xmm0 62; AVX-NEXT: retq 63 %a = shufflevector <4 x float> %x, <4 x float> %y, <4 x i32> <i32 0, i32 2, i32 4, i32 6> 64 %b = shufflevector <4 x float> %x, <4 x float> %y, <4 x i32> <i32 1, i32 3, i32 5, i32 7> 65 %r = fadd <4 x float> %a, %b 66 ret <4 x float> %r 67} 68 69define <4 x float> @haddps2(<4 x float> %x, <4 x float> %y) { 70; SSE3-LABEL: haddps2: 71; SSE3: # BB#0: 72; SSE3-NEXT: haddps %xmm1, %xmm0 73; SSE3-NEXT: retq 74; 75; AVX-LABEL: haddps2: 76; AVX: # BB#0: 77; AVX-NEXT: vhaddps %xmm1, %xmm0, %xmm0 78; AVX-NEXT: retq 79 %a = shufflevector <4 x float> %x, <4 x float> %y, <4 x i32> <i32 1, i32 2, i32 5, i32 6> 80 %b = shufflevector <4 x float> %y, <4 x float> %x, <4 x i32> <i32 4, i32 7, i32 0, i32 3> 81 %r = fadd <4 x float> %a, %b 82 ret <4 x float> %r 83} 84 85define <4 x float> @haddps3(<4 x float> %x) { 86; SSE3-LABEL: haddps3: 87; SSE3: # BB#0: 88; SSE3-NEXT: haddps %xmm0, %xmm0 89; SSE3-NEXT: retq 90; 91; AVX-LABEL: haddps3: 92; AVX: # BB#0: 93; AVX-NEXT: vhaddps %xmm0, %xmm0, %xmm0 94; AVX-NEXT: retq 95 %a = shufflevector <4 x float> %x, <4 x float> undef, <4 x i32> <i32 undef, i32 2, i32 4, i32 6> 96 %b = shufflevector <4 x float> %x, <4 x float> undef, <4 x i32> <i32 undef, i32 3, i32 5, i32 7> 97 %r = fadd <4 x float> %a, %b 98 ret <4 x float> %r 99} 100 101define <4 x float> @haddps4(<4 x float> %x) { 102; SSE3-LABEL: haddps4: 103; SSE3: # BB#0: 104; SSE3-NEXT: haddps %xmm0, %xmm0 105; SSE3-NEXT: retq 106; 107; AVX-LABEL: haddps4: 108; AVX: # BB#0: 109; AVX-NEXT: vhaddps %xmm0, %xmm0, %xmm0 110; AVX-NEXT: retq 111 %a = shufflevector <4 x float> %x, <4 x float> undef, <4 x i32> <i32 0, i32 2, i32 undef, i32 undef> 112 %b = shufflevector <4 x float> %x, <4 x float> undef, <4 x i32> <i32 1, i32 3, i32 undef, i32 undef> 113 %r = fadd <4 x float> %a, %b 114 ret <4 x float> %r 115} 116 117define <4 x float> @haddps5(<4 x float> %x) { 118; SSE3-LABEL: haddps5: 119; SSE3: # BB#0: 120; SSE3-NEXT: haddps %xmm0, %xmm0 121; SSE3-NEXT: retq 122; 123; AVX-LABEL: haddps5: 124; AVX: # BB#0: 125; AVX-NEXT: vhaddps %xmm0, %xmm0, %xmm0 126; AVX-NEXT: retq 127 %a = shufflevector <4 x float> %x, <4 x float> undef, <4 x i32> <i32 0, i32 3, i32 undef, i32 undef> 128 %b = shufflevector <4 x float> %x, <4 x float> undef, <4 x i32> <i32 1, i32 2, i32 undef, i32 undef> 129 %r = fadd <4 x float> %a, %b 130 ret <4 x float> %r 131} 132 133define <4 x float> @haddps6(<4 x float> %x) { 134; SSE3-LABEL: haddps6: 135; SSE3: # BB#0: 136; SSE3-NEXT: haddps %xmm0, %xmm0 137; SSE3-NEXT: retq 138; 139; AVX-LABEL: haddps6: 140; AVX: # BB#0: 141; AVX-NEXT: vhaddps %xmm0, %xmm0, %xmm0 142; AVX-NEXT: retq 143 %a = shufflevector <4 x float> %x, <4 x float> undef, <4 x i32> <i32 0, i32 undef, i32 undef, i32 undef> 144 %b = shufflevector <4 x float> %x, <4 x float> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef> 145 %r = fadd <4 x float> %a, %b 146 ret <4 x float> %r 147} 148 149define <4 x float> @haddps7(<4 x float> %x) { 150; SSE3-LABEL: haddps7: 151; SSE3: # BB#0: 152; SSE3-NEXT: haddps %xmm0, %xmm0 153; SSE3-NEXT: retq 154; 155; AVX-LABEL: haddps7: 156; AVX: # BB#0: 157; AVX-NEXT: vhaddps %xmm0, %xmm0, %xmm0 158; AVX-NEXT: retq 159 %a = shufflevector <4 x float> %x, <4 x float> undef, <4 x i32> <i32 undef, i32 3, i32 undef, i32 undef> 160 %b = shufflevector <4 x float> %x, <4 x float> undef, <4 x i32> <i32 undef, i32 2, i32 undef, i32 undef> 161 %r = fadd <4 x float> %a, %b 162 ret <4 x float> %r 163} 164 165define <2 x double> @hsubpd1(<2 x double> %x, <2 x double> %y) { 166; SSE3-LABEL: hsubpd1: 167; SSE3: # BB#0: 168; SSE3-NEXT: hsubpd %xmm1, %xmm0 169; SSE3-NEXT: retq 170; 171; AVX-LABEL: hsubpd1: 172; AVX: # BB#0: 173; AVX-NEXT: vhsubpd %xmm1, %xmm0, %xmm0 174; AVX-NEXT: retq 175 %a = shufflevector <2 x double> %x, <2 x double> %y, <2 x i32> <i32 0, i32 2> 176 %b = shufflevector <2 x double> %x, <2 x double> %y, <2 x i32> <i32 1, i32 3> 177 %r = fsub <2 x double> %a, %b 178 ret <2 x double> %r 179} 180 181define <2 x double> @hsubpd2(<2 x double> %x) { 182; SSE3-LABEL: hsubpd2: 183; SSE3: # BB#0: 184; SSE3-NEXT: hsubpd %xmm0, %xmm0 185; SSE3-NEXT: retq 186; 187; AVX-LABEL: hsubpd2: 188; AVX: # BB#0: 189; AVX-NEXT: vhsubpd %xmm0, %xmm0, %xmm0 190; AVX-NEXT: retq 191 %a = shufflevector <2 x double> %x, <2 x double> undef, <2 x i32> <i32 0, i32 undef> 192 %b = shufflevector <2 x double> %x, <2 x double> undef, <2 x i32> <i32 1, i32 undef> 193 %r = fsub <2 x double> %a, %b 194 ret <2 x double> %r 195} 196 197define <4 x float> @hsubps1(<4 x float> %x, <4 x float> %y) { 198; SSE3-LABEL: hsubps1: 199; SSE3: # BB#0: 200; SSE3-NEXT: hsubps %xmm1, %xmm0 201; SSE3-NEXT: retq 202; 203; AVX-LABEL: hsubps1: 204; AVX: # BB#0: 205; AVX-NEXT: vhsubps %xmm1, %xmm0, %xmm0 206; AVX-NEXT: retq 207 %a = shufflevector <4 x float> %x, <4 x float> %y, <4 x i32> <i32 0, i32 2, i32 4, i32 6> 208 %b = shufflevector <4 x float> %x, <4 x float> %y, <4 x i32> <i32 1, i32 3, i32 5, i32 7> 209 %r = fsub <4 x float> %a, %b 210 ret <4 x float> %r 211} 212 213define <4 x float> @hsubps2(<4 x float> %x) { 214; SSE3-LABEL: hsubps2: 215; SSE3: # BB#0: 216; SSE3-NEXT: hsubps %xmm0, %xmm0 217; SSE3-NEXT: retq 218; 219; AVX-LABEL: hsubps2: 220; AVX: # BB#0: 221; AVX-NEXT: vhsubps %xmm0, %xmm0, %xmm0 222; AVX-NEXT: retq 223 %a = shufflevector <4 x float> %x, <4 x float> undef, <4 x i32> <i32 undef, i32 2, i32 4, i32 6> 224 %b = shufflevector <4 x float> %x, <4 x float> undef, <4 x i32> <i32 undef, i32 3, i32 5, i32 7> 225 %r = fsub <4 x float> %a, %b 226 ret <4 x float> %r 227} 228 229define <4 x float> @hsubps3(<4 x float> %x) { 230; SSE3-LABEL: hsubps3: 231; SSE3: # BB#0: 232; SSE3-NEXT: hsubps %xmm0, %xmm0 233; SSE3-NEXT: retq 234; 235; AVX-LABEL: hsubps3: 236; AVX: # BB#0: 237; AVX-NEXT: vhsubps %xmm0, %xmm0, %xmm0 238; AVX-NEXT: retq 239 %a = shufflevector <4 x float> %x, <4 x float> undef, <4 x i32> <i32 0, i32 2, i32 undef, i32 undef> 240 %b = shufflevector <4 x float> %x, <4 x float> undef, <4 x i32> <i32 1, i32 3, i32 undef, i32 undef> 241 %r = fsub <4 x float> %a, %b 242 ret <4 x float> %r 243} 244 245define <4 x float> @hsubps4(<4 x float> %x) { 246; SSE3-LABEL: hsubps4: 247; SSE3: # BB#0: 248; SSE3-NEXT: hsubps %xmm0, %xmm0 249; SSE3-NEXT: retq 250; 251; AVX-LABEL: hsubps4: 252; AVX: # BB#0: 253; AVX-NEXT: vhsubps %xmm0, %xmm0, %xmm0 254; AVX-NEXT: retq 255 %a = shufflevector <4 x float> %x, <4 x float> undef, <4 x i32> <i32 0, i32 undef, i32 undef, i32 undef> 256 %b = shufflevector <4 x float> %x, <4 x float> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef> 257 %r = fsub <4 x float> %a, %b 258 ret <4 x float> %r 259} 260 261define <8 x float> @vhaddps1(<8 x float> %x, <8 x float> %y) { 262; SSE3-LABEL: vhaddps1: 263; SSE3: # BB#0: 264; SSE3-NEXT: haddps %xmm2, %xmm0 265; SSE3-NEXT: haddps %xmm3, %xmm1 266; SSE3-NEXT: retq 267; 268; AVX-LABEL: vhaddps1: 269; AVX: # BB#0: 270; AVX-NEXT: vhaddps %ymm1, %ymm0, %ymm0 271; AVX-NEXT: retq 272 %a = shufflevector <8 x float> %x, <8 x float> %y, <8 x i32> <i32 0, i32 2, i32 8, i32 10, i32 4, i32 6, i32 12, i32 14> 273 %b = shufflevector <8 x float> %x, <8 x float> %y, <8 x i32> <i32 1, i32 3, i32 9, i32 11, i32 5, i32 7, i32 13, i32 15> 274 %r = fadd <8 x float> %a, %b 275 ret <8 x float> %r 276} 277 278define <8 x float> @vhaddps2(<8 x float> %x, <8 x float> %y) { 279; SSE3-LABEL: vhaddps2: 280; SSE3: # BB#0: 281; SSE3-NEXT: haddps %xmm2, %xmm0 282; SSE3-NEXT: haddps %xmm3, %xmm1 283; SSE3-NEXT: retq 284; 285; AVX-LABEL: vhaddps2: 286; AVX: # BB#0: 287; AVX-NEXT: vhaddps %ymm1, %ymm0, %ymm0 288; AVX-NEXT: retq 289 %a = shufflevector <8 x float> %x, <8 x float> %y, <8 x i32> <i32 1, i32 2, i32 9, i32 10, i32 5, i32 6, i32 13, i32 14> 290 %b = shufflevector <8 x float> %y, <8 x float> %x, <8 x i32> <i32 8, i32 11, i32 0, i32 3, i32 12, i32 15, i32 4, i32 7> 291 %r = fadd <8 x float> %a, %b 292 ret <8 x float> %r 293} 294 295define <8 x float> @vhaddps3(<8 x float> %x) { 296; SSE3-LABEL: vhaddps3: 297; SSE3: # BB#0: 298; SSE3-NEXT: haddps %xmm0, %xmm0 299; SSE3-NEXT: haddps %xmm1, %xmm1 300; SSE3-NEXT: retq 301; 302; AVX-LABEL: vhaddps3: 303; AVX: # BB#0: 304; AVX-NEXT: vhaddps %ymm0, %ymm0, %ymm0 305; AVX-NEXT: retq 306 %a = shufflevector <8 x float> %x, <8 x float> undef, <8 x i32> <i32 undef, i32 2, i32 8, i32 10, i32 4, i32 6, i32 undef, i32 14> 307 %b = shufflevector <8 x float> %x, <8 x float> undef, <8 x i32> <i32 1, i32 3, i32 9, i32 undef, i32 5, i32 7, i32 13, i32 15> 308 %r = fadd <8 x float> %a, %b 309 ret <8 x float> %r 310} 311 312define <8 x float> @vhsubps1(<8 x float> %x, <8 x float> %y) { 313; SSE3-LABEL: vhsubps1: 314; SSE3: # BB#0: 315; SSE3-NEXT: hsubps %xmm2, %xmm0 316; SSE3-NEXT: hsubps %xmm3, %xmm1 317; SSE3-NEXT: retq 318; 319; AVX-LABEL: vhsubps1: 320; AVX: # BB#0: 321; AVX-NEXT: vhsubps %ymm1, %ymm0, %ymm0 322; AVX-NEXT: retq 323 %a = shufflevector <8 x float> %x, <8 x float> %y, <8 x i32> <i32 0, i32 2, i32 8, i32 10, i32 4, i32 6, i32 12, i32 14> 324 %b = shufflevector <8 x float> %x, <8 x float> %y, <8 x i32> <i32 1, i32 3, i32 9, i32 11, i32 5, i32 7, i32 13, i32 15> 325 %r = fsub <8 x float> %a, %b 326 ret <8 x float> %r 327} 328 329define <8 x float> @vhsubps3(<8 x float> %x) { 330; SSE3-LABEL: vhsubps3: 331; SSE3: # BB#0: 332; SSE3-NEXT: hsubps %xmm0, %xmm0 333; SSE3-NEXT: hsubps %xmm1, %xmm1 334; SSE3-NEXT: retq 335; 336; AVX-LABEL: vhsubps3: 337; AVX: # BB#0: 338; AVX-NEXT: vhsubps %ymm0, %ymm0, %ymm0 339; AVX-NEXT: retq 340 %a = shufflevector <8 x float> %x, <8 x float> undef, <8 x i32> <i32 undef, i32 2, i32 8, i32 10, i32 4, i32 6, i32 undef, i32 14> 341 %b = shufflevector <8 x float> %x, <8 x float> undef, <8 x i32> <i32 1, i32 3, i32 9, i32 undef, i32 5, i32 7, i32 13, i32 15> 342 %r = fsub <8 x float> %a, %b 343 ret <8 x float> %r 344} 345 346define <4 x double> @vhaddpd1(<4 x double> %x, <4 x double> %y) { 347; SSE3-LABEL: vhaddpd1: 348; SSE3: # BB#0: 349; SSE3-NEXT: haddpd %xmm2, %xmm0 350; SSE3-NEXT: haddpd %xmm3, %xmm1 351; SSE3-NEXT: retq 352; 353; AVX-LABEL: vhaddpd1: 354; AVX: # BB#0: 355; AVX-NEXT: vhaddpd %ymm1, %ymm0, %ymm0 356; AVX-NEXT: retq 357 %a = shufflevector <4 x double> %x, <4 x double> %y, <4 x i32> <i32 0, i32 4, i32 2, i32 6> 358 %b = shufflevector <4 x double> %x, <4 x double> %y, <4 x i32> <i32 1, i32 5, i32 3, i32 7> 359 %r = fadd <4 x double> %a, %b 360 ret <4 x double> %r 361} 362 363define <4 x double> @vhsubpd1(<4 x double> %x, <4 x double> %y) { 364; SSE3-LABEL: vhsubpd1: 365; SSE3: # BB#0: 366; SSE3-NEXT: hsubpd %xmm2, %xmm0 367; SSE3-NEXT: hsubpd %xmm3, %xmm1 368; SSE3-NEXT: retq 369; 370; AVX-LABEL: vhsubpd1: 371; AVX: # BB#0: 372; AVX-NEXT: vhsubpd %ymm1, %ymm0, %ymm0 373; AVX-NEXT: retq 374 %a = shufflevector <4 x double> %x, <4 x double> %y, <4 x i32> <i32 0, i32 4, i32 2, i32 6> 375 %b = shufflevector <4 x double> %x, <4 x double> %y, <4 x i32> <i32 1, i32 5, i32 3, i32 7> 376 %r = fsub <4 x double> %a, %b 377 ret <4 x double> %r 378} 379 380define <2 x float> @haddps_v2f32(<4 x float> %v0) { 381; SSE3-LABEL: haddps_v2f32: 382; SSE3: # BB#0: 383; SSE3-NEXT: haddps %xmm0, %xmm0 384; SSE3-NEXT: retq 385; 386; AVX-LABEL: haddps_v2f32: 387; AVX: # BB#0: 388; AVX-NEXT: vhaddps %xmm0, %xmm0, %xmm0 389; AVX-NEXT: retq 390 %v0.0 = extractelement <4 x float> %v0, i32 0 391 %v0.1 = extractelement <4 x float> %v0, i32 1 392 %v0.2 = extractelement <4 x float> %v0, i32 2 393 %v0.3 = extractelement <4 x float> %v0, i32 3 394 %op0 = fadd float %v0.0, %v0.1 395 %op1 = fadd float %v0.2, %v0.3 396 %res0 = insertelement <2 x float> undef, float %op0, i32 0 397 %res1 = insertelement <2 x float> %res0, float %op1, i32 1 398 ret <2 x float> %res1 399} 400