1; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=avx2 | FileCheck -check-prefix=AVX2 %s 2; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=avx -mattr=-popcnt | FileCheck -check-prefix=AVX1-NOPOPCNT %s 3; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=avx2 -mattr=-popcnt | FileCheck -check-prefix=AVX2-NOPOPCNT %s 4 5; Vector version of: 6; v = v - ((v >> 1) & 0x55555555) 7; v = (v & 0x33333333) + ((v >> 2) & 0x33333333) 8; v = (v + (v >> 4) & 0xF0F0F0F) 9; v = v + (v >> 8) 10; v = v + (v >> 16) 11; v = v + (v >> 32) ; i64 only 12 13define <8 x i32> @test0(<8 x i32> %x) { 14; AVX2-LABEL: @test0 15entry: 16; AVX2: vpsrld $1, %ymm 17; AVX2-NEXT: vpbroadcastd 18; AVX2-NEXT: vpand 19; AVX2-NEXT: vpsubd 20; AVX2-NEXT: vpbroadcastd 21; AVX2-NEXT: vpand 22; AVX2-NEXT: vpsrld $2 23; AVX2-NEXT: vpand 24; AVX2-NEXT: vpaddd 25; AVX2-NEXT: vpsrld $4 26; AVX2-NEXT: vpaddd 27; AVX2-NEXT: vpbroadcastd 28; AVX2-NEXT: vpand 29; AVX2-NEXT: vpsrld $8 30; AVX2-NEXT: vpaddd 31; AVX2-NEXT: vpsrld $16 32; AVX2-NEXT: vpaddd 33; AVX2-NEXT: vpbroadcastd 34; AVX2-NEXT: vpand 35 %y = call <8 x i32> @llvm.ctpop.v8i32(<8 x i32> %x) 36 ret <8 x i32> %y 37} 38 39define <4 x i64> @test1(<4 x i64> %x) { 40; AVX2-NOPOPCNT-LABEL: @test1 41entry: 42; AVX2-NOPOPCNT: vpsrlq $1, %ymm 43; AVX2-NOPOPCNT-NEXT: vpbroadcastq 44; AVX2-NOPOPCNT-NEXT: vpand 45; AVX2-NOPOPCNT-NEXT: vpsubq 46; AVX2-NOPOPCNT-NEXT: vpbroadcastq 47; AVX2-NOPOPCNT-NEXT: vpand 48; AVX2-NOPOPCNT-NEXT: vpsrlq $2 49; AVX2-NOPOPCNT-NEXT: vpand 50; AVX2-NOPOPCNT-NEXT: vpaddq 51; AVX2-NOPOPCNT-NEXT: vpsrlq $4 52; AVX2-NOPOPCNT-NEXT: vpaddq 53; AVX2-NOPOPCNT-NEXT: vpbroadcastq 54; AVX2-NOPOPCNT-NEXT: vpand 55; AVX2-NOPOPCNT-NEXT: vpsrlq $8 56; AVX2-NOPOPCNT-NEXT: vpaddq 57; AVX2-NOPOPCNT-NEXT: vpsrlq $16 58; AVX2-NOPOPCNT-NEXT: vpaddq 59; AVX2-NOPOPCNT-NEXT: vpsrlq $32 60; AVX2-NOPOPCNT-NEXT: vpaddq 61; AVX2-NOPOPCNT-NEXT: vpbroadcastq 62; AVX2-NOPOPCNT-NEXT: vpand 63 %y = call <4 x i64> @llvm.ctpop.v4i64(<4 x i64> %x) 64 ret <4 x i64> %y 65} 66 67define <4 x i32> @test2(<4 x i32> %x) { 68; AVX2-NOPOPCNT-LABEL: @test2 69; AVX1-NOPOPCNT-LABEL: @test2 70entry: 71; AVX2-NOPOPCNT: vpsrld $1, %xmm 72; AVX2-NOPOPCNT-NEXT: vpbroadcastd 73; AVX2-NOPOPCNT-NEXT: vpand 74; AVX2-NOPOPCNT-NEXT: vpsubd 75; AVX2-NOPOPCNT-NEXT: vpbroadcastd 76; AVX2-NOPOPCNT-NEXT: vpand 77; AVX2-NOPOPCNT-NEXT: vpsrld $2 78; AVX2-NOPOPCNT-NEXT: vpand 79; AVX2-NOPOPCNT-NEXT: vpaddd 80; AVX2-NOPOPCNT-NEXT: vpsrld $4 81; AVX2-NOPOPCNT-NEXT: vpaddd 82; AVX2-NOPOPCNT-NEXT: vpbroadcastd 83; AVX2-NOPOPCNT-NEXT: vpand 84; AVX2-NOPOPCNT-NEXT: vpsrld $8 85; AVX2-NOPOPCNT-NEXT: vpaddd 86; AVX2-NOPOPCNT-NEXT: vpsrld $16 87; AVX2-NOPOPCNT-NEXT: vpaddd 88; AVX2-NOPOPCNT-NEXT: vpbroadcastd 89; AVX2-NOPOPCNT-NEXT: vpand 90; AVX1-NOPOPCNT: vpsrld $1, %xmm 91; AVX1-NOPOPCNT-NEXT: vpand 92; AVX1-NOPOPCNT-NEXT: vpsubd 93; AVX1-NOPOPCNT-NEXT: vmovdqa 94; AVX1-NOPOPCNT-NEXT: vpand 95; AVX1-NOPOPCNT-NEXT: vpsrld $2 96; AVX1-NOPOPCNT-NEXT: vpand 97; AVX1-NOPOPCNT-NEXT: vpaddd 98; AVX1-NOPOPCNT-NEXT: vpsrld $4 99; AVX1-NOPOPCNT-NEXT: vpaddd 100; AVX1-NOPOPCNT-NEXT: vpand 101; AVX1-NOPOPCNT-NEXT: vpsrld $8 102; AVX1-NOPOPCNT-NEXT: vpaddd 103; AVX1-NOPOPCNT-NEXT: vpsrld $16 104; AVX1-NOPOPCNT-NEXT: vpaddd 105; AVX1-NOPOPCNT-NEXT: vpand 106 %y = call <4 x i32> @llvm.ctpop.v4i32(<4 x i32> %x) 107 ret <4 x i32> %y 108} 109 110define <2 x i64> @test3(<2 x i64> %x) { 111; AVX2-NOPOPCNT-LABEL: @test3 112; AVX1-NOPOPCNT-LABEL: @test3 113entry: 114; AVX2-NOPOPCNT: vpsrlq $1, %xmm 115; AVX2-NOPOPCNT-NEXT: vpand 116; AVX2-NOPOPCNT-NEXT: vpsubq 117; AVX2-NOPOPCNT-NEXT: vmovdqa 118; AVX2-NOPOPCNT-NEXT: vpand 119; AVX2-NOPOPCNT-NEXT: vpsrlq $2 120; AVX2-NOPOPCNT-NEXT: vpand 121; AVX2-NOPOPCNT-NEXT: vpaddq 122; AVX2-NOPOPCNT-NEXT: vpsrlq $4 123; AVX2-NOPOPCNT-NEXT: vpaddq 124; AVX2-NOPOPCNT-NEXT: vpand 125; AVX2-NOPOPCNT-NEXT: vpsrlq $8 126; AVX2-NOPOPCNT-NEXT: vpaddq 127; AVX2-NOPOPCNT-NEXT: vpsrlq $16 128; AVX2-NOPOPCNT-NEXT: vpaddq 129; AVX2-NOPOPCNT-NEXT: vpsrlq $32 130; AVX2-NOPOPCNT-NEXT: vpaddq 131; AVX2-NOPOPCNT-NEXT: vpand 132; AVX1-NOPOPCNT: vpsrlq $1, %xmm 133; AVX1-NOPOPCNT-NEXT: vpand 134; AVX1-NOPOPCNT-NEXT: vpsubq 135; AVX1-NOPOPCNT-NEXT: vmovdqa 136; AVX1-NOPOPCNT-NEXT: vpand 137; AVX1-NOPOPCNT-NEXT: vpsrlq $2 138; AVX1-NOPOPCNT-NEXT: vpand 139; AVX1-NOPOPCNT-NEXT: vpaddq 140; AVX1-NOPOPCNT-NEXT: vpsrlq $4 141; AVX1-NOPOPCNT-NEXT: vpaddq 142; AVX1-NOPOPCNT-NEXT: vpand 143; AVX1-NOPOPCNT-NEXT: vpsrlq $8 144; AVX1-NOPOPCNT-NEXT: vpaddq 145; AVX1-NOPOPCNT-NEXT: vpsrlq $16 146; AVX1-NOPOPCNT-NEXT: vpaddq 147; AVX1-NOPOPCNT-NEXT: vpsrlq $32 148; AVX1-NOPOPCNT-NEXT: vpaddq 149; AVX1-NOPOPCNT-NEXT: vpand 150 %y = call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %x) 151 ret <2 x i64> %y 152} 153 154declare <4 x i32> @llvm.ctpop.v4i32(<4 x i32>) 155declare <2 x i64> @llvm.ctpop.v2i64(<2 x i64>) 156 157declare <8 x i32> @llvm.ctpop.v8i32(<8 x i32>) 158declare <4 x i64> @llvm.ctpop.v4i64(<4 x i64>) 159 160