1 
2 #include <stdio.h>
3 
4 typedef  unsigned long long int  ULong;
5 typedef  unsigned int            UInt;
6 
7 __attribute__((noinline))
do_lzcnt64(UInt * flags,ULong * res,ULong arg)8 void do_lzcnt64 ( /*OUT*/UInt* flags, /*OUT*/ULong* res, ULong arg )
9 {
10   ULong block[3] = { arg, 0ULL, 0ULL };
11   __asm__ __volatile__(
12     "movabsq $0x5555555555555555, %%r11" "\n\t"
13     "lzcntq 0(%0), %%r11"     "\n\t"
14     "movq %%r11, 8(%0)"       "\n\t"
15     "pushfq"                  "\n\t"
16     "popq %%r11"              "\n\t"
17     "movq %%r11, 16(%0)"      "\n"
18     : : "r"(&block[0]) : "r11","cc","memory"
19   );
20   *res = block[1];
21   *flags = block[2] & 0x8d5;
22 }
23 
24 __attribute__((noinline))
do_lzcnt32(UInt * flags,ULong * res,ULong arg)25 void do_lzcnt32 ( /*OUT*/UInt* flags, /*OUT*/ULong* res, ULong arg )
26 {
27   ULong block[3] = { arg, 0ULL, 0ULL };
28   __asm__ __volatile__(
29     "movabsq $0x5555555555555555, %%r11" "\n\t"
30     "lzcntl 0(%0), %%r11d"    "\n\t"
31     "movq %%r11, 8(%0)"       "\n\t"
32     "pushfq"                  "\n\t"
33     "popq %%r11"              "\n\t"
34     "movq %%r11, 16(%0)"      "\n"
35     : : "r"(&block[0]) : "r11","cc","memory"
36   );
37   *res = block[1];
38   *flags = block[2] & 0x8d5;
39 }
40 
41 __attribute__((noinline))
do_lzcnt16(UInt * flags,ULong * res,ULong arg)42 void do_lzcnt16 ( /*OUT*/UInt* flags, /*OUT*/ULong* res, ULong arg )
43 {
44   ULong block[3] = { arg, 0ULL, 0ULL };
45   __asm__ __volatile__(
46     "movabsq $0x5555555555555555, %%r11" "\n\t"
47     "lzcntw 0(%0), %%r11w"    "\n\t"
48     "movq %%r11, 8(%0)"       "\n\t"
49     "pushfq"                  "\n\t"
50     "popq %%r11"              "\n\t"
51     "movq %%r11, 16(%0)"      "\n"
52     : : "r"(&block[0]) : "r11","cc","memory"
53   );
54   *res = block[1];
55   *flags = block[2] & 0x8d5;
56 }
57 
main(void)58 int main ( void )
59 {
60    ULong w;
61 
62    w = 0xFEDC192837475675ULL;
63    while (1) {
64       ULong res;
65       UInt  flags;
66       do_lzcnt64(&flags, &res, w);
67       printf("lzcntq %016llx -> %016llx %04x\n", w, res, flags);
68       if (w == 0) break;
69       w = ((w >> 2) | (w >> 1)) + (w / 17ULL);
70    }
71 
72    w = 0xFEDC192837475675ULL;
73    while (1) {
74       ULong res;
75       UInt  flags;
76       do_lzcnt32(&flags, &res, w);
77       printf("lzcntl %016llx -> %016llx %04x\n", w, res, flags);
78       if (w == 0) break;
79       w = ((w >> 2) | (w >> 1)) + (w / 17ULL);
80    }
81 
82    w = 0xFEDC192837475675ULL;
83    while (1) {
84       ULong res;
85       UInt  flags;
86       do_lzcnt16(&flags, &res, w);
87       printf("lzcntw %016llx -> %016llx %04x\n", w, res, flags);
88       if (w == 0) break;
89       w = ((w >> 2) | (w >> 1)) + (w / 17ULL);
90    }
91 
92    return 0;
93 }
94