1 
2 #include <stdio.h>
3 
4 typedef  unsigned long long int  ULong;
5 typedef  unsigned int            UInt;
6 
7 __attribute__((noinline))
do_andn64(UInt * flags,ULong * res,ULong arg1,ULong arg2)8 void do_andn64 ( /*OUT*/UInt* flags, /*OUT*/ULong* res, ULong arg1, ULong arg2 )
9 {
10   ULong tem, flag;
11   __asm__ __volatile__(
12     "movabsq $0x5555555555555555, %0" "\n\t"
13     "andn %2, %3, %0"         "\n\t"
14     "pushfq"		      "\n\t"
15     "popq %1"                 "\n"
16     : "=&r" (tem), "=r" (flag) : "r" (arg1), "r" (arg2) : "cc"
17   );
18   *res = tem;
19   *flags = flag & 0x8d5;
20   __asm__ __volatile__(
21     "movabsq $0x5555555555555555, %0" "\n\t"
22     "andn %2, %3, %0"         "\n\t"
23     "pushfq"		      "\n\t"
24     "popq %1"                 "\n"
25     : "=&r" (tem), "=r" (flag) : "m" (arg1), "r" (arg2) : "cc"
26   );
27   if (*res != tem || *flags != (flag & 0x8d5))
28      printf ("Difference between r and m variants\n");
29 }
30 
31 __attribute__((noinline))
do_andn32(UInt * flags,ULong * res,UInt arg1,UInt arg2)32 void do_andn32 ( /*OUT*/UInt* flags, /*OUT*/ULong* res, UInt arg1, UInt arg2 )
33 {
34   ULong tem, flag;
35   __asm__ __volatile__(
36     "movabsq $0x5555555555555555, %0" "\n\t"
37     "andn %2, %3, %k0"        "\n\t"
38     "pushfq"		      "\n\t"
39     "popq %1"                 "\n"
40     : "=&r" (tem), "=r" (flag) : "r" (arg1), "r" (arg2) : "cc"
41   );
42   *res = tem;
43   *flags = flag & 0x8d5;
44   __asm__ __volatile__(
45     "movabsq $0x5555555555555555, %0" "\n\t"
46     "andn %2, %3, %k0"        "\n\t"
47     "pushfq"		      "\n\t"
48     "popq %1"                 "\n"
49     : "=&r" (tem), "=r" (flag) : "m" (arg1), "r" (arg2) : "cc"
50   );
51   if (*res != tem || *flags != (flag & 0x8d5))
52      printf ("Difference between r and m variants\n");
53 }
54 
55 
56 __attribute__((noinline))
do_mulx64(ULong * res1,ULong * res2,ULong arg1,ULong arg2)57 void do_mulx64 ( /*OUT*/ULong* res1, /*OUT*/ULong* res2,
58                  ULong arg1, ULong arg2 )
59 {
60   ULong tem1, tem2, flag1, flag2, flag3, flag4;
61   __asm__ __volatile__(
62     "movabsq $0x5555555555555555, %0" "\n\t"
63     "movabsq $0x5555555555555555, %1" "\n\t"
64     "movq %4, %%rdx"          "\n\t"
65     "pushfq"                  "\n\t"
66     "xorq $0x8d5, (%%rsp)"    "\n\t"
67     "movq (%%rsp), %2"        "\n\t"
68     "popfq"                   "\n\t"
69     "mulx %5, %1, %0"         "\n\t"
70     "pushfq"                  "\n\t"
71     "movq (%%rsp), %3"        "\n\t"
72     "xorq $0x8d5, (%%rsp)"    "\n\t"
73     "popfq"                   "\n"
74     : "=&r" (tem1), "=&r" (tem2), "=&r" (flag1), "=r" (flag2)
75     : "g" (arg1), "r" (arg2) : "cc", "rdx"
76   );
77   *res1 = tem1;
78   *res2 = tem2;
79   __asm__ __volatile__(
80     "movabsq $0x5555555555555555, %0" "\n\t"
81     "movabsq $0x5555555555555555, %1" "\n\t"
82     "movq %4, %%rdx"          "\n\t"
83     "pushfq"                  "\n\t"
84     "popq %2"                 "\n\t"
85     "mulx %5, %1, %0"         "\n\t"
86     "pushfq"                  "\n\t"
87     "popq %3"                 "\n"
88     : "=&r" (tem1), "=&r" (tem2), "=&r" (flag3), "=r" (flag4)
89     : "g" (arg1), "m" (arg2) : "cc", "rdx"
90   );
91   if (*res1 != tem1 || *res2 != tem2)
92      printf ("Difference between r and m variants\n");
93   if (((flag1 ^ flag2) | (flag3 ^ flag4)) & 0x8d5)
94      printf ("Flags changed\n");
95 }
96 
97 __attribute__((noinline))
do_mulx32(ULong * res1,ULong * res2,UInt arg1,UInt arg2)98 void do_mulx32 ( /*OUT*/ULong* res1, /*OUT*/ULong* res2,
99                  UInt arg1, UInt arg2 )
100 {
101   ULong tem1, tem2, flag1, flag2, flag3, flag4;
102   __asm__ __volatile__(
103     "movabsq $0x5555555555555555, %0" "\n\t"
104     "movabsq $0x5555555555555555, %1" "\n\t"
105     "movl %4, %%edx"          "\n\t"
106     "pushfq"                  "\n\t"
107     "xorq $0x8d5, (%%rsp)"    "\n\t"
108     "movq (%%rsp), %2"        "\n\t"
109     "popfq"                    "\n\t"
110     "mulx %5, %k1, %k0"       "\n\t"
111     "pushfq"                  "\n\t"
112     "movq (%%rsp), %3"        "\n\t"
113     "xorq $0x8d5, (%%rsp)"    "\n\t"
114     "popfq"                   "\n"
115     : "=&r" (tem1), "=&r" (tem2), "=&r" (flag1), "=r" (flag2)
116     : "g" (arg1), "r" (arg2) : "cc", "rdx"
117   );
118   *res1 = tem1;
119   *res2 = tem2;
120   __asm__ __volatile__(
121     "movabsq $0x5555555555555555, %0" "\n\t"
122     "movabsq $0x5555555555555555, %1" "\n\t"
123     "movl %4, %%edx"          "\n\t"
124     "pushfq"                  "\n\t"
125     "popq %2"                 "\n\t"
126     "mulx %5, %k1, %k0"       "\n\t"
127     "pushfq"                  "\n\t"
128     "popq %3"                 "\n"
129     : "=&r" (tem1), "=&r" (tem2), "=&r" (flag3), "=r" (flag4)
130     : "g" (arg1), "m" (arg2) : "cc", "rdx"
131   );
132   if (*res1 != tem1 || *res2 != tem2)
133      printf ("Difference between r and m variants\n");
134   if (((flag1 ^ flag2) | (flag3 ^ flag4)) & 0x8d5)
135      printf ("Flags changed\n");
136 }
137 
138 
139 __attribute__((noinline))
do_sarx64(ULong * res,ULong arg1,ULong arg2)140 void do_sarx64 ( /*OUT*/ULong* res, ULong arg1, ULong arg2 )
141 {
142   ULong tem, flag1, flag2, flag3, flag4;
143   __asm__ __volatile__(
144     "movabsq $0x5555555555555555, %0" "\n\t"
145     "pushfq"                  "\n\t"
146     "xorq $0x8d5, (%%rsp)"    "\n\t"
147     "movq (%%rsp), %1"        "\n\t"
148     "popfq"                   "\n\t"
149     "sarx %3, %4, %0"         "\n\t"
150     "pushfq"                  "\n\t"
151     "movq (%%rsp), %2"        "\n\t"
152     "xorq $0x8d5, (%%rsp)"    "\n\t"
153     "popfq"                   "\n"
154     : "=&r" (tem), "=&r" (flag1), "=r" (flag2)
155     : "r" (arg1), "r" (arg2) : "cc"
156   );
157   *res = tem;
158   __asm__ __volatile__(
159     "movabsq $0x5555555555555555, %0" "\n\t"
160     "pushfq"                  "\n\t"
161     "xorq $0x8d5, (%%rsp)"    "\n\t"
162     "movq (%%rsp), %1"        "\n\t"
163     "popfq"                   "\n\t"
164     "sarx %3, %4, %0"         "\n\t"
165     "pushfq"                  "\n\t"
166     "movq (%%rsp), %2"        "\n\t"
167     "xorq $0x8d5, (%%rsp)"    "\n\t"
168     "popfq"                   "\n"
169     : "=&r" (tem), "=&r" (flag3), "=r" (flag4)
170     : "r" (arg1), "m" (arg2) : "cc"
171   );
172   if (*res != tem)
173      printf ("Difference between r and m variants\n");
174   if (((flag1 ^ flag2) | (flag3 ^ flag4)) & 0x8d5)
175      printf ("Flags changed\n");
176 }
177 
178 __attribute__((noinline))
do_sarx32(ULong * res,UInt arg1,UInt arg2)179 void do_sarx32 ( /*OUT*/ULong* res, UInt arg1, UInt arg2 )
180 {
181   ULong tem, flag1, flag2, flag3, flag4;
182   __asm__ __volatile__(
183     "movabsq $0x5555555555555555, %0" "\n\t"
184     "pushfq"                  "\n\t"
185     "xorq $0x8d5, (%%rsp)"    "\n\t"
186     "movq (%%rsp), %1"        "\n\t"
187     "popfq"                   "\n\t"
188     "sarx %3, %4, %k0"        "\n\t"
189     "pushfq"                  "\n\t"
190     "movq (%%rsp), %2"        "\n\t"
191     "xorq $0x8d5, (%%rsp)"    "\n\t"
192     "popfq"                   "\n"
193     : "=&r" (tem), "=&r" (flag1), "=r" (flag2)
194     : "r" (arg1), "r" (arg2) : "cc"
195   );
196   *res = tem;
197   __asm__ __volatile__(
198     "movabsq $0x5555555555555555, %0" "\n\t"
199     "pushfq"                  "\n\t"
200     "xorq $0x8d5, (%%rsp)"    "\n\t"
201     "movq (%%rsp), %1"        "\n\t"
202     "popfq"                   "\n\t"
203     "sarx %3, %4, %k0"        "\n\t"
204     "pushfq"                  "\n\t"
205     "movq (%%rsp), %2"        "\n\t"
206     "xorq $0x8d5, (%%rsp)"    "\n\t"
207     "popfq"                   "\n"
208     : "=&r" (tem), "=&r" (flag3), "=r" (flag4)
209     : "r" (arg1), "m" (arg2) : "cc"
210   );
211   if (*res != tem)
212      printf ("Difference between r and m variants\n");
213   if (((flag1 ^ flag2) | (flag3 ^ flag4)) & 0x8d5)
214      printf ("Flags changed\n");
215 }
216 
217 
218 __attribute__((noinline))
do_shlx64(ULong * res,ULong arg1,ULong arg2)219 void do_shlx64 ( /*OUT*/ULong* res, ULong arg1, ULong arg2 )
220 {
221   ULong tem, flag1, flag2, flag3, flag4;
222   __asm__ __volatile__(
223     "movabsq $0x5555555555555555, %0" "\n\t"
224     "pushfq"                  "\n\t"
225     "xorq $0x8d5, (%%rsp)"    "\n\t"
226     "movq (%%rsp), %1"        "\n\t"
227     "popfq"                   "\n\t"
228     "shlx %3, %4, %0"         "\n\t"
229     "pushfq"                  "\n\t"
230     "movq (%%rsp), %2"        "\n\t"
231     "xorq $0x8d5, (%%rsp)"    "\n\t"
232     "popfq"                   "\n"
233     : "=&r" (tem), "=&r" (flag1), "=r" (flag2)
234     : "r" (arg1), "r" (arg2) : "cc"
235   );
236   *res = tem;
237   __asm__ __volatile__(
238     "movabsq $0x5555555555555555, %0" "\n\t"
239     "pushfq"                  "\n\t"
240     "xorq $0x8d5, (%%rsp)"    "\n\t"
241     "movq (%%rsp), %1"        "\n\t"
242     "popfq"                   "\n\t"
243     "shlx %3, %4, %0"         "\n\t"
244     "pushfq"                  "\n\t"
245     "movq (%%rsp), %2"        "\n\t"
246     "xorq $0x8d5, (%%rsp)"    "\n\t"
247     "popfq"                   "\n"
248     : "=&r" (tem), "=&r" (flag3), "=r" (flag4)
249     : "r" (arg1), "m" (arg2) : "cc"
250   );
251   if (*res != tem)
252      printf ("Difference between r and m variants\n");
253   if (((flag1 ^ flag2) | (flag3 ^ flag4)) & 0x8d5)
254      printf ("Flags changed\n");
255 }
256 
257 __attribute__((noinline))
do_shlx32(ULong * res,UInt arg1,UInt arg2)258 void do_shlx32 ( /*OUT*/ULong* res, UInt arg1, UInt arg2 )
259 {
260   ULong tem, flag1, flag2, flag3, flag4;
261   __asm__ __volatile__(
262     "movabsq $0x5555555555555555, %0" "\n\t"
263     "pushfq"                  "\n\t"
264     "xorq $0x8d5, (%%rsp)"    "\n\t"
265     "movq (%%rsp), %1"        "\n\t"
266     "popfq"                   "\n\t"
267     "shlx %3, %4, %k0"        "\n\t"
268     "pushfq"                  "\n\t"
269     "movq (%%rsp), %2"        "\n\t"
270     "xorq $0x8d5, (%%rsp)"    "\n\t"
271     "popfq"                   "\n"
272     : "=&r" (tem), "=&r" (flag1), "=r" (flag2)
273     : "r" (arg1), "r" (arg2) : "cc"
274   );
275   *res = tem;
276   __asm__ __volatile__(
277     "movabsq $0x5555555555555555, %0" "\n\t"
278     "pushfq"                  "\n\t"
279     "xorq $0x8d5, (%%rsp)"    "\n\t"
280     "movq (%%rsp), %1"        "\n\t"
281     "popfq"                   "\n\t"
282     "shlx %3, %4, %k0"        "\n\t"
283     "pushfq"                  "\n\t"
284     "movq (%%rsp), %2"        "\n\t"
285     "xorq $0x8d5, (%%rsp)"    "\n\t"
286     "popfq"                   "\n"
287     : "=&r" (tem), "=&r" (flag3), "=r" (flag4)
288     : "r" (arg1), "m" (arg2) : "cc"
289   );
290   if (*res != tem)
291      printf ("Difference between r and m variants\n");
292   if (((flag1 ^ flag2) | (flag3 ^ flag4)) & 0x8d5)
293      printf ("Flags changed\n");
294 }
295 
296 
297 __attribute__((noinline))
do_shrx64(ULong * res,ULong arg1,ULong arg2)298 void do_shrx64 ( /*OUT*/ULong* res, ULong arg1, ULong arg2 )
299 {
300   ULong tem, flag1, flag2, flag3, flag4;
301   __asm__ __volatile__(
302     "movabsq $0x5555555555555555, %0" "\n\t"
303     "pushfq"                  "\n\t"
304     "xorq $0x8d5, (%%rsp)"    "\n\t"
305     "movq (%%rsp), %1"        "\n\t"
306     "popfq"                   "\n\t"
307     "shrx %3, %4, %0"         "\n\t"
308     "pushfq"                  "\n\t"
309     "movq (%%rsp), %2"        "\n\t"
310     "xorq $0x8d5, (%%rsp)"    "\n\t"
311     "popfq"                   "\n"
312     : "=&r" (tem), "=&r" (flag1), "=r" (flag2)
313     : "r" (arg1), "r" (arg2) : "cc"
314   );
315   *res = tem;
316   __asm__ __volatile__(
317     "movabsq $0x5555555555555555, %0" "\n\t"
318     "pushfq"                  "\n\t"
319     "xorq $0x8d5, (%%rsp)"    "\n\t"
320     "movq (%%rsp), %1"        "\n\t"
321     "popfq"                   "\n\t"
322     "shrx %3, %4, %0"         "\n\t"
323     "pushfq"                  "\n\t"
324     "movq (%%rsp), %2"        "\n\t"
325     "xorq $0x8d5, (%%rsp)"    "\n\t"
326     "popfq"                   "\n"
327     : "=&r" (tem), "=&r" (flag3), "=r" (flag4)
328     : "r" (arg1), "m" (arg2) : "cc"
329   );
330   if (*res != tem)
331      printf ("Difference between r and m variants\n");
332   if (((flag1 ^ flag2) | (flag3 ^ flag4)) & 0x8d5)
333      printf ("Flags changed\n");
334 }
335 
336 __attribute__((noinline))
do_shrx32(ULong * res,UInt arg1,UInt arg2)337 void do_shrx32 ( /*OUT*/ULong* res, UInt arg1, UInt arg2 )
338 {
339   ULong tem, flag1, flag2, flag3, flag4;
340   __asm__ __volatile__(
341     "movabsq $0x5555555555555555, %0" "\n\t"
342     "pushfq"                  "\n\t"
343     "xorq $0x8d5, (%%rsp)"    "\n\t"
344     "movq (%%rsp), %1"        "\n\t"
345     "popfq"                   "\n\t"
346     "shrx %3, %4, %k0"        "\n\t"
347     "pushfq"                  "\n\t"
348     "movq (%%rsp), %2"        "\n\t"
349     "xorq $0x8d5, (%%rsp)"    "\n\t"
350     "popfq"                   "\n"
351     : "=&r" (tem), "=&r" (flag1), "=r" (flag2)
352     : "r" (arg1), "r" (arg2) : "cc"
353   );
354   *res = tem;
355   __asm__ __volatile__(
356     "movabsq $0x5555555555555555, %0" "\n\t"
357     "pushfq"                  "\n\t"
358     "xorq $0x8d5, (%%rsp)"    "\n\t"
359     "movq (%%rsp), %1"        "\n\t"
360     "popfq"                   "\n\t"
361     "shrx %3, %4, %k0"        "\n\t"
362     "pushfq"                  "\n\t"
363     "movq (%%rsp), %2"        "\n\t"
364     "xorq $0x8d5, (%%rsp)"    "\n\t"
365     "popfq"                   "\n"
366     : "=&r" (tem), "=&r" (flag3), "=r" (flag4)
367     : "r" (arg1), "m" (arg2) : "cc"
368   );
369   if (*res != tem)
370      printf ("Difference between r and m variants\n");
371   if (((flag1 ^ flag2) | (flag3 ^ flag4)) & 0x8d5)
372      printf ("Flags changed\n");
373 }
374 
375 
376 __attribute__((noinline))
do_rorx64(ULong * res1,ULong * res2,ULong arg)377 void do_rorx64 ( /*OUT*/ULong* res1, /*OUT*/ULong* res2, ULong arg )
378 {
379   ULong tem, flag1, flag2, flag3, flag4;
380   __asm__ __volatile__(
381     "movabsq $0x5555555555555555, %0" "\n\t"
382     "pushfq"                  "\n\t"
383     "xorq $0x8d5, (%%rsp)"    "\n\t"
384     "movq (%%rsp), %1"        "\n\t"
385     "popfq"                   "\n\t"
386     "rorx $12, %3, %0"        "\n\t"
387     "pushfq"                  "\n\t"
388     "movq (%%rsp), %2"        "\n\t"
389     "xorq $0x8d5, (%%rsp)"    "\n\t"
390     "popfq"                   "\n"
391     : "=&r" (tem), "=&r" (flag1), "=r" (flag2) : "r" (arg) : "cc"
392   );
393   *res1 = tem;
394   __asm__ __volatile__(
395     "movabsq $0x5555555555555555, %0" "\n\t"
396     "pushfq"                  "\n\t"
397     "xorq $0x8d5, (%%rsp)"    "\n\t"
398     "movq (%%rsp), %1"        "\n\t"
399     "popfq"                   "\n\t"
400     "rorx $67, %3, %0"        "\n\t"
401     "pushfq"                  "\n\t"
402     "movq (%%rsp), %2"        "\n\t"
403     "xorq $0x8d5, (%%rsp)"    "\n\t"
404     "popfq"                   "\n"
405     : "=&r" (tem), "=&r" (flag3), "=r" (flag4) : "m" (arg) : "cc"
406   );
407   *res2 = tem;
408   if (((flag1 ^ flag2) | (flag3 ^ flag4)) & 0x8d5)
409      printf ("Flags changed\n");
410 }
411 
412 __attribute__((noinline))
do_rorx32(ULong * res1,ULong * res2,UInt arg)413 void do_rorx32 ( /*OUT*/ULong* res1, /*OUT*/ULong* res2, UInt arg )
414 {
415   ULong tem, flag1, flag2, flag3, flag4;
416   __asm__ __volatile__(
417     "movabsq $0x5555555555555555, %0" "\n\t"
418     "pushfq"                  "\n\t"
419     "xorq $0x8d5, (%%rsp)"    "\n\t"
420     "movq (%%rsp), %1"        "\n\t"
421     "popfq"                   "\n\t"
422     "rorx $12, %3, %k0"       "\n\t"
423     "pushfq"                  "\n\t"
424     "movq (%%rsp), %2"        "\n\t"
425     "xorq $0x8d5, (%%rsp)"    "\n\t"
426     "popfq"                   "\n"
427     : "=&r" (tem), "=&r" (flag1), "=r" (flag2) : "r" (arg) : "cc"
428   );
429   *res1 = tem;
430   __asm__ __volatile__(
431     "movabsq $0x5555555555555555, %0" "\n\t"
432     "pushfq"                  "\n\t"
433     "xorq $0x8d5, (%%rsp)"    "\n\t"
434     "movq (%%rsp), %1"        "\n\t"
435     "popfq"                   "\n\t"
436     "rorx $67, %3, %k0"       "\n\t"
437     "pushfq"                  "\n\t"
438     "movq (%%rsp), %2"        "\n\t"
439     "xorq $0x8d5, (%%rsp)"    "\n\t"
440     "popfq"                   "\n"
441     : "=&r" (tem), "=&r" (flag3), "=r" (flag4) : "m" (arg) : "cc"
442   );
443   *res2 = tem;
444   if (((flag1 ^ flag2) | (flag3 ^ flag4)) & 0x8d5)
445      printf ("Flags changed\n");
446 }
447 
448 
449 __attribute__((noinline))
do_blsi64(UInt * flags,ULong * res,ULong arg)450 void do_blsi64 ( /*OUT*/UInt* flags, /*OUT*/ULong* res, ULong arg )
451 {
452   ULong tem, flag;
453   __asm__ __volatile__(
454     "movabsq $0x5555555555555555, %0" "\n\t"
455     "blsi %2, %0"             "\n\t"
456     "pushfq"		      "\n\t"
457     "popq %1"                 "\n"
458     : "=&r" (tem), "=&r" (flag) : "r" (arg) : "cc"
459   );
460   *res = tem;
461   *flags = flag & 0x8d5;
462   __asm__ __volatile__(
463     "movabsq $0x5555555555555555, %0" "\n\t"
464     "blsi %2, %0"             "\n\t"
465     "pushfq"		      "\n\t"
466     "popq %1"                 "\n"
467     : "=&r" (tem), "=&r" (flag) : "m" (arg) : "cc"
468   );
469   if (*res != tem || *flags != (flag & 0x8d5))
470      printf ("Difference between r and m variants\n");
471 }
472 
473 __attribute__((noinline))
do_blsi32(UInt * flags,ULong * res,UInt arg)474 void do_blsi32 ( /*OUT*/UInt* flags, /*OUT*/ULong* res, UInt arg )
475 {
476   ULong tem, flag;
477   __asm__ __volatile__(
478     "movabsq $0x5555555555555555, %0" "\n\t"
479     "blsi %2, %k0"            "\n\t"
480     "pushfq"		      "\n\t"
481     "popq %1"                 "\n"
482     : "=&r" (tem), "=&r" (flag) : "r" (arg) : "cc"
483   );
484   *res = tem;
485   *flags = flag & 0x8d5;
486   __asm__ __volatile__(
487     "movabsq $0x5555555555555555, %0" "\n\t"
488     "blsi %2, %k0"            "\n\t"
489     "pushfq"		      "\n\t"
490     "popq %1"                 "\n"
491     : "=&r" (tem), "=&r" (flag) : "m" (arg) : "cc"
492   );
493   if (*res != tem || *flags != (flag & 0x8d5))
494      printf ("Difference between r and m variants\n");
495 }
496 
497 
498 __attribute__((noinline))
do_blsmsk64(UInt * flags,ULong * res,ULong arg)499 void do_blsmsk64 ( /*OUT*/UInt* flags, /*OUT*/ULong* res, ULong arg )
500 {
501   ULong tem, flag;
502   __asm__ __volatile__(
503     "movabsq $0x5555555555555555, %0" "\n\t"
504     "blsmsk %2, %0"           "\n\t"
505     "pushfq"		      "\n\t"
506     "popq %1"                 "\n"
507     : "=&r" (tem), "=&r" (flag) : "r" (arg) : "cc"
508   );
509   *res = tem;
510   *flags = flag & 0x8d5;
511   __asm__ __volatile__(
512     "movabsq $0x5555555555555555, %0" "\n\t"
513     "blsmsk %2, %0"           "\n\t"
514     "pushfq"		      "\n\t"
515     "popq %1"                 "\n"
516     : "=&r" (tem), "=&r" (flag) : "m" (arg) : "cc"
517   );
518   if (*res != tem || *flags != (flag & 0x8d5))
519      printf ("Difference between r and m variants\n");
520 }
521 
522 __attribute__((noinline))
do_blsmsk32(UInt * flags,ULong * res,UInt arg)523 void do_blsmsk32 ( /*OUT*/UInt* flags, /*OUT*/ULong* res, UInt arg )
524 {
525   ULong tem, flag;
526   __asm__ __volatile__(
527     "movabsq $0x5555555555555555, %0" "\n\t"
528     "blsmsk %2, %k0"          "\n\t"
529     "pushfq"		      "\n\t"
530     "popq %1"                 "\n"
531     : "=&r" (tem), "=&r" (flag) : "r" (arg) : "cc"
532   );
533   *res = tem;
534   *flags = flag & 0x8d5;
535   __asm__ __volatile__(
536     "movabsq $0x5555555555555555, %0" "\n\t"
537     "blsmsk %2, %k0"          "\n\t"
538     "pushfq"		      "\n\t"
539     "popq %1"                 "\n"
540     : "=&r" (tem), "=&r" (flag) : "m" (arg) : "cc"
541   );
542   if (*res != tem || *flags != (flag & 0x8d5))
543      printf ("Difference between r and m variants\n");
544 }
545 
546 
547 __attribute__((noinline))
do_blsr64(UInt * flags,ULong * res,ULong arg)548 void do_blsr64 ( /*OUT*/UInt* flags, /*OUT*/ULong* res, ULong arg )
549 {
550   ULong tem, flag;
551   __asm__ __volatile__(
552     "movabsq $0x5555555555555555, %0" "\n\t"
553     "blsr %2, %0"             "\n\t"
554     "pushfq"		      "\n\t"
555     "popq %1"                 "\n"
556     : "=&r" (tem), "=&r" (flag) : "r" (arg) : "cc"
557   );
558   *res = tem;
559   *flags = flag & 0x8d5;
560   __asm__ __volatile__(
561     "movabsq $0x5555555555555555, %0" "\n\t"
562     "blsr %2, %0"             "\n\t"
563     "pushfq"		      "\n\t"
564     "popq %1"                 "\n"
565     : "=&r" (tem), "=&r" (flag) : "m" (arg) : "cc"
566   );
567   if (*res != tem || *flags != (flag & 0x8d5))
568      printf ("Difference between r and m variants\n");
569 }
570 
571 __attribute__((noinline))
do_blsr32(UInt * flags,ULong * res,UInt arg)572 void do_blsr32 ( /*OUT*/UInt* flags, /*OUT*/ULong* res, UInt arg )
573 {
574   ULong tem, flag;
575   __asm__ __volatile__(
576     "movabsq $0x5555555555555555, %0" "\n\t"
577     "blsr %2, %k0"            "\n\t"
578     "pushfq"		      "\n\t"
579     "popq %1"                 "\n"
580     : "=&r" (tem), "=&r" (flag) : "r" (arg) : "cc"
581   );
582   *res = tem;
583   *flags = flag & 0x8d5;
584   __asm__ __volatile__(
585     "movabsq $0x5555555555555555, %0" "\n\t"
586     "blsr %2, %k0"            "\n\t"
587     "pushfq"		      "\n\t"
588     "popq %1"                 "\n"
589     : "=&r" (tem), "=&r" (flag) : "m" (arg) : "cc"
590   );
591   if (*res != tem || *flags != (flag & 0x8d5))
592      printf ("Difference between r and m variants\n");
593 }
594 
595 
596 __attribute__((noinline))
do_bextr64(UInt * flags,ULong * res,ULong arg1,ULong arg2)597 void do_bextr64 ( /*OUT*/UInt* flags, /*OUT*/ULong* res,
598                   ULong arg1, ULong arg2 )
599 {
600   ULong tem, flag;
601   __asm__ __volatile__(
602     "movabsq $0x5555555555555555, %0" "\n\t"
603     "bextr %2, %3, %0"        "\n\t"
604     "pushfq"		      "\n\t"
605     "popq %1"                 "\n"
606     : "=&r" (tem), "=&r" (flag) : "r" (arg1), "r" (arg2) : "cc"
607   );
608   *res = tem;
609   *flags = flag & 0x8d5;
610   __asm__ __volatile__(
611     "movabsq $0x5555555555555555, %0" "\n\t"
612     "bextr %2, %3, %0"        "\n\t"
613     "pushfq"		      "\n\t"
614     "popq %1"                 "\n"
615     : "=&r" (tem), "=&r" (flag) : "r" (arg1), "m" (arg2) : "cc"
616   );
617   if (*res != tem || *flags != (flag & 0x8d5))
618      printf ("Difference between r and m variants\n");
619 }
620 
621 __attribute__((noinline))
do_bextr32(UInt * flags,ULong * res,UInt arg1,UInt arg2)622 void do_bextr32 ( /*OUT*/UInt* flags, /*OUT*/ULong* res,
623                   UInt arg1, UInt arg2 )
624 {
625   ULong tem, flag;
626   __asm__ __volatile__(
627     "movabsq $0x5555555555555555, %0" "\n\t"
628     "bextr %2, %3, %k0"       "\n\t"
629     "pushfq"		      "\n\t"
630     "popq %1"                 "\n"
631     : "=&r" (tem), "=&r" (flag) : "r" (arg1), "r" (arg2) : "cc"
632   );
633   *res = tem;
634   *flags = flag & 0x8d5;
635   __asm__ __volatile__(
636     "movabsq $0x5555555555555555, %0" "\n\t"
637     "bextr %2, %3, %k0"       "\n\t"
638     "pushfq"		      "\n\t"
639     "popq %1"                 "\n"
640     : "=&r" (tem), "=&r" (flag) : "r" (arg1), "m" (arg2) : "cc"
641   );
642   if (*res != tem || *flags != (flag & 0x8d5))
643      printf ("Difference between r and m variants\n");
644 }
645 
646 
647 __attribute__((noinline))
do_bzhi64(UInt * flags,ULong * res,ULong arg1,ULong arg2)648 void do_bzhi64 ( /*OUT*/UInt* flags, /*OUT*/ULong* res,
649                  ULong arg1, ULong arg2 )
650 {
651   ULong tem, flag;
652   __asm__ __volatile__(
653     "movabsq $0x5555555555555555, %0" "\n\t"
654     "bzhi %2, %3, %0"         "\n\t"
655     "pushfq"		      "\n\t"
656     "popq %1"                 "\n"
657     : "=&r" (tem), "=&r" (flag) : "r" (arg1), "r" (arg2) : "cc"
658   );
659   *res = tem;
660   *flags = flag & 0x8d5;
661   __asm__ __volatile__(
662     "movabsq $0x5555555555555555, %0" "\n\t"
663     "bzhi %2, %3, %0"         "\n\t"
664     "pushfq"		      "\n\t"
665     "popq %1"                 "\n"
666     : "=&r" (tem), "=&r" (flag) : "r" (arg1), "m" (arg2) : "cc"
667   );
668   if (*res != tem || *flags != (flag & 0x8d5))
669      printf ("Difference between r and m variants\n");
670 }
671 
672 __attribute__((noinline))
do_bzhi32(UInt * flags,ULong * res,UInt arg1,UInt arg2)673 void do_bzhi32 ( /*OUT*/UInt* flags, /*OUT*/ULong* res,
674                  UInt arg1, UInt arg2 )
675 {
676   ULong tem, flag;
677   __asm__ __volatile__(
678     "movabsq $0x5555555555555555, %0" "\n\t"
679     "bzhi %2, %3, %k0"        "\n\t"
680     "pushfq"		      "\n\t"
681     "popq %1"                 "\n"
682     : "=&r" (tem), "=&r" (flag) : "r" (arg1), "r" (arg2) : "cc"
683   );
684   *res = tem;
685   *flags = flag & 0x8d5;
686   __asm__ __volatile__(
687     "movabsq $0x5555555555555555, %0" "\n\t"
688     "bzhi %2, %3, %k0"        "\n\t"
689     "pushfq"		      "\n\t"
690     "popq %1"                 "\n"
691     : "=&r" (tem), "=&r" (flag) : "r" (arg1), "m" (arg2) : "cc"
692   );
693   if (*res != tem || *flags != (flag & 0x8d5))
694      printf ("Difference between r and m variants\n");
695 }
696 
697 
698 __attribute__((noinline))
do_pdep64(ULong * res,ULong arg1,ULong arg2)699 void do_pdep64 ( /*OUT*/ULong* res, ULong arg1, ULong arg2 )
700 {
701   ULong tem, flag1, flag2, flag3, flag4;
702   __asm__ __volatile__(
703     "movabsq $0x5555555555555555, %0" "\n\t"
704     "pushfq"                  "\n\t"
705     "xorq $0x8d5, (%%rsp)"    "\n\t"
706     "movq (%%rsp), %1"        "\n\t"
707     "popfq"                   "\n\t"
708     "pdep %3, %4, %0"         "\n\t"
709     "pushfq"                  "\n\t"
710     "movq (%%rsp), %2"        "\n\t"
711     "xorq $0x8d5, (%%rsp)"    "\n\t"
712     "popfq"                   "\n"
713     : "=&r" (tem), "=&r" (flag1), "=r" (flag2)
714     : "r" (arg1), "r" (arg2) : "cc"
715   );
716   *res = tem;
717   __asm__ __volatile__(
718     "movabsq $0x5555555555555555, %0" "\n\t"
719     "pushfq"                  "\n\t"
720     "xorq $0x8d5, (%%rsp)"    "\n\t"
721     "movq (%%rsp), %1"        "\n\t"
722     "popfq"                   "\n\t"
723     "pdep %3, %4, %0"         "\n\t"
724     "pushfq"                  "\n\t"
725     "movq (%%rsp), %2"        "\n\t"
726     "xorq $0x8d5, (%%rsp)"    "\n\t"
727     "popfq"                   "\n"
728     : "=&r" (tem), "=&r" (flag3), "=r" (flag4)
729     : "m" (arg1), "r" (arg2) : "cc"
730   );
731   if (*res != tem)
732      printf ("Difference between r and m variants\n");
733   if (((flag1 ^ flag2) | (flag3 ^ flag4)) & 0x8d5)
734      printf ("Flags changed\n");
735 }
736 
737 __attribute__((noinline))
do_pdep32(ULong * res,UInt arg1,UInt arg2)738 void do_pdep32 ( /*OUT*/ULong* res, UInt arg1, UInt arg2 )
739 {
740   ULong tem, flag1, flag2, flag3, flag4;
741   __asm__ __volatile__(
742     "movabsq $0x5555555555555555, %0" "\n\t"
743     "pushfq"                  "\n\t"
744     "xorq $0x8d5, (%%rsp)"    "\n\t"
745     "movq (%%rsp), %1"        "\n\t"
746     "popfq"                   "\n\t"
747     "pdep %3, %4, %k0"        "\n\t"
748     "pushfq"                  "\n\t"
749     "movq (%%rsp), %2"        "\n\t"
750     "xorq $0x8d5, (%%rsp)"    "\n\t"
751     "popfq"                   "\n"
752     : "=&r" (tem), "=&r" (flag1), "=r" (flag2)
753     : "r" (arg1), "r" (arg2) : "cc"
754   );
755   *res = tem;
756   __asm__ __volatile__(
757     "movabsq $0x5555555555555555, %0" "\n\t"
758     "pushfq"                  "\n\t"
759     "xorq $0x8d5, (%%rsp)"    "\n\t"
760     "movq (%%rsp), %1"        "\n\t"
761     "popfq"                   "\n\t"
762     "pdep %3, %4, %k0"        "\n\t"
763     "pushfq"                  "\n\t"
764     "movq (%%rsp), %2"        "\n\t"
765     "xorq $0x8d5, (%%rsp)"    "\n\t"
766     "popfq"                   "\n"
767     : "=&r" (tem), "=&r" (flag3), "=r" (flag4)
768     : "m" (arg1), "r" (arg2) : "cc"
769   );
770   if (*res != tem)
771      printf ("Difference between r and m variants\n");
772   if (((flag1 ^ flag2) | (flag3 ^ flag4)) & 0x8d5)
773      printf ("Flags changed\n");
774 }
775 
776 
777 __attribute__((noinline))
do_pext64(ULong * res,ULong arg1,ULong arg2)778 void do_pext64 ( /*OUT*/ULong* res, ULong arg1, ULong arg2 )
779 {
780   ULong tem, flag1, flag2, flag3, flag4;
781   __asm__ __volatile__(
782     "movabsq $0x5555555555555555, %0" "\n\t"
783     "pushfq"                  "\n\t"
784     "xorq $0x8d5, (%%rsp)"    "\n\t"
785     "movq (%%rsp), %1"        "\n\t"
786     "popfq"                   "\n\t"
787     "pext %3, %4, %0"         "\n\t"
788     "pushfq"                  "\n\t"
789     "movq (%%rsp), %2"        "\n\t"
790     "xorq $0x8d5, (%%rsp)"    "\n\t"
791     "popfq"                   "\n"
792     : "=&r" (tem), "=&r" (flag1), "=r" (flag2)
793     : "r" (arg1), "r" (arg2) : "cc"
794   );
795   *res = tem;
796   __asm__ __volatile__(
797     "movabsq $0x5555555555555555, %0" "\n\t"
798     "pushfq"                  "\n\t"
799     "xorq $0x8d5, (%%rsp)"    "\n\t"
800     "movq (%%rsp), %1"        "\n\t"
801     "popfq"                   "\n\t"
802     "pext %3, %4, %0"         "\n\t"
803     "pushfq"                  "\n\t"
804     "movq (%%rsp), %2"        "\n\t"
805     "xorq $0x8d5, (%%rsp)"    "\n\t"
806     "popfq"                   "\n"
807     : "=&r" (tem), "=&r" (flag3), "=r" (flag4)
808     : "m" (arg1), "r" (arg2) : "cc"
809   );
810   if (*res != tem)
811      printf ("Difference between r and m variants\n");
812   if (((flag1 ^ flag2) | (flag3 ^ flag4)) & 0x8d5)
813      printf ("Flags changed\n");
814 }
815 
816 __attribute__((noinline))
do_pext32(ULong * res,UInt arg1,UInt arg2)817 void do_pext32 ( /*OUT*/ULong* res, UInt arg1, UInt arg2 )
818 {
819   ULong tem, flag1, flag2, flag3, flag4;
820   __asm__ __volatile__(
821     "movabsq $0x5555555555555555, %0" "\n\t"
822     "pushfq"                  "\n\t"
823     "xorq $0x8d5, (%%rsp)"    "\n\t"
824     "movq (%%rsp), %1"        "\n\t"
825     "popfq"                   "\n\t"
826     "pext %3, %4, %k0"        "\n\t"
827     "pushfq"                  "\n\t"
828     "movq (%%rsp), %2"        "\n\t"
829     "xorq $0x8d5, (%%rsp)"    "\n\t"
830     "popfq"                   "\n"
831     : "=&r" (tem), "=&r" (flag1), "=r" (flag2)
832     : "r" (arg1), "r" (arg2) : "cc"
833   );
834   *res = tem;
835   __asm__ __volatile__(
836     "movabsq $0x5555555555555555, %0" "\n\t"
837     "pushfq"                  "\n\t"
838     "xorq $0x8d5, (%%rsp)"    "\n\t"
839     "movq (%%rsp), %1"        "\n\t"
840     "popfq"                   "\n\t"
841     "pext %3, %4, %k0"        "\n\t"
842     "pushfq"                  "\n\t"
843     "movq (%%rsp), %2"        "\n\t"
844     "xorq $0x8d5, (%%rsp)"    "\n\t"
845     "popfq"                   "\n"
846     : "=&r" (tem), "=&r" (flag3), "=r" (flag4)
847     : "m" (arg1), "r" (arg2) : "cc"
848   );
849   if (*res != tem)
850      printf ("Difference between r and m variants\n");
851   if (((flag1 ^ flag2) | (flag3 ^ flag4)) & 0x8d5)
852      printf ("Flags changed\n");
853 }
854 
855 
main(void)856 int main ( void )
857 {
858    ULong w1, w2;
859 
860    w1 = 0xFEDC192837475675ULL;
861    w2 = 0x57657438291CDEF0ULL;
862    while (1) {
863       ULong res;
864       UInt  flags;
865       do_andn64(&flags, &res, w1, w2);
866       printf("andn64 %016llx %016llx -> %016llx %04x\n", w1, w2, res, flags);
867       if (w1 == 0) break;
868       w1 = ((w1 >> 2) | (w1 >> 1)) + (w1 / 17ULL);
869       w2 = ((w2 >> 2) | (w2 >> 1)) + (w2 / 17ULL);
870    }
871 
872    w1 = 0xFEDC192837475675ULL;
873    w2 = 0x57657438291CDEF0ULL;
874    while (1) {
875       ULong res;
876       UInt  flags;
877       do_andn32(&flags, &res, w1, w2);
878       printf("andn32 %016llx %016llx -> %016llx %04x\n", w1, w2, res, flags);
879       if (w1 == 0) break;
880       w1 = ((w1 >> 2) | (w1 >> 1)) + (w1 / 17ULL);
881       w2 = ((w2 >> 2) | (w2 >> 1)) + (w2 / 17ULL);
882    }
883 
884    w1 = 0xFEDC192837475675ULL;
885    w2 = 0x57657438291CDEF0ULL;
886    while (1) {
887       ULong res1, res2;
888       do_mulx64(&res1, &res2, w1, w2);
889       printf("mulx64 %016llx %016llx -> %016llx %016llx\n", w1, w2, res1, res2);
890       if (w1 == 0) break;
891       w1 = ((w1 >> 2) | (w1 >> 1)) + (w1 / 17ULL);
892       w2 = ((w2 >> 2) | (w2 >> 1)) + (w2 / 17ULL);
893    }
894 
895    w1 = 0xFEDC192837475675ULL;
896    w2 = 0x57657438291CDEF0ULL;
897    while (1) {
898       ULong res1, res2;
899       do_mulx32(&res1, &res2, w1, w2);
900       printf("mulx32 %016llx %016llx -> %016llx %016llx\n", w1, w2, res1, res2);
901       if (w1 == 0) break;
902       w1 = ((w1 >> 2) | (w1 >> 1)) + (w1 / 17ULL);
903       w2 = ((w2 >> 2) | (w2 >> 1)) + (w2 / 17ULL);
904    }
905 
906    w1 = 0xFEDC192837475675ULL;
907    w2 = 0x57657438291CDEF0ULL;
908    while (1) {
909       ULong res;
910       do_sarx64(&res, w1, w2);
911       printf("sarx64 %016llx %016llx -> %016llx\n", w1, w2, res);
912       if (w1 == 0) break;
913       w1 = ((w1 >> 2) | (w1 >> 1)) + (w1 / 17ULL);
914       w2 = ((w2 >> 2) | (w2 >> 1)) + (w2 / 17ULL);
915    }
916 
917    w1 = 0xFEDC192837475675ULL;
918    w2 = 0x57657438291CDEF0ULL;
919    while (1) {
920       ULong res;
921       do_sarx32(&res, w1, w2);
922       printf("sarx32 %016llx %016llx -> %016llx\n", w1, w2, res);
923       if (w1 == 0) break;
924       w1 = ((w1 >> 2) | (w1 >> 1)) + (w1 / 17ULL);
925       w2 = ((w2 >> 2) | (w2 >> 1)) + (w2 / 17ULL);
926    }
927 
928    w1 = 0xFEDC192837475675ULL;
929    w2 = 0x57657438291CDEF0ULL;
930    while (1) {
931       ULong res;
932       do_shlx64(&res, w1, w2);
933       printf("shlx64 %016llx %016llx -> %016llx\n", w1, w2, res);
934       if (w1 == 0) break;
935       w1 = ((w1 >> 2) | (w1 >> 1)) + (w1 / 17ULL);
936       w2 = ((w2 >> 2) | (w2 >> 1)) + (w2 / 17ULL);
937    }
938 
939    w1 = 0xFEDC192837475675ULL;
940    w2 = 0x57657438291CDEF0ULL;
941    while (1) {
942       ULong res;
943       do_shlx32(&res, w1, w2);
944       printf("shlx32 %016llx %016llx -> %016llx\n", w1, w2, res);
945       if (w1 == 0) break;
946       w1 = ((w1 >> 2) | (w1 >> 1)) + (w1 / 17ULL);
947       w2 = ((w2 >> 2) | (w2 >> 1)) + (w2 / 17ULL);
948    }
949 
950    w1 = 0xFEDC192837475675ULL;
951    w2 = 0x57657438291CDEF0ULL;
952    while (1) {
953       ULong res;
954       do_shrx64(&res, w1, w2);
955       printf("shrx64 %016llx %016llx -> %016llx\n", w1, w2, res);
956       if (w1 == 0) break;
957       w1 = ((w1 >> 2) | (w1 >> 1)) + (w1 / 17ULL);
958       w2 = ((w2 >> 2) | (w2 >> 1)) + (w2 / 17ULL);
959    }
960 
961    w1 = 0xFEDC192837475675ULL;
962    w2 = 0x57657438291CDEF0ULL;
963    while (1) {
964       ULong res;
965       do_shrx32(&res, w1, w2);
966       printf("shrx32 %016llx %016llx -> %016llx\n", w1, w2, res);
967       if (w1 == 0) break;
968       w1 = ((w1 >> 2) | (w1 >> 1)) + (w1 / 17ULL);
969       w2 = ((w2 >> 2) | (w2 >> 1)) + (w2 / 17ULL);
970    }
971 
972    w1 = 0xFEDC192837475675ULL;
973    while (1) {
974       ULong res1, res2;
975       do_rorx64(&res1, &res2, w1);
976       printf("rorx64 %016llx -> %016llx %016llx\n", w1, res1, res2);
977       if (w1 == 0) break;
978       w1 = ((w1 >> 2) | (w1 >> 1)) + (w1 / 17ULL);
979    }
980 
981    w1 = 0xFEDC192837475675ULL;
982    while (1) {
983       ULong res1, res2;
984       do_rorx32(&res1, &res2, w1);
985       printf("rorx32 %016llx -> %016llx %016llx\n", w1, res1, res2);
986       if (w1 == 0) break;
987       w1 = ((w1 >> 2) | (w1 >> 1)) + (w1 / 17ULL);
988    }
989 
990    w1 = 0xFEDC192837475675ULL;
991    while (1) {
992       ULong res;
993       UInt  flags;
994       do_blsi64(&flags, &res, w1);
995       printf("blsi64 %016llx -> %016llx %04x\n", w1, res, flags);
996       if (w1 == 0) break;
997       w1 = ((w1 >> 2) | (w1 >> 1)) + (w1 / 17ULL);
998    }
999 
1000    w1 = 0xFEDC192837475675ULL;
1001    while (1) {
1002       ULong res;
1003       UInt  flags;
1004       do_blsi32(&flags, &res, w1);
1005       printf("blsi32 %016llx -> %016llx %04x\n", w1, res, flags);
1006       if (w1 == 0) break;
1007       w1 = ((w1 >> 2) | (w1 >> 1)) + (w1 / 17ULL);
1008    }
1009 
1010    w1 = 0xFEDC192837475675ULL;
1011    while (1) {
1012       ULong res;
1013       UInt  flags;
1014       do_blsmsk64(&flags, &res, w1);
1015       printf("blsmsk64 %016llx -> %016llx %04x\n", w1, res, flags);
1016       if (w1 == 0) break;
1017       w1 = ((w1 >> 2) | (w1 >> 1)) + (w1 / 17ULL);
1018    }
1019 
1020    w1 = 0xFEDC192837475675ULL;
1021    while (1) {
1022       ULong res;
1023       UInt  flags;
1024       do_blsmsk32(&flags, &res, w1);
1025       printf("blsmsk32 %016llx -> %016llx %04x\n", w1, res, flags);
1026       if (w1 == 0) break;
1027       w1 = ((w1 >> 2) | (w1 >> 1)) + (w1 / 17ULL);
1028    }
1029 
1030    w1 = 0xFEDC192837475675ULL;
1031    while (1) {
1032       ULong res;
1033       UInt  flags;
1034       do_blsr64(&flags, &res, w1);
1035       printf("blsr64 %016llx -> %016llx %04x\n", w1, res, flags);
1036       if (w1 == 0) break;
1037       w1 = ((w1 >> 2) | (w1 >> 1)) + (w1 / 17ULL);
1038    }
1039 
1040    w1 = 0xFEDC192837475675ULL;
1041    while (1) {
1042       ULong res;
1043       UInt  flags;
1044       do_blsr32(&flags, &res, w1);
1045       printf("blsr32 %016llx -> %016llx %04x\n", w1, res, flags);
1046       if (w1 == 0) break;
1047       w1 = ((w1 >> 2) | (w1 >> 1)) + (w1 / 17ULL);
1048    }
1049 
1050    w1 = 0xFEDC192837475675ULL;
1051    w2 = 0x57657438291CDEF0ULL;
1052    while (1) {
1053       ULong res;
1054       UInt  flags;
1055       do_bextr64(&flags, &res, w1, w2);
1056       printf("bextr64 %016llx %016llx -> %016llx %04x\n", w1, w2, res, flags);
1057       if (w1 == 0) break;
1058       w1 = ((w1 >> 2) | (w1 >> 1)) + (w1 / 17ULL);
1059       w2 = ((w2 >> 2) | (w2 >> 1)) + (w2 / 17ULL);
1060    }
1061 
1062    w1 = 0xFEDC192837475675ULL;
1063    w2 = 0x57657438291CDEF0ULL;
1064    while (1) {
1065       ULong res;
1066       UInt  flags;
1067       do_bextr32(&flags, &res, w1, w2);
1068       printf("bextr32 %016llx %016llx -> %016llx %04x\n", w1, w2, res, flags);
1069       if (w1 == 0) break;
1070       w1 = ((w1 >> 2) | (w1 >> 1)) + (w1 / 17ULL);
1071       w2 = ((w2 >> 2) | (w2 >> 1)) + (w2 / 17ULL);
1072    }
1073 
1074    w1 = 0xFEDC192837475675ULL;
1075    w2 = 0x57657438291CDEF0ULL;
1076    while (1) {
1077       ULong res;
1078       UInt  flags;
1079       do_bzhi64(&flags, &res, w1, w2);
1080       printf("bzhi64 %016llx %016llx -> %016llx %04x\n", w1, w2, res, flags);
1081       if (w1 == 0) break;
1082       w1 = ((w1 >> 2) | (w1 >> 1)) + (w1 / 17ULL);
1083       w2 = ((w2 >> 2) | (w2 >> 1)) + (w2 / 17ULL);
1084    }
1085 
1086    w1 = 0xFEDC192837475675ULL;
1087    w2 = 0x57657438291CDEF0ULL;
1088    while (1) {
1089       ULong res;
1090       UInt  flags;
1091       do_bzhi32(&flags, &res, w1, w2);
1092       printf("bzhi32 %016llx %016llx -> %016llx %04x\n", w1, w2, res, flags);
1093       if (w1 == 0) break;
1094       w1 = ((w1 >> 2) | (w1 >> 1)) + (w1 / 17ULL);
1095       w2 = ((w2 >> 2) | (w2 >> 1)) + (w2 / 17ULL);
1096    }
1097 
1098    w1 = 0xFEDC192837475675ULL;
1099    w2 = 0x57657438291CDEF0ULL;
1100    while (1) {
1101       ULong res;
1102       do_pdep64(&res, w1, w2);
1103       printf("pdep64 %016llx %016llx -> %016llx\n", w1, w2, res);
1104       if (w1 == 0) break;
1105       w1 = ((w1 >> 2) | (w1 >> 1)) + (w1 / 17ULL);
1106       w2 = ((w2 >> 2) | (w2 >> 1)) + (w2 / 17ULL);
1107    }
1108 
1109    w1 = 0xFEDC192837475675ULL;
1110    w2 = 0x57657438291CDEF0ULL;
1111    while (1) {
1112       ULong res;
1113       do_pdep32(&res, w1, w2);
1114       printf("pdep32 %016llx %016llx -> %016llx\n", w1, w2, res);
1115       if (w1 == 0) break;
1116       w1 = ((w1 >> 2) | (w1 >> 1)) + (w1 / 17ULL);
1117       w2 = ((w2 >> 2) | (w2 >> 1)) + (w2 / 17ULL);
1118    }
1119 
1120    w1 = 0xFEDC192837475675ULL;
1121    w2 = 0x57657438291CDEF0ULL;
1122    while (1) {
1123       ULong res;
1124       do_pext64(&res, w1, w2);
1125       printf("pext64 %016llx %016llx -> %016llx\n", w1, w2, res);
1126       if (w1 == 0) break;
1127       w1 = ((w1 >> 2) | (w1 >> 1)) + (w1 / 17ULL);
1128       w2 = ((w2 >> 2) | (w2 >> 1)) + (w2 / 17ULL);
1129    }
1130 
1131    w1 = 0xFEDC192837475675ULL;
1132    w2 = 0x57657438291CDEF0ULL;
1133    while (1) {
1134       ULong res;
1135       do_pext32(&res, w1, w2);
1136       printf("pext32 %016llx %016llx -> %016llx\n", w1, w2, res);
1137       if (w1 == 0) break;
1138       w1 = ((w1 >> 2) | (w1 >> 1)) + (w1 / 17ULL);
1139       w2 = ((w2 >> 2) | (w2 >> 1)) + (w2 / 17ULL);
1140    }
1141 
1142    return 0;
1143 }
1144