1
2 #include <stdio.h>
3
4 typedef unsigned long long int ULong;
5 typedef unsigned int UInt;
6
7 __attribute__((noinline))
do_andn64(UInt * flags,ULong * res,ULong arg1,ULong arg2)8 void do_andn64 ( /*OUT*/UInt* flags, /*OUT*/ULong* res, ULong arg1, ULong arg2 )
9 {
10 ULong tem, flag;
11 __asm__ __volatile__(
12 "movabsq $0x5555555555555555, %0" "\n\t"
13 "andn %2, %3, %0" "\n\t"
14 "pushfq" "\n\t"
15 "popq %1" "\n"
16 : "=&r" (tem), "=r" (flag) : "r" (arg1), "r" (arg2) : "cc"
17 );
18 *res = tem;
19 *flags = flag & 0x8d5;
20 __asm__ __volatile__(
21 "movabsq $0x5555555555555555, %0" "\n\t"
22 "andn %2, %3, %0" "\n\t"
23 "pushfq" "\n\t"
24 "popq %1" "\n"
25 : "=&r" (tem), "=r" (flag) : "m" (arg1), "r" (arg2) : "cc"
26 );
27 if (*res != tem || *flags != (flag & 0x8d5))
28 printf ("Difference between r and m variants\n");
29 }
30
31 __attribute__((noinline))
do_andn32(UInt * flags,ULong * res,UInt arg1,UInt arg2)32 void do_andn32 ( /*OUT*/UInt* flags, /*OUT*/ULong* res, UInt arg1, UInt arg2 )
33 {
34 ULong tem, flag;
35 __asm__ __volatile__(
36 "movabsq $0x5555555555555555, %0" "\n\t"
37 "andn %2, %3, %k0" "\n\t"
38 "pushfq" "\n\t"
39 "popq %1" "\n"
40 : "=&r" (tem), "=r" (flag) : "r" (arg1), "r" (arg2) : "cc"
41 );
42 *res = tem;
43 *flags = flag & 0x8d5;
44 __asm__ __volatile__(
45 "movabsq $0x5555555555555555, %0" "\n\t"
46 "andn %2, %3, %k0" "\n\t"
47 "pushfq" "\n\t"
48 "popq %1" "\n"
49 : "=&r" (tem), "=r" (flag) : "m" (arg1), "r" (arg2) : "cc"
50 );
51 if (*res != tem || *flags != (flag & 0x8d5))
52 printf ("Difference between r and m variants\n");
53 }
54
55
56 __attribute__((noinline))
do_mulx64(ULong * res1,ULong * res2,ULong arg1,ULong arg2)57 void do_mulx64 ( /*OUT*/ULong* res1, /*OUT*/ULong* res2,
58 ULong arg1, ULong arg2 )
59 {
60 ULong tem1, tem2, flag1, flag2, flag3, flag4;
61 __asm__ __volatile__(
62 "movabsq $0x5555555555555555, %0" "\n\t"
63 "movabsq $0x5555555555555555, %1" "\n\t"
64 "movq %4, %%rdx" "\n\t"
65 "pushfq" "\n\t"
66 "xorq $0x8d5, (%%rsp)" "\n\t"
67 "movq (%%rsp), %2" "\n\t"
68 "popfq" "\n\t"
69 "mulx %5, %1, %0" "\n\t"
70 "pushfq" "\n\t"
71 "movq (%%rsp), %3" "\n\t"
72 "xorq $0x8d5, (%%rsp)" "\n\t"
73 "popfq" "\n"
74 : "=&r" (tem1), "=&r" (tem2), "=&r" (flag1), "=r" (flag2)
75 : "g" (arg1), "r" (arg2) : "cc", "rdx"
76 );
77 *res1 = tem1;
78 *res2 = tem2;
79 __asm__ __volatile__(
80 "movabsq $0x5555555555555555, %0" "\n\t"
81 "movabsq $0x5555555555555555, %1" "\n\t"
82 "movq %4, %%rdx" "\n\t"
83 "pushfq" "\n\t"
84 "popq %2" "\n\t"
85 "mulx %5, %1, %0" "\n\t"
86 "pushfq" "\n\t"
87 "popq %3" "\n"
88 : "=&r" (tem1), "=&r" (tem2), "=&r" (flag3), "=r" (flag4)
89 : "g" (arg1), "m" (arg2) : "cc", "rdx"
90 );
91 if (*res1 != tem1 || *res2 != tem2)
92 printf ("Difference between r and m variants\n");
93 if (((flag1 ^ flag2) | (flag3 ^ flag4)) & 0x8d5)
94 printf ("Flags changed\n");
95 }
96
97 __attribute__((noinline))
do_mulx32(ULong * res1,ULong * res2,UInt arg1,UInt arg2)98 void do_mulx32 ( /*OUT*/ULong* res1, /*OUT*/ULong* res2,
99 UInt arg1, UInt arg2 )
100 {
101 ULong tem1, tem2, flag1, flag2, flag3, flag4;
102 __asm__ __volatile__(
103 "movabsq $0x5555555555555555, %0" "\n\t"
104 "movabsq $0x5555555555555555, %1" "\n\t"
105 "movl %4, %%edx" "\n\t"
106 "pushfq" "\n\t"
107 "xorq $0x8d5, (%%rsp)" "\n\t"
108 "movq (%%rsp), %2" "\n\t"
109 "popfq" "\n\t"
110 "mulx %5, %k1, %k0" "\n\t"
111 "pushfq" "\n\t"
112 "movq (%%rsp), %3" "\n\t"
113 "xorq $0x8d5, (%%rsp)" "\n\t"
114 "popfq" "\n"
115 : "=&r" (tem1), "=&r" (tem2), "=&r" (flag1), "=r" (flag2)
116 : "g" (arg1), "r" (arg2) : "cc", "rdx"
117 );
118 *res1 = tem1;
119 *res2 = tem2;
120 __asm__ __volatile__(
121 "movabsq $0x5555555555555555, %0" "\n\t"
122 "movabsq $0x5555555555555555, %1" "\n\t"
123 "movl %4, %%edx" "\n\t"
124 "pushfq" "\n\t"
125 "popq %2" "\n\t"
126 "mulx %5, %k1, %k0" "\n\t"
127 "pushfq" "\n\t"
128 "popq %3" "\n"
129 : "=&r" (tem1), "=&r" (tem2), "=&r" (flag3), "=r" (flag4)
130 : "g" (arg1), "m" (arg2) : "cc", "rdx"
131 );
132 if (*res1 != tem1 || *res2 != tem2)
133 printf ("Difference between r and m variants\n");
134 if (((flag1 ^ flag2) | (flag3 ^ flag4)) & 0x8d5)
135 printf ("Flags changed\n");
136 }
137
138
139 __attribute__((noinline))
do_sarx64(ULong * res,ULong arg1,ULong arg2)140 void do_sarx64 ( /*OUT*/ULong* res, ULong arg1, ULong arg2 )
141 {
142 ULong tem, flag1, flag2, flag3, flag4;
143 __asm__ __volatile__(
144 "movabsq $0x5555555555555555, %0" "\n\t"
145 "pushfq" "\n\t"
146 "xorq $0x8d5, (%%rsp)" "\n\t"
147 "movq (%%rsp), %1" "\n\t"
148 "popfq" "\n\t"
149 "sarx %3, %4, %0" "\n\t"
150 "pushfq" "\n\t"
151 "movq (%%rsp), %2" "\n\t"
152 "xorq $0x8d5, (%%rsp)" "\n\t"
153 "popfq" "\n"
154 : "=&r" (tem), "=&r" (flag1), "=r" (flag2)
155 : "r" (arg1), "r" (arg2) : "cc"
156 );
157 *res = tem;
158 __asm__ __volatile__(
159 "movabsq $0x5555555555555555, %0" "\n\t"
160 "pushfq" "\n\t"
161 "xorq $0x8d5, (%%rsp)" "\n\t"
162 "movq (%%rsp), %1" "\n\t"
163 "popfq" "\n\t"
164 "sarx %3, %4, %0" "\n\t"
165 "pushfq" "\n\t"
166 "movq (%%rsp), %2" "\n\t"
167 "xorq $0x8d5, (%%rsp)" "\n\t"
168 "popfq" "\n"
169 : "=&r" (tem), "=&r" (flag3), "=r" (flag4)
170 : "r" (arg1), "m" (arg2) : "cc"
171 );
172 if (*res != tem)
173 printf ("Difference between r and m variants\n");
174 if (((flag1 ^ flag2) | (flag3 ^ flag4)) & 0x8d5)
175 printf ("Flags changed\n");
176 }
177
178 __attribute__((noinline))
do_sarx32(ULong * res,UInt arg1,UInt arg2)179 void do_sarx32 ( /*OUT*/ULong* res, UInt arg1, UInt arg2 )
180 {
181 ULong tem, flag1, flag2, flag3, flag4;
182 __asm__ __volatile__(
183 "movabsq $0x5555555555555555, %0" "\n\t"
184 "pushfq" "\n\t"
185 "xorq $0x8d5, (%%rsp)" "\n\t"
186 "movq (%%rsp), %1" "\n\t"
187 "popfq" "\n\t"
188 "sarx %3, %4, %k0" "\n\t"
189 "pushfq" "\n\t"
190 "movq (%%rsp), %2" "\n\t"
191 "xorq $0x8d5, (%%rsp)" "\n\t"
192 "popfq" "\n"
193 : "=&r" (tem), "=&r" (flag1), "=r" (flag2)
194 : "r" (arg1), "r" (arg2) : "cc"
195 );
196 *res = tem;
197 __asm__ __volatile__(
198 "movabsq $0x5555555555555555, %0" "\n\t"
199 "pushfq" "\n\t"
200 "xorq $0x8d5, (%%rsp)" "\n\t"
201 "movq (%%rsp), %1" "\n\t"
202 "popfq" "\n\t"
203 "sarx %3, %4, %k0" "\n\t"
204 "pushfq" "\n\t"
205 "movq (%%rsp), %2" "\n\t"
206 "xorq $0x8d5, (%%rsp)" "\n\t"
207 "popfq" "\n"
208 : "=&r" (tem), "=&r" (flag3), "=r" (flag4)
209 : "r" (arg1), "m" (arg2) : "cc"
210 );
211 if (*res != tem)
212 printf ("Difference between r and m variants\n");
213 if (((flag1 ^ flag2) | (flag3 ^ flag4)) & 0x8d5)
214 printf ("Flags changed\n");
215 }
216
217
218 __attribute__((noinline))
do_shlx64(ULong * res,ULong arg1,ULong arg2)219 void do_shlx64 ( /*OUT*/ULong* res, ULong arg1, ULong arg2 )
220 {
221 ULong tem, flag1, flag2, flag3, flag4;
222 __asm__ __volatile__(
223 "movabsq $0x5555555555555555, %0" "\n\t"
224 "pushfq" "\n\t"
225 "xorq $0x8d5, (%%rsp)" "\n\t"
226 "movq (%%rsp), %1" "\n\t"
227 "popfq" "\n\t"
228 "shlx %3, %4, %0" "\n\t"
229 "pushfq" "\n\t"
230 "movq (%%rsp), %2" "\n\t"
231 "xorq $0x8d5, (%%rsp)" "\n\t"
232 "popfq" "\n"
233 : "=&r" (tem), "=&r" (flag1), "=r" (flag2)
234 : "r" (arg1), "r" (arg2) : "cc"
235 );
236 *res = tem;
237 __asm__ __volatile__(
238 "movabsq $0x5555555555555555, %0" "\n\t"
239 "pushfq" "\n\t"
240 "xorq $0x8d5, (%%rsp)" "\n\t"
241 "movq (%%rsp), %1" "\n\t"
242 "popfq" "\n\t"
243 "shlx %3, %4, %0" "\n\t"
244 "pushfq" "\n\t"
245 "movq (%%rsp), %2" "\n\t"
246 "xorq $0x8d5, (%%rsp)" "\n\t"
247 "popfq" "\n"
248 : "=&r" (tem), "=&r" (flag3), "=r" (flag4)
249 : "r" (arg1), "m" (arg2) : "cc"
250 );
251 if (*res != tem)
252 printf ("Difference between r and m variants\n");
253 if (((flag1 ^ flag2) | (flag3 ^ flag4)) & 0x8d5)
254 printf ("Flags changed\n");
255 }
256
257 __attribute__((noinline))
do_shlx32(ULong * res,UInt arg1,UInt arg2)258 void do_shlx32 ( /*OUT*/ULong* res, UInt arg1, UInt arg2 )
259 {
260 ULong tem, flag1, flag2, flag3, flag4;
261 __asm__ __volatile__(
262 "movabsq $0x5555555555555555, %0" "\n\t"
263 "pushfq" "\n\t"
264 "xorq $0x8d5, (%%rsp)" "\n\t"
265 "movq (%%rsp), %1" "\n\t"
266 "popfq" "\n\t"
267 "shlx %3, %4, %k0" "\n\t"
268 "pushfq" "\n\t"
269 "movq (%%rsp), %2" "\n\t"
270 "xorq $0x8d5, (%%rsp)" "\n\t"
271 "popfq" "\n"
272 : "=&r" (tem), "=&r" (flag1), "=r" (flag2)
273 : "r" (arg1), "r" (arg2) : "cc"
274 );
275 *res = tem;
276 __asm__ __volatile__(
277 "movabsq $0x5555555555555555, %0" "\n\t"
278 "pushfq" "\n\t"
279 "xorq $0x8d5, (%%rsp)" "\n\t"
280 "movq (%%rsp), %1" "\n\t"
281 "popfq" "\n\t"
282 "shlx %3, %4, %k0" "\n\t"
283 "pushfq" "\n\t"
284 "movq (%%rsp), %2" "\n\t"
285 "xorq $0x8d5, (%%rsp)" "\n\t"
286 "popfq" "\n"
287 : "=&r" (tem), "=&r" (flag3), "=r" (flag4)
288 : "r" (arg1), "m" (arg2) : "cc"
289 );
290 if (*res != tem)
291 printf ("Difference between r and m variants\n");
292 if (((flag1 ^ flag2) | (flag3 ^ flag4)) & 0x8d5)
293 printf ("Flags changed\n");
294 }
295
296
297 __attribute__((noinline))
do_shrx64(ULong * res,ULong arg1,ULong arg2)298 void do_shrx64 ( /*OUT*/ULong* res, ULong arg1, ULong arg2 )
299 {
300 ULong tem, flag1, flag2, flag3, flag4;
301 __asm__ __volatile__(
302 "movabsq $0x5555555555555555, %0" "\n\t"
303 "pushfq" "\n\t"
304 "xorq $0x8d5, (%%rsp)" "\n\t"
305 "movq (%%rsp), %1" "\n\t"
306 "popfq" "\n\t"
307 "shrx %3, %4, %0" "\n\t"
308 "pushfq" "\n\t"
309 "movq (%%rsp), %2" "\n\t"
310 "xorq $0x8d5, (%%rsp)" "\n\t"
311 "popfq" "\n"
312 : "=&r" (tem), "=&r" (flag1), "=r" (flag2)
313 : "r" (arg1), "r" (arg2) : "cc"
314 );
315 *res = tem;
316 __asm__ __volatile__(
317 "movabsq $0x5555555555555555, %0" "\n\t"
318 "pushfq" "\n\t"
319 "xorq $0x8d5, (%%rsp)" "\n\t"
320 "movq (%%rsp), %1" "\n\t"
321 "popfq" "\n\t"
322 "shrx %3, %4, %0" "\n\t"
323 "pushfq" "\n\t"
324 "movq (%%rsp), %2" "\n\t"
325 "xorq $0x8d5, (%%rsp)" "\n\t"
326 "popfq" "\n"
327 : "=&r" (tem), "=&r" (flag3), "=r" (flag4)
328 : "r" (arg1), "m" (arg2) : "cc"
329 );
330 if (*res != tem)
331 printf ("Difference between r and m variants\n");
332 if (((flag1 ^ flag2) | (flag3 ^ flag4)) & 0x8d5)
333 printf ("Flags changed\n");
334 }
335
336 __attribute__((noinline))
do_shrx32(ULong * res,UInt arg1,UInt arg2)337 void do_shrx32 ( /*OUT*/ULong* res, UInt arg1, UInt arg2 )
338 {
339 ULong tem, flag1, flag2, flag3, flag4;
340 __asm__ __volatile__(
341 "movabsq $0x5555555555555555, %0" "\n\t"
342 "pushfq" "\n\t"
343 "xorq $0x8d5, (%%rsp)" "\n\t"
344 "movq (%%rsp), %1" "\n\t"
345 "popfq" "\n\t"
346 "shrx %3, %4, %k0" "\n\t"
347 "pushfq" "\n\t"
348 "movq (%%rsp), %2" "\n\t"
349 "xorq $0x8d5, (%%rsp)" "\n\t"
350 "popfq" "\n"
351 : "=&r" (tem), "=&r" (flag1), "=r" (flag2)
352 : "r" (arg1), "r" (arg2) : "cc"
353 );
354 *res = tem;
355 __asm__ __volatile__(
356 "movabsq $0x5555555555555555, %0" "\n\t"
357 "pushfq" "\n\t"
358 "xorq $0x8d5, (%%rsp)" "\n\t"
359 "movq (%%rsp), %1" "\n\t"
360 "popfq" "\n\t"
361 "shrx %3, %4, %k0" "\n\t"
362 "pushfq" "\n\t"
363 "movq (%%rsp), %2" "\n\t"
364 "xorq $0x8d5, (%%rsp)" "\n\t"
365 "popfq" "\n"
366 : "=&r" (tem), "=&r" (flag3), "=r" (flag4)
367 : "r" (arg1), "m" (arg2) : "cc"
368 );
369 if (*res != tem)
370 printf ("Difference between r and m variants\n");
371 if (((flag1 ^ flag2) | (flag3 ^ flag4)) & 0x8d5)
372 printf ("Flags changed\n");
373 }
374
375
376 __attribute__((noinline))
do_rorx64(ULong * res1,ULong * res2,ULong arg)377 void do_rorx64 ( /*OUT*/ULong* res1, /*OUT*/ULong* res2, ULong arg )
378 {
379 ULong tem, flag1, flag2, flag3, flag4;
380 __asm__ __volatile__(
381 "movabsq $0x5555555555555555, %0" "\n\t"
382 "pushfq" "\n\t"
383 "xorq $0x8d5, (%%rsp)" "\n\t"
384 "movq (%%rsp), %1" "\n\t"
385 "popfq" "\n\t"
386 "rorx $12, %3, %0" "\n\t"
387 "pushfq" "\n\t"
388 "movq (%%rsp), %2" "\n\t"
389 "xorq $0x8d5, (%%rsp)" "\n\t"
390 "popfq" "\n"
391 : "=&r" (tem), "=&r" (flag1), "=r" (flag2) : "r" (arg) : "cc"
392 );
393 *res1 = tem;
394 __asm__ __volatile__(
395 "movabsq $0x5555555555555555, %0" "\n\t"
396 "pushfq" "\n\t"
397 "xorq $0x8d5, (%%rsp)" "\n\t"
398 "movq (%%rsp), %1" "\n\t"
399 "popfq" "\n\t"
400 "rorx $67, %3, %0" "\n\t"
401 "pushfq" "\n\t"
402 "movq (%%rsp), %2" "\n\t"
403 "xorq $0x8d5, (%%rsp)" "\n\t"
404 "popfq" "\n"
405 : "=&r" (tem), "=&r" (flag3), "=r" (flag4) : "m" (arg) : "cc"
406 );
407 *res2 = tem;
408 if (((flag1 ^ flag2) | (flag3 ^ flag4)) & 0x8d5)
409 printf ("Flags changed\n");
410 }
411
412 __attribute__((noinline))
do_rorx32(ULong * res1,ULong * res2,UInt arg)413 void do_rorx32 ( /*OUT*/ULong* res1, /*OUT*/ULong* res2, UInt arg )
414 {
415 ULong tem, flag1, flag2, flag3, flag4;
416 __asm__ __volatile__(
417 "movabsq $0x5555555555555555, %0" "\n\t"
418 "pushfq" "\n\t"
419 "xorq $0x8d5, (%%rsp)" "\n\t"
420 "movq (%%rsp), %1" "\n\t"
421 "popfq" "\n\t"
422 "rorx $12, %3, %k0" "\n\t"
423 "pushfq" "\n\t"
424 "movq (%%rsp), %2" "\n\t"
425 "xorq $0x8d5, (%%rsp)" "\n\t"
426 "popfq" "\n"
427 : "=&r" (tem), "=&r" (flag1), "=r" (flag2) : "r" (arg) : "cc"
428 );
429 *res1 = tem;
430 __asm__ __volatile__(
431 "movabsq $0x5555555555555555, %0" "\n\t"
432 "pushfq" "\n\t"
433 "xorq $0x8d5, (%%rsp)" "\n\t"
434 "movq (%%rsp), %1" "\n\t"
435 "popfq" "\n\t"
436 "rorx $67, %3, %k0" "\n\t"
437 "pushfq" "\n\t"
438 "movq (%%rsp), %2" "\n\t"
439 "xorq $0x8d5, (%%rsp)" "\n\t"
440 "popfq" "\n"
441 : "=&r" (tem), "=&r" (flag3), "=r" (flag4) : "m" (arg) : "cc"
442 );
443 *res2 = tem;
444 if (((flag1 ^ flag2) | (flag3 ^ flag4)) & 0x8d5)
445 printf ("Flags changed\n");
446 }
447
448
449 __attribute__((noinline))
do_blsi64(UInt * flags,ULong * res,ULong arg)450 void do_blsi64 ( /*OUT*/UInt* flags, /*OUT*/ULong* res, ULong arg )
451 {
452 ULong tem, flag;
453 __asm__ __volatile__(
454 "movabsq $0x5555555555555555, %0" "\n\t"
455 "blsi %2, %0" "\n\t"
456 "pushfq" "\n\t"
457 "popq %1" "\n"
458 : "=&r" (tem), "=&r" (flag) : "r" (arg) : "cc"
459 );
460 *res = tem;
461 *flags = flag & 0x8d5;
462 __asm__ __volatile__(
463 "movabsq $0x5555555555555555, %0" "\n\t"
464 "blsi %2, %0" "\n\t"
465 "pushfq" "\n\t"
466 "popq %1" "\n"
467 : "=&r" (tem), "=&r" (flag) : "m" (arg) : "cc"
468 );
469 if (*res != tem || *flags != (flag & 0x8d5))
470 printf ("Difference between r and m variants\n");
471 }
472
473 __attribute__((noinline))
do_blsi32(UInt * flags,ULong * res,UInt arg)474 void do_blsi32 ( /*OUT*/UInt* flags, /*OUT*/ULong* res, UInt arg )
475 {
476 ULong tem, flag;
477 __asm__ __volatile__(
478 "movabsq $0x5555555555555555, %0" "\n\t"
479 "blsi %2, %k0" "\n\t"
480 "pushfq" "\n\t"
481 "popq %1" "\n"
482 : "=&r" (tem), "=&r" (flag) : "r" (arg) : "cc"
483 );
484 *res = tem;
485 *flags = flag & 0x8d5;
486 __asm__ __volatile__(
487 "movabsq $0x5555555555555555, %0" "\n\t"
488 "blsi %2, %k0" "\n\t"
489 "pushfq" "\n\t"
490 "popq %1" "\n"
491 : "=&r" (tem), "=&r" (flag) : "m" (arg) : "cc"
492 );
493 if (*res != tem || *flags != (flag & 0x8d5))
494 printf ("Difference between r and m variants\n");
495 }
496
497
498 __attribute__((noinline))
do_blsmsk64(UInt * flags,ULong * res,ULong arg)499 void do_blsmsk64 ( /*OUT*/UInt* flags, /*OUT*/ULong* res, ULong arg )
500 {
501 ULong tem, flag;
502 __asm__ __volatile__(
503 "movabsq $0x5555555555555555, %0" "\n\t"
504 "blsmsk %2, %0" "\n\t"
505 "pushfq" "\n\t"
506 "popq %1" "\n"
507 : "=&r" (tem), "=&r" (flag) : "r" (arg) : "cc"
508 );
509 *res = tem;
510 *flags = flag & 0x8d5;
511 __asm__ __volatile__(
512 "movabsq $0x5555555555555555, %0" "\n\t"
513 "blsmsk %2, %0" "\n\t"
514 "pushfq" "\n\t"
515 "popq %1" "\n"
516 : "=&r" (tem), "=&r" (flag) : "m" (arg) : "cc"
517 );
518 if (*res != tem || *flags != (flag & 0x8d5))
519 printf ("Difference between r and m variants\n");
520 }
521
522 __attribute__((noinline))
do_blsmsk32(UInt * flags,ULong * res,UInt arg)523 void do_blsmsk32 ( /*OUT*/UInt* flags, /*OUT*/ULong* res, UInt arg )
524 {
525 ULong tem, flag;
526 __asm__ __volatile__(
527 "movabsq $0x5555555555555555, %0" "\n\t"
528 "blsmsk %2, %k0" "\n\t"
529 "pushfq" "\n\t"
530 "popq %1" "\n"
531 : "=&r" (tem), "=&r" (flag) : "r" (arg) : "cc"
532 );
533 *res = tem;
534 *flags = flag & 0x8d5;
535 __asm__ __volatile__(
536 "movabsq $0x5555555555555555, %0" "\n\t"
537 "blsmsk %2, %k0" "\n\t"
538 "pushfq" "\n\t"
539 "popq %1" "\n"
540 : "=&r" (tem), "=&r" (flag) : "m" (arg) : "cc"
541 );
542 if (*res != tem || *flags != (flag & 0x8d5))
543 printf ("Difference between r and m variants\n");
544 }
545
546
547 __attribute__((noinline))
do_blsr64(UInt * flags,ULong * res,ULong arg)548 void do_blsr64 ( /*OUT*/UInt* flags, /*OUT*/ULong* res, ULong arg )
549 {
550 ULong tem, flag;
551 __asm__ __volatile__(
552 "movabsq $0x5555555555555555, %0" "\n\t"
553 "blsr %2, %0" "\n\t"
554 "pushfq" "\n\t"
555 "popq %1" "\n"
556 : "=&r" (tem), "=&r" (flag) : "r" (arg) : "cc"
557 );
558 *res = tem;
559 *flags = flag & 0x8d5;
560 __asm__ __volatile__(
561 "movabsq $0x5555555555555555, %0" "\n\t"
562 "blsr %2, %0" "\n\t"
563 "pushfq" "\n\t"
564 "popq %1" "\n"
565 : "=&r" (tem), "=&r" (flag) : "m" (arg) : "cc"
566 );
567 if (*res != tem || *flags != (flag & 0x8d5))
568 printf ("Difference between r and m variants\n");
569 }
570
571 __attribute__((noinline))
do_blsr32(UInt * flags,ULong * res,UInt arg)572 void do_blsr32 ( /*OUT*/UInt* flags, /*OUT*/ULong* res, UInt arg )
573 {
574 ULong tem, flag;
575 __asm__ __volatile__(
576 "movabsq $0x5555555555555555, %0" "\n\t"
577 "blsr %2, %k0" "\n\t"
578 "pushfq" "\n\t"
579 "popq %1" "\n"
580 : "=&r" (tem), "=&r" (flag) : "r" (arg) : "cc"
581 );
582 *res = tem;
583 *flags = flag & 0x8d5;
584 __asm__ __volatile__(
585 "movabsq $0x5555555555555555, %0" "\n\t"
586 "blsr %2, %k0" "\n\t"
587 "pushfq" "\n\t"
588 "popq %1" "\n"
589 : "=&r" (tem), "=&r" (flag) : "m" (arg) : "cc"
590 );
591 if (*res != tem || *flags != (flag & 0x8d5))
592 printf ("Difference between r and m variants\n");
593 }
594
595
596 __attribute__((noinline))
do_bextr64(UInt * flags,ULong * res,ULong arg1,ULong arg2)597 void do_bextr64 ( /*OUT*/UInt* flags, /*OUT*/ULong* res,
598 ULong arg1, ULong arg2 )
599 {
600 ULong tem, flag;
601 __asm__ __volatile__(
602 "movabsq $0x5555555555555555, %0" "\n\t"
603 "bextr %2, %3, %0" "\n\t"
604 "pushfq" "\n\t"
605 "popq %1" "\n"
606 : "=&r" (tem), "=&r" (flag) : "r" (arg1), "r" (arg2) : "cc"
607 );
608 *res = tem;
609 *flags = flag & 0x8d5;
610 __asm__ __volatile__(
611 "movabsq $0x5555555555555555, %0" "\n\t"
612 "bextr %2, %3, %0" "\n\t"
613 "pushfq" "\n\t"
614 "popq %1" "\n"
615 : "=&r" (tem), "=&r" (flag) : "r" (arg1), "m" (arg2) : "cc"
616 );
617 if (*res != tem || *flags != (flag & 0x8d5))
618 printf ("Difference between r and m variants\n");
619 }
620
621 __attribute__((noinline))
do_bextr32(UInt * flags,ULong * res,UInt arg1,UInt arg2)622 void do_bextr32 ( /*OUT*/UInt* flags, /*OUT*/ULong* res,
623 UInt arg1, UInt arg2 )
624 {
625 ULong tem, flag;
626 __asm__ __volatile__(
627 "movabsq $0x5555555555555555, %0" "\n\t"
628 "bextr %2, %3, %k0" "\n\t"
629 "pushfq" "\n\t"
630 "popq %1" "\n"
631 : "=&r" (tem), "=&r" (flag) : "r" (arg1), "r" (arg2) : "cc"
632 );
633 *res = tem;
634 *flags = flag & 0x8d5;
635 __asm__ __volatile__(
636 "movabsq $0x5555555555555555, %0" "\n\t"
637 "bextr %2, %3, %k0" "\n\t"
638 "pushfq" "\n\t"
639 "popq %1" "\n"
640 : "=&r" (tem), "=&r" (flag) : "r" (arg1), "m" (arg2) : "cc"
641 );
642 if (*res != tem || *flags != (flag & 0x8d5))
643 printf ("Difference between r and m variants\n");
644 }
645
646
647 __attribute__((noinline))
do_bzhi64(UInt * flags,ULong * res,ULong arg1,ULong arg2)648 void do_bzhi64 ( /*OUT*/UInt* flags, /*OUT*/ULong* res,
649 ULong arg1, ULong arg2 )
650 {
651 ULong tem, flag;
652 __asm__ __volatile__(
653 "movabsq $0x5555555555555555, %0" "\n\t"
654 "bzhi %2, %3, %0" "\n\t"
655 "pushfq" "\n\t"
656 "popq %1" "\n"
657 : "=&r" (tem), "=&r" (flag) : "r" (arg1), "r" (arg2) : "cc"
658 );
659 *res = tem;
660 *flags = flag & 0x8d5;
661 __asm__ __volatile__(
662 "movabsq $0x5555555555555555, %0" "\n\t"
663 "bzhi %2, %3, %0" "\n\t"
664 "pushfq" "\n\t"
665 "popq %1" "\n"
666 : "=&r" (tem), "=&r" (flag) : "r" (arg1), "m" (arg2) : "cc"
667 );
668 if (*res != tem || *flags != (flag & 0x8d5))
669 printf ("Difference between r and m variants\n");
670 }
671
672 __attribute__((noinline))
do_bzhi32(UInt * flags,ULong * res,UInt arg1,UInt arg2)673 void do_bzhi32 ( /*OUT*/UInt* flags, /*OUT*/ULong* res,
674 UInt arg1, UInt arg2 )
675 {
676 ULong tem, flag;
677 __asm__ __volatile__(
678 "movabsq $0x5555555555555555, %0" "\n\t"
679 "bzhi %2, %3, %k0" "\n\t"
680 "pushfq" "\n\t"
681 "popq %1" "\n"
682 : "=&r" (tem), "=&r" (flag) : "r" (arg1), "r" (arg2) : "cc"
683 );
684 *res = tem;
685 *flags = flag & 0x8d5;
686 __asm__ __volatile__(
687 "movabsq $0x5555555555555555, %0" "\n\t"
688 "bzhi %2, %3, %k0" "\n\t"
689 "pushfq" "\n\t"
690 "popq %1" "\n"
691 : "=&r" (tem), "=&r" (flag) : "r" (arg1), "m" (arg2) : "cc"
692 );
693 if (*res != tem || *flags != (flag & 0x8d5))
694 printf ("Difference between r and m variants\n");
695 }
696
697
698 __attribute__((noinline))
do_pdep64(ULong * res,ULong arg1,ULong arg2)699 void do_pdep64 ( /*OUT*/ULong* res, ULong arg1, ULong arg2 )
700 {
701 ULong tem, flag1, flag2, flag3, flag4;
702 __asm__ __volatile__(
703 "movabsq $0x5555555555555555, %0" "\n\t"
704 "pushfq" "\n\t"
705 "xorq $0x8d5, (%%rsp)" "\n\t"
706 "movq (%%rsp), %1" "\n\t"
707 "popfq" "\n\t"
708 "pdep %3, %4, %0" "\n\t"
709 "pushfq" "\n\t"
710 "movq (%%rsp), %2" "\n\t"
711 "xorq $0x8d5, (%%rsp)" "\n\t"
712 "popfq" "\n"
713 : "=&r" (tem), "=&r" (flag1), "=r" (flag2)
714 : "r" (arg1), "r" (arg2) : "cc"
715 );
716 *res = tem;
717 __asm__ __volatile__(
718 "movabsq $0x5555555555555555, %0" "\n\t"
719 "pushfq" "\n\t"
720 "xorq $0x8d5, (%%rsp)" "\n\t"
721 "movq (%%rsp), %1" "\n\t"
722 "popfq" "\n\t"
723 "pdep %3, %4, %0" "\n\t"
724 "pushfq" "\n\t"
725 "movq (%%rsp), %2" "\n\t"
726 "xorq $0x8d5, (%%rsp)" "\n\t"
727 "popfq" "\n"
728 : "=&r" (tem), "=&r" (flag3), "=r" (flag4)
729 : "m" (arg1), "r" (arg2) : "cc"
730 );
731 if (*res != tem)
732 printf ("Difference between r and m variants\n");
733 if (((flag1 ^ flag2) | (flag3 ^ flag4)) & 0x8d5)
734 printf ("Flags changed\n");
735 }
736
737 __attribute__((noinline))
do_pdep32(ULong * res,UInt arg1,UInt arg2)738 void do_pdep32 ( /*OUT*/ULong* res, UInt arg1, UInt arg2 )
739 {
740 ULong tem, flag1, flag2, flag3, flag4;
741 __asm__ __volatile__(
742 "movabsq $0x5555555555555555, %0" "\n\t"
743 "pushfq" "\n\t"
744 "xorq $0x8d5, (%%rsp)" "\n\t"
745 "movq (%%rsp), %1" "\n\t"
746 "popfq" "\n\t"
747 "pdep %3, %4, %k0" "\n\t"
748 "pushfq" "\n\t"
749 "movq (%%rsp), %2" "\n\t"
750 "xorq $0x8d5, (%%rsp)" "\n\t"
751 "popfq" "\n"
752 : "=&r" (tem), "=&r" (flag1), "=r" (flag2)
753 : "r" (arg1), "r" (arg2) : "cc"
754 );
755 *res = tem;
756 __asm__ __volatile__(
757 "movabsq $0x5555555555555555, %0" "\n\t"
758 "pushfq" "\n\t"
759 "xorq $0x8d5, (%%rsp)" "\n\t"
760 "movq (%%rsp), %1" "\n\t"
761 "popfq" "\n\t"
762 "pdep %3, %4, %k0" "\n\t"
763 "pushfq" "\n\t"
764 "movq (%%rsp), %2" "\n\t"
765 "xorq $0x8d5, (%%rsp)" "\n\t"
766 "popfq" "\n"
767 : "=&r" (tem), "=&r" (flag3), "=r" (flag4)
768 : "m" (arg1), "r" (arg2) : "cc"
769 );
770 if (*res != tem)
771 printf ("Difference between r and m variants\n");
772 if (((flag1 ^ flag2) | (flag3 ^ flag4)) & 0x8d5)
773 printf ("Flags changed\n");
774 }
775
776
777 __attribute__((noinline))
do_pext64(ULong * res,ULong arg1,ULong arg2)778 void do_pext64 ( /*OUT*/ULong* res, ULong arg1, ULong arg2 )
779 {
780 ULong tem, flag1, flag2, flag3, flag4;
781 __asm__ __volatile__(
782 "movabsq $0x5555555555555555, %0" "\n\t"
783 "pushfq" "\n\t"
784 "xorq $0x8d5, (%%rsp)" "\n\t"
785 "movq (%%rsp), %1" "\n\t"
786 "popfq" "\n\t"
787 "pext %3, %4, %0" "\n\t"
788 "pushfq" "\n\t"
789 "movq (%%rsp), %2" "\n\t"
790 "xorq $0x8d5, (%%rsp)" "\n\t"
791 "popfq" "\n"
792 : "=&r" (tem), "=&r" (flag1), "=r" (flag2)
793 : "r" (arg1), "r" (arg2) : "cc"
794 );
795 *res = tem;
796 __asm__ __volatile__(
797 "movabsq $0x5555555555555555, %0" "\n\t"
798 "pushfq" "\n\t"
799 "xorq $0x8d5, (%%rsp)" "\n\t"
800 "movq (%%rsp), %1" "\n\t"
801 "popfq" "\n\t"
802 "pext %3, %4, %0" "\n\t"
803 "pushfq" "\n\t"
804 "movq (%%rsp), %2" "\n\t"
805 "xorq $0x8d5, (%%rsp)" "\n\t"
806 "popfq" "\n"
807 : "=&r" (tem), "=&r" (flag3), "=r" (flag4)
808 : "m" (arg1), "r" (arg2) : "cc"
809 );
810 if (*res != tem)
811 printf ("Difference between r and m variants\n");
812 if (((flag1 ^ flag2) | (flag3 ^ flag4)) & 0x8d5)
813 printf ("Flags changed\n");
814 }
815
816 __attribute__((noinline))
do_pext32(ULong * res,UInt arg1,UInt arg2)817 void do_pext32 ( /*OUT*/ULong* res, UInt arg1, UInt arg2 )
818 {
819 ULong tem, flag1, flag2, flag3, flag4;
820 __asm__ __volatile__(
821 "movabsq $0x5555555555555555, %0" "\n\t"
822 "pushfq" "\n\t"
823 "xorq $0x8d5, (%%rsp)" "\n\t"
824 "movq (%%rsp), %1" "\n\t"
825 "popfq" "\n\t"
826 "pext %3, %4, %k0" "\n\t"
827 "pushfq" "\n\t"
828 "movq (%%rsp), %2" "\n\t"
829 "xorq $0x8d5, (%%rsp)" "\n\t"
830 "popfq" "\n"
831 : "=&r" (tem), "=&r" (flag1), "=r" (flag2)
832 : "r" (arg1), "r" (arg2) : "cc"
833 );
834 *res = tem;
835 __asm__ __volatile__(
836 "movabsq $0x5555555555555555, %0" "\n\t"
837 "pushfq" "\n\t"
838 "xorq $0x8d5, (%%rsp)" "\n\t"
839 "movq (%%rsp), %1" "\n\t"
840 "popfq" "\n\t"
841 "pext %3, %4, %k0" "\n\t"
842 "pushfq" "\n\t"
843 "movq (%%rsp), %2" "\n\t"
844 "xorq $0x8d5, (%%rsp)" "\n\t"
845 "popfq" "\n"
846 : "=&r" (tem), "=&r" (flag3), "=r" (flag4)
847 : "m" (arg1), "r" (arg2) : "cc"
848 );
849 if (*res != tem)
850 printf ("Difference between r and m variants\n");
851 if (((flag1 ^ flag2) | (flag3 ^ flag4)) & 0x8d5)
852 printf ("Flags changed\n");
853 }
854
855
main(void)856 int main ( void )
857 {
858 ULong w1, w2;
859
860 w1 = 0xFEDC192837475675ULL;
861 w2 = 0x57657438291CDEF0ULL;
862 while (1) {
863 ULong res;
864 UInt flags;
865 do_andn64(&flags, &res, w1, w2);
866 printf("andn64 %016llx %016llx -> %016llx %04x\n", w1, w2, res, flags);
867 if (w1 == 0) break;
868 w1 = ((w1 >> 2) | (w1 >> 1)) + (w1 / 17ULL);
869 w2 = ((w2 >> 2) | (w2 >> 1)) + (w2 / 17ULL);
870 }
871
872 w1 = 0xFEDC192837475675ULL;
873 w2 = 0x57657438291CDEF0ULL;
874 while (1) {
875 ULong res;
876 UInt flags;
877 do_andn32(&flags, &res, w1, w2);
878 printf("andn32 %016llx %016llx -> %016llx %04x\n", w1, w2, res, flags);
879 if (w1 == 0) break;
880 w1 = ((w1 >> 2) | (w1 >> 1)) + (w1 / 17ULL);
881 w2 = ((w2 >> 2) | (w2 >> 1)) + (w2 / 17ULL);
882 }
883
884 w1 = 0xFEDC192837475675ULL;
885 w2 = 0x57657438291CDEF0ULL;
886 while (1) {
887 ULong res1, res2;
888 do_mulx64(&res1, &res2, w1, w2);
889 printf("mulx64 %016llx %016llx -> %016llx %016llx\n", w1, w2, res1, res2);
890 if (w1 == 0) break;
891 w1 = ((w1 >> 2) | (w1 >> 1)) + (w1 / 17ULL);
892 w2 = ((w2 >> 2) | (w2 >> 1)) + (w2 / 17ULL);
893 }
894
895 w1 = 0xFEDC192837475675ULL;
896 w2 = 0x57657438291CDEF0ULL;
897 while (1) {
898 ULong res1, res2;
899 do_mulx32(&res1, &res2, w1, w2);
900 printf("mulx32 %016llx %016llx -> %016llx %016llx\n", w1, w2, res1, res2);
901 if (w1 == 0) break;
902 w1 = ((w1 >> 2) | (w1 >> 1)) + (w1 / 17ULL);
903 w2 = ((w2 >> 2) | (w2 >> 1)) + (w2 / 17ULL);
904 }
905
906 w1 = 0xFEDC192837475675ULL;
907 w2 = 0x57657438291CDEF0ULL;
908 while (1) {
909 ULong res;
910 do_sarx64(&res, w1, w2);
911 printf("sarx64 %016llx %016llx -> %016llx\n", w1, w2, res);
912 if (w1 == 0) break;
913 w1 = ((w1 >> 2) | (w1 >> 1)) + (w1 / 17ULL);
914 w2 = ((w2 >> 2) | (w2 >> 1)) + (w2 / 17ULL);
915 }
916
917 w1 = 0xFEDC192837475675ULL;
918 w2 = 0x57657438291CDEF0ULL;
919 while (1) {
920 ULong res;
921 do_sarx32(&res, w1, w2);
922 printf("sarx32 %016llx %016llx -> %016llx\n", w1, w2, res);
923 if (w1 == 0) break;
924 w1 = ((w1 >> 2) | (w1 >> 1)) + (w1 / 17ULL);
925 w2 = ((w2 >> 2) | (w2 >> 1)) + (w2 / 17ULL);
926 }
927
928 w1 = 0xFEDC192837475675ULL;
929 w2 = 0x57657438291CDEF0ULL;
930 while (1) {
931 ULong res;
932 do_shlx64(&res, w1, w2);
933 printf("shlx64 %016llx %016llx -> %016llx\n", w1, w2, res);
934 if (w1 == 0) break;
935 w1 = ((w1 >> 2) | (w1 >> 1)) + (w1 / 17ULL);
936 w2 = ((w2 >> 2) | (w2 >> 1)) + (w2 / 17ULL);
937 }
938
939 w1 = 0xFEDC192837475675ULL;
940 w2 = 0x57657438291CDEF0ULL;
941 while (1) {
942 ULong res;
943 do_shlx32(&res, w1, w2);
944 printf("shlx32 %016llx %016llx -> %016llx\n", w1, w2, res);
945 if (w1 == 0) break;
946 w1 = ((w1 >> 2) | (w1 >> 1)) + (w1 / 17ULL);
947 w2 = ((w2 >> 2) | (w2 >> 1)) + (w2 / 17ULL);
948 }
949
950 w1 = 0xFEDC192837475675ULL;
951 w2 = 0x57657438291CDEF0ULL;
952 while (1) {
953 ULong res;
954 do_shrx64(&res, w1, w2);
955 printf("shrx64 %016llx %016llx -> %016llx\n", w1, w2, res);
956 if (w1 == 0) break;
957 w1 = ((w1 >> 2) | (w1 >> 1)) + (w1 / 17ULL);
958 w2 = ((w2 >> 2) | (w2 >> 1)) + (w2 / 17ULL);
959 }
960
961 w1 = 0xFEDC192837475675ULL;
962 w2 = 0x57657438291CDEF0ULL;
963 while (1) {
964 ULong res;
965 do_shrx32(&res, w1, w2);
966 printf("shrx32 %016llx %016llx -> %016llx\n", w1, w2, res);
967 if (w1 == 0) break;
968 w1 = ((w1 >> 2) | (w1 >> 1)) + (w1 / 17ULL);
969 w2 = ((w2 >> 2) | (w2 >> 1)) + (w2 / 17ULL);
970 }
971
972 w1 = 0xFEDC192837475675ULL;
973 while (1) {
974 ULong res1, res2;
975 do_rorx64(&res1, &res2, w1);
976 printf("rorx64 %016llx -> %016llx %016llx\n", w1, res1, res2);
977 if (w1 == 0) break;
978 w1 = ((w1 >> 2) | (w1 >> 1)) + (w1 / 17ULL);
979 }
980
981 w1 = 0xFEDC192837475675ULL;
982 while (1) {
983 ULong res1, res2;
984 do_rorx32(&res1, &res2, w1);
985 printf("rorx32 %016llx -> %016llx %016llx\n", w1, res1, res2);
986 if (w1 == 0) break;
987 w1 = ((w1 >> 2) | (w1 >> 1)) + (w1 / 17ULL);
988 }
989
990 w1 = 0xFEDC192837475675ULL;
991 while (1) {
992 ULong res;
993 UInt flags;
994 do_blsi64(&flags, &res, w1);
995 printf("blsi64 %016llx -> %016llx %04x\n", w1, res, flags);
996 if (w1 == 0) break;
997 w1 = ((w1 >> 2) | (w1 >> 1)) + (w1 / 17ULL);
998 }
999
1000 w1 = 0xFEDC192837475675ULL;
1001 while (1) {
1002 ULong res;
1003 UInt flags;
1004 do_blsi32(&flags, &res, w1);
1005 printf("blsi32 %016llx -> %016llx %04x\n", w1, res, flags);
1006 if (w1 == 0) break;
1007 w1 = ((w1 >> 2) | (w1 >> 1)) + (w1 / 17ULL);
1008 }
1009
1010 w1 = 0xFEDC192837475675ULL;
1011 while (1) {
1012 ULong res;
1013 UInt flags;
1014 do_blsmsk64(&flags, &res, w1);
1015 printf("blsmsk64 %016llx -> %016llx %04x\n", w1, res, flags);
1016 if (w1 == 0) break;
1017 w1 = ((w1 >> 2) | (w1 >> 1)) + (w1 / 17ULL);
1018 }
1019
1020 w1 = 0xFEDC192837475675ULL;
1021 while (1) {
1022 ULong res;
1023 UInt flags;
1024 do_blsmsk32(&flags, &res, w1);
1025 printf("blsmsk32 %016llx -> %016llx %04x\n", w1, res, flags);
1026 if (w1 == 0) break;
1027 w1 = ((w1 >> 2) | (w1 >> 1)) + (w1 / 17ULL);
1028 }
1029
1030 w1 = 0xFEDC192837475675ULL;
1031 while (1) {
1032 ULong res;
1033 UInt flags;
1034 do_blsr64(&flags, &res, w1);
1035 printf("blsr64 %016llx -> %016llx %04x\n", w1, res, flags);
1036 if (w1 == 0) break;
1037 w1 = ((w1 >> 2) | (w1 >> 1)) + (w1 / 17ULL);
1038 }
1039
1040 w1 = 0xFEDC192837475675ULL;
1041 while (1) {
1042 ULong res;
1043 UInt flags;
1044 do_blsr32(&flags, &res, w1);
1045 printf("blsr32 %016llx -> %016llx %04x\n", w1, res, flags);
1046 if (w1 == 0) break;
1047 w1 = ((w1 >> 2) | (w1 >> 1)) + (w1 / 17ULL);
1048 }
1049
1050 w1 = 0xFEDC192837475675ULL;
1051 w2 = 0x57657438291CDEF0ULL;
1052 while (1) {
1053 ULong res;
1054 UInt flags;
1055 do_bextr64(&flags, &res, w1, w2);
1056 printf("bextr64 %016llx %016llx -> %016llx %04x\n", w1, w2, res, flags);
1057 if (w1 == 0) break;
1058 w1 = ((w1 >> 2) | (w1 >> 1)) + (w1 / 17ULL);
1059 w2 = ((w2 >> 2) | (w2 >> 1)) + (w2 / 17ULL);
1060 }
1061
1062 w1 = 0xFEDC192837475675ULL;
1063 w2 = 0x57657438291CDEF0ULL;
1064 while (1) {
1065 ULong res;
1066 UInt flags;
1067 do_bextr32(&flags, &res, w1, w2);
1068 printf("bextr32 %016llx %016llx -> %016llx %04x\n", w1, w2, res, flags);
1069 if (w1 == 0) break;
1070 w1 = ((w1 >> 2) | (w1 >> 1)) + (w1 / 17ULL);
1071 w2 = ((w2 >> 2) | (w2 >> 1)) + (w2 / 17ULL);
1072 }
1073
1074 w1 = 0xFEDC192837475675ULL;
1075 w2 = 0x57657438291CDEF0ULL;
1076 while (1) {
1077 ULong res;
1078 UInt flags;
1079 do_bzhi64(&flags, &res, w1, w2);
1080 printf("bzhi64 %016llx %016llx -> %016llx %04x\n", w1, w2, res, flags);
1081 if (w1 == 0) break;
1082 w1 = ((w1 >> 2) | (w1 >> 1)) + (w1 / 17ULL);
1083 w2 = ((w2 >> 2) | (w2 >> 1)) + (w2 / 17ULL);
1084 }
1085
1086 w1 = 0xFEDC192837475675ULL;
1087 w2 = 0x57657438291CDEF0ULL;
1088 while (1) {
1089 ULong res;
1090 UInt flags;
1091 do_bzhi32(&flags, &res, w1, w2);
1092 printf("bzhi32 %016llx %016llx -> %016llx %04x\n", w1, w2, res, flags);
1093 if (w1 == 0) break;
1094 w1 = ((w1 >> 2) | (w1 >> 1)) + (w1 / 17ULL);
1095 w2 = ((w2 >> 2) | (w2 >> 1)) + (w2 / 17ULL);
1096 }
1097
1098 w1 = 0xFEDC192837475675ULL;
1099 w2 = 0x57657438291CDEF0ULL;
1100 while (1) {
1101 ULong res;
1102 do_pdep64(&res, w1, w2);
1103 printf("pdep64 %016llx %016llx -> %016llx\n", w1, w2, res);
1104 if (w1 == 0) break;
1105 w1 = ((w1 >> 2) | (w1 >> 1)) + (w1 / 17ULL);
1106 w2 = ((w2 >> 2) | (w2 >> 1)) + (w2 / 17ULL);
1107 }
1108
1109 w1 = 0xFEDC192837475675ULL;
1110 w2 = 0x57657438291CDEF0ULL;
1111 while (1) {
1112 ULong res;
1113 do_pdep32(&res, w1, w2);
1114 printf("pdep32 %016llx %016llx -> %016llx\n", w1, w2, res);
1115 if (w1 == 0) break;
1116 w1 = ((w1 >> 2) | (w1 >> 1)) + (w1 / 17ULL);
1117 w2 = ((w2 >> 2) | (w2 >> 1)) + (w2 / 17ULL);
1118 }
1119
1120 w1 = 0xFEDC192837475675ULL;
1121 w2 = 0x57657438291CDEF0ULL;
1122 while (1) {
1123 ULong res;
1124 do_pext64(&res, w1, w2);
1125 printf("pext64 %016llx %016llx -> %016llx\n", w1, w2, res);
1126 if (w1 == 0) break;
1127 w1 = ((w1 >> 2) | (w1 >> 1)) + (w1 / 17ULL);
1128 w2 = ((w2 >> 2) | (w2 >> 1)) + (w2 / 17ULL);
1129 }
1130
1131 w1 = 0xFEDC192837475675ULL;
1132 w2 = 0x57657438291CDEF0ULL;
1133 while (1) {
1134 ULong res;
1135 do_pext32(&res, w1, w2);
1136 printf("pext32 %016llx %016llx -> %016llx\n", w1, w2, res);
1137 if (w1 == 0) break;
1138 w1 = ((w1 >> 2) | (w1 >> 1)) + (w1 / 17ULL);
1139 w2 = ((w2 >> 2) | (w2 >> 1)) + (w2 / 17ULL);
1140 }
1141
1142 return 0;
1143 }
1144