1 
2 #include <stdio.h>
3 #include <stdlib.h>
4 #include <assert.h>
5 
6 #define VERBOSE 0
7 
8 typedef  unsigned int            UInt;
9 typedef  unsigned char           UChar;
10 typedef  unsigned long long int  ULong;
11 typedef  signed long long int    Long;
12 typedef  signed int              Int;
13 typedef  unsigned short          UShort;
14 typedef  unsigned long           UWord;
15 typedef  char                    HChar;
16 
myrandom(void)17 unsigned myrandom(void)
18 {
19    /* Simple multiply-with-carry random generator. */
20    static unsigned m_w = 11;
21    static unsigned m_z = 13;
22 
23    m_z = 36969 * (m_z & 65535) + (m_z >> 16);
24    m_w = 18000 * (m_w & 65535) + (m_w >> 16);
25 
26    return (m_z << 16) + m_w;
27 }
28 
29 /////////////////////////////////////////////////////////////////
30 // BEGIN crc32 stuff                                           //
31 /////////////////////////////////////////////////////////////////
32 
33 static const UInt crc32Table[256] = {
34 
35    /*-- Ugly, innit? --*/
36 
37    0x00000000L, 0x04c11db7L, 0x09823b6eL, 0x0d4326d9L,
38    0x130476dcL, 0x17c56b6bL, 0x1a864db2L, 0x1e475005L,
39    0x2608edb8L, 0x22c9f00fL, 0x2f8ad6d6L, 0x2b4bcb61L,
40    0x350c9b64L, 0x31cd86d3L, 0x3c8ea00aL, 0x384fbdbdL,
41    0x4c11db70L, 0x48d0c6c7L, 0x4593e01eL, 0x4152fda9L,
42    0x5f15adacL, 0x5bd4b01bL, 0x569796c2L, 0x52568b75L,
43    0x6a1936c8L, 0x6ed82b7fL, 0x639b0da6L, 0x675a1011L,
44    0x791d4014L, 0x7ddc5da3L, 0x709f7b7aL, 0x745e66cdL,
45    0x9823b6e0L, 0x9ce2ab57L, 0x91a18d8eL, 0x95609039L,
46    0x8b27c03cL, 0x8fe6dd8bL, 0x82a5fb52L, 0x8664e6e5L,
47    0xbe2b5b58L, 0xbaea46efL, 0xb7a96036L, 0xb3687d81L,
48    0xad2f2d84L, 0xa9ee3033L, 0xa4ad16eaL, 0xa06c0b5dL,
49    0xd4326d90L, 0xd0f37027L, 0xddb056feL, 0xd9714b49L,
50    0xc7361b4cL, 0xc3f706fbL, 0xceb42022L, 0xca753d95L,
51    0xf23a8028L, 0xf6fb9d9fL, 0xfbb8bb46L, 0xff79a6f1L,
52    0xe13ef6f4L, 0xe5ffeb43L, 0xe8bccd9aL, 0xec7dd02dL,
53    0x34867077L, 0x30476dc0L, 0x3d044b19L, 0x39c556aeL,
54    0x278206abL, 0x23431b1cL, 0x2e003dc5L, 0x2ac12072L,
55    0x128e9dcfL, 0x164f8078L, 0x1b0ca6a1L, 0x1fcdbb16L,
56    0x018aeb13L, 0x054bf6a4L, 0x0808d07dL, 0x0cc9cdcaL,
57    0x7897ab07L, 0x7c56b6b0L, 0x71159069L, 0x75d48ddeL,
58    0x6b93dddbL, 0x6f52c06cL, 0x6211e6b5L, 0x66d0fb02L,
59    0x5e9f46bfL, 0x5a5e5b08L, 0x571d7dd1L, 0x53dc6066L,
60    0x4d9b3063L, 0x495a2dd4L, 0x44190b0dL, 0x40d816baL,
61    0xaca5c697L, 0xa864db20L, 0xa527fdf9L, 0xa1e6e04eL,
62    0xbfa1b04bL, 0xbb60adfcL, 0xb6238b25L, 0xb2e29692L,
63    0x8aad2b2fL, 0x8e6c3698L, 0x832f1041L, 0x87ee0df6L,
64    0x99a95df3L, 0x9d684044L, 0x902b669dL, 0x94ea7b2aL,
65    0xe0b41de7L, 0xe4750050L, 0xe9362689L, 0xedf73b3eL,
66    0xf3b06b3bL, 0xf771768cL, 0xfa325055L, 0xfef34de2L,
67    0xc6bcf05fL, 0xc27dede8L, 0xcf3ecb31L, 0xcbffd686L,
68    0xd5b88683L, 0xd1799b34L, 0xdc3abdedL, 0xd8fba05aL,
69    0x690ce0eeL, 0x6dcdfd59L, 0x608edb80L, 0x644fc637L,
70    0x7a089632L, 0x7ec98b85L, 0x738aad5cL, 0x774bb0ebL,
71    0x4f040d56L, 0x4bc510e1L, 0x46863638L, 0x42472b8fL,
72    0x5c007b8aL, 0x58c1663dL, 0x558240e4L, 0x51435d53L,
73    0x251d3b9eL, 0x21dc2629L, 0x2c9f00f0L, 0x285e1d47L,
74    0x36194d42L, 0x32d850f5L, 0x3f9b762cL, 0x3b5a6b9bL,
75    0x0315d626L, 0x07d4cb91L, 0x0a97ed48L, 0x0e56f0ffL,
76    0x1011a0faL, 0x14d0bd4dL, 0x19939b94L, 0x1d528623L,
77    0xf12f560eL, 0xf5ee4bb9L, 0xf8ad6d60L, 0xfc6c70d7L,
78    0xe22b20d2L, 0xe6ea3d65L, 0xeba91bbcL, 0xef68060bL,
79    0xd727bbb6L, 0xd3e6a601L, 0xdea580d8L, 0xda649d6fL,
80    0xc423cd6aL, 0xc0e2d0ddL, 0xcda1f604L, 0xc960ebb3L,
81    0xbd3e8d7eL, 0xb9ff90c9L, 0xb4bcb610L, 0xb07daba7L,
82    0xae3afba2L, 0xaafbe615L, 0xa7b8c0ccL, 0xa379dd7bL,
83    0x9b3660c6L, 0x9ff77d71L, 0x92b45ba8L, 0x9675461fL,
84    0x8832161aL, 0x8cf30badL, 0x81b02d74L, 0x857130c3L,
85    0x5d8a9099L, 0x594b8d2eL, 0x5408abf7L, 0x50c9b640L,
86    0x4e8ee645L, 0x4a4ffbf2L, 0x470cdd2bL, 0x43cdc09cL,
87    0x7b827d21L, 0x7f436096L, 0x7200464fL, 0x76c15bf8L,
88    0x68860bfdL, 0x6c47164aL, 0x61043093L, 0x65c52d24L,
89    0x119b4be9L, 0x155a565eL, 0x18197087L, 0x1cd86d30L,
90    0x029f3d35L, 0x065e2082L, 0x0b1d065bL, 0x0fdc1becL,
91    0x3793a651L, 0x3352bbe6L, 0x3e119d3fL, 0x3ad08088L,
92    0x2497d08dL, 0x2056cd3aL, 0x2d15ebe3L, 0x29d4f654L,
93    0xc5a92679L, 0xc1683bceL, 0xcc2b1d17L, 0xc8ea00a0L,
94    0xd6ad50a5L, 0xd26c4d12L, 0xdf2f6bcbL, 0xdbee767cL,
95    0xe3a1cbc1L, 0xe760d676L, 0xea23f0afL, 0xeee2ed18L,
96    0xf0a5bd1dL, 0xf464a0aaL, 0xf9278673L, 0xfde69bc4L,
97    0x89b8fd09L, 0x8d79e0beL, 0x803ac667L, 0x84fbdbd0L,
98    0x9abc8bd5L, 0x9e7d9662L, 0x933eb0bbL, 0x97ffad0cL,
99    0xafb010b1L, 0xab710d06L, 0xa6322bdfL, 0xa2f33668L,
100    0xbcb4666dL, 0xb8757bdaL, 0xb5365d03L, 0xb1f740b4L
101 };
102 
103 #define UPDATE_CRC(crcVar,cha)                 \
104 {                                              \
105    crcVar = (crcVar << 8) ^                    \
106             crc32Table[(crcVar >> 24) ^        \
107                        ((UChar)cha)];          \
108 }
109 
crcBytes(UChar * bytes,UWord nBytes,UInt crcIn)110 static UInt crcBytes ( UChar* bytes, UWord nBytes, UInt crcIn )
111 {
112    UInt crc = crcIn;
113    while (nBytes >= 4) {
114       UPDATE_CRC(crc, bytes[0]);
115       UPDATE_CRC(crc, bytes[1]);
116       UPDATE_CRC(crc, bytes[2]);
117       UPDATE_CRC(crc, bytes[3]);
118       bytes += 4;
119       nBytes -= 4;
120    }
121    while (nBytes >= 1) {
122       UPDATE_CRC(crc, bytes[0]);
123       bytes += 1;
124       nBytes -= 1;
125    }
126    return crc;
127 }
128 
crcFinalise(UInt crc)129 static UInt crcFinalise ( UInt crc ) {
130    return ~crc;
131 }
132 
133 ////////
134 
135 static UInt theCRC = 0xFFFFFFFF;
136 
137 static HChar outBuf[1024];
138 // take output that's in outBuf, length as specified, and
139 // update the running crc.
send(int nbytes)140 static void send ( int nbytes )
141 {
142    assert( ((unsigned int)nbytes) < sizeof(outBuf)-1);
143    assert(outBuf[nbytes] == 0);
144    theCRC = crcBytes( (UChar*)&outBuf[0], nbytes, theCRC );
145    if (VERBOSE) printf("SEND %08x %s", theCRC, outBuf);
146 }
147 
148 
149 /////////////////////////////////////////////////////////////////
150 // END crc32 stuff                                             //
151 /////////////////////////////////////////////////////////////////
152 
153 #if 0
154 
155 // full version
156 #define NVALS 57
157 
158 static unsigned int val[NVALS]
159     = { 0x00, 0x01, 0x02, 0x03,
160         0x3F, 0x40, 0x41,
161         0x7E, 0x7F, 0x80, 0x81, 0x82,
162         0xBF, 0xC0, 0xC1,
163         0xFC, 0xFD, 0xFE, 0xFF,
164 
165         0xFF00, 0xFF01, 0xFF02, 0xFF03,
166         0xFF3F, 0xFF40, 0xFF41,
167         0xFF7E, 0xFF7F, 0xFF80, 0xFF81, 0xFF82,
168         0xFFBF, 0xFFC0, 0xFFC1,
169         0xFFFC, 0xFFFD, 0xFFFE, 0xFFFF,
170 
171         0xFFFFFF00, 0xFFFFFF01, 0xFFFFFF02, 0xFFFFFF03,
172         0xFFFFFF3F, 0xFFFFFF40, 0xFFFFFF41,
173         0xFFFFFF7E, 0xFFFFFF7F, 0xFFFFFF80, 0xFFFFFF81, 0xFFFFFF82,
174         0xFFFFFFBF, 0xFFFFFFC0, 0xFFFFFFC1,
175         0xFFFFFFFC, 0xFFFFFFFD, 0xFFFFFFFE, 0xFFFFFFFF
176       };
177 
178 #else
179 
180 // shortened version, for use as valgrind regtest
181 #define NVALS 27
182 
183 static unsigned int val[NVALS]
184     = { 0x00, 0x01,
185         0x3F, 0x40,
186         0x7F, 0x80,
187         0xBF, 0xC0,
188         0xFF,
189 
190         0xFF00, 0xFF01,
191         0xFF3F, 0xFF40,
192         0xFF7F, 0xFF80,
193         0xFFBF, 0xFFC0,
194         0xFFFF,
195 
196         0xFFFFFF00, 0xFFFFFF01,
197         0xFFFFFF3F, 0xFFFFFF40,
198         0xFFFFFF7F, 0xFFFFFF80,
199         0xFFFFFFBF, 0xFFFFFFC0,
200         0xFFFFFFFF
201       };
202 
203 #endif
204 
205 /////////////////////////////////////
206 
207 #define CC_C    0x0001
208 #define CC_P    0x0004
209 #define CC_A    0x0010
210 #define CC_Z    0x0040
211 #define CC_S    0x0080
212 #define CC_O    0x0800
213 
214 #define CC_MASK (CC_C | CC_P | CC_A | CC_Z | CC_S | CC_O)
215 
216 #define GEN_do_locked_G_E(_name,_eax)   \
217   \
218   __attribute__((noinline)) void do_locked_G_E_##_name ( void )  \
219   {   \
220     volatile int e_val, g_val, e_val_before;   \
221     int o, s, z, a, c, p, v1, v2, flags_in;   \
222     int block[4];   \
223     \
224     for (v1 = 0; v1 < NVALS; v1++) {   \
225     for (v2 = 0; v2 < NVALS; v2++) {   \
226     \
227     for (o = 0; o < 2; o++) {   \
228     for (s = 0; s < 2; s++) {   \
229     for (z = 0; z < 2; z++) {   \
230     for (a = 0; a < 2; a++) {   \
231     for (c = 0; c < 2; c++) {   \
232     for (p = 0; p < 2; p++) {   \
233       \
234       flags_in = (o ? CC_O : 0)   \
235                | (s ? CC_S : 0)   \
236                | (z ? CC_Z : 0)   \
237                | (a ? CC_A : 0)   \
238                | (c ? CC_C : 0)   \
239                | (p ? CC_P : 0);   \
240       \
241       g_val = val[v1];   \
242       e_val = val[v2];   \
243       e_val_before = e_val;   \
244       \
245       block[0] = flags_in;   \
246       block[1] = g_val;   \
247       block[2] = (int)(long)&e_val;   \
248       block[3] = 0;   \
249       __asm__ __volatile__(   \
250           "movl 0(%0), %%eax\n\t"   \
251           "pushl %%eax\n\t"   \
252           "popfl\n\t"   \
253           "movl 4(%0), %%eax\n\t"   \
254           "movl 8(%0), %%ebx\n\t"   \
255           "lock; " #_name " %%" #_eax ",(%%ebx)\n\t"   \
256           "pushfl\n\t"   \
257           "popl %%eax\n\t"   \
258           "movl %%eax, 12(%0)\n\t"   \
259           : : "r"(&block[0]) : "eax","ebx","cc","memory"   \
260       );   \
261       \
262       send( \
263          sprintf(outBuf,                                        \
264                  "%s G=%08x E=%08x CCIN=%08x -> E=%08x CCOUT=%08x\n", \
265                  #_name, g_val, e_val_before, flags_in,   \
266                  e_val, block[3] & CC_MASK) );            \
267       \
268     }}}}}}   \
269     \
270     }}   \
271   }
272 
GEN_do_locked_G_E(addb,al)273 GEN_do_locked_G_E(addb,al)
274 GEN_do_locked_G_E(addw,ax)
275 GEN_do_locked_G_E(addl,eax)
276 
277 GEN_do_locked_G_E(orb, al)
278 GEN_do_locked_G_E(orw, ax)
279 GEN_do_locked_G_E(orl, eax)
280 
281 GEN_do_locked_G_E(adcb,al)
282 GEN_do_locked_G_E(adcw,ax)
283 GEN_do_locked_G_E(adcl,eax)
284 
285 GEN_do_locked_G_E(sbbb,al)
286 GEN_do_locked_G_E(sbbw,ax)
287 GEN_do_locked_G_E(sbbl,eax)
288 
289 GEN_do_locked_G_E(andb,al)
290 GEN_do_locked_G_E(andw,ax)
291 GEN_do_locked_G_E(andl,eax)
292 
293 GEN_do_locked_G_E(subb,al)
294 GEN_do_locked_G_E(subw,ax)
295 GEN_do_locked_G_E(subl,eax)
296 
297 GEN_do_locked_G_E(xorb,al)
298 GEN_do_locked_G_E(xorw,ax)
299 GEN_do_locked_G_E(xorl,eax)
300 
301 
302 
303 
304 #define GEN_do_locked_imm_E(_name,_eax,_imm)        \
305   \
306   __attribute__((noinline)) void do_locked_imm_E_##_name##_##_imm ( void )  \
307   {   \
308     volatile int e_val, e_val_before;   \
309     int o, s, z, a, c, p, v2, flags_in;   \
310     int block[3];   \
311     \
312     for (v2 = 0; v2 < NVALS; v2++) {   \
313     \
314     for (o = 0; o < 2; o++) {   \
315     for (s = 0; s < 2; s++) {   \
316     for (z = 0; z < 2; z++) {   \
317     for (a = 0; a < 2; a++) {   \
318     for (c = 0; c < 2; c++) {   \
319     for (p = 0; p < 2; p++) {   \
320       \
321       flags_in = (o ? CC_O : 0)   \
322                | (s ? CC_S : 0)   \
323                | (z ? CC_Z : 0)   \
324                | (a ? CC_A : 0)   \
325                | (c ? CC_C : 0)   \
326                | (p ? CC_P : 0);   \
327       \
328       e_val = val[v2];   \
329       e_val_before = e_val;   \
330       \
331       block[0] = flags_in;   \
332       block[1] = (int)(long)&e_val;   \
333       block[2] = 0;   \
334       __asm__ __volatile__(   \
335           "movl 0(%0), %%eax\n\t"   \
336           "pushl %%eax\n\t"   \
337           "popfl\n\t"   \
338           "movl 4(%0), %%ebx\n\t"   \
339           "lock; " #_name " $" #_imm ",(%%ebx)\n\t"   \
340           "pushfl\n\t"   \
341           "popl %%eax\n\t"   \
342           "movl %%eax, 8(%0)\n\t"   \
343           : : "r"(&block[0]) : "eax","ebx","cc","memory"   \
344       );   \
345       \
346       send( \
347         sprintf(outBuf, \
348              "%s I=%s E=%08x CCIN=%08x -> E=%08x CCOUT=%08x\n",       \
349              #_name, #_imm, e_val_before, flags_in,         \
350                 e_val, block[2] & CC_MASK) );               \
351       \
352     }}}}}}   \
353     \
354     }   \
355   }
356 
357 GEN_do_locked_imm_E(addb,al,0x7F)
358 GEN_do_locked_imm_E(addb,al,0xF1)
359 GEN_do_locked_imm_E(addw,ax,0x7E)
360 GEN_do_locked_imm_E(addw,ax,0x9325)
361 GEN_do_locked_imm_E(addl,eax,0x7D)
362 GEN_do_locked_imm_E(addl,eax,0x31415927)
363 
364 GEN_do_locked_imm_E(orb,al,0x7F)
365 GEN_do_locked_imm_E(orb,al,0xF1)
366 GEN_do_locked_imm_E(orw,ax,0x7E)
367 GEN_do_locked_imm_E(orw,ax,0x9325)
368 GEN_do_locked_imm_E(orl,eax,0x7D)
369 GEN_do_locked_imm_E(orl,eax,0x31415927)
370 
371 GEN_do_locked_imm_E(adcb,al,0x7F)
372 GEN_do_locked_imm_E(adcb,al,0xF1)
373 GEN_do_locked_imm_E(adcw,ax,0x7E)
374 GEN_do_locked_imm_E(adcw,ax,0x9325)
375 GEN_do_locked_imm_E(adcl,eax,0x7D)
376 GEN_do_locked_imm_E(adcl,eax,0x31415927)
377 
378 GEN_do_locked_imm_E(sbbb,al,0x7F)
379 GEN_do_locked_imm_E(sbbb,al,0xF1)
380 GEN_do_locked_imm_E(sbbw,ax,0x7E)
381 GEN_do_locked_imm_E(sbbw,ax,0x9325)
382 GEN_do_locked_imm_E(sbbl,eax,0x7D)
383 GEN_do_locked_imm_E(sbbl,eax,0x31415927)
384 
385 GEN_do_locked_imm_E(andb,al,0x7F)
386 GEN_do_locked_imm_E(andb,al,0xF1)
387 GEN_do_locked_imm_E(andw,ax,0x7E)
388 GEN_do_locked_imm_E(andw,ax,0x9325)
389 GEN_do_locked_imm_E(andl,eax,0x7D)
390 GEN_do_locked_imm_E(andl,eax,0x31415927)
391 
392 GEN_do_locked_imm_E(subb,al,0x7F)
393 GEN_do_locked_imm_E(subb,al,0xF1)
394 GEN_do_locked_imm_E(subw,ax,0x7E)
395 GEN_do_locked_imm_E(subw,ax,0x9325)
396 GEN_do_locked_imm_E(subl,eax,0x7D)
397 GEN_do_locked_imm_E(subl,eax,0x31415927)
398 
399 GEN_do_locked_imm_E(xorb,al,0x7F)
400 GEN_do_locked_imm_E(xorb,al,0xF1)
401 GEN_do_locked_imm_E(xorw,ax,0x7E)
402 GEN_do_locked_imm_E(xorw,ax,0x9325)
403 GEN_do_locked_imm_E(xorl,eax,0x7D)
404 GEN_do_locked_imm_E(xorl,eax,0x31415927)
405 
406 #define GEN_do_locked_unary_E(_name,_eax)        \
407   \
408   __attribute__((noinline)) void do_locked_unary_E_##_name ( void )  \
409   {   \
410     volatile int e_val, e_val_before;   \
411     int o, s, z, a, c, p, v2, flags_in;   \
412     int block[3];   \
413     \
414     for (v2 = 0; v2 < NVALS; v2++) {   \
415     \
416     for (o = 0; o < 2; o++) {   \
417     for (s = 0; s < 2; s++) {   \
418     for (z = 0; z < 2; z++) {   \
419     for (a = 0; a < 2; a++) {   \
420     for (c = 0; c < 2; c++) {   \
421     for (p = 0; p < 2; p++) {   \
422       \
423       flags_in = (o ? CC_O : 0)   \
424                | (s ? CC_S : 0)   \
425                | (z ? CC_Z : 0)   \
426                | (a ? CC_A : 0)   \
427                | (c ? CC_C : 0)   \
428                | (p ? CC_P : 0);   \
429       \
430       e_val = val[v2];   \
431       e_val_before = e_val;   \
432       \
433       block[0] = flags_in;   \
434       block[1] = (int)(long)&e_val;   \
435       block[2] = 0;   \
436       __asm__ __volatile__(   \
437           "movl 0(%0), %%eax\n\t"   \
438           "pushl %%eax\n\t"   \
439           "popfl\n\t"   \
440           "movl 4(%0), %%ebx\n\t"   \
441           "lock; " #_name " (%%ebx)\n\t"   \
442           "pushfl\n\t"   \
443           "popl %%eax\n\t"   \
444           "movl %%eax, 8(%0)\n\t"   \
445           : : "r"(&block[0]) : "eax","ebx","cc","memory"   \
446       );   \
447       \
448       send( \
449          sprintf(outBuf, \
450                 "%s E=%08x CCIN=%08x -> E=%08x CCOUT=%08x\n",   \
451              #_name, e_val_before, flags_in,         \
452                 e_val, block[2] & CC_MASK));         \
453       \
454     }}}}}}   \
455     \
456     }   \
457   }
458 
459 GEN_do_locked_unary_E(decb,al)
460 GEN_do_locked_unary_E(decw,ax)
461 GEN_do_locked_unary_E(decl,eax)
462 
463 GEN_do_locked_unary_E(incb,al)
464 GEN_do_locked_unary_E(incw,ax)
465 GEN_do_locked_unary_E(incl,eax)
466 
467 GEN_do_locked_unary_E(negb,al)
468 GEN_do_locked_unary_E(negw,ax)
469 GEN_do_locked_unary_E(negl,eax)
470 
471 GEN_do_locked_unary_E(notb,al)
472 GEN_do_locked_unary_E(notw,ax)
473 GEN_do_locked_unary_E(notl,eax)
474 
475 
476 /////////////////////////////////////////////////////////////////
477 
478 unsigned int btsl_mem ( UChar* base, int bitno )
479 {
480    unsigned char res;
481    __asm__
482    __volatile__("lock; btsl\t%2, %0\n\t"
483                 "setc\t%1"
484                 : "=m" (*base), "=q" (res)
485                 : "r" (bitno));
486    /* Pretty meaningless to dereference base here, but that's what you
487       have to do to get a btsl insn which refers to memory starting at
488       base. */
489    return res;
490 }
btsw_mem(UChar * base,int bitno)491 unsigned int btsw_mem ( UChar* base, int bitno )
492 {
493    unsigned char res;
494    __asm__
495    __volatile__("lock; btsw\t%w2, %0\n\t"
496                 "setc\t%1"
497                 : "=m" (*base), "=q" (res)
498                 : "r" (bitno));
499    return res;
500 }
501 
btrl_mem(UChar * base,int bitno)502 unsigned int btrl_mem ( UChar* base, int bitno )
503 {
504    unsigned char res;
505    __asm__
506    __volatile__("lock; btrl\t%2, %0\n\t"
507                 "setc\t%1"
508                 : "=m" (*base), "=q" (res)
509                 : "r" (bitno));
510    return res;
511 }
btrw_mem(UChar * base,int bitno)512 unsigned int btrw_mem ( UChar* base, int bitno )
513 {
514    unsigned char res;
515    __asm__
516    __volatile__("lock; btrw\t%w2, %0\n\t"
517                 "setc\t%1"
518                 : "=m" (*base), "=q" (res)
519                 : "r" (bitno));
520    return res;
521 }
522 
btcl_mem(UChar * base,int bitno)523 unsigned int btcl_mem ( UChar* base, int bitno )
524 {
525    unsigned char res;
526    __asm__
527    __volatile__("lock; btcl\t%2, %0\n\t"
528                 "setc\t%1"
529                 : "=m" (*base), "=q" (res)
530                 : "r" (bitno));
531    return res;
532 }
btcw_mem(UChar * base,int bitno)533 unsigned int btcw_mem ( UChar* base, int bitno )
534 {
535    unsigned char res;
536    __asm__
537    __volatile__("lock; btcw\t%w2, %0\n\t"
538                 "setc\t%1"
539                 : "=m" (*base), "=q" (res)
540                 : "r" (bitno));
541    return res;
542 }
543 
btl_mem(UChar * base,int bitno)544 unsigned int btl_mem ( UChar* base, int bitno )
545 {
546    unsigned char res;
547    __asm__
548    __volatile__("btl\t%2, %0\n\t"
549                 "setc\t%1"
550                 : "=m" (*base), "=q" (res)
551                 : "r" (bitno)
552                 : "cc", "memory");
553    return res;
554 }
btw_mem(UChar * base,int bitno)555 unsigned int btw_mem ( UChar* base, int bitno )
556 {
557    unsigned char res;
558    __asm__
559    __volatile__("btw\t%w2, %0\n\t"
560                 "setc\t%1"
561                 : "=m" (*base), "=q" (res)
562                 : "r" (bitno));
563    return res;
564 }
565 
rol1(ULong x)566 ULong rol1 ( ULong x )
567 {
568   return (x << 1) | (x >> 63);
569 }
570 
do_bt_G_E_tests(void)571 void do_bt_G_E_tests ( void )
572 {
573    UInt   n, bitoff, op;
574    UInt   c;
575    UChar* block;
576    ULong  carrydep, res;;
577 
578    /*------------------------ MEM-L -----------------------*/
579 
580    carrydep = 0;
581    block = calloc(200,1);
582    block += 100;
583    /* Valid bit offsets are -800 .. 799 inclusive. */
584 
585    for (n = 0; n < 10000; n++) {
586       bitoff = (myrandom() % 1600) - 800;
587       op = myrandom() % 4;
588       c = 2;
589       switch (op) {
590          case 0: c = btsl_mem(block, bitoff); break;
591          case 1: c = btrl_mem(block, bitoff); break;
592          case 2: c = btcl_mem(block, bitoff); break;
593          case 3: c = btl_mem(block, bitoff); break;
594       }
595       c &= 255;
596       assert(c == 0 || c == 1);
597       carrydep = c ? (rol1(carrydep) ^ (Long)(Int)bitoff) : carrydep;
598    }
599 
600    /* Compute final result */
601    block -= 100;
602    res = 0;
603    for (n = 0; n < 200; n++) {
604       UChar ch = block[n];
605       /* printf("%d ", (int)block[n]); */
606       res = rol1(res) ^ (ULong)ch;
607    }
608 
609    send( sprintf(outBuf,
610                  "bt{s,r,c}l: final res 0x%llx, carrydep 0x%llx\n",
611                  res, carrydep ));
612    free(block);
613 
614    /*------------------------ MEM-W -----------------------*/
615 
616    carrydep = 0;
617    block = calloc(200,1);
618    block += 100;
619    /* Valid bit offsets are -800 .. 799 inclusive. */
620 
621    for (n = 0; n < 10000; n++) {
622       bitoff = (myrandom() % 1600) - 800;
623       op = myrandom() % 4;
624       c = 2;
625       switch (op) {
626          case 0: c = btsw_mem(block, bitoff); break;
627          case 1: c = btrw_mem(block, bitoff); break;
628          case 2: c = btcw_mem(block, bitoff); break;
629          case 3: c = btw_mem(block, bitoff); break;
630       }
631       c &= 255;
632       assert(c == 0 || c == 1);
633       carrydep = c ? (rol1(carrydep) ^ (Long)(Int)bitoff) : carrydep;
634    }
635 
636    /* Compute final result */
637    block -= 100;
638    res = 0;
639    for (n = 0; n < 200; n++) {
640       UChar ch = block[n];
641       /* printf("%d ", (int)block[n]); */
642       res = rol1(res) ^ (ULong)ch;
643    }
644 
645    send( sprintf(outBuf,
646                  "bt{s,r,c}w: final res 0x%llx, carrydep 0x%llx\n",
647                  res, carrydep ));
648    free(block);
649 }
650 
651 
652 /////////////////////////////////////////////////////////////////
653 
654 /* Given a word, do bt/bts/btr/btc on bits 0, 1, 2 and 3 of it, and
655    also reconstruct the original bits 0, 1, 2, 3 by looking at the
656    carry flag.  Returned result has mashed bits 0-3 at the bottom and
657    the reconstructed original bits 0-3 as 4-7. */
658 
mash_mem_L(UInt * origp)659 UInt mash_mem_L ( UInt* origp )
660 {
661   UInt reconstructed, mashed;
662   __asm__ __volatile__ (
663      "movl %2, %%edx\n\t"
664      ""
665      "movl $0, %%eax\n\t"
666      "\n\t"
667      "btl  $0, (%%edx)\n\t"
668      "setb %%cl\n\t"
669      "movzbl %%cl, %%ecx\n\t"
670      "orl %%ecx, %%eax\n\t"
671      "\n\t"
672      "lock; btsl $1, (%%edx)\n\t"
673      "setb %%cl\n\t"
674      "movzbl %%cl, %%ecx\n\t"
675      "shll $1, %%ecx\n\t"
676      "orl %%ecx, %%eax\n\t"
677      "\n\t"
678      "lock; btrl $2, (%%edx)\n\t"
679      "setb %%cl\n\t"
680      "movzbl %%cl, %%ecx\n\t"
681      "shll $2, %%ecx\n\t"
682      "orl %%ecx, %%eax\n\t"
683      "\n\t"
684      "lock; btcl $3, (%%edx)\n\t"
685      "setb %%cl\n\t"
686      "movzbl %%cl, %%ecx\n\t"
687      "shll $3, %%ecx\n\t"
688      "orl %%ecx, %%eax\n\t"
689      "\n\t"
690      "movl %%eax, %0\n\t"
691      "movl (%%edx), %1"
692 
693      : "=r" (reconstructed), "=r" (mashed)
694      : "r" (origp)
695      : "eax", "ecx", "edx", "cc");
696   return (mashed & 0xF) | ((reconstructed & 0xF) << 4);
697 }
698 
mash_mem_W(UShort * origp)699 UInt mash_mem_W ( UShort* origp )
700 {
701   UInt reconstructed, mashed;
702   __asm__ __volatile__ (
703      "movl %2, %%edx\n\t"
704      ""
705      "movl $0, %%eax\n\t"
706      "\n\t"
707      "btw  $0, (%%edx)\n\t"
708      "setb %%cl\n\t"
709      "movzbl %%cl, %%ecx\n\t"
710      "orl %%ecx, %%eax\n\t"
711      "\n\t"
712      "lock; btsw $1, (%%edx)\n\t"
713      "setb %%cl\n\t"
714      "movzbl %%cl, %%ecx\n\t"
715      "shll $1, %%ecx\n\t"
716      "orl %%ecx, %%eax\n\t"
717      "\n\t"
718      "lock; btrw $2, (%%edx)\n\t"
719      "setb %%cl\n\t"
720      "movzbl %%cl, %%ecx\n\t"
721      "shll $2, %%ecx\n\t"
722      "orl %%ecx, %%eax\n\t"
723      "\n\t"
724      "lock; btcw $3, (%%edx)\n\t"
725      "setb %%cl\n\t"
726      "movzbl %%cl, %%ecx\n\t"
727      "shll $3, %%ecx\n\t"
728      "orl %%ecx, %%eax\n\t"
729      "\n\t"
730      "movl %%eax, %0\n\t"
731      "movzwl (%%edx), %1"
732 
733      : "=r" (reconstructed), "=r" (mashed)
734      : "r" (origp)
735      : "eax", "ecx", "edx", "cc");
736   return (mashed & 0xF) | ((reconstructed & 0xF) << 4);
737 }
738 
739 
do_bt_imm_E_tests(void)740 void do_bt_imm_E_tests( void )
741 {
742   int i;
743   UInt*   iil = malloc(sizeof(UInt));
744   UShort* iiw = malloc(sizeof(UShort));
745   for (i = 0; i < 0x10; i++) {
746     *iil = i;
747     *iiw = i;
748     send( sprintf(outBuf, "0x%x -> 0x%02x 0x%02x\n", i,
749                   mash_mem_L(iil), mash_mem_W(iiw)));
750   }
751   free(iil);
752   free(iiw);
753 }
754 
755 
756 
757 /////////////////////////////////////////////////////////////////
758 
main(void)759 int main ( void )
760 {
761   do_locked_G_E_addb();
762   do_locked_G_E_addw();
763   do_locked_G_E_addl();
764 
765   do_locked_G_E_orb();
766   do_locked_G_E_orw();
767   do_locked_G_E_orl();
768 
769   do_locked_G_E_adcb();
770   do_locked_G_E_adcw();
771   do_locked_G_E_adcl();
772 
773   do_locked_G_E_sbbb();
774   do_locked_G_E_sbbw();
775   do_locked_G_E_sbbl();
776 
777   do_locked_G_E_andb();
778   do_locked_G_E_andw();
779   do_locked_G_E_andl();
780 
781   do_locked_G_E_subb();
782   do_locked_G_E_subw();
783   do_locked_G_E_subl();
784 
785   do_locked_G_E_xorb();
786   do_locked_G_E_xorw();
787   do_locked_G_E_xorl();
788   //21
789   do_locked_imm_E_addb_0x7F();
790   do_locked_imm_E_addb_0xF1();
791   do_locked_imm_E_addw_0x7E();
792   do_locked_imm_E_addw_0x9325();
793   do_locked_imm_E_addl_0x7D();
794   do_locked_imm_E_addl_0x31415927();
795 
796   do_locked_imm_E_orb_0x7F();
797   do_locked_imm_E_orb_0xF1();
798   do_locked_imm_E_orw_0x7E();
799   do_locked_imm_E_orw_0x9325();
800   do_locked_imm_E_orl_0x7D();
801   do_locked_imm_E_orl_0x31415927();
802 
803   do_locked_imm_E_adcb_0x7F();
804   do_locked_imm_E_adcb_0xF1();
805   do_locked_imm_E_adcw_0x7E();
806   do_locked_imm_E_adcw_0x9325();
807   do_locked_imm_E_adcl_0x7D();
808   do_locked_imm_E_adcl_0x31415927();
809 
810   do_locked_imm_E_sbbb_0x7F();
811   do_locked_imm_E_sbbb_0xF1();
812   do_locked_imm_E_sbbw_0x7E();
813   do_locked_imm_E_sbbw_0x9325();
814   do_locked_imm_E_sbbl_0x7D();
815   do_locked_imm_E_sbbl_0x31415927();
816 
817   do_locked_imm_E_andb_0x7F();
818   do_locked_imm_E_andb_0xF1();
819   do_locked_imm_E_andw_0x7E();
820   do_locked_imm_E_andw_0x9325();
821   do_locked_imm_E_andl_0x7D();
822   do_locked_imm_E_andl_0x31415927();
823 
824   do_locked_imm_E_subb_0x7F();
825   do_locked_imm_E_subb_0xF1();
826   do_locked_imm_E_subw_0x7E();
827   do_locked_imm_E_subw_0x9325();
828   do_locked_imm_E_subl_0x7D();
829   do_locked_imm_E_subl_0x31415927();
830 
831   do_locked_imm_E_xorb_0x7F();
832   do_locked_imm_E_xorb_0xF1();
833   do_locked_imm_E_xorw_0x7E();
834   do_locked_imm_E_xorw_0x9325();
835   do_locked_imm_E_xorl_0x7D();
836   do_locked_imm_E_xorl_0x31415927();
837   // 63
838   do_locked_unary_E_decb();
839   do_locked_unary_E_decw();
840   do_locked_unary_E_decl();
841 
842   do_locked_unary_E_incb();
843   do_locked_unary_E_incw();
844   do_locked_unary_E_incl();
845 
846   do_locked_unary_E_negb();
847   do_locked_unary_E_negw();
848   do_locked_unary_E_negl();
849 
850   do_locked_unary_E_notb();
851   do_locked_unary_E_notw();
852   do_locked_unary_E_notl();
853   // 75
854   do_bt_G_E_tests();
855   // 81
856   do_bt_imm_E_tests();
857   // 87
858   // So there should be 87 lock-prefixed instructions in the
859   // disassembly of this compilation unit.
860   // confirm with
861   // objdump -d ./x86locked | grep lock | grep -v do_lock | grep -v elf32 | wc
862 
863   { UInt crcExpd = 0xB2D75045;
864     theCRC = crcFinalise( theCRC );
865     if (theCRC == crcExpd) {
866        printf("x86locked: PASS: CRCs actual 0x%08X expected 0x%08X\n",
867               theCRC, crcExpd);
868     } else {
869        printf("x86locked: FAIL: CRCs actual 0x%08X expected 0x%08X\n",
870               theCRC, crcExpd);
871        printf("x86locked: set #define VERBOSE 1 to diagnose\n");
872     }
873   }
874 
875   return 0;
876 }
877