1
2 #include <stdio.h>
3 #include <stdlib.h>
4 #include <assert.h>
5
6 #define VERBOSE 0
7
8 typedef unsigned int UInt;
9 typedef unsigned char UChar;
10 typedef unsigned long long int ULong;
11 typedef signed long long int Long;
12 typedef signed int Int;
13 typedef unsigned short UShort;
14 typedef unsigned long UWord;
15 typedef char HChar;
16
myrandom(void)17 unsigned myrandom(void)
18 {
19 /* Simple multiply-with-carry random generator. */
20 static unsigned m_w = 11;
21 static unsigned m_z = 13;
22
23 m_z = 36969 * (m_z & 65535) + (m_z >> 16);
24 m_w = 18000 * (m_w & 65535) + (m_w >> 16);
25
26 return (m_z << 16) + m_w;
27 }
28
29 /////////////////////////////////////////////////////////////////
30 // BEGIN crc32 stuff //
31 /////////////////////////////////////////////////////////////////
32
33 static const UInt crc32Table[256] = {
34
35 /*-- Ugly, innit? --*/
36
37 0x00000000L, 0x04c11db7L, 0x09823b6eL, 0x0d4326d9L,
38 0x130476dcL, 0x17c56b6bL, 0x1a864db2L, 0x1e475005L,
39 0x2608edb8L, 0x22c9f00fL, 0x2f8ad6d6L, 0x2b4bcb61L,
40 0x350c9b64L, 0x31cd86d3L, 0x3c8ea00aL, 0x384fbdbdL,
41 0x4c11db70L, 0x48d0c6c7L, 0x4593e01eL, 0x4152fda9L,
42 0x5f15adacL, 0x5bd4b01bL, 0x569796c2L, 0x52568b75L,
43 0x6a1936c8L, 0x6ed82b7fL, 0x639b0da6L, 0x675a1011L,
44 0x791d4014L, 0x7ddc5da3L, 0x709f7b7aL, 0x745e66cdL,
45 0x9823b6e0L, 0x9ce2ab57L, 0x91a18d8eL, 0x95609039L,
46 0x8b27c03cL, 0x8fe6dd8bL, 0x82a5fb52L, 0x8664e6e5L,
47 0xbe2b5b58L, 0xbaea46efL, 0xb7a96036L, 0xb3687d81L,
48 0xad2f2d84L, 0xa9ee3033L, 0xa4ad16eaL, 0xa06c0b5dL,
49 0xd4326d90L, 0xd0f37027L, 0xddb056feL, 0xd9714b49L,
50 0xc7361b4cL, 0xc3f706fbL, 0xceb42022L, 0xca753d95L,
51 0xf23a8028L, 0xf6fb9d9fL, 0xfbb8bb46L, 0xff79a6f1L,
52 0xe13ef6f4L, 0xe5ffeb43L, 0xe8bccd9aL, 0xec7dd02dL,
53 0x34867077L, 0x30476dc0L, 0x3d044b19L, 0x39c556aeL,
54 0x278206abL, 0x23431b1cL, 0x2e003dc5L, 0x2ac12072L,
55 0x128e9dcfL, 0x164f8078L, 0x1b0ca6a1L, 0x1fcdbb16L,
56 0x018aeb13L, 0x054bf6a4L, 0x0808d07dL, 0x0cc9cdcaL,
57 0x7897ab07L, 0x7c56b6b0L, 0x71159069L, 0x75d48ddeL,
58 0x6b93dddbL, 0x6f52c06cL, 0x6211e6b5L, 0x66d0fb02L,
59 0x5e9f46bfL, 0x5a5e5b08L, 0x571d7dd1L, 0x53dc6066L,
60 0x4d9b3063L, 0x495a2dd4L, 0x44190b0dL, 0x40d816baL,
61 0xaca5c697L, 0xa864db20L, 0xa527fdf9L, 0xa1e6e04eL,
62 0xbfa1b04bL, 0xbb60adfcL, 0xb6238b25L, 0xb2e29692L,
63 0x8aad2b2fL, 0x8e6c3698L, 0x832f1041L, 0x87ee0df6L,
64 0x99a95df3L, 0x9d684044L, 0x902b669dL, 0x94ea7b2aL,
65 0xe0b41de7L, 0xe4750050L, 0xe9362689L, 0xedf73b3eL,
66 0xf3b06b3bL, 0xf771768cL, 0xfa325055L, 0xfef34de2L,
67 0xc6bcf05fL, 0xc27dede8L, 0xcf3ecb31L, 0xcbffd686L,
68 0xd5b88683L, 0xd1799b34L, 0xdc3abdedL, 0xd8fba05aL,
69 0x690ce0eeL, 0x6dcdfd59L, 0x608edb80L, 0x644fc637L,
70 0x7a089632L, 0x7ec98b85L, 0x738aad5cL, 0x774bb0ebL,
71 0x4f040d56L, 0x4bc510e1L, 0x46863638L, 0x42472b8fL,
72 0x5c007b8aL, 0x58c1663dL, 0x558240e4L, 0x51435d53L,
73 0x251d3b9eL, 0x21dc2629L, 0x2c9f00f0L, 0x285e1d47L,
74 0x36194d42L, 0x32d850f5L, 0x3f9b762cL, 0x3b5a6b9bL,
75 0x0315d626L, 0x07d4cb91L, 0x0a97ed48L, 0x0e56f0ffL,
76 0x1011a0faL, 0x14d0bd4dL, 0x19939b94L, 0x1d528623L,
77 0xf12f560eL, 0xf5ee4bb9L, 0xf8ad6d60L, 0xfc6c70d7L,
78 0xe22b20d2L, 0xe6ea3d65L, 0xeba91bbcL, 0xef68060bL,
79 0xd727bbb6L, 0xd3e6a601L, 0xdea580d8L, 0xda649d6fL,
80 0xc423cd6aL, 0xc0e2d0ddL, 0xcda1f604L, 0xc960ebb3L,
81 0xbd3e8d7eL, 0xb9ff90c9L, 0xb4bcb610L, 0xb07daba7L,
82 0xae3afba2L, 0xaafbe615L, 0xa7b8c0ccL, 0xa379dd7bL,
83 0x9b3660c6L, 0x9ff77d71L, 0x92b45ba8L, 0x9675461fL,
84 0x8832161aL, 0x8cf30badL, 0x81b02d74L, 0x857130c3L,
85 0x5d8a9099L, 0x594b8d2eL, 0x5408abf7L, 0x50c9b640L,
86 0x4e8ee645L, 0x4a4ffbf2L, 0x470cdd2bL, 0x43cdc09cL,
87 0x7b827d21L, 0x7f436096L, 0x7200464fL, 0x76c15bf8L,
88 0x68860bfdL, 0x6c47164aL, 0x61043093L, 0x65c52d24L,
89 0x119b4be9L, 0x155a565eL, 0x18197087L, 0x1cd86d30L,
90 0x029f3d35L, 0x065e2082L, 0x0b1d065bL, 0x0fdc1becL,
91 0x3793a651L, 0x3352bbe6L, 0x3e119d3fL, 0x3ad08088L,
92 0x2497d08dL, 0x2056cd3aL, 0x2d15ebe3L, 0x29d4f654L,
93 0xc5a92679L, 0xc1683bceL, 0xcc2b1d17L, 0xc8ea00a0L,
94 0xd6ad50a5L, 0xd26c4d12L, 0xdf2f6bcbL, 0xdbee767cL,
95 0xe3a1cbc1L, 0xe760d676L, 0xea23f0afL, 0xeee2ed18L,
96 0xf0a5bd1dL, 0xf464a0aaL, 0xf9278673L, 0xfde69bc4L,
97 0x89b8fd09L, 0x8d79e0beL, 0x803ac667L, 0x84fbdbd0L,
98 0x9abc8bd5L, 0x9e7d9662L, 0x933eb0bbL, 0x97ffad0cL,
99 0xafb010b1L, 0xab710d06L, 0xa6322bdfL, 0xa2f33668L,
100 0xbcb4666dL, 0xb8757bdaL, 0xb5365d03L, 0xb1f740b4L
101 };
102
103 #define UPDATE_CRC(crcVar,cha) \
104 { \
105 crcVar = (crcVar << 8) ^ \
106 crc32Table[(crcVar >> 24) ^ \
107 ((UChar)cha)]; \
108 }
109
crcBytes(UChar * bytes,UWord nBytes,UInt crcIn)110 static UInt crcBytes ( UChar* bytes, UWord nBytes, UInt crcIn )
111 {
112 UInt crc = crcIn;
113 while (nBytes >= 4) {
114 UPDATE_CRC(crc, bytes[0]);
115 UPDATE_CRC(crc, bytes[1]);
116 UPDATE_CRC(crc, bytes[2]);
117 UPDATE_CRC(crc, bytes[3]);
118 bytes += 4;
119 nBytes -= 4;
120 }
121 while (nBytes >= 1) {
122 UPDATE_CRC(crc, bytes[0]);
123 bytes += 1;
124 nBytes -= 1;
125 }
126 return crc;
127 }
128
crcFinalise(UInt crc)129 static UInt crcFinalise ( UInt crc ) {
130 return ~crc;
131 }
132
133 ////////
134
135 static UInt theCRC = 0xFFFFFFFF;
136
137 static HChar outBuf[1024];
138 // take output that's in outBuf, length as specified, and
139 // update the running crc.
send(int nbytes)140 static void send ( int nbytes )
141 {
142 assert( ((unsigned int)nbytes) < sizeof(outBuf)-1);
143 assert(outBuf[nbytes] == 0);
144 theCRC = crcBytes( (UChar*)&outBuf[0], nbytes, theCRC );
145 if (VERBOSE) printf("SEND %08x %s", theCRC, outBuf);
146 }
147
148
149 /////////////////////////////////////////////////////////////////
150 // END crc32 stuff //
151 /////////////////////////////////////////////////////////////////
152
153 #if 0
154
155 // full version
156 #define NVALS 57
157
158 static unsigned int val[NVALS]
159 = { 0x00, 0x01, 0x02, 0x03,
160 0x3F, 0x40, 0x41,
161 0x7E, 0x7F, 0x80, 0x81, 0x82,
162 0xBF, 0xC0, 0xC1,
163 0xFC, 0xFD, 0xFE, 0xFF,
164
165 0xFF00, 0xFF01, 0xFF02, 0xFF03,
166 0xFF3F, 0xFF40, 0xFF41,
167 0xFF7E, 0xFF7F, 0xFF80, 0xFF81, 0xFF82,
168 0xFFBF, 0xFFC0, 0xFFC1,
169 0xFFFC, 0xFFFD, 0xFFFE, 0xFFFF,
170
171 0xFFFFFF00, 0xFFFFFF01, 0xFFFFFF02, 0xFFFFFF03,
172 0xFFFFFF3F, 0xFFFFFF40, 0xFFFFFF41,
173 0xFFFFFF7E, 0xFFFFFF7F, 0xFFFFFF80, 0xFFFFFF81, 0xFFFFFF82,
174 0xFFFFFFBF, 0xFFFFFFC0, 0xFFFFFFC1,
175 0xFFFFFFFC, 0xFFFFFFFD, 0xFFFFFFFE, 0xFFFFFFFF
176 };
177
178 #else
179
180 // shortened version, for use as valgrind regtest
181 #define NVALS 27
182
183 static unsigned int val[NVALS]
184 = { 0x00, 0x01,
185 0x3F, 0x40,
186 0x7F, 0x80,
187 0xBF, 0xC0,
188 0xFF,
189
190 0xFF00, 0xFF01,
191 0xFF3F, 0xFF40,
192 0xFF7F, 0xFF80,
193 0xFFBF, 0xFFC0,
194 0xFFFF,
195
196 0xFFFFFF00, 0xFFFFFF01,
197 0xFFFFFF3F, 0xFFFFFF40,
198 0xFFFFFF7F, 0xFFFFFF80,
199 0xFFFFFFBF, 0xFFFFFFC0,
200 0xFFFFFFFF
201 };
202
203 #endif
204
205 /////////////////////////////////////
206
207 #define CC_C 0x0001
208 #define CC_P 0x0004
209 #define CC_A 0x0010
210 #define CC_Z 0x0040
211 #define CC_S 0x0080
212 #define CC_O 0x0800
213
214 #define CC_MASK (CC_C | CC_P | CC_A | CC_Z | CC_S | CC_O)
215
216 #define GEN_do_locked_G_E(_name,_eax) \
217 \
218 __attribute__((noinline)) void do_locked_G_E_##_name ( void ) \
219 { \
220 volatile int e_val, g_val, e_val_before; \
221 int o, s, z, a, c, p, v1, v2, flags_in; \
222 int block[4]; \
223 \
224 for (v1 = 0; v1 < NVALS; v1++) { \
225 for (v2 = 0; v2 < NVALS; v2++) { \
226 \
227 for (o = 0; o < 2; o++) { \
228 for (s = 0; s < 2; s++) { \
229 for (z = 0; z < 2; z++) { \
230 for (a = 0; a < 2; a++) { \
231 for (c = 0; c < 2; c++) { \
232 for (p = 0; p < 2; p++) { \
233 \
234 flags_in = (o ? CC_O : 0) \
235 | (s ? CC_S : 0) \
236 | (z ? CC_Z : 0) \
237 | (a ? CC_A : 0) \
238 | (c ? CC_C : 0) \
239 | (p ? CC_P : 0); \
240 \
241 g_val = val[v1]; \
242 e_val = val[v2]; \
243 e_val_before = e_val; \
244 \
245 block[0] = flags_in; \
246 block[1] = g_val; \
247 block[2] = (int)(long)&e_val; \
248 block[3] = 0; \
249 __asm__ __volatile__( \
250 "movl 0(%0), %%eax\n\t" \
251 "pushl %%eax\n\t" \
252 "popfl\n\t" \
253 "movl 4(%0), %%eax\n\t" \
254 "movl 8(%0), %%ebx\n\t" \
255 "lock; " #_name " %%" #_eax ",(%%ebx)\n\t" \
256 "pushfl\n\t" \
257 "popl %%eax\n\t" \
258 "movl %%eax, 12(%0)\n\t" \
259 : : "r"(&block[0]) : "eax","ebx","cc","memory" \
260 ); \
261 \
262 send( \
263 sprintf(outBuf, \
264 "%s G=%08x E=%08x CCIN=%08x -> E=%08x CCOUT=%08x\n", \
265 #_name, g_val, e_val_before, flags_in, \
266 e_val, block[3] & CC_MASK) ); \
267 \
268 }}}}}} \
269 \
270 }} \
271 }
272
GEN_do_locked_G_E(addb,al)273 GEN_do_locked_G_E(addb,al)
274 GEN_do_locked_G_E(addw,ax)
275 GEN_do_locked_G_E(addl,eax)
276
277 GEN_do_locked_G_E(orb, al)
278 GEN_do_locked_G_E(orw, ax)
279 GEN_do_locked_G_E(orl, eax)
280
281 GEN_do_locked_G_E(adcb,al)
282 GEN_do_locked_G_E(adcw,ax)
283 GEN_do_locked_G_E(adcl,eax)
284
285 GEN_do_locked_G_E(sbbb,al)
286 GEN_do_locked_G_E(sbbw,ax)
287 GEN_do_locked_G_E(sbbl,eax)
288
289 GEN_do_locked_G_E(andb,al)
290 GEN_do_locked_G_E(andw,ax)
291 GEN_do_locked_G_E(andl,eax)
292
293 GEN_do_locked_G_E(subb,al)
294 GEN_do_locked_G_E(subw,ax)
295 GEN_do_locked_G_E(subl,eax)
296
297 GEN_do_locked_G_E(xorb,al)
298 GEN_do_locked_G_E(xorw,ax)
299 GEN_do_locked_G_E(xorl,eax)
300
301
302
303
304 #define GEN_do_locked_imm_E(_name,_eax,_imm) \
305 \
306 __attribute__((noinline)) void do_locked_imm_E_##_name##_##_imm ( void ) \
307 { \
308 volatile int e_val, e_val_before; \
309 int o, s, z, a, c, p, v2, flags_in; \
310 int block[3]; \
311 \
312 for (v2 = 0; v2 < NVALS; v2++) { \
313 \
314 for (o = 0; o < 2; o++) { \
315 for (s = 0; s < 2; s++) { \
316 for (z = 0; z < 2; z++) { \
317 for (a = 0; a < 2; a++) { \
318 for (c = 0; c < 2; c++) { \
319 for (p = 0; p < 2; p++) { \
320 \
321 flags_in = (o ? CC_O : 0) \
322 | (s ? CC_S : 0) \
323 | (z ? CC_Z : 0) \
324 | (a ? CC_A : 0) \
325 | (c ? CC_C : 0) \
326 | (p ? CC_P : 0); \
327 \
328 e_val = val[v2]; \
329 e_val_before = e_val; \
330 \
331 block[0] = flags_in; \
332 block[1] = (int)(long)&e_val; \
333 block[2] = 0; \
334 __asm__ __volatile__( \
335 "movl 0(%0), %%eax\n\t" \
336 "pushl %%eax\n\t" \
337 "popfl\n\t" \
338 "movl 4(%0), %%ebx\n\t" \
339 "lock; " #_name " $" #_imm ",(%%ebx)\n\t" \
340 "pushfl\n\t" \
341 "popl %%eax\n\t" \
342 "movl %%eax, 8(%0)\n\t" \
343 : : "r"(&block[0]) : "eax","ebx","cc","memory" \
344 ); \
345 \
346 send( \
347 sprintf(outBuf, \
348 "%s I=%s E=%08x CCIN=%08x -> E=%08x CCOUT=%08x\n", \
349 #_name, #_imm, e_val_before, flags_in, \
350 e_val, block[2] & CC_MASK) ); \
351 \
352 }}}}}} \
353 \
354 } \
355 }
356
357 GEN_do_locked_imm_E(addb,al,0x7F)
358 GEN_do_locked_imm_E(addb,al,0xF1)
359 GEN_do_locked_imm_E(addw,ax,0x7E)
360 GEN_do_locked_imm_E(addw,ax,0x9325)
361 GEN_do_locked_imm_E(addl,eax,0x7D)
362 GEN_do_locked_imm_E(addl,eax,0x31415927)
363
364 GEN_do_locked_imm_E(orb,al,0x7F)
365 GEN_do_locked_imm_E(orb,al,0xF1)
366 GEN_do_locked_imm_E(orw,ax,0x7E)
367 GEN_do_locked_imm_E(orw,ax,0x9325)
368 GEN_do_locked_imm_E(orl,eax,0x7D)
369 GEN_do_locked_imm_E(orl,eax,0x31415927)
370
371 GEN_do_locked_imm_E(adcb,al,0x7F)
372 GEN_do_locked_imm_E(adcb,al,0xF1)
373 GEN_do_locked_imm_E(adcw,ax,0x7E)
374 GEN_do_locked_imm_E(adcw,ax,0x9325)
375 GEN_do_locked_imm_E(adcl,eax,0x7D)
376 GEN_do_locked_imm_E(adcl,eax,0x31415927)
377
378 GEN_do_locked_imm_E(sbbb,al,0x7F)
379 GEN_do_locked_imm_E(sbbb,al,0xF1)
380 GEN_do_locked_imm_E(sbbw,ax,0x7E)
381 GEN_do_locked_imm_E(sbbw,ax,0x9325)
382 GEN_do_locked_imm_E(sbbl,eax,0x7D)
383 GEN_do_locked_imm_E(sbbl,eax,0x31415927)
384
385 GEN_do_locked_imm_E(andb,al,0x7F)
386 GEN_do_locked_imm_E(andb,al,0xF1)
387 GEN_do_locked_imm_E(andw,ax,0x7E)
388 GEN_do_locked_imm_E(andw,ax,0x9325)
389 GEN_do_locked_imm_E(andl,eax,0x7D)
390 GEN_do_locked_imm_E(andl,eax,0x31415927)
391
392 GEN_do_locked_imm_E(subb,al,0x7F)
393 GEN_do_locked_imm_E(subb,al,0xF1)
394 GEN_do_locked_imm_E(subw,ax,0x7E)
395 GEN_do_locked_imm_E(subw,ax,0x9325)
396 GEN_do_locked_imm_E(subl,eax,0x7D)
397 GEN_do_locked_imm_E(subl,eax,0x31415927)
398
399 GEN_do_locked_imm_E(xorb,al,0x7F)
400 GEN_do_locked_imm_E(xorb,al,0xF1)
401 GEN_do_locked_imm_E(xorw,ax,0x7E)
402 GEN_do_locked_imm_E(xorw,ax,0x9325)
403 GEN_do_locked_imm_E(xorl,eax,0x7D)
404 GEN_do_locked_imm_E(xorl,eax,0x31415927)
405
406 #define GEN_do_locked_unary_E(_name,_eax) \
407 \
408 __attribute__((noinline)) void do_locked_unary_E_##_name ( void ) \
409 { \
410 volatile int e_val, e_val_before; \
411 int o, s, z, a, c, p, v2, flags_in; \
412 int block[3]; \
413 \
414 for (v2 = 0; v2 < NVALS; v2++) { \
415 \
416 for (o = 0; o < 2; o++) { \
417 for (s = 0; s < 2; s++) { \
418 for (z = 0; z < 2; z++) { \
419 for (a = 0; a < 2; a++) { \
420 for (c = 0; c < 2; c++) { \
421 for (p = 0; p < 2; p++) { \
422 \
423 flags_in = (o ? CC_O : 0) \
424 | (s ? CC_S : 0) \
425 | (z ? CC_Z : 0) \
426 | (a ? CC_A : 0) \
427 | (c ? CC_C : 0) \
428 | (p ? CC_P : 0); \
429 \
430 e_val = val[v2]; \
431 e_val_before = e_val; \
432 \
433 block[0] = flags_in; \
434 block[1] = (int)(long)&e_val; \
435 block[2] = 0; \
436 __asm__ __volatile__( \
437 "movl 0(%0), %%eax\n\t" \
438 "pushl %%eax\n\t" \
439 "popfl\n\t" \
440 "movl 4(%0), %%ebx\n\t" \
441 "lock; " #_name " (%%ebx)\n\t" \
442 "pushfl\n\t" \
443 "popl %%eax\n\t" \
444 "movl %%eax, 8(%0)\n\t" \
445 : : "r"(&block[0]) : "eax","ebx","cc","memory" \
446 ); \
447 \
448 send( \
449 sprintf(outBuf, \
450 "%s E=%08x CCIN=%08x -> E=%08x CCOUT=%08x\n", \
451 #_name, e_val_before, flags_in, \
452 e_val, block[2] & CC_MASK)); \
453 \
454 }}}}}} \
455 \
456 } \
457 }
458
459 GEN_do_locked_unary_E(decb,al)
460 GEN_do_locked_unary_E(decw,ax)
461 GEN_do_locked_unary_E(decl,eax)
462
463 GEN_do_locked_unary_E(incb,al)
464 GEN_do_locked_unary_E(incw,ax)
465 GEN_do_locked_unary_E(incl,eax)
466
467 GEN_do_locked_unary_E(negb,al)
468 GEN_do_locked_unary_E(negw,ax)
469 GEN_do_locked_unary_E(negl,eax)
470
471 GEN_do_locked_unary_E(notb,al)
472 GEN_do_locked_unary_E(notw,ax)
473 GEN_do_locked_unary_E(notl,eax)
474
475
476 /////////////////////////////////////////////////////////////////
477
478 unsigned int btsl_mem ( UChar* base, int bitno )
479 {
480 unsigned char res;
481 __asm__
482 __volatile__("lock; btsl\t%2, %0\n\t"
483 "setc\t%1"
484 : "=m" (*base), "=q" (res)
485 : "r" (bitno));
486 /* Pretty meaningless to dereference base here, but that's what you
487 have to do to get a btsl insn which refers to memory starting at
488 base. */
489 return res;
490 }
btsw_mem(UChar * base,int bitno)491 unsigned int btsw_mem ( UChar* base, int bitno )
492 {
493 unsigned char res;
494 __asm__
495 __volatile__("lock; btsw\t%w2, %0\n\t"
496 "setc\t%1"
497 : "=m" (*base), "=q" (res)
498 : "r" (bitno));
499 return res;
500 }
501
btrl_mem(UChar * base,int bitno)502 unsigned int btrl_mem ( UChar* base, int bitno )
503 {
504 unsigned char res;
505 __asm__
506 __volatile__("lock; btrl\t%2, %0\n\t"
507 "setc\t%1"
508 : "=m" (*base), "=q" (res)
509 : "r" (bitno));
510 return res;
511 }
btrw_mem(UChar * base,int bitno)512 unsigned int btrw_mem ( UChar* base, int bitno )
513 {
514 unsigned char res;
515 __asm__
516 __volatile__("lock; btrw\t%w2, %0\n\t"
517 "setc\t%1"
518 : "=m" (*base), "=q" (res)
519 : "r" (bitno));
520 return res;
521 }
522
btcl_mem(UChar * base,int bitno)523 unsigned int btcl_mem ( UChar* base, int bitno )
524 {
525 unsigned char res;
526 __asm__
527 __volatile__("lock; btcl\t%2, %0\n\t"
528 "setc\t%1"
529 : "=m" (*base), "=q" (res)
530 : "r" (bitno));
531 return res;
532 }
btcw_mem(UChar * base,int bitno)533 unsigned int btcw_mem ( UChar* base, int bitno )
534 {
535 unsigned char res;
536 __asm__
537 __volatile__("lock; btcw\t%w2, %0\n\t"
538 "setc\t%1"
539 : "=m" (*base), "=q" (res)
540 : "r" (bitno));
541 return res;
542 }
543
btl_mem(UChar * base,int bitno)544 unsigned int btl_mem ( UChar* base, int bitno )
545 {
546 unsigned char res;
547 __asm__
548 __volatile__("btl\t%2, %0\n\t"
549 "setc\t%1"
550 : "=m" (*base), "=q" (res)
551 : "r" (bitno)
552 : "cc", "memory");
553 return res;
554 }
btw_mem(UChar * base,int bitno)555 unsigned int btw_mem ( UChar* base, int bitno )
556 {
557 unsigned char res;
558 __asm__
559 __volatile__("btw\t%w2, %0\n\t"
560 "setc\t%1"
561 : "=m" (*base), "=q" (res)
562 : "r" (bitno));
563 return res;
564 }
565
rol1(ULong x)566 ULong rol1 ( ULong x )
567 {
568 return (x << 1) | (x >> 63);
569 }
570
do_bt_G_E_tests(void)571 void do_bt_G_E_tests ( void )
572 {
573 UInt n, bitoff, op;
574 UInt c;
575 UChar* block;
576 ULong carrydep, res;;
577
578 /*------------------------ MEM-L -----------------------*/
579
580 carrydep = 0;
581 block = calloc(200,1);
582 block += 100;
583 /* Valid bit offsets are -800 .. 799 inclusive. */
584
585 for (n = 0; n < 10000; n++) {
586 bitoff = (myrandom() % 1600) - 800;
587 op = myrandom() % 4;
588 c = 2;
589 switch (op) {
590 case 0: c = btsl_mem(block, bitoff); break;
591 case 1: c = btrl_mem(block, bitoff); break;
592 case 2: c = btcl_mem(block, bitoff); break;
593 case 3: c = btl_mem(block, bitoff); break;
594 }
595 c &= 255;
596 assert(c == 0 || c == 1);
597 carrydep = c ? (rol1(carrydep) ^ (Long)(Int)bitoff) : carrydep;
598 }
599
600 /* Compute final result */
601 block -= 100;
602 res = 0;
603 for (n = 0; n < 200; n++) {
604 UChar ch = block[n];
605 /* printf("%d ", (int)block[n]); */
606 res = rol1(res) ^ (ULong)ch;
607 }
608
609 send( sprintf(outBuf,
610 "bt{s,r,c}l: final res 0x%llx, carrydep 0x%llx\n",
611 res, carrydep ));
612 free(block);
613
614 /*------------------------ MEM-W -----------------------*/
615
616 carrydep = 0;
617 block = calloc(200,1);
618 block += 100;
619 /* Valid bit offsets are -800 .. 799 inclusive. */
620
621 for (n = 0; n < 10000; n++) {
622 bitoff = (myrandom() % 1600) - 800;
623 op = myrandom() % 4;
624 c = 2;
625 switch (op) {
626 case 0: c = btsw_mem(block, bitoff); break;
627 case 1: c = btrw_mem(block, bitoff); break;
628 case 2: c = btcw_mem(block, bitoff); break;
629 case 3: c = btw_mem(block, bitoff); break;
630 }
631 c &= 255;
632 assert(c == 0 || c == 1);
633 carrydep = c ? (rol1(carrydep) ^ (Long)(Int)bitoff) : carrydep;
634 }
635
636 /* Compute final result */
637 block -= 100;
638 res = 0;
639 for (n = 0; n < 200; n++) {
640 UChar ch = block[n];
641 /* printf("%d ", (int)block[n]); */
642 res = rol1(res) ^ (ULong)ch;
643 }
644
645 send( sprintf(outBuf,
646 "bt{s,r,c}w: final res 0x%llx, carrydep 0x%llx\n",
647 res, carrydep ));
648 free(block);
649 }
650
651
652 /////////////////////////////////////////////////////////////////
653
654 /* Given a word, do bt/bts/btr/btc on bits 0, 1, 2 and 3 of it, and
655 also reconstruct the original bits 0, 1, 2, 3 by looking at the
656 carry flag. Returned result has mashed bits 0-3 at the bottom and
657 the reconstructed original bits 0-3 as 4-7. */
658
mash_mem_L(UInt * origp)659 UInt mash_mem_L ( UInt* origp )
660 {
661 UInt reconstructed, mashed;
662 __asm__ __volatile__ (
663 "movl %2, %%edx\n\t"
664 ""
665 "movl $0, %%eax\n\t"
666 "\n\t"
667 "btl $0, (%%edx)\n\t"
668 "setb %%cl\n\t"
669 "movzbl %%cl, %%ecx\n\t"
670 "orl %%ecx, %%eax\n\t"
671 "\n\t"
672 "lock; btsl $1, (%%edx)\n\t"
673 "setb %%cl\n\t"
674 "movzbl %%cl, %%ecx\n\t"
675 "shll $1, %%ecx\n\t"
676 "orl %%ecx, %%eax\n\t"
677 "\n\t"
678 "lock; btrl $2, (%%edx)\n\t"
679 "setb %%cl\n\t"
680 "movzbl %%cl, %%ecx\n\t"
681 "shll $2, %%ecx\n\t"
682 "orl %%ecx, %%eax\n\t"
683 "\n\t"
684 "lock; btcl $3, (%%edx)\n\t"
685 "setb %%cl\n\t"
686 "movzbl %%cl, %%ecx\n\t"
687 "shll $3, %%ecx\n\t"
688 "orl %%ecx, %%eax\n\t"
689 "\n\t"
690 "movl %%eax, %0\n\t"
691 "movl (%%edx), %1"
692
693 : "=r" (reconstructed), "=r" (mashed)
694 : "r" (origp)
695 : "eax", "ecx", "edx", "cc");
696 return (mashed & 0xF) | ((reconstructed & 0xF) << 4);
697 }
698
mash_mem_W(UShort * origp)699 UInt mash_mem_W ( UShort* origp )
700 {
701 UInt reconstructed, mashed;
702 __asm__ __volatile__ (
703 "movl %2, %%edx\n\t"
704 ""
705 "movl $0, %%eax\n\t"
706 "\n\t"
707 "btw $0, (%%edx)\n\t"
708 "setb %%cl\n\t"
709 "movzbl %%cl, %%ecx\n\t"
710 "orl %%ecx, %%eax\n\t"
711 "\n\t"
712 "lock; btsw $1, (%%edx)\n\t"
713 "setb %%cl\n\t"
714 "movzbl %%cl, %%ecx\n\t"
715 "shll $1, %%ecx\n\t"
716 "orl %%ecx, %%eax\n\t"
717 "\n\t"
718 "lock; btrw $2, (%%edx)\n\t"
719 "setb %%cl\n\t"
720 "movzbl %%cl, %%ecx\n\t"
721 "shll $2, %%ecx\n\t"
722 "orl %%ecx, %%eax\n\t"
723 "\n\t"
724 "lock; btcw $3, (%%edx)\n\t"
725 "setb %%cl\n\t"
726 "movzbl %%cl, %%ecx\n\t"
727 "shll $3, %%ecx\n\t"
728 "orl %%ecx, %%eax\n\t"
729 "\n\t"
730 "movl %%eax, %0\n\t"
731 "movzwl (%%edx), %1"
732
733 : "=r" (reconstructed), "=r" (mashed)
734 : "r" (origp)
735 : "eax", "ecx", "edx", "cc");
736 return (mashed & 0xF) | ((reconstructed & 0xF) << 4);
737 }
738
739
do_bt_imm_E_tests(void)740 void do_bt_imm_E_tests( void )
741 {
742 int i;
743 UInt* iil = malloc(sizeof(UInt));
744 UShort* iiw = malloc(sizeof(UShort));
745 for (i = 0; i < 0x10; i++) {
746 *iil = i;
747 *iiw = i;
748 send( sprintf(outBuf, "0x%x -> 0x%02x 0x%02x\n", i,
749 mash_mem_L(iil), mash_mem_W(iiw)));
750 }
751 free(iil);
752 free(iiw);
753 }
754
755
756
757 /////////////////////////////////////////////////////////////////
758
main(void)759 int main ( void )
760 {
761 do_locked_G_E_addb();
762 do_locked_G_E_addw();
763 do_locked_G_E_addl();
764
765 do_locked_G_E_orb();
766 do_locked_G_E_orw();
767 do_locked_G_E_orl();
768
769 do_locked_G_E_adcb();
770 do_locked_G_E_adcw();
771 do_locked_G_E_adcl();
772
773 do_locked_G_E_sbbb();
774 do_locked_G_E_sbbw();
775 do_locked_G_E_sbbl();
776
777 do_locked_G_E_andb();
778 do_locked_G_E_andw();
779 do_locked_G_E_andl();
780
781 do_locked_G_E_subb();
782 do_locked_G_E_subw();
783 do_locked_G_E_subl();
784
785 do_locked_G_E_xorb();
786 do_locked_G_E_xorw();
787 do_locked_G_E_xorl();
788 //21
789 do_locked_imm_E_addb_0x7F();
790 do_locked_imm_E_addb_0xF1();
791 do_locked_imm_E_addw_0x7E();
792 do_locked_imm_E_addw_0x9325();
793 do_locked_imm_E_addl_0x7D();
794 do_locked_imm_E_addl_0x31415927();
795
796 do_locked_imm_E_orb_0x7F();
797 do_locked_imm_E_orb_0xF1();
798 do_locked_imm_E_orw_0x7E();
799 do_locked_imm_E_orw_0x9325();
800 do_locked_imm_E_orl_0x7D();
801 do_locked_imm_E_orl_0x31415927();
802
803 do_locked_imm_E_adcb_0x7F();
804 do_locked_imm_E_adcb_0xF1();
805 do_locked_imm_E_adcw_0x7E();
806 do_locked_imm_E_adcw_0x9325();
807 do_locked_imm_E_adcl_0x7D();
808 do_locked_imm_E_adcl_0x31415927();
809
810 do_locked_imm_E_sbbb_0x7F();
811 do_locked_imm_E_sbbb_0xF1();
812 do_locked_imm_E_sbbw_0x7E();
813 do_locked_imm_E_sbbw_0x9325();
814 do_locked_imm_E_sbbl_0x7D();
815 do_locked_imm_E_sbbl_0x31415927();
816
817 do_locked_imm_E_andb_0x7F();
818 do_locked_imm_E_andb_0xF1();
819 do_locked_imm_E_andw_0x7E();
820 do_locked_imm_E_andw_0x9325();
821 do_locked_imm_E_andl_0x7D();
822 do_locked_imm_E_andl_0x31415927();
823
824 do_locked_imm_E_subb_0x7F();
825 do_locked_imm_E_subb_0xF1();
826 do_locked_imm_E_subw_0x7E();
827 do_locked_imm_E_subw_0x9325();
828 do_locked_imm_E_subl_0x7D();
829 do_locked_imm_E_subl_0x31415927();
830
831 do_locked_imm_E_xorb_0x7F();
832 do_locked_imm_E_xorb_0xF1();
833 do_locked_imm_E_xorw_0x7E();
834 do_locked_imm_E_xorw_0x9325();
835 do_locked_imm_E_xorl_0x7D();
836 do_locked_imm_E_xorl_0x31415927();
837 // 63
838 do_locked_unary_E_decb();
839 do_locked_unary_E_decw();
840 do_locked_unary_E_decl();
841
842 do_locked_unary_E_incb();
843 do_locked_unary_E_incw();
844 do_locked_unary_E_incl();
845
846 do_locked_unary_E_negb();
847 do_locked_unary_E_negw();
848 do_locked_unary_E_negl();
849
850 do_locked_unary_E_notb();
851 do_locked_unary_E_notw();
852 do_locked_unary_E_notl();
853 // 75
854 do_bt_G_E_tests();
855 // 81
856 do_bt_imm_E_tests();
857 // 87
858 // So there should be 87 lock-prefixed instructions in the
859 // disassembly of this compilation unit.
860 // confirm with
861 // objdump -d ./x86locked | grep lock | grep -v do_lock | grep -v elf32 | wc
862
863 { UInt crcExpd = 0xB2D75045;
864 theCRC = crcFinalise( theCRC );
865 if (theCRC == crcExpd) {
866 printf("x86locked: PASS: CRCs actual 0x%08X expected 0x%08X\n",
867 theCRC, crcExpd);
868 } else {
869 printf("x86locked: FAIL: CRCs actual 0x%08X expected 0x%08X\n",
870 theCRC, crcExpd);
871 printf("x86locked: set #define VERBOSE 1 to diagnose\n");
872 }
873 }
874
875 return 0;
876 }
877