1
2 #include <stdio.h>
3 #include <stdlib.h>
4 #include <assert.h>
5
6 #define VERBOSE 0
7
8 typedef unsigned int UInt;
9 typedef unsigned char UChar;
10 typedef unsigned long long int ULong;
11 typedef signed long long int Long;
12 typedef signed int Int;
13 typedef unsigned short UShort;
14 typedef unsigned long UWord;
15 typedef char HChar;
16
17 /////////////////////////////////////////////////////////////////
18 // BEGIN crc32 stuff //
19 /////////////////////////////////////////////////////////////////
20
21 static const UInt crc32Table[256] = {
22
23 /*-- Ugly, innit? --*/
24
25 0x00000000L, 0x04c11db7L, 0x09823b6eL, 0x0d4326d9L,
26 0x130476dcL, 0x17c56b6bL, 0x1a864db2L, 0x1e475005L,
27 0x2608edb8L, 0x22c9f00fL, 0x2f8ad6d6L, 0x2b4bcb61L,
28 0x350c9b64L, 0x31cd86d3L, 0x3c8ea00aL, 0x384fbdbdL,
29 0x4c11db70L, 0x48d0c6c7L, 0x4593e01eL, 0x4152fda9L,
30 0x5f15adacL, 0x5bd4b01bL, 0x569796c2L, 0x52568b75L,
31 0x6a1936c8L, 0x6ed82b7fL, 0x639b0da6L, 0x675a1011L,
32 0x791d4014L, 0x7ddc5da3L, 0x709f7b7aL, 0x745e66cdL,
33 0x9823b6e0L, 0x9ce2ab57L, 0x91a18d8eL, 0x95609039L,
34 0x8b27c03cL, 0x8fe6dd8bL, 0x82a5fb52L, 0x8664e6e5L,
35 0xbe2b5b58L, 0xbaea46efL, 0xb7a96036L, 0xb3687d81L,
36 0xad2f2d84L, 0xa9ee3033L, 0xa4ad16eaL, 0xa06c0b5dL,
37 0xd4326d90L, 0xd0f37027L, 0xddb056feL, 0xd9714b49L,
38 0xc7361b4cL, 0xc3f706fbL, 0xceb42022L, 0xca753d95L,
39 0xf23a8028L, 0xf6fb9d9fL, 0xfbb8bb46L, 0xff79a6f1L,
40 0xe13ef6f4L, 0xe5ffeb43L, 0xe8bccd9aL, 0xec7dd02dL,
41 0x34867077L, 0x30476dc0L, 0x3d044b19L, 0x39c556aeL,
42 0x278206abL, 0x23431b1cL, 0x2e003dc5L, 0x2ac12072L,
43 0x128e9dcfL, 0x164f8078L, 0x1b0ca6a1L, 0x1fcdbb16L,
44 0x018aeb13L, 0x054bf6a4L, 0x0808d07dL, 0x0cc9cdcaL,
45 0x7897ab07L, 0x7c56b6b0L, 0x71159069L, 0x75d48ddeL,
46 0x6b93dddbL, 0x6f52c06cL, 0x6211e6b5L, 0x66d0fb02L,
47 0x5e9f46bfL, 0x5a5e5b08L, 0x571d7dd1L, 0x53dc6066L,
48 0x4d9b3063L, 0x495a2dd4L, 0x44190b0dL, 0x40d816baL,
49 0xaca5c697L, 0xa864db20L, 0xa527fdf9L, 0xa1e6e04eL,
50 0xbfa1b04bL, 0xbb60adfcL, 0xb6238b25L, 0xb2e29692L,
51 0x8aad2b2fL, 0x8e6c3698L, 0x832f1041L, 0x87ee0df6L,
52 0x99a95df3L, 0x9d684044L, 0x902b669dL, 0x94ea7b2aL,
53 0xe0b41de7L, 0xe4750050L, 0xe9362689L, 0xedf73b3eL,
54 0xf3b06b3bL, 0xf771768cL, 0xfa325055L, 0xfef34de2L,
55 0xc6bcf05fL, 0xc27dede8L, 0xcf3ecb31L, 0xcbffd686L,
56 0xd5b88683L, 0xd1799b34L, 0xdc3abdedL, 0xd8fba05aL,
57 0x690ce0eeL, 0x6dcdfd59L, 0x608edb80L, 0x644fc637L,
58 0x7a089632L, 0x7ec98b85L, 0x738aad5cL, 0x774bb0ebL,
59 0x4f040d56L, 0x4bc510e1L, 0x46863638L, 0x42472b8fL,
60 0x5c007b8aL, 0x58c1663dL, 0x558240e4L, 0x51435d53L,
61 0x251d3b9eL, 0x21dc2629L, 0x2c9f00f0L, 0x285e1d47L,
62 0x36194d42L, 0x32d850f5L, 0x3f9b762cL, 0x3b5a6b9bL,
63 0x0315d626L, 0x07d4cb91L, 0x0a97ed48L, 0x0e56f0ffL,
64 0x1011a0faL, 0x14d0bd4dL, 0x19939b94L, 0x1d528623L,
65 0xf12f560eL, 0xf5ee4bb9L, 0xf8ad6d60L, 0xfc6c70d7L,
66 0xe22b20d2L, 0xe6ea3d65L, 0xeba91bbcL, 0xef68060bL,
67 0xd727bbb6L, 0xd3e6a601L, 0xdea580d8L, 0xda649d6fL,
68 0xc423cd6aL, 0xc0e2d0ddL, 0xcda1f604L, 0xc960ebb3L,
69 0xbd3e8d7eL, 0xb9ff90c9L, 0xb4bcb610L, 0xb07daba7L,
70 0xae3afba2L, 0xaafbe615L, 0xa7b8c0ccL, 0xa379dd7bL,
71 0x9b3660c6L, 0x9ff77d71L, 0x92b45ba8L, 0x9675461fL,
72 0x8832161aL, 0x8cf30badL, 0x81b02d74L, 0x857130c3L,
73 0x5d8a9099L, 0x594b8d2eL, 0x5408abf7L, 0x50c9b640L,
74 0x4e8ee645L, 0x4a4ffbf2L, 0x470cdd2bL, 0x43cdc09cL,
75 0x7b827d21L, 0x7f436096L, 0x7200464fL, 0x76c15bf8L,
76 0x68860bfdL, 0x6c47164aL, 0x61043093L, 0x65c52d24L,
77 0x119b4be9L, 0x155a565eL, 0x18197087L, 0x1cd86d30L,
78 0x029f3d35L, 0x065e2082L, 0x0b1d065bL, 0x0fdc1becL,
79 0x3793a651L, 0x3352bbe6L, 0x3e119d3fL, 0x3ad08088L,
80 0x2497d08dL, 0x2056cd3aL, 0x2d15ebe3L, 0x29d4f654L,
81 0xc5a92679L, 0xc1683bceL, 0xcc2b1d17L, 0xc8ea00a0L,
82 0xd6ad50a5L, 0xd26c4d12L, 0xdf2f6bcbL, 0xdbee767cL,
83 0xe3a1cbc1L, 0xe760d676L, 0xea23f0afL, 0xeee2ed18L,
84 0xf0a5bd1dL, 0xf464a0aaL, 0xf9278673L, 0xfde69bc4L,
85 0x89b8fd09L, 0x8d79e0beL, 0x803ac667L, 0x84fbdbd0L,
86 0x9abc8bd5L, 0x9e7d9662L, 0x933eb0bbL, 0x97ffad0cL,
87 0xafb010b1L, 0xab710d06L, 0xa6322bdfL, 0xa2f33668L,
88 0xbcb4666dL, 0xb8757bdaL, 0xb5365d03L, 0xb1f740b4L
89 };
90
91 #define UPDATE_CRC(crcVar,cha) \
92 { \
93 crcVar = (crcVar << 8) ^ \
94 crc32Table[(crcVar >> 24) ^ \
95 ((UChar)cha)]; \
96 }
97
crcBytes(UChar * bytes,UWord nBytes,UInt crcIn)98 static UInt crcBytes ( UChar* bytes, UWord nBytes, UInt crcIn )
99 {
100 UInt crc = crcIn;
101 while (nBytes >= 4) {
102 UPDATE_CRC(crc, bytes[0]);
103 UPDATE_CRC(crc, bytes[1]);
104 UPDATE_CRC(crc, bytes[2]);
105 UPDATE_CRC(crc, bytes[3]);
106 bytes += 4;
107 nBytes -= 4;
108 }
109 while (nBytes >= 1) {
110 UPDATE_CRC(crc, bytes[0]);
111 bytes += 1;
112 nBytes -= 1;
113 }
114 return crc;
115 }
116
crcFinalise(UInt crc)117 static UInt crcFinalise ( UInt crc ) {
118 return ~crc;
119 }
120
121 ////////
122
123 static UInt theCRC = 0xFFFFFFFF;
124
125 static HChar outBuf[1024];
126 // take output that's in outBuf, length as specified, and
127 // update the running crc.
send(int nbytes)128 static void send ( int nbytes )
129 {
130 assert( ((unsigned int)nbytes) < sizeof(outBuf)-1);
131 assert(outBuf[nbytes] == 0);
132 theCRC = crcBytes( (UChar*)&outBuf[0], nbytes, theCRC );
133 if (VERBOSE) printf("SEND %08x %s", theCRC, outBuf);
134 }
135
136
137 /////////////////////////////////////////////////////////////////
138 // END crc32 stuff //
139 /////////////////////////////////////////////////////////////////
140
141 #if 0
142
143 // full version
144 #define NVALS 76
145
146 static ULong val[NVALS]
147 = { 0x00ULL, 0x01ULL, 0x02ULL, 0x03ULL,
148 0x3FULL, 0x40ULL, 0x41ULL,
149 0x7EULL, 0x7FULL, 0x80ULL, 0x81ULL, 0x82ULL,
150 0xBFULL, 0xC0ULL, 0xC1ULL,
151 0xFCULL, 0xFDULL, 0xFEULL, 0xFFULL,
152
153 0xFF00ULL, 0xFF01ULL, 0xFF02ULL, 0xFF03ULL,
154 0xFF3FULL, 0xFF40ULL, 0xFF41ULL,
155 0xFF7EULL, 0xFF7FULL, 0xFF80ULL, 0xFF81ULL, 0xFF82ULL,
156 0xFFBFULL, 0xFFC0ULL, 0xFFC1ULL,
157 0xFFFCULL, 0xFFFDULL, 0xFFFEULL, 0xFFFFULL,
158
159 0xFFFFFF00ULL, 0xFFFFFF01ULL, 0xFFFFFF02ULL, 0xFFFFFF03ULL,
160 0xFFFFFF3FULL, 0xFFFFFF40ULL, 0xFFFFFF41ULL,
161 0xFFFFFF7EULL, 0xFFFFFF7FULL, 0xFFFFFF80ULL, 0xFFFFFF81ULL, 0xFFFFFF82ULL,
162 0xFFFFFFBFULL, 0xFFFFFFC0ULL, 0xFFFFFFC1ULL,
163 0xFFFFFFFCULL, 0xFFFFFFFDULL, 0xFFFFFFFEULL, 0xFFFFFFFFULL,
164
165 0xFFFFFFFFFFFFFF00ULL, 0xFFFFFFFFFFFFFF01ULL, 0xFFFFFFFFFFFFFF02ULL,
166 0xFFFFFFFFFFFFFF03ULL,
167 0xFFFFFFFFFFFFFF3FULL, 0xFFFFFFFFFFFFFF40ULL, 0xFFFFFFFFFFFFFF41ULL,
168 0xFFFFFFFFFFFFFF7EULL, 0xFFFFFFFFFFFFFF7FULL, 0xFFFFFFFFFFFFFF80ULL,
169 0xFFFFFFFFFFFFFF81ULL, 0xFFFFFFFFFFFFFF82ULL,
170 0xFFFFFFFFFFFFFFBFULL, 0xFFFFFFFFFFFFFFC0ULL, 0xFFFFFFFFFFFFFFC1ULL,
171 0xFFFFFFFFFFFFFFFCULL, 0xFFFFFFFFFFFFFFFDULL, 0xFFFFFFFFFFFFFFFEULL,
172 0xFFFFFFFFFFFFFFFFULL
173 };
174
175 #else
176
177 // shortened version, for use as valgrind regtest
178 #define NVALS 36
179
180 static ULong val[NVALS]
181 = { 0x00ULL, 0x01ULL,
182 0x3FULL, 0x40ULL,
183 0x7FULL, 0x80ULL,
184 0xBFULL, 0xC0ULL,
185 0xFFULL,
186
187 0xFF00ULL, 0xFF01ULL,
188 0xFF3FULL, 0xFF40ULL,
189 0xFF7FULL, 0xFF80ULL,
190 0xFFBFULL, 0xFFC0ULL,
191 0xFFFFULL,
192
193 0xFFFFFF00ULL, 0xFFFFFF01ULL,
194 0xFFFFFF3FULL, 0xFFFFFF40ULL,
195 0xFFFFFF7EULL, 0xFFFFFF7FULL,
196 0xFFFFFFBFULL, 0xFFFFFFC0ULL,
197 0xFFFFFFFFULL,
198
199 0xFFFFFFFFFFFFFF00ULL, 0xFFFFFFFFFFFFFF01ULL,
200 0xFFFFFFFFFFFFFF3FULL, 0xFFFFFFFFFFFFFF40ULL,
201 0xFFFFFFFFFFFFFF7FULL, 0xFFFFFFFFFFFFFF80ULL,
202 0xFFFFFFFFFFFFFFBFULL, 0xFFFFFFFFFFFFFFC0ULL,
203 0xFFFFFFFFFFFFFFFFULL
204 };
205
206 #endif
207
208 /////////////////////////////////////
209
210 #define CC_C 0x0001
211 #define CC_P 0x0004
212 #define CC_A 0x0010
213 #define CC_Z 0x0040
214 #define CC_S 0x0080
215 #define CC_O 0x0800
216
217 #define CC_MASK (CC_C | CC_P | CC_A | CC_Z | CC_S | CC_O)
218
219 #define GEN_do_locked_G_E(_name,_eax) \
220 \
221 __attribute__((noinline)) void do_locked_G_E_##_name ( void ) \
222 { \
223 volatile Long e_val, g_val, e_val_before; \
224 Long o, s, z, a, c, p, v1, v2, flags_in; \
225 Long block[4]; \
226 \
227 for (v1 = 0; v1 < NVALS; v1++) { \
228 for (v2 = 0; v2 < NVALS; v2++) { \
229 \
230 for (o = 0; o < 2; o++) { \
231 for (s = 0; s < 2; s++) { \
232 for (z = 0; z < 2; z++) { \
233 for (a = 0; a < 2; a++) { \
234 for (c = 0; c < 2; c++) { \
235 for (p = 0; p < 2; p++) { \
236 \
237 flags_in = (o ? CC_O : 0) \
238 | (s ? CC_S : 0) \
239 | (z ? CC_Z : 0) \
240 | (a ? CC_A : 0) \
241 | (c ? CC_C : 0) \
242 | (p ? CC_P : 0); \
243 \
244 g_val = val[v1]; \
245 e_val = val[v2]; \
246 e_val_before = e_val; \
247 \
248 block[0] = flags_in; \
249 block[1] = g_val; \
250 block[2] = (long)&e_val; \
251 block[3] = 0; \
252 __asm__ __volatile__( \
253 "movq 0(%0), %%rax\n\t" \
254 "pushq %%rax\n\t" \
255 "popfq\n\t" \
256 "movq 8(%0), %%rax\n\t" \
257 "movq 16(%0), %%rbx\n\t" \
258 "lock; " #_name " %%" #_eax ",(%%rbx)\n\t" \
259 "pushfq\n\t" \
260 "popq %%rax\n\t" \
261 "movq %%rax, 24(%0)\n\t" \
262 : : "r"(&block[0]) : "rax","rbx","cc","memory" \
263 ); \
264 \
265 send( \
266 sprintf(outBuf, \
267 "%s G=%016llx E=%016llx CCIN=%08llx -> E=%016llx CCOUT=%08llx\n", \
268 #_name, g_val, e_val_before, flags_in, \
269 e_val, block[3] & CC_MASK)); \
270 \
271 }}}}}} \
272 \
273 }} \
274 }
275
GEN_do_locked_G_E(addb,al)276 GEN_do_locked_G_E(addb,al)
277 GEN_do_locked_G_E(addw,ax)
278 GEN_do_locked_G_E(addl,eax)
279 GEN_do_locked_G_E(addq,rax)
280
281 GEN_do_locked_G_E(orb, al)
282 GEN_do_locked_G_E(orw, ax)
283 GEN_do_locked_G_E(orl, eax)
284 GEN_do_locked_G_E(orq, rax)
285
286 GEN_do_locked_G_E(adcb,al)
287 GEN_do_locked_G_E(adcw,ax)
288 GEN_do_locked_G_E(adcl,eax)
289 GEN_do_locked_G_E(adcq,rax)
290
291 GEN_do_locked_G_E(sbbb,al)
292 GEN_do_locked_G_E(sbbw,ax)
293 GEN_do_locked_G_E(sbbl,eax)
294 GEN_do_locked_G_E(sbbq,rax)
295
296 GEN_do_locked_G_E(andb,al)
297 GEN_do_locked_G_E(andw,ax)
298 GEN_do_locked_G_E(andl,eax)
299 GEN_do_locked_G_E(andq,rax)
300
301 GEN_do_locked_G_E(subb,al)
302 GEN_do_locked_G_E(subw,ax)
303 GEN_do_locked_G_E(subl,eax)
304 GEN_do_locked_G_E(subq,rax)
305
306 GEN_do_locked_G_E(xorb,al)
307 GEN_do_locked_G_E(xorw,ax)
308 GEN_do_locked_G_E(xorl,eax)
309 GEN_do_locked_G_E(xorq,rax)
310
311
312
313
314 #define GEN_do_locked_imm_E(_name,_eax,_imm) \
315 \
316 __attribute__((noinline)) void do_locked_imm_E_##_name##_##_imm ( void ) \
317 { \
318 volatile Long e_val, e_val_before; \
319 Long o, s, z, a, c, p, v2, flags_in; \
320 Long block[3]; \
321 \
322 for (v2 = 0; v2 < NVALS; v2++) { \
323 \
324 for (o = 0; o < 2; o++) { \
325 for (s = 0; s < 2; s++) { \
326 for (z = 0; z < 2; z++) { \
327 for (a = 0; a < 2; a++) { \
328 for (c = 0; c < 2; c++) { \
329 for (p = 0; p < 2; p++) { \
330 \
331 flags_in = (o ? CC_O : 0) \
332 | (s ? CC_S : 0) \
333 | (z ? CC_Z : 0) \
334 | (a ? CC_A : 0) \
335 | (c ? CC_C : 0) \
336 | (p ? CC_P : 0); \
337 \
338 e_val = val[v2]; \
339 e_val_before = e_val; \
340 \
341 block[0] = flags_in; \
342 block[1] = (long)&e_val; \
343 block[2] = 0; \
344 __asm__ __volatile__( \
345 "movq 0(%0), %%rax\n\t" \
346 "pushq %%rax\n\t" \
347 "popfq\n\t" \
348 "movq 8(%0), %%rbx\n\t" \
349 "lock; " #_name " $" #_imm ",(%%rbx)\n\t" \
350 "pushfq\n\t" \
351 "popq %%rax\n\t" \
352 "movq %%rax, 16(%0)\n\t" \
353 : : "r"(&block[0]) : "rax","rbx","cc","memory" \
354 ); \
355 \
356 send( \
357 sprintf(outBuf, \
358 "%s I=%s E=%016llx CCIN=%08llx -> E=%016llx CCOUT=%08llx\n", \
359 #_name, #_imm, e_val_before, flags_in, \
360 e_val, block[2] & CC_MASK)); \
361 \
362 }}}}}} \
363 \
364 } \
365 }
366
367 GEN_do_locked_imm_E(addb,al,0x7F)
368 GEN_do_locked_imm_E(addb,al,0xF1)
369 GEN_do_locked_imm_E(addw,ax,0x7E)
370 GEN_do_locked_imm_E(addw,ax,0x9325)
371 GEN_do_locked_imm_E(addl,eax,0x7D)
372 GEN_do_locked_imm_E(addl,eax,0x31415927)
373 GEN_do_locked_imm_E(addq,rax,0x7D)
374 GEN_do_locked_imm_E(addq,rax,0x31415927)
375
376 GEN_do_locked_imm_E(orb,al,0x7F)
377 GEN_do_locked_imm_E(orb,al,0xF1)
378 GEN_do_locked_imm_E(orw,ax,0x7E)
379 GEN_do_locked_imm_E(orw,ax,0x9325)
380 GEN_do_locked_imm_E(orl,eax,0x7D)
381 GEN_do_locked_imm_E(orl,eax,0x31415927)
382 GEN_do_locked_imm_E(orq,rax,0x7D)
383 GEN_do_locked_imm_E(orq,rax,0x31415927)
384
385 GEN_do_locked_imm_E(adcb,al,0x7F)
386 GEN_do_locked_imm_E(adcb,al,0xF1)
387 GEN_do_locked_imm_E(adcw,ax,0x7E)
388 GEN_do_locked_imm_E(adcw,ax,0x9325)
389 GEN_do_locked_imm_E(adcl,eax,0x7D)
390 GEN_do_locked_imm_E(adcl,eax,0x31415927)
391 GEN_do_locked_imm_E(adcq,rax,0x7D)
392 GEN_do_locked_imm_E(adcq,rax,0x31415927)
393
394 GEN_do_locked_imm_E(sbbb,al,0x7F)
395 GEN_do_locked_imm_E(sbbb,al,0xF1)
396 GEN_do_locked_imm_E(sbbw,ax,0x7E)
397 GEN_do_locked_imm_E(sbbw,ax,0x9325)
398 GEN_do_locked_imm_E(sbbl,eax,0x7D)
399 GEN_do_locked_imm_E(sbbl,eax,0x31415927)
400 GEN_do_locked_imm_E(sbbq,rax,0x7D)
401 GEN_do_locked_imm_E(sbbq,rax,0x31415927)
402
403 GEN_do_locked_imm_E(andb,al,0x7F)
404 GEN_do_locked_imm_E(andb,al,0xF1)
405 GEN_do_locked_imm_E(andw,ax,0x7E)
406 GEN_do_locked_imm_E(andw,ax,0x9325)
407 GEN_do_locked_imm_E(andl,eax,0x7D)
408 GEN_do_locked_imm_E(andl,eax,0x31415927)
409 GEN_do_locked_imm_E(andq,rax,0x7D)
410 GEN_do_locked_imm_E(andq,rax,0x31415927)
411
412 GEN_do_locked_imm_E(subb,al,0x7F)
413 GEN_do_locked_imm_E(subb,al,0xF1)
414 GEN_do_locked_imm_E(subw,ax,0x7E)
415 GEN_do_locked_imm_E(subw,ax,0x9325)
416 GEN_do_locked_imm_E(subl,eax,0x7D)
417 GEN_do_locked_imm_E(subl,eax,0x31415927)
418 GEN_do_locked_imm_E(subq,rax,0x7D)
419 GEN_do_locked_imm_E(subq,rax,0x31415927)
420
421 GEN_do_locked_imm_E(xorb,al,0x7F)
422 GEN_do_locked_imm_E(xorb,al,0xF1)
423 GEN_do_locked_imm_E(xorw,ax,0x7E)
424 GEN_do_locked_imm_E(xorw,ax,0x9325)
425 GEN_do_locked_imm_E(xorl,eax,0x7D)
426 GEN_do_locked_imm_E(xorl,eax,0x31415927)
427 GEN_do_locked_imm_E(xorq,rax,0x7D)
428 GEN_do_locked_imm_E(xorq,rax,0x31415927)
429
430 #define GEN_do_locked_unary_E(_name,_eax) \
431 \
432 __attribute__((noinline)) void do_locked_unary_E_##_name ( void ) \
433 { \
434 volatile Long e_val, e_val_before; \
435 Long o, s, z, a, c, p, v2, flags_in; \
436 Long block[3]; \
437 \
438 for (v2 = 0; v2 < NVALS; v2++) { \
439 \
440 for (o = 0; o < 2; o++) { \
441 for (s = 0; s < 2; s++) { \
442 for (z = 0; z < 2; z++) { \
443 for (a = 0; a < 2; a++) { \
444 for (c = 0; c < 2; c++) { \
445 for (p = 0; p < 2; p++) { \
446 \
447 flags_in = (o ? CC_O : 0) \
448 | (s ? CC_S : 0) \
449 | (z ? CC_Z : 0) \
450 | (a ? CC_A : 0) \
451 | (c ? CC_C : 0) \
452 | (p ? CC_P : 0); \
453 \
454 e_val = val[v2]; \
455 e_val_before = e_val; \
456 \
457 block[0] = flags_in; \
458 block[1] = (long)&e_val; \
459 block[2] = 0; \
460 __asm__ __volatile__( \
461 "movq 0(%0), %%rax\n\t" \
462 "pushq %%rax\n\t" \
463 "popfq\n\t" \
464 "movq 8(%0), %%rbx\n\t" \
465 "lock; " #_name " (%%rbx)\n\t" \
466 "pushfq\n\t" \
467 "popq %%rax\n\t" \
468 "movq %%rax, 16(%0)\n\t" \
469 : : "r"(&block[0]) : "rax","rbx","cc","memory" \
470 ); \
471 \
472 send( \
473 sprintf(outBuf, \
474 "%s E=%016llx CCIN=%08llx -> E=%016llx CCOUT=%08llx\n", \
475 #_name, e_val_before, flags_in, \
476 e_val, block[2] & CC_MASK)); \
477 \
478 }}}}}} \
479 \
480 } \
481 }
482
483 GEN_do_locked_unary_E(decb,al)
484 GEN_do_locked_unary_E(decw,ax)
485 GEN_do_locked_unary_E(decl,eax)
486 GEN_do_locked_unary_E(decq,rax)
487
488 GEN_do_locked_unary_E(incb,al)
489 GEN_do_locked_unary_E(incw,ax)
490 GEN_do_locked_unary_E(incl,eax)
491 GEN_do_locked_unary_E(incq,rax)
492
493 GEN_do_locked_unary_E(negb,al)
494 GEN_do_locked_unary_E(negw,ax)
495 GEN_do_locked_unary_E(negl,eax)
496 GEN_do_locked_unary_E(negq,rax)
497
498 GEN_do_locked_unary_E(notb,al)
499 GEN_do_locked_unary_E(notw,ax)
500 GEN_do_locked_unary_E(notl,eax)
501 GEN_do_locked_unary_E(notq,rax)
502
503
504 /////////////////////////////////////////////////////////////////
505
506 ULong btsq_mem ( UChar* base, int bitno )
507 {
508 ULong res;
509 __asm__
510 __volatile__("lock; btsq\t%2, %0\n\t"
511 "setc %%dl\n\t"
512 "movzbq %%dl,%1\n"
513 : "=m" (*base), "=r" (res)
514 : "r" ((ULong)bitno) : "rdx","cc","memory" );
515 /* Pretty meaningless to dereference base here, but that's what you
516 have to do to get a btsl insn which refers to memory starting at
517 base. */
518 return res;
519 }
btsl_mem(UChar * base,int bitno)520 ULong btsl_mem ( UChar* base, int bitno )
521 {
522 ULong res;
523 __asm__
524 __volatile__("lock; btsl\t%2, %0\n\t"
525 "setc %%dl\n\t"
526 "movzbq %%dl,%1\n"
527 : "=m" (*base), "=r" (res)
528 : "r" ((UInt)bitno));
529 return res;
530 }
btsw_mem(UChar * base,int bitno)531 ULong btsw_mem ( UChar* base, int bitno )
532 {
533 ULong res;
534 __asm__
535 __volatile__("lock; btsw\t%w2, %0\n\t"
536 "setc %%dl\n\t"
537 "movzbq %%dl,%1\n"
538 : "=m" (*base), "=r" (res)
539 : "r" ((ULong)bitno));
540 return res;
541 }
542
btrq_mem(UChar * base,int bitno)543 ULong btrq_mem ( UChar* base, int bitno )
544 {
545 ULong res;
546 __asm__
547 __volatile__("lock; btrq\t%2, %0\n\t"
548 "setc %%dl\n\t"
549 "movzbq %%dl,%1\n"
550 : "=m" (*base), "=r" (res)
551 : "r" ((ULong)bitno));
552 return res;
553 }
btrl_mem(UChar * base,int bitno)554 ULong btrl_mem ( UChar* base, int bitno )
555 {
556 ULong res;
557 __asm__
558 __volatile__("lock; btrl\t%2, %0\n\t"
559 "setc %%dl\n\t"
560 "movzbq %%dl,%1\n"
561 : "=m" (*base), "=r" (res)
562 : "r" ((UInt)bitno));
563 return res;
564 }
btrw_mem(UChar * base,int bitno)565 ULong btrw_mem ( UChar* base, int bitno )
566 {
567 ULong res;
568 __asm__
569 __volatile__("lock; btrw\t%w2, %0\n\t"
570 "setc %%dl\n\t"
571 "movzbq %%dl,%1\n"
572 : "=m" (*base), "=r" (res)
573 : "r" ((ULong)bitno));
574 return res;
575 }
576
btcq_mem(UChar * base,int bitno)577 ULong btcq_mem ( UChar* base, int bitno )
578 {
579 ULong res;
580 __asm__
581 __volatile__("lock; btcq\t%2, %0\n\t"
582 "setc %%dl\n\t"
583 "movzbq %%dl,%1\n"
584 : "=m" (*base), "=r" (res)
585 : "r" ((ULong)bitno));
586 return res;
587 }
btcl_mem(UChar * base,int bitno)588 ULong btcl_mem ( UChar* base, int bitno )
589 {
590 ULong res;
591 __asm__
592 __volatile__("lock; btcl\t%2, %0\n\t"
593 "setc %%dl\n\t"
594 "movzbq %%dl,%1\n"
595 : "=m" (*base), "=r" (res)
596 : "r" ((UInt)bitno));
597 return res;
598 }
btcw_mem(UChar * base,int bitno)599 ULong btcw_mem ( UChar* base, int bitno )
600 {
601 ULong res;
602 __asm__
603 __volatile__("lock; btcw\t%w2, %0\n\t"
604 "setc %%dl\n\t"
605 "movzbq %%dl,%1\n"
606 : "=m" (*base), "=r" (res)
607 : "r" ((ULong)bitno));
608 return res;
609 }
610
btq_mem(UChar * base,int bitno)611 ULong btq_mem ( UChar* base, int bitno )
612 {
613 ULong res;
614 __asm__
615 __volatile__("btq\t%2, %0\n\t"
616 "setc %%dl\n\t"
617 "movzbq %%dl,%1\n"
618 : "=m" (*base), "=r" (res)
619 : "r" ((ULong)bitno)
620 : "cc", "memory");
621 return res;
622 }
btl_mem(UChar * base,int bitno)623 ULong btl_mem ( UChar* base, int bitno )
624 {
625 ULong res;
626 __asm__
627 __volatile__("btl\t%2, %0\n\t"
628 "setc %%dl\n\t"
629 "movzbq %%dl,%1\n"
630 : "=m" (*base), "=r" (res)
631 : "r" ((UInt)bitno)
632 : "cc", "memory");
633 return res;
634 }
btw_mem(UChar * base,int bitno)635 ULong btw_mem ( UChar* base, int bitno )
636 {
637 ULong res;
638 __asm__
639 __volatile__("btw\t%w2, %0\n\t"
640 "setc %%dl\n\t"
641 "movzbq %%dl,%1\n"
642 : "=m" (*base), "=r" (res)
643 : "r" ((ULong)bitno));
644 return res;
645 }
646
rol1(ULong x)647 ULong rol1 ( ULong x )
648 {
649 return (x << 1) | (x >> 63);
650 }
651
do_bt_G_E_tests(void)652 void do_bt_G_E_tests ( void )
653 {
654 ULong n, bitoff, op;
655 ULong c;
656 UChar* block;
657 ULong carrydep, res;;
658
659 /*------------------------ MEM-Q -----------------------*/
660
661 carrydep = 0;
662 block = calloc(200,1);
663 block += 100;
664 /* Valid bit offsets are -800 .. 799 inclusive. */
665
666 for (n = 0; n < 10000; n++) {
667 bitoff = (random() % 1600) - 800;
668 op = random() % 4;
669 c = 2;
670 switch (op) {
671 case 0: c = btsq_mem(block, bitoff); break;
672 case 1: c = btrq_mem(block, bitoff); break;
673 case 2: c = btcq_mem(block, bitoff); break;
674 case 3: c = btq_mem(block, bitoff); break;
675 }
676 c &= 255;
677 assert(c == 0 || c == 1);
678 carrydep = c ? (rol1(carrydep) ^ (Long)bitoff) : carrydep;
679 }
680
681 /* Compute final result */
682 block -= 100;
683 res = 0;
684 for (n = 0; n < 200; n++) {
685 UChar ch = block[n];
686 /* printf("%d ", (int)block[n]); */
687 res = rol1(res) ^ (ULong)ch;
688 }
689
690 send( sprintf(outBuf,
691 "bt{s,r,c}q: final res 0x%llx, carrydep 0x%llx\n",
692 res, carrydep));
693 free(block);
694
695 /*------------------------ MEM-L -----------------------*/
696
697 carrydep = 0;
698 block = calloc(200,1);
699 block += 100;
700 /* Valid bit offsets are -800 .. 799 inclusive. */
701
702 for (n = 0; n < 10000; n++) {
703 bitoff = (random() % 1600) - 800;
704 op = random() % 4;
705 c = 2;
706 switch (op) {
707 case 0: c = btsl_mem(block, bitoff); break;
708 case 1: c = btrl_mem(block, bitoff); break;
709 case 2: c = btcl_mem(block, bitoff); break;
710 case 3: c = btl_mem(block, bitoff); break;
711 }
712 c &= 255;
713 assert(c == 0 || c == 1);
714 carrydep = c ? (rol1(carrydep) ^ (Long)(Int)bitoff) : carrydep;
715 }
716
717 /* Compute final result */
718 block -= 100;
719 res = 0;
720 for (n = 0; n < 200; n++) {
721 UChar ch = block[n];
722 /* printf("%d ", (int)block[n]); */
723 res = rol1(res) ^ (ULong)ch;
724 }
725
726 send( sprintf(outBuf,
727 "bt{s,r,c}l: final res 0x%llx, carrydep 0x%llx\n",
728 res, carrydep));
729 free(block);
730
731 /*------------------------ MEM-W -----------------------*/
732
733 carrydep = 0;
734 block = calloc(200,1);
735 block += 100;
736 /* Valid bit offsets are -800 .. 799 inclusive. */
737
738 for (n = 0; n < 10000; n++) {
739 bitoff = (random() % 1600) - 800;
740 op = random() % 4;
741 c = 2;
742 switch (op) {
743 case 0: c = btsw_mem(block, bitoff); break;
744 case 1: c = btrw_mem(block, bitoff); break;
745 case 2: c = btcw_mem(block, bitoff); break;
746 case 3: c = btw_mem(block, bitoff); break;
747 }
748 c &= 255;
749 assert(c == 0 || c == 1);
750 carrydep = c ? (rol1(carrydep) ^ (Long)(Int)bitoff) : carrydep;
751 }
752
753 /* Compute final result */
754 block -= 100;
755 res = 0;
756 for (n = 0; n < 200; n++) {
757 UChar ch = block[n];
758 /* printf("%d ", (int)block[n]); */
759 res = rol1(res) ^ (ULong)ch;
760 }
761
762 send(sprintf(outBuf,
763 "bt{s,r,c}w: final res 0x%llx, carrydep 0x%llx\n",
764 res, carrydep));
765 free(block);
766 }
767
768
769 /////////////////////////////////////////////////////////////////
770
771 /* Given a word, do bt/bts/btr/btc on bits 0, 1, 2 and 3 of it, and
772 also reconstruct the original bits 0, 1, 2, 3 by looking at the
773 carry flag. Returned result has mashed bits 0-3 at the bottom and
774 the reconstructed original bits 0-3 as 4-7. */
775
mash_mem_Q(ULong * origp)776 ULong mash_mem_Q ( ULong* origp )
777 {
778 ULong reconstructed, mashed;
779 __asm__ __volatile__ (
780 "movq %2, %%rdx\n\t"
781 ""
782 "movq $0, %%rax\n\t"
783 "\n\t"
784 "btq $0, (%%rdx)\n\t"
785 "setb %%cl\n\t"
786 "movzbq %%cl, %%rcx\n\t"
787 "orq %%rcx, %%rax\n\t"
788 "\n\t"
789 "lock; btsq $1, (%%rdx)\n\t"
790 "setb %%cl\n\t"
791 "movzbq %%cl, %%rcx\n\t"
792 "shlq $1, %%rcx\n\t"
793 "orq %%rcx, %%rax\n\t"
794 "\n\t"
795 "lock; btrq $2, (%%rdx)\n\t"
796 "setb %%cl\n\t"
797 "movzbq %%cl, %%rcx\n\t"
798 "shlq $2, %%rcx\n\t"
799 "orq %%rcx, %%rax\n\t"
800 "\n\t"
801 "lock; btcq $3, (%%rdx)\n\t"
802 "setb %%cl\n\t"
803 "movzbq %%cl, %%rcx\n\t"
804 "shlq $3, %%rcx\n\t"
805 "orq %%rcx, %%rax\n\t"
806 "\n\t"
807 "movq %%rax, %0\n\t"
808 "movq (%%rdx), %1"
809 : "=r" (reconstructed), "=r" (mashed)
810 : "r" (origp)
811 : "rax", "rcx", "rdx", "cc");
812 return (mashed & 0xF) | ((reconstructed & 0xF) << 4);
813 }
814
mash_mem_L(UInt * origp)815 ULong mash_mem_L ( UInt* origp )
816 {
817 ULong reconstructed; UInt mashed;
818 __asm__ __volatile__ (
819 "movq %2, %%rdx\n\t"
820 ""
821 "movq $0, %%rax\n\t"
822 "\n\t"
823 "btl $0, (%%rdx)\n\t"
824 "setb %%cl\n\t"
825 "movzbq %%cl, %%rcx\n\t"
826 "orq %%rcx, %%rax\n\t"
827 "\n\t"
828 "lock; btsl $1, (%%rdx)\n\t"
829 "setb %%cl\n\t"
830 "movzbq %%cl, %%rcx\n\t"
831 "shlq $1, %%rcx\n\t"
832 "orq %%rcx, %%rax\n\t"
833 "\n\t"
834 "lock; btrl $2, (%%rdx)\n\t"
835 "setb %%cl\n\t"
836 "movzbq %%cl, %%rcx\n\t"
837 "shlq $2, %%rcx\n\t"
838 "orq %%rcx, %%rax\n\t"
839 "\n\t"
840 "lock; btcl $3, (%%rdx)\n\t"
841 "setb %%cl\n\t"
842 "movzbq %%cl, %%rcx\n\t"
843 "shlq $3, %%rcx\n\t"
844 "orq %%rcx, %%rax\n\t"
845 "\n\t"
846 "movq %%rax, %0\n\t"
847 "movl (%%rdx), %1"
848 : "=r" (reconstructed), "=r" (mashed)
849 : "r" (origp)
850 : "rax", "rcx", "rdx", "cc");
851 return (mashed & 0xF) | ((reconstructed & 0xF) << 4);
852 }
853
mash_mem_W(UShort * origp)854 ULong mash_mem_W ( UShort* origp )
855 {
856 ULong reconstructed, mashed;
857 __asm__ __volatile__ (
858 "movq %2, %%rdx\n\t"
859 ""
860 "movq $0, %%rax\n\t"
861 "\n\t"
862 "btw $0, (%%rdx)\n\t"
863 "setb %%cl\n\t"
864 "movzbq %%cl, %%rcx\n\t"
865 "orq %%rcx, %%rax\n\t"
866 "\n\t"
867 "lock; btsw $1, (%%rdx)\n\t"
868 "setb %%cl\n\t"
869 "movzbq %%cl, %%rcx\n\t"
870 "shlq $1, %%rcx\n\t"
871 "orq %%rcx, %%rax\n\t"
872 "\n\t"
873 "lock; btrw $2, (%%rdx)\n\t"
874 "setb %%cl\n\t"
875 "movzbq %%cl, %%rcx\n\t"
876 "shlq $2, %%rcx\n\t"
877 "orq %%rcx, %%rax\n\t"
878 "\n\t"
879 "lock; btcw $3, (%%rdx)\n\t"
880 "setb %%cl\n\t"
881 "movzbq %%cl, %%rcx\n\t"
882 "shlq $3, %%rcx\n\t"
883 "orq %%rcx, %%rax\n\t"
884 "\n\t"
885 "movq %%rax, %0\n\t"
886 "movzwq (%%rdx), %1"
887 : "=r" (reconstructed), "=r" (mashed)
888 : "r" (origp)
889 : "rax", "rcx", "rdx", "cc");
890 return (mashed & 0xF) | ((reconstructed & 0xF) << 4);
891 }
892
893
do_bt_imm_E_tests(void)894 void do_bt_imm_E_tests( void )
895 {
896 ULong i;
897 ULong* iiq = malloc(sizeof(ULong));
898 UInt* iil = malloc(sizeof(UInt));
899 UShort* iiw = malloc(sizeof(UShort));
900 for (i = 0; i < 0x10; i++) {
901 *iiq = i;
902 *iil = i;
903 *iiw = i;
904 send(sprintf(outBuf,"0x%llx -> 0x%02llx 0x%02llx 0x%02llx\n", i,
905 mash_mem_Q(iiq), mash_mem_L(iil), mash_mem_W(iiw)));
906 }
907 free(iiq);
908 free(iil);
909 free(iiw);
910 }
911
912
913 /////////////////////////////////////////////////////////////////
914
main(void)915 int main ( void )
916 {
917 do_locked_G_E_addb();
918 do_locked_G_E_addw();
919 do_locked_G_E_addl();
920 do_locked_G_E_addq();
921
922 do_locked_G_E_orb();
923 do_locked_G_E_orw();
924 do_locked_G_E_orl();
925 do_locked_G_E_orq();
926
927 do_locked_G_E_adcb();
928 do_locked_G_E_adcw();
929 do_locked_G_E_adcl();
930 do_locked_G_E_adcq();
931
932 do_locked_G_E_sbbb();
933 do_locked_G_E_sbbw();
934 do_locked_G_E_sbbl();
935 do_locked_G_E_sbbq();
936
937 do_locked_G_E_andb();
938 do_locked_G_E_andw();
939 do_locked_G_E_andl();
940 do_locked_G_E_andq();
941
942 do_locked_G_E_subb();
943 do_locked_G_E_subw();
944 do_locked_G_E_subl();
945 do_locked_G_E_subq();
946
947 do_locked_G_E_xorb();
948 do_locked_G_E_xorw();
949 do_locked_G_E_xorl();
950 do_locked_G_E_xorq();
951 // 4 * 7
952
953 do_locked_imm_E_addb_0x7F();
954 do_locked_imm_E_addb_0xF1();
955 do_locked_imm_E_addw_0x7E();
956 do_locked_imm_E_addw_0x9325();
957 do_locked_imm_E_addl_0x7D();
958 do_locked_imm_E_addl_0x31415927();
959 do_locked_imm_E_addq_0x7D();
960 do_locked_imm_E_addq_0x31415927();
961
962 do_locked_imm_E_orb_0x7F();
963 do_locked_imm_E_orb_0xF1();
964 do_locked_imm_E_orw_0x7E();
965 do_locked_imm_E_orw_0x9325();
966 do_locked_imm_E_orl_0x7D();
967 do_locked_imm_E_orl_0x31415927();
968 do_locked_imm_E_orq_0x7D();
969 do_locked_imm_E_orq_0x31415927();
970
971 do_locked_imm_E_adcb_0x7F();
972 do_locked_imm_E_adcb_0xF1();
973 do_locked_imm_E_adcw_0x7E();
974 do_locked_imm_E_adcw_0x9325();
975 do_locked_imm_E_adcl_0x7D();
976 do_locked_imm_E_adcl_0x31415927();
977 do_locked_imm_E_adcq_0x7D();
978 do_locked_imm_E_adcq_0x31415927();
979
980 do_locked_imm_E_sbbb_0x7F();
981 do_locked_imm_E_sbbb_0xF1();
982 do_locked_imm_E_sbbw_0x7E();
983 do_locked_imm_E_sbbw_0x9325();
984 do_locked_imm_E_sbbl_0x7D();
985 do_locked_imm_E_sbbl_0x31415927();
986 do_locked_imm_E_sbbq_0x7D();
987 do_locked_imm_E_sbbq_0x31415927();
988
989 do_locked_imm_E_andb_0x7F();
990 do_locked_imm_E_andb_0xF1();
991 do_locked_imm_E_andw_0x7E();
992 do_locked_imm_E_andw_0x9325();
993 do_locked_imm_E_andl_0x7D();
994 do_locked_imm_E_andl_0x31415927();
995 do_locked_imm_E_andq_0x7D();
996 do_locked_imm_E_andq_0x31415927();
997
998 do_locked_imm_E_subb_0x7F();
999 do_locked_imm_E_subb_0xF1();
1000 do_locked_imm_E_subw_0x7E();
1001 do_locked_imm_E_subw_0x9325();
1002 do_locked_imm_E_subl_0x7D();
1003 do_locked_imm_E_subl_0x31415927();
1004 do_locked_imm_E_subq_0x7D();
1005 do_locked_imm_E_subq_0x31415927();
1006
1007 do_locked_imm_E_xorb_0x7F();
1008 do_locked_imm_E_xorb_0xF1();
1009 do_locked_imm_E_xorw_0x7E();
1010 do_locked_imm_E_xorw_0x9325();
1011 do_locked_imm_E_xorl_0x7D();
1012 do_locked_imm_E_xorl_0x31415927();
1013 do_locked_imm_E_xorq_0x7D();
1014 do_locked_imm_E_xorq_0x31415927();
1015 // 4 * 7 + 8 * 7 == 84
1016
1017 do_locked_unary_E_decb();
1018 do_locked_unary_E_decw();
1019 do_locked_unary_E_decl();
1020 do_locked_unary_E_decq();
1021
1022 do_locked_unary_E_incb();
1023 do_locked_unary_E_incw();
1024 do_locked_unary_E_incl();
1025 do_locked_unary_E_incq();
1026
1027 do_locked_unary_E_negb();
1028 do_locked_unary_E_negw();
1029 do_locked_unary_E_negl();
1030 do_locked_unary_E_negq();
1031
1032 do_locked_unary_E_notb();
1033 do_locked_unary_E_notw();
1034 do_locked_unary_E_notl();
1035 do_locked_unary_E_notq();
1036 // 100
1037
1038 do_bt_G_E_tests();
1039 // 109
1040 do_bt_imm_E_tests();
1041 // 118
1042
1043 // So there should be 118 lock-prefixed instructions in the
1044 // disassembly of this compilation unit.
1045 // confirm with
1046 // objdump -d ./amd64locked | grep lock | grep -v do_lock | grep -v elf64 | wc
1047
1048
1049 { UInt crcExpd = 0x1F677629;
1050 theCRC = crcFinalise( theCRC );
1051 if (theCRC == crcExpd) {
1052 printf("amd64locked: PASS: CRCs actual 0x%08X expected 0x%08X\n",
1053 theCRC, crcExpd);
1054 } else {
1055 printf("amd64locked: FAIL: CRCs actual 0x%08X expected 0x%08X\n",
1056 theCRC, crcExpd);
1057 printf("amd64locked: set #define VERBOSE 1 to diagnose\n");
1058 }
1059 }
1060
1061 return 0;
1062 }
1063