1
2 /*---------------------------------------------------------------*/
3 /*--- begin guest_amd64_helpers.c ---*/
4 /*---------------------------------------------------------------*/
5
6 /*
7 This file is part of Valgrind, a dynamic binary instrumentation
8 framework.
9
10 Copyright (C) 2004-2013 OpenWorks LLP
11 info@open-works.net
12
13 This program is free software; you can redistribute it and/or
14 modify it under the terms of the GNU General Public License as
15 published by the Free Software Foundation; either version 2 of the
16 License, or (at your option) any later version.
17
18 This program is distributed in the hope that it will be useful, but
19 WITHOUT ANY WARRANTY; without even the implied warranty of
20 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
21 General Public License for more details.
22
23 You should have received a copy of the GNU General Public License
24 along with this program; if not, write to the Free Software
25 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
26 02110-1301, USA.
27
28 The GNU General Public License is contained in the file COPYING.
29
30 Neither the names of the U.S. Department of Energy nor the
31 University of California nor the names of its contributors may be
32 used to endorse or promote products derived from this software
33 without prior written permission.
34 */
35
36 #include "libvex_basictypes.h"
37 #include "libvex_emnote.h"
38 #include "libvex_guest_amd64.h"
39 #include "libvex_ir.h"
40 #include "libvex.h"
41
42 #include "main_util.h"
43 #include "main_globals.h"
44 #include "guest_generic_bb_to_IR.h"
45 #include "guest_amd64_defs.h"
46 #include "guest_generic_x87.h"
47
48
49 /* This file contains helper functions for amd64 guest code.
50 Calls to these functions are generated by the back end.
51 These calls are of course in the host machine code and
52 this file will be compiled to host machine code, so that
53 all makes sense.
54
55 Only change the signatures of these helper functions very
56 carefully. If you change the signature here, you'll have to change
57 the parameters passed to it in the IR calls constructed by
58 guest-amd64/toIR.c.
59
60 The convention used is that all functions called from generated
61 code are named amd64g_<something>, and any function whose name lacks
62 that prefix is not called from generated code. Note that some
63 LibVEX_* functions can however be called by VEX's client, but that
64 is not the same as calling them from VEX-generated code.
65 */
66
67
68 /* Set to 1 to get detailed profiling info about use of the flag
69 machinery. */
70 #define PROFILE_RFLAGS 0
71
72
73 /*---------------------------------------------------------------*/
74 /*--- %rflags run-time helpers. ---*/
75 /*---------------------------------------------------------------*/
76
77 /* Do 64x64 -> 128 signed/unsigned multiplies, for computing flags
78 after imulq/mulq. */
79
mullS64(Long u,Long v,Long * rHi,Long * rLo)80 static void mullS64 ( Long u, Long v, Long* rHi, Long* rLo )
81 {
82 ULong u0, v0, w0;
83 Long u1, v1, w1, w2, t;
84 u0 = u & 0xFFFFFFFFULL;
85 u1 = u >> 32;
86 v0 = v & 0xFFFFFFFFULL;
87 v1 = v >> 32;
88 w0 = u0 * v0;
89 t = u1 * v0 + (w0 >> 32);
90 w1 = t & 0xFFFFFFFFULL;
91 w2 = t >> 32;
92 w1 = u0 * v1 + w1;
93 *rHi = u1 * v1 + w2 + (w1 >> 32);
94 *rLo = u * v;
95 }
96
mullU64(ULong u,ULong v,ULong * rHi,ULong * rLo)97 static void mullU64 ( ULong u, ULong v, ULong* rHi, ULong* rLo )
98 {
99 ULong u0, v0, w0;
100 ULong u1, v1, w1,w2,t;
101 u0 = u & 0xFFFFFFFFULL;
102 u1 = u >> 32;
103 v0 = v & 0xFFFFFFFFULL;
104 v1 = v >> 32;
105 w0 = u0 * v0;
106 t = u1 * v0 + (w0 >> 32);
107 w1 = t & 0xFFFFFFFFULL;
108 w2 = t >> 32;
109 w1 = u0 * v1 + w1;
110 *rHi = u1 * v1 + w2 + (w1 >> 32);
111 *rLo = u * v;
112 }
113
114
115 static const UChar parity_table[256] = {
116 AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0,
117 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P,
118 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P,
119 AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0,
120 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P,
121 AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0,
122 AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0,
123 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P,
124 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P,
125 AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0,
126 AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0,
127 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P,
128 AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0,
129 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P,
130 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P,
131 AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0,
132 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P,
133 AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0,
134 AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0,
135 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P,
136 AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0,
137 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P,
138 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P,
139 AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0,
140 AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0,
141 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P,
142 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P,
143 AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0,
144 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P,
145 AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0,
146 AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0,
147 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P,
148 };
149
150 /* generalised left-shifter */
lshift(Long x,Int n)151 static inline Long lshift ( Long x, Int n )
152 {
153 if (n >= 0)
154 return (ULong)x << n;
155 else
156 return x >> (-n);
157 }
158
159 /* identity on ULong */
idULong(ULong x)160 static inline ULong idULong ( ULong x )
161 {
162 return x;
163 }
164
165
166 #define PREAMBLE(__data_bits) \
167 /* const */ ULong DATA_MASK \
168 = __data_bits==8 \
169 ? 0xFFULL \
170 : (__data_bits==16 \
171 ? 0xFFFFULL \
172 : (__data_bits==32 \
173 ? 0xFFFFFFFFULL \
174 : 0xFFFFFFFFFFFFFFFFULL)); \
175 /* const */ ULong SIGN_MASK = 1ULL << (__data_bits - 1); \
176 /* const */ ULong CC_DEP1 = cc_dep1_formal; \
177 /* const */ ULong CC_DEP2 = cc_dep2_formal; \
178 /* const */ ULong CC_NDEP = cc_ndep_formal; \
179 /* Four bogus assignments, which hopefully gcc can */ \
180 /* optimise away, and which stop it complaining about */ \
181 /* unused variables. */ \
182 SIGN_MASK = SIGN_MASK; \
183 DATA_MASK = DATA_MASK; \
184 CC_DEP2 = CC_DEP2; \
185 CC_NDEP = CC_NDEP;
186
187
188 /*-------------------------------------------------------------*/
189
190 #define ACTIONS_ADD(DATA_BITS,DATA_UTYPE) \
191 { \
192 PREAMBLE(DATA_BITS); \
193 { ULong cf, pf, af, zf, sf, of; \
194 ULong argL, argR, res; \
195 argL = CC_DEP1; \
196 argR = CC_DEP2; \
197 res = argL + argR; \
198 cf = (DATA_UTYPE)res < (DATA_UTYPE)argL; \
199 pf = parity_table[(UChar)res]; \
200 af = (res ^ argL ^ argR) & 0x10; \
201 zf = ((DATA_UTYPE)res == 0) << 6; \
202 sf = lshift(res, 8 - DATA_BITS) & 0x80; \
203 of = lshift((argL ^ argR ^ -1) & (argL ^ res), \
204 12 - DATA_BITS) & AMD64G_CC_MASK_O; \
205 return cf | pf | af | zf | sf | of; \
206 } \
207 }
208
209 /*-------------------------------------------------------------*/
210
211 #define ACTIONS_SUB(DATA_BITS,DATA_UTYPE) \
212 { \
213 PREAMBLE(DATA_BITS); \
214 { ULong cf, pf, af, zf, sf, of; \
215 ULong argL, argR, res; \
216 argL = CC_DEP1; \
217 argR = CC_DEP2; \
218 res = argL - argR; \
219 cf = (DATA_UTYPE)argL < (DATA_UTYPE)argR; \
220 pf = parity_table[(UChar)res]; \
221 af = (res ^ argL ^ argR) & 0x10; \
222 zf = ((DATA_UTYPE)res == 0) << 6; \
223 sf = lshift(res, 8 - DATA_BITS) & 0x80; \
224 of = lshift((argL ^ argR) & (argL ^ res), \
225 12 - DATA_BITS) & AMD64G_CC_MASK_O; \
226 return cf | pf | af | zf | sf | of; \
227 } \
228 }
229
230 /*-------------------------------------------------------------*/
231
232 #define ACTIONS_ADC(DATA_BITS,DATA_UTYPE) \
233 { \
234 PREAMBLE(DATA_BITS); \
235 { ULong cf, pf, af, zf, sf, of; \
236 ULong argL, argR, oldC, res; \
237 oldC = CC_NDEP & AMD64G_CC_MASK_C; \
238 argL = CC_DEP1; \
239 argR = CC_DEP2 ^ oldC; \
240 res = (argL + argR) + oldC; \
241 if (oldC) \
242 cf = (DATA_UTYPE)res <= (DATA_UTYPE)argL; \
243 else \
244 cf = (DATA_UTYPE)res < (DATA_UTYPE)argL; \
245 pf = parity_table[(UChar)res]; \
246 af = (res ^ argL ^ argR) & 0x10; \
247 zf = ((DATA_UTYPE)res == 0) << 6; \
248 sf = lshift(res, 8 - DATA_BITS) & 0x80; \
249 of = lshift((argL ^ argR ^ -1) & (argL ^ res), \
250 12 - DATA_BITS) & AMD64G_CC_MASK_O; \
251 return cf | pf | af | zf | sf | of; \
252 } \
253 }
254
255 /*-------------------------------------------------------------*/
256
257 #define ACTIONS_SBB(DATA_BITS,DATA_UTYPE) \
258 { \
259 PREAMBLE(DATA_BITS); \
260 { ULong cf, pf, af, zf, sf, of; \
261 ULong argL, argR, oldC, res; \
262 oldC = CC_NDEP & AMD64G_CC_MASK_C; \
263 argL = CC_DEP1; \
264 argR = CC_DEP2 ^ oldC; \
265 res = (argL - argR) - oldC; \
266 if (oldC) \
267 cf = (DATA_UTYPE)argL <= (DATA_UTYPE)argR; \
268 else \
269 cf = (DATA_UTYPE)argL < (DATA_UTYPE)argR; \
270 pf = parity_table[(UChar)res]; \
271 af = (res ^ argL ^ argR) & 0x10; \
272 zf = ((DATA_UTYPE)res == 0) << 6; \
273 sf = lshift(res, 8 - DATA_BITS) & 0x80; \
274 of = lshift((argL ^ argR) & (argL ^ res), \
275 12 - DATA_BITS) & AMD64G_CC_MASK_O; \
276 return cf | pf | af | zf | sf | of; \
277 } \
278 }
279
280 /*-------------------------------------------------------------*/
281
282 #define ACTIONS_LOGIC(DATA_BITS,DATA_UTYPE) \
283 { \
284 PREAMBLE(DATA_BITS); \
285 { ULong cf, pf, af, zf, sf, of; \
286 cf = 0; \
287 pf = parity_table[(UChar)CC_DEP1]; \
288 af = 0; \
289 zf = ((DATA_UTYPE)CC_DEP1 == 0) << 6; \
290 sf = lshift(CC_DEP1, 8 - DATA_BITS) & 0x80; \
291 of = 0; \
292 return cf | pf | af | zf | sf | of; \
293 } \
294 }
295
296 /*-------------------------------------------------------------*/
297
298 #define ACTIONS_INC(DATA_BITS,DATA_UTYPE) \
299 { \
300 PREAMBLE(DATA_BITS); \
301 { ULong cf, pf, af, zf, sf, of; \
302 ULong argL, argR, res; \
303 res = CC_DEP1; \
304 argL = res - 1; \
305 argR = 1; \
306 cf = CC_NDEP & AMD64G_CC_MASK_C; \
307 pf = parity_table[(UChar)res]; \
308 af = (res ^ argL ^ argR) & 0x10; \
309 zf = ((DATA_UTYPE)res == 0) << 6; \
310 sf = lshift(res, 8 - DATA_BITS) & 0x80; \
311 of = ((res & DATA_MASK) == SIGN_MASK) << 11; \
312 return cf | pf | af | zf | sf | of; \
313 } \
314 }
315
316 /*-------------------------------------------------------------*/
317
318 #define ACTIONS_DEC(DATA_BITS,DATA_UTYPE) \
319 { \
320 PREAMBLE(DATA_BITS); \
321 { ULong cf, pf, af, zf, sf, of; \
322 ULong argL, argR, res; \
323 res = CC_DEP1; \
324 argL = res + 1; \
325 argR = 1; \
326 cf = CC_NDEP & AMD64G_CC_MASK_C; \
327 pf = parity_table[(UChar)res]; \
328 af = (res ^ argL ^ argR) & 0x10; \
329 zf = ((DATA_UTYPE)res == 0) << 6; \
330 sf = lshift(res, 8 - DATA_BITS) & 0x80; \
331 of = ((res & DATA_MASK) \
332 == ((ULong)SIGN_MASK - 1)) << 11; \
333 return cf | pf | af | zf | sf | of; \
334 } \
335 }
336
337 /*-------------------------------------------------------------*/
338
339 #define ACTIONS_SHL(DATA_BITS,DATA_UTYPE) \
340 { \
341 PREAMBLE(DATA_BITS); \
342 { ULong cf, pf, af, zf, sf, of; \
343 cf = (CC_DEP2 >> (DATA_BITS - 1)) & AMD64G_CC_MASK_C; \
344 pf = parity_table[(UChar)CC_DEP1]; \
345 af = 0; /* undefined */ \
346 zf = ((DATA_UTYPE)CC_DEP1 == 0) << 6; \
347 sf = lshift(CC_DEP1, 8 - DATA_BITS) & 0x80; \
348 /* of is defined if shift count == 1 */ \
349 of = lshift(CC_DEP2 ^ CC_DEP1, 12 - DATA_BITS) \
350 & AMD64G_CC_MASK_O; \
351 return cf | pf | af | zf | sf | of; \
352 } \
353 }
354
355 /*-------------------------------------------------------------*/
356
357 #define ACTIONS_SHR(DATA_BITS,DATA_UTYPE) \
358 { \
359 PREAMBLE(DATA_BITS); \
360 { ULong cf, pf, af, zf, sf, of; \
361 cf = CC_DEP2 & 1; \
362 pf = parity_table[(UChar)CC_DEP1]; \
363 af = 0; /* undefined */ \
364 zf = ((DATA_UTYPE)CC_DEP1 == 0) << 6; \
365 sf = lshift(CC_DEP1, 8 - DATA_BITS) & 0x80; \
366 /* of is defined if shift count == 1 */ \
367 of = lshift(CC_DEP2 ^ CC_DEP1, 12 - DATA_BITS) \
368 & AMD64G_CC_MASK_O; \
369 return cf | pf | af | zf | sf | of; \
370 } \
371 }
372
373 /*-------------------------------------------------------------*/
374
375 /* ROL: cf' = lsb(result). of' = msb(result) ^ lsb(result). */
376 /* DEP1 = result, NDEP = old flags */
377 #define ACTIONS_ROL(DATA_BITS,DATA_UTYPE) \
378 { \
379 PREAMBLE(DATA_BITS); \
380 { ULong fl \
381 = (CC_NDEP & ~(AMD64G_CC_MASK_O | AMD64G_CC_MASK_C)) \
382 | (AMD64G_CC_MASK_C & CC_DEP1) \
383 | (AMD64G_CC_MASK_O & (lshift(CC_DEP1, \
384 11-(DATA_BITS-1)) \
385 ^ lshift(CC_DEP1, 11))); \
386 return fl; \
387 } \
388 }
389
390 /*-------------------------------------------------------------*/
391
392 /* ROR: cf' = msb(result). of' = msb(result) ^ msb-1(result). */
393 /* DEP1 = result, NDEP = old flags */
394 #define ACTIONS_ROR(DATA_BITS,DATA_UTYPE) \
395 { \
396 PREAMBLE(DATA_BITS); \
397 { ULong fl \
398 = (CC_NDEP & ~(AMD64G_CC_MASK_O | AMD64G_CC_MASK_C)) \
399 | (AMD64G_CC_MASK_C & (CC_DEP1 >> (DATA_BITS-1))) \
400 | (AMD64G_CC_MASK_O & (lshift(CC_DEP1, \
401 11-(DATA_BITS-1)) \
402 ^ lshift(CC_DEP1, 11-(DATA_BITS-1)+1))); \
403 return fl; \
404 } \
405 }
406
407 /*-------------------------------------------------------------*/
408
409 #define ACTIONS_UMUL(DATA_BITS, DATA_UTYPE, NARROWtoU, \
410 DATA_U2TYPE, NARROWto2U) \
411 { \
412 PREAMBLE(DATA_BITS); \
413 { ULong cf, pf, af, zf, sf, of; \
414 DATA_UTYPE hi; \
415 DATA_UTYPE lo \
416 = NARROWtoU( ((DATA_UTYPE)CC_DEP1) \
417 * ((DATA_UTYPE)CC_DEP2) ); \
418 DATA_U2TYPE rr \
419 = NARROWto2U( \
420 ((DATA_U2TYPE)((DATA_UTYPE)CC_DEP1)) \
421 * ((DATA_U2TYPE)((DATA_UTYPE)CC_DEP2)) ); \
422 hi = NARROWtoU(rr >>/*u*/ DATA_BITS); \
423 cf = (hi != 0); \
424 pf = parity_table[(UChar)lo]; \
425 af = 0; /* undefined */ \
426 zf = (lo == 0) << 6; \
427 sf = lshift(lo, 8 - DATA_BITS) & 0x80; \
428 of = cf << 11; \
429 return cf | pf | af | zf | sf | of; \
430 } \
431 }
432
433 /*-------------------------------------------------------------*/
434
435 #define ACTIONS_SMUL(DATA_BITS, DATA_STYPE, NARROWtoS, \
436 DATA_S2TYPE, NARROWto2S) \
437 { \
438 PREAMBLE(DATA_BITS); \
439 { ULong cf, pf, af, zf, sf, of; \
440 DATA_STYPE hi; \
441 DATA_STYPE lo \
442 = NARROWtoS( ((DATA_S2TYPE)(DATA_STYPE)CC_DEP1) \
443 * ((DATA_S2TYPE)(DATA_STYPE)CC_DEP2) ); \
444 DATA_S2TYPE rr \
445 = NARROWto2S( \
446 ((DATA_S2TYPE)((DATA_STYPE)CC_DEP1)) \
447 * ((DATA_S2TYPE)((DATA_STYPE)CC_DEP2)) ); \
448 hi = NARROWtoS(rr >>/*s*/ DATA_BITS); \
449 cf = (hi != (lo >>/*s*/ (DATA_BITS-1))); \
450 pf = parity_table[(UChar)lo]; \
451 af = 0; /* undefined */ \
452 zf = (lo == 0) << 6; \
453 sf = lshift(lo, 8 - DATA_BITS) & 0x80; \
454 of = cf << 11; \
455 return cf | pf | af | zf | sf | of; \
456 } \
457 }
458
459 /*-------------------------------------------------------------*/
460
461 #define ACTIONS_UMULQ \
462 { \
463 PREAMBLE(64); \
464 { ULong cf, pf, af, zf, sf, of; \
465 ULong lo, hi; \
466 mullU64( (ULong)CC_DEP1, (ULong)CC_DEP2, &hi, &lo ); \
467 cf = (hi != 0); \
468 pf = parity_table[(UChar)lo]; \
469 af = 0; /* undefined */ \
470 zf = (lo == 0) << 6; \
471 sf = lshift(lo, 8 - 64) & 0x80; \
472 of = cf << 11; \
473 return cf | pf | af | zf | sf | of; \
474 } \
475 }
476
477 /*-------------------------------------------------------------*/
478
479 #define ACTIONS_SMULQ \
480 { \
481 PREAMBLE(64); \
482 { ULong cf, pf, af, zf, sf, of; \
483 Long lo, hi; \
484 mullS64( (Long)CC_DEP1, (Long)CC_DEP2, &hi, &lo ); \
485 cf = (hi != (lo >>/*s*/ (64-1))); \
486 pf = parity_table[(UChar)lo]; \
487 af = 0; /* undefined */ \
488 zf = (lo == 0) << 6; \
489 sf = lshift(lo, 8 - 64) & 0x80; \
490 of = cf << 11; \
491 return cf | pf | af | zf | sf | of; \
492 } \
493 }
494
495 /*-------------------------------------------------------------*/
496
497 #define ACTIONS_ANDN(DATA_BITS,DATA_UTYPE) \
498 { \
499 PREAMBLE(DATA_BITS); \
500 { ULong cf, pf, af, zf, sf, of; \
501 cf = 0; \
502 pf = 0; \
503 af = 0; \
504 zf = ((DATA_UTYPE)CC_DEP1 == 0) << 6; \
505 sf = lshift(CC_DEP1, 8 - DATA_BITS) & 0x80; \
506 of = 0; \
507 return cf | pf | af | zf | sf | of; \
508 } \
509 }
510
511 /*-------------------------------------------------------------*/
512
513 #define ACTIONS_BLSI(DATA_BITS,DATA_UTYPE) \
514 { \
515 PREAMBLE(DATA_BITS); \
516 { ULong cf, pf, af, zf, sf, of; \
517 cf = ((DATA_UTYPE)CC_DEP2 != 0); \
518 pf = 0; \
519 af = 0; \
520 zf = ((DATA_UTYPE)CC_DEP1 == 0) << 6; \
521 sf = lshift(CC_DEP1, 8 - DATA_BITS) & 0x80; \
522 of = 0; \
523 return cf | pf | af | zf | sf | of; \
524 } \
525 }
526
527 /*-------------------------------------------------------------*/
528
529 #define ACTIONS_BLSMSK(DATA_BITS,DATA_UTYPE) \
530 { \
531 PREAMBLE(DATA_BITS); \
532 { Long cf, pf, af, zf, sf, of; \
533 cf = ((DATA_UTYPE)CC_DEP2 == 0); \
534 pf = 0; \
535 af = 0; \
536 zf = 0; \
537 sf = lshift(CC_DEP1, 8 - DATA_BITS) & 0x80; \
538 of = 0; \
539 return cf | pf | af | zf | sf | of; \
540 } \
541 }
542
543 /*-------------------------------------------------------------*/
544
545 #define ACTIONS_BLSR(DATA_BITS,DATA_UTYPE) \
546 { \
547 PREAMBLE(DATA_BITS); \
548 { ULong cf, pf, af, zf, sf, of; \
549 cf = ((DATA_UTYPE)CC_DEP2 == 0); \
550 pf = 0; \
551 af = 0; \
552 zf = ((DATA_UTYPE)CC_DEP1 == 0) << 6; \
553 sf = lshift(CC_DEP1, 8 - DATA_BITS) & 0x80; \
554 of = 0; \
555 return cf | pf | af | zf | sf | of; \
556 } \
557 }
558
559 /*-------------------------------------------------------------*/
560
561
562 #if PROFILE_RFLAGS
563
564 static Bool initted = False;
565
566 /* C flag, fast route */
567 static UInt tabc_fast[AMD64G_CC_OP_NUMBER];
568 /* C flag, slow route */
569 static UInt tabc_slow[AMD64G_CC_OP_NUMBER];
570 /* table for calculate_cond */
571 static UInt tab_cond[AMD64G_CC_OP_NUMBER][16];
572 /* total entry counts for calc_all, calc_c, calc_cond. */
573 static UInt n_calc_all = 0;
574 static UInt n_calc_c = 0;
575 static UInt n_calc_cond = 0;
576
577 #define SHOW_COUNTS_NOW (0 == (0x3FFFFF & (n_calc_all+n_calc_c+n_calc_cond)))
578
579
showCounts(void)580 static void showCounts ( void )
581 {
582 Int op, co;
583 HChar ch;
584 vex_printf("\nTotal calls: calc_all=%u calc_cond=%u calc_c=%u\n",
585 n_calc_all, n_calc_cond, n_calc_c);
586
587 vex_printf(" cSLOW cFAST O NO B NB Z NZ BE NBE"
588 " S NS P NP L NL LE NLE\n");
589 vex_printf(" -----------------------------------------------------"
590 "----------------------------------------\n");
591 for (op = 0; op < AMD64G_CC_OP_NUMBER; op++) {
592
593 ch = ' ';
594 if (op > 0 && (op-1) % 4 == 0)
595 ch = 'B';
596 if (op > 0 && (op-1) % 4 == 1)
597 ch = 'W';
598 if (op > 0 && (op-1) % 4 == 2)
599 ch = 'L';
600 if (op > 0 && (op-1) % 4 == 3)
601 ch = 'Q';
602
603 vex_printf("%2d%c: ", op, ch);
604 vex_printf("%6u ", tabc_slow[op]);
605 vex_printf("%6u ", tabc_fast[op]);
606 for (co = 0; co < 16; co++) {
607 Int n = tab_cond[op][co];
608 if (n >= 1000) {
609 vex_printf(" %3dK", n / 1000);
610 } else
611 if (n >= 0) {
612 vex_printf(" %3d ", n );
613 } else {
614 vex_printf(" ");
615 }
616 }
617 vex_printf("\n");
618 }
619 vex_printf("\n");
620 }
621
initCounts(void)622 static void initCounts ( void )
623 {
624 Int op, co;
625 initted = True;
626 for (op = 0; op < AMD64G_CC_OP_NUMBER; op++) {
627 tabc_fast[op] = tabc_slow[op] = 0;
628 for (co = 0; co < 16; co++)
629 tab_cond[op][co] = 0;
630 }
631 }
632
633 #endif /* PROFILE_RFLAGS */
634
635
636 /* CALLED FROM GENERATED CODE: CLEAN HELPER */
637 /* Calculate all the 6 flags from the supplied thunk parameters.
638 Worker function, not directly called from generated code. */
639 static
amd64g_calculate_rflags_all_WRK(ULong cc_op,ULong cc_dep1_formal,ULong cc_dep2_formal,ULong cc_ndep_formal)640 ULong amd64g_calculate_rflags_all_WRK ( ULong cc_op,
641 ULong cc_dep1_formal,
642 ULong cc_dep2_formal,
643 ULong cc_ndep_formal )
644 {
645 switch (cc_op) {
646 case AMD64G_CC_OP_COPY:
647 return cc_dep1_formal
648 & (AMD64G_CC_MASK_O | AMD64G_CC_MASK_S | AMD64G_CC_MASK_Z
649 | AMD64G_CC_MASK_A | AMD64G_CC_MASK_C | AMD64G_CC_MASK_P);
650
651 case AMD64G_CC_OP_ADDB: ACTIONS_ADD( 8, UChar );
652 case AMD64G_CC_OP_ADDW: ACTIONS_ADD( 16, UShort );
653 case AMD64G_CC_OP_ADDL: ACTIONS_ADD( 32, UInt );
654 case AMD64G_CC_OP_ADDQ: ACTIONS_ADD( 64, ULong );
655
656 case AMD64G_CC_OP_ADCB: ACTIONS_ADC( 8, UChar );
657 case AMD64G_CC_OP_ADCW: ACTIONS_ADC( 16, UShort );
658 case AMD64G_CC_OP_ADCL: ACTIONS_ADC( 32, UInt );
659 case AMD64G_CC_OP_ADCQ: ACTIONS_ADC( 64, ULong );
660
661 case AMD64G_CC_OP_SUBB: ACTIONS_SUB( 8, UChar );
662 case AMD64G_CC_OP_SUBW: ACTIONS_SUB( 16, UShort );
663 case AMD64G_CC_OP_SUBL: ACTIONS_SUB( 32, UInt );
664 case AMD64G_CC_OP_SUBQ: ACTIONS_SUB( 64, ULong );
665
666 case AMD64G_CC_OP_SBBB: ACTIONS_SBB( 8, UChar );
667 case AMD64G_CC_OP_SBBW: ACTIONS_SBB( 16, UShort );
668 case AMD64G_CC_OP_SBBL: ACTIONS_SBB( 32, UInt );
669 case AMD64G_CC_OP_SBBQ: ACTIONS_SBB( 64, ULong );
670
671 case AMD64G_CC_OP_LOGICB: ACTIONS_LOGIC( 8, UChar );
672 case AMD64G_CC_OP_LOGICW: ACTIONS_LOGIC( 16, UShort );
673 case AMD64G_CC_OP_LOGICL: ACTIONS_LOGIC( 32, UInt );
674 case AMD64G_CC_OP_LOGICQ: ACTIONS_LOGIC( 64, ULong );
675
676 case AMD64G_CC_OP_INCB: ACTIONS_INC( 8, UChar );
677 case AMD64G_CC_OP_INCW: ACTIONS_INC( 16, UShort );
678 case AMD64G_CC_OP_INCL: ACTIONS_INC( 32, UInt );
679 case AMD64G_CC_OP_INCQ: ACTIONS_INC( 64, ULong );
680
681 case AMD64G_CC_OP_DECB: ACTIONS_DEC( 8, UChar );
682 case AMD64G_CC_OP_DECW: ACTIONS_DEC( 16, UShort );
683 case AMD64G_CC_OP_DECL: ACTIONS_DEC( 32, UInt );
684 case AMD64G_CC_OP_DECQ: ACTIONS_DEC( 64, ULong );
685
686 case AMD64G_CC_OP_SHLB: ACTIONS_SHL( 8, UChar );
687 case AMD64G_CC_OP_SHLW: ACTIONS_SHL( 16, UShort );
688 case AMD64G_CC_OP_SHLL: ACTIONS_SHL( 32, UInt );
689 case AMD64G_CC_OP_SHLQ: ACTIONS_SHL( 64, ULong );
690
691 case AMD64G_CC_OP_SHRB: ACTIONS_SHR( 8, UChar );
692 case AMD64G_CC_OP_SHRW: ACTIONS_SHR( 16, UShort );
693 case AMD64G_CC_OP_SHRL: ACTIONS_SHR( 32, UInt );
694 case AMD64G_CC_OP_SHRQ: ACTIONS_SHR( 64, ULong );
695
696 case AMD64G_CC_OP_ROLB: ACTIONS_ROL( 8, UChar );
697 case AMD64G_CC_OP_ROLW: ACTIONS_ROL( 16, UShort );
698 case AMD64G_CC_OP_ROLL: ACTIONS_ROL( 32, UInt );
699 case AMD64G_CC_OP_ROLQ: ACTIONS_ROL( 64, ULong );
700
701 case AMD64G_CC_OP_RORB: ACTIONS_ROR( 8, UChar );
702 case AMD64G_CC_OP_RORW: ACTIONS_ROR( 16, UShort );
703 case AMD64G_CC_OP_RORL: ACTIONS_ROR( 32, UInt );
704 case AMD64G_CC_OP_RORQ: ACTIONS_ROR( 64, ULong );
705
706 case AMD64G_CC_OP_UMULB: ACTIONS_UMUL( 8, UChar, toUChar,
707 UShort, toUShort );
708 case AMD64G_CC_OP_UMULW: ACTIONS_UMUL( 16, UShort, toUShort,
709 UInt, toUInt );
710 case AMD64G_CC_OP_UMULL: ACTIONS_UMUL( 32, UInt, toUInt,
711 ULong, idULong );
712
713 case AMD64G_CC_OP_UMULQ: ACTIONS_UMULQ;
714
715 case AMD64G_CC_OP_SMULB: ACTIONS_SMUL( 8, Char, toUChar,
716 Short, toUShort );
717 case AMD64G_CC_OP_SMULW: ACTIONS_SMUL( 16, Short, toUShort,
718 Int, toUInt );
719 case AMD64G_CC_OP_SMULL: ACTIONS_SMUL( 32, Int, toUInt,
720 Long, idULong );
721
722 case AMD64G_CC_OP_SMULQ: ACTIONS_SMULQ;
723
724 case AMD64G_CC_OP_ANDN32: ACTIONS_ANDN( 32, UInt );
725 case AMD64G_CC_OP_ANDN64: ACTIONS_ANDN( 64, ULong );
726
727 case AMD64G_CC_OP_BLSI32: ACTIONS_BLSI( 32, UInt );
728 case AMD64G_CC_OP_BLSI64: ACTIONS_BLSI( 64, ULong );
729
730 case AMD64G_CC_OP_BLSMSK32: ACTIONS_BLSMSK( 32, UInt );
731 case AMD64G_CC_OP_BLSMSK64: ACTIONS_BLSMSK( 64, ULong );
732
733 case AMD64G_CC_OP_BLSR32: ACTIONS_BLSR( 32, UInt );
734 case AMD64G_CC_OP_BLSR64: ACTIONS_BLSR( 64, ULong );
735
736 default:
737 /* shouldn't really make these calls from generated code */
738 vex_printf("amd64g_calculate_rflags_all_WRK(AMD64)"
739 "( %llu, 0x%llx, 0x%llx, 0x%llx )\n",
740 cc_op, cc_dep1_formal, cc_dep2_formal, cc_ndep_formal );
741 vpanic("amd64g_calculate_rflags_all_WRK(AMD64)");
742 }
743 }
744
745
746 /* CALLED FROM GENERATED CODE: CLEAN HELPER */
747 /* Calculate all the 6 flags from the supplied thunk parameters. */
amd64g_calculate_rflags_all(ULong cc_op,ULong cc_dep1,ULong cc_dep2,ULong cc_ndep)748 ULong amd64g_calculate_rflags_all ( ULong cc_op,
749 ULong cc_dep1,
750 ULong cc_dep2,
751 ULong cc_ndep )
752 {
753 # if PROFILE_RFLAGS
754 if (!initted) initCounts();
755 n_calc_all++;
756 if (SHOW_COUNTS_NOW) showCounts();
757 # endif
758 return
759 amd64g_calculate_rflags_all_WRK ( cc_op, cc_dep1, cc_dep2, cc_ndep );
760 }
761
762
763 /* CALLED FROM GENERATED CODE: CLEAN HELPER */
764 /* Calculate just the carry flag from the supplied thunk parameters. */
amd64g_calculate_rflags_c(ULong cc_op,ULong cc_dep1,ULong cc_dep2,ULong cc_ndep)765 ULong amd64g_calculate_rflags_c ( ULong cc_op,
766 ULong cc_dep1,
767 ULong cc_dep2,
768 ULong cc_ndep )
769 {
770 # if PROFILE_RFLAGS
771 if (!initted) initCounts();
772 n_calc_c++;
773 tabc_fast[cc_op]++;
774 if (SHOW_COUNTS_NOW) showCounts();
775 # endif
776
777 /* Fast-case some common ones. */
778 switch (cc_op) {
779 case AMD64G_CC_OP_COPY:
780 return (cc_dep1 >> AMD64G_CC_SHIFT_C) & 1;
781 case AMD64G_CC_OP_LOGICQ:
782 case AMD64G_CC_OP_LOGICL:
783 case AMD64G_CC_OP_LOGICW:
784 case AMD64G_CC_OP_LOGICB:
785 return 0;
786 // case AMD64G_CC_OP_SUBL:
787 // return ((UInt)cc_dep1) < ((UInt)cc_dep2)
788 // ? AMD64G_CC_MASK_C : 0;
789 // case AMD64G_CC_OP_SUBW:
790 // return ((UInt)(cc_dep1 & 0xFFFF)) < ((UInt)(cc_dep2 & 0xFFFF))
791 // ? AMD64G_CC_MASK_C : 0;
792 // case AMD64G_CC_OP_SUBB:
793 // return ((UInt)(cc_dep1 & 0xFF)) < ((UInt)(cc_dep2 & 0xFF))
794 // ? AMD64G_CC_MASK_C : 0;
795 // case AMD64G_CC_OP_INCL:
796 // case AMD64G_CC_OP_DECL:
797 // return cc_ndep & AMD64G_CC_MASK_C;
798 default:
799 break;
800 }
801
802 # if PROFILE_RFLAGS
803 tabc_fast[cc_op]--;
804 tabc_slow[cc_op]++;
805 # endif
806
807 return amd64g_calculate_rflags_all_WRK(cc_op,cc_dep1,cc_dep2,cc_ndep)
808 & AMD64G_CC_MASK_C;
809 }
810
811
812 /* CALLED FROM GENERATED CODE: CLEAN HELPER */
813 /* returns 1 or 0 */
amd64g_calculate_condition(ULong cond,ULong cc_op,ULong cc_dep1,ULong cc_dep2,ULong cc_ndep)814 ULong amd64g_calculate_condition ( ULong/*AMD64Condcode*/ cond,
815 ULong cc_op,
816 ULong cc_dep1,
817 ULong cc_dep2,
818 ULong cc_ndep )
819 {
820 ULong rflags = amd64g_calculate_rflags_all_WRK(cc_op, cc_dep1,
821 cc_dep2, cc_ndep);
822 ULong of,sf,zf,cf,pf;
823 ULong inv = cond & 1;
824
825 # if PROFILE_RFLAGS
826 if (!initted) initCounts();
827 tab_cond[cc_op][cond]++;
828 n_calc_cond++;
829 if (SHOW_COUNTS_NOW) showCounts();
830 # endif
831
832 switch (cond) {
833 case AMD64CondNO:
834 case AMD64CondO: /* OF == 1 */
835 of = rflags >> AMD64G_CC_SHIFT_O;
836 return 1 & (inv ^ of);
837
838 case AMD64CondNZ:
839 case AMD64CondZ: /* ZF == 1 */
840 zf = rflags >> AMD64G_CC_SHIFT_Z;
841 return 1 & (inv ^ zf);
842
843 case AMD64CondNB:
844 case AMD64CondB: /* CF == 1 */
845 cf = rflags >> AMD64G_CC_SHIFT_C;
846 return 1 & (inv ^ cf);
847 break;
848
849 case AMD64CondNBE:
850 case AMD64CondBE: /* (CF or ZF) == 1 */
851 cf = rflags >> AMD64G_CC_SHIFT_C;
852 zf = rflags >> AMD64G_CC_SHIFT_Z;
853 return 1 & (inv ^ (cf | zf));
854 break;
855
856 case AMD64CondNS:
857 case AMD64CondS: /* SF == 1 */
858 sf = rflags >> AMD64G_CC_SHIFT_S;
859 return 1 & (inv ^ sf);
860
861 case AMD64CondNP:
862 case AMD64CondP: /* PF == 1 */
863 pf = rflags >> AMD64G_CC_SHIFT_P;
864 return 1 & (inv ^ pf);
865
866 case AMD64CondNL:
867 case AMD64CondL: /* (SF xor OF) == 1 */
868 sf = rflags >> AMD64G_CC_SHIFT_S;
869 of = rflags >> AMD64G_CC_SHIFT_O;
870 return 1 & (inv ^ (sf ^ of));
871 break;
872
873 case AMD64CondNLE:
874 case AMD64CondLE: /* ((SF xor OF) or ZF) == 1 */
875 sf = rflags >> AMD64G_CC_SHIFT_S;
876 of = rflags >> AMD64G_CC_SHIFT_O;
877 zf = rflags >> AMD64G_CC_SHIFT_Z;
878 return 1 & (inv ^ ((sf ^ of) | zf));
879 break;
880
881 default:
882 /* shouldn't really make these calls from generated code */
883 vex_printf("amd64g_calculate_condition"
884 "( %llu, %llu, 0x%llx, 0x%llx, 0x%llx )\n",
885 cond, cc_op, cc_dep1, cc_dep2, cc_ndep );
886 vpanic("amd64g_calculate_condition");
887 }
888 }
889
890
891 /* VISIBLE TO LIBVEX CLIENT */
LibVEX_GuestAMD64_get_rflags(const VexGuestAMD64State * vex_state)892 ULong LibVEX_GuestAMD64_get_rflags ( /*IN*/const VexGuestAMD64State* vex_state )
893 {
894 ULong rflags = amd64g_calculate_rflags_all_WRK(
895 vex_state->guest_CC_OP,
896 vex_state->guest_CC_DEP1,
897 vex_state->guest_CC_DEP2,
898 vex_state->guest_CC_NDEP
899 );
900 Long dflag = vex_state->guest_DFLAG;
901 vassert(dflag == 1 || dflag == -1);
902 if (dflag == -1)
903 rflags |= (1<<10);
904 if (vex_state->guest_IDFLAG == 1)
905 rflags |= (1<<21);
906 if (vex_state->guest_ACFLAG == 1)
907 rflags |= (1<<18);
908
909 return rflags;
910 }
911
912 /* VISIBLE TO LIBVEX CLIENT */
913 void
LibVEX_GuestAMD64_put_rflag_c(ULong new_carry_flag,VexGuestAMD64State * vex_state)914 LibVEX_GuestAMD64_put_rflag_c ( ULong new_carry_flag,
915 /*MOD*/VexGuestAMD64State* vex_state )
916 {
917 ULong oszacp = amd64g_calculate_rflags_all_WRK(
918 vex_state->guest_CC_OP,
919 vex_state->guest_CC_DEP1,
920 vex_state->guest_CC_DEP2,
921 vex_state->guest_CC_NDEP
922 );
923 if (new_carry_flag & 1) {
924 oszacp |= AMD64G_CC_MASK_C;
925 } else {
926 oszacp &= ~AMD64G_CC_MASK_C;
927 }
928 vex_state->guest_CC_OP = AMD64G_CC_OP_COPY;
929 vex_state->guest_CC_DEP1 = oszacp;
930 vex_state->guest_CC_DEP2 = 0;
931 vex_state->guest_CC_NDEP = 0;
932 }
933
934
935 /*---------------------------------------------------------------*/
936 /*--- %rflags translation-time function specialisers. ---*/
937 /*--- These help iropt specialise calls the above run-time ---*/
938 /*--- %rflags functions. ---*/
939 /*---------------------------------------------------------------*/
940
941 /* Used by the optimiser to try specialisations. Returns an
942 equivalent expression, or NULL if none. */
943
isU64(IRExpr * e,ULong n)944 static Bool isU64 ( IRExpr* e, ULong n )
945 {
946 return toBool( e->tag == Iex_Const
947 && e->Iex.Const.con->tag == Ico_U64
948 && e->Iex.Const.con->Ico.U64 == n );
949 }
950
guest_amd64_spechelper(const HChar * function_name,IRExpr ** args,IRStmt ** precedingStmts,Int n_precedingStmts)951 IRExpr* guest_amd64_spechelper ( const HChar* function_name,
952 IRExpr** args,
953 IRStmt** precedingStmts,
954 Int n_precedingStmts )
955 {
956 # define unop(_op,_a1) IRExpr_Unop((_op),(_a1))
957 # define binop(_op,_a1,_a2) IRExpr_Binop((_op),(_a1),(_a2))
958 # define mkU64(_n) IRExpr_Const(IRConst_U64(_n))
959 # define mkU32(_n) IRExpr_Const(IRConst_U32(_n))
960 # define mkU8(_n) IRExpr_Const(IRConst_U8(_n))
961
962 Int i, arity = 0;
963 for (i = 0; args[i]; i++)
964 arity++;
965 # if 0
966 vex_printf("spec request:\n");
967 vex_printf(" %s ", function_name);
968 for (i = 0; i < arity; i++) {
969 vex_printf(" ");
970 ppIRExpr(args[i]);
971 }
972 vex_printf("\n");
973 # endif
974
975 /* --------- specialising "amd64g_calculate_condition" --------- */
976
977 if (vex_streq(function_name, "amd64g_calculate_condition")) {
978 /* specialise calls to above "calculate condition" function */
979 IRExpr *cond, *cc_op, *cc_dep1, *cc_dep2;
980 vassert(arity == 5);
981 cond = args[0];
982 cc_op = args[1];
983 cc_dep1 = args[2];
984 cc_dep2 = args[3];
985
986 /*---------------- ADDQ ----------------*/
987
988 if (isU64(cc_op, AMD64G_CC_OP_ADDQ) && isU64(cond, AMD64CondZ)) {
989 /* long long add, then Z --> test (dst+src == 0) */
990 return unop(Iop_1Uto64,
991 binop(Iop_CmpEQ64,
992 binop(Iop_Add64, cc_dep1, cc_dep2),
993 mkU64(0)));
994 }
995
996 /*---------------- ADDL ----------------*/
997
998 if (isU64(cc_op, AMD64G_CC_OP_ADDL) && isU64(cond, AMD64CondO)) {
999 /* This is very commonly generated by Javascript JITs, for
1000 the idiom "do a 32-bit add and jump to out-of-line code if
1001 an overflow occurs". */
1002 /* long add, then O (overflow)
1003 --> ((dep1 ^ dep2 ^ -1) & (dep1 ^ (dep1 + dep2)))[31]
1004 --> (((dep1 ^ dep2 ^ -1) & (dep1 ^ (dep1 +64 dep2))) >>u 31) & 1
1005 --> (((not(dep1 ^ dep2)) & (dep1 ^ (dep1 +64 dep2))) >>u 31) & 1
1006 */
1007 vassert(isIRAtom(cc_dep1));
1008 vassert(isIRAtom(cc_dep2));
1009 return
1010 binop(Iop_And64,
1011 binop(Iop_Shr64,
1012 binop(Iop_And64,
1013 unop(Iop_Not64,
1014 binop(Iop_Xor64, cc_dep1, cc_dep2)),
1015 binop(Iop_Xor64,
1016 cc_dep1,
1017 binop(Iop_Add64, cc_dep1, cc_dep2))),
1018 mkU8(31)),
1019 mkU64(1));
1020
1021 }
1022
1023 /*---------------- SUBQ ----------------*/
1024
1025 /* 0, */
1026 if (isU64(cc_op, AMD64G_CC_OP_SUBQ) && isU64(cond, AMD64CondO)) {
1027 /* long long sub/cmp, then O (overflow)
1028 --> ((dep1 ^ dep2) & (dep1 ^ (dep1 - dep2)))[63]
1029 --> ((dep1 ^ dep2) & (dep1 ^ (dep1 - dep2))) >>u 63
1030 */
1031 vassert(isIRAtom(cc_dep1));
1032 vassert(isIRAtom(cc_dep2));
1033 return binop(Iop_Shr64,
1034 binop(Iop_And64,
1035 binop(Iop_Xor64, cc_dep1, cc_dep2),
1036 binop(Iop_Xor64,
1037 cc_dep1,
1038 binop(Iop_Sub64, cc_dep1, cc_dep2))),
1039 mkU8(64));
1040 }
1041 if (isU64(cc_op, AMD64G_CC_OP_SUBQ) && isU64(cond, AMD64CondNO)) {
1042 /* No action. Never yet found a test case. */
1043 }
1044
1045 /* 2, 3 */
1046 if (isU64(cc_op, AMD64G_CC_OP_SUBQ) && isU64(cond, AMD64CondB)) {
1047 /* long long sub/cmp, then B (unsigned less than)
1048 --> test dst <u src */
1049 return unop(Iop_1Uto64,
1050 binop(Iop_CmpLT64U, cc_dep1, cc_dep2));
1051 }
1052 if (isU64(cc_op, AMD64G_CC_OP_SUBQ) && isU64(cond, AMD64CondNB)) {
1053 /* long long sub/cmp, then NB (unsigned greater than or equal)
1054 --> test src <=u dst */
1055 /* Note, args are opposite way round from the usual */
1056 return unop(Iop_1Uto64,
1057 binop(Iop_CmpLE64U, cc_dep2, cc_dep1));
1058 }
1059
1060 /* 4, 5 */
1061 if (isU64(cc_op, AMD64G_CC_OP_SUBQ) && isU64(cond, AMD64CondZ)) {
1062 /* long long sub/cmp, then Z --> test dst==src */
1063 return unop(Iop_1Uto64,
1064 binop(Iop_CmpEQ64,cc_dep1,cc_dep2));
1065 }
1066 if (isU64(cc_op, AMD64G_CC_OP_SUBQ) && isU64(cond, AMD64CondNZ)) {
1067 /* long long sub/cmp, then NZ --> test dst!=src */
1068 return unop(Iop_1Uto64,
1069 binop(Iop_CmpNE64,cc_dep1,cc_dep2));
1070 }
1071
1072 /* 6, 7 */
1073 if (isU64(cc_op, AMD64G_CC_OP_SUBQ) && isU64(cond, AMD64CondBE)) {
1074 /* long long sub/cmp, then BE (unsigned less than or equal)
1075 --> test dst <=u src */
1076 return unop(Iop_1Uto64,
1077 binop(Iop_CmpLE64U, cc_dep1, cc_dep2));
1078 }
1079 if (isU64(cc_op, AMD64G_CC_OP_SUBQ) && isU64(cond, AMD64CondNBE)) {
1080 /* long long sub/cmp, then NBE (unsigned greater than)
1081 --> test !(dst <=u src) */
1082 return binop(Iop_Xor64,
1083 unop(Iop_1Uto64,
1084 binop(Iop_CmpLE64U, cc_dep1, cc_dep2)),
1085 mkU64(1));
1086 }
1087
1088 /* 8, 9 */
1089 if (isU64(cc_op, AMD64G_CC_OP_SUBQ) && isU64(cond, AMD64CondS)) {
1090 /* long long sub/cmp, then S (negative)
1091 --> (dst-src)[63]
1092 --> (dst-src) >>u 63 */
1093 return binop(Iop_Shr64,
1094 binop(Iop_Sub64, cc_dep1, cc_dep2),
1095 mkU8(63));
1096 }
1097 if (isU64(cc_op, AMD64G_CC_OP_SUBQ) && isU64(cond, AMD64CondNS)) {
1098 /* long long sub/cmp, then NS (not negative)
1099 --> (dst-src)[63] ^ 1
1100 --> ((dst-src) >>u 63) ^ 1 */
1101 return binop(Iop_Xor64,
1102 binop(Iop_Shr64,
1103 binop(Iop_Sub64, cc_dep1, cc_dep2),
1104 mkU8(63)),
1105 mkU64(1));
1106 }
1107
1108 /* 12, 13 */
1109 if (isU64(cc_op, AMD64G_CC_OP_SUBQ) && isU64(cond, AMD64CondL)) {
1110 /* long long sub/cmp, then L (signed less than)
1111 --> test dst <s src */
1112 return unop(Iop_1Uto64,
1113 binop(Iop_CmpLT64S, cc_dep1, cc_dep2));
1114 }
1115 if (isU64(cc_op, AMD64G_CC_OP_SUBQ) && isU64(cond, AMD64CondNL)) {
1116 /* long long sub/cmp, then NL (signed greater than or equal)
1117 --> test dst >=s src
1118 --> test src <=s dst */
1119 return unop(Iop_1Uto64,
1120 binop(Iop_CmpLE64S, cc_dep2, cc_dep1));
1121 }
1122
1123 /* 14, 15 */
1124 if (isU64(cc_op, AMD64G_CC_OP_SUBQ) && isU64(cond, AMD64CondLE)) {
1125 /* long long sub/cmp, then LE (signed less than or equal)
1126 --> test dst <=s src */
1127 return unop(Iop_1Uto64,
1128 binop(Iop_CmpLE64S, cc_dep1, cc_dep2));
1129 }
1130 if (isU64(cc_op, AMD64G_CC_OP_SUBQ) && isU64(cond, AMD64CondNLE)) {
1131 /* long sub/cmp, then NLE (signed greater than)
1132 --> test !(dst <=s src)
1133 --> test (dst >s src)
1134 --> test (src <s dst) */
1135 return unop(Iop_1Uto64,
1136 binop(Iop_CmpLT64S, cc_dep2, cc_dep1));
1137
1138 }
1139
1140 /*---------------- SUBL ----------------*/
1141
1142 /* 0, */
1143 if (isU64(cc_op, AMD64G_CC_OP_SUBL) && isU64(cond, AMD64CondO)) {
1144 /* This is very commonly generated by Javascript JITs, for
1145 the idiom "do a 32-bit subtract and jump to out-of-line
1146 code if an overflow occurs". */
1147 /* long sub/cmp, then O (overflow)
1148 --> ((dep1 ^ dep2) & (dep1 ^ (dep1 - dep2)))[31]
1149 --> (((dep1 ^ dep2) & (dep1 ^ (dep1 -64 dep2))) >>u 31) & 1
1150 */
1151 vassert(isIRAtom(cc_dep1));
1152 vassert(isIRAtom(cc_dep2));
1153 return
1154 binop(Iop_And64,
1155 binop(Iop_Shr64,
1156 binop(Iop_And64,
1157 binop(Iop_Xor64, cc_dep1, cc_dep2),
1158 binop(Iop_Xor64,
1159 cc_dep1,
1160 binop(Iop_Sub64, cc_dep1, cc_dep2))),
1161 mkU8(31)),
1162 mkU64(1));
1163 }
1164 if (isU64(cc_op, AMD64G_CC_OP_SUBL) && isU64(cond, AMD64CondNO)) {
1165 /* No action. Never yet found a test case. */
1166 }
1167
1168 /* 2, 3 */
1169 if (isU64(cc_op, AMD64G_CC_OP_SUBL) && isU64(cond, AMD64CondB)) {
1170 /* long sub/cmp, then B (unsigned less than)
1171 --> test dst <u src */
1172 return unop(Iop_1Uto64,
1173 binop(Iop_CmpLT32U,
1174 unop(Iop_64to32, cc_dep1),
1175 unop(Iop_64to32, cc_dep2)));
1176 }
1177 if (isU64(cc_op, AMD64G_CC_OP_SUBL) && isU64(cond, AMD64CondNB)) {
1178 /* long sub/cmp, then NB (unsigned greater than or equal)
1179 --> test src <=u dst */
1180 /* Note, args are opposite way round from the usual */
1181 return unop(Iop_1Uto64,
1182 binop(Iop_CmpLE32U,
1183 unop(Iop_64to32, cc_dep2),
1184 unop(Iop_64to32, cc_dep1)));
1185 }
1186
1187 /* 4, 5 */
1188 if (isU64(cc_op, AMD64G_CC_OP_SUBL) && isU64(cond, AMD64CondZ)) {
1189 /* long sub/cmp, then Z --> test dst==src */
1190 return unop(Iop_1Uto64,
1191 binop(Iop_CmpEQ32,
1192 unop(Iop_64to32, cc_dep1),
1193 unop(Iop_64to32, cc_dep2)));
1194 }
1195 if (isU64(cc_op, AMD64G_CC_OP_SUBL) && isU64(cond, AMD64CondNZ)) {
1196 /* long sub/cmp, then NZ --> test dst!=src */
1197 return unop(Iop_1Uto64,
1198 binop(Iop_CmpNE32,
1199 unop(Iop_64to32, cc_dep1),
1200 unop(Iop_64to32, cc_dep2)));
1201 }
1202
1203 /* 6, 7 */
1204 if (isU64(cc_op, AMD64G_CC_OP_SUBL) && isU64(cond, AMD64CondBE)) {
1205 /* long sub/cmp, then BE (unsigned less than or equal)
1206 --> test dst <=u src */
1207 return unop(Iop_1Uto64,
1208 binop(Iop_CmpLE32U,
1209 unop(Iop_64to32, cc_dep1),
1210 unop(Iop_64to32, cc_dep2)));
1211 }
1212 if (isU64(cc_op, AMD64G_CC_OP_SUBL) && isU64(cond, AMD64CondNBE)) {
1213 /* long sub/cmp, then NBE (unsigned greater than)
1214 --> test src <u dst */
1215 /* Note, args are opposite way round from the usual */
1216 return unop(Iop_1Uto64,
1217 binop(Iop_CmpLT32U,
1218 unop(Iop_64to32, cc_dep2),
1219 unop(Iop_64to32, cc_dep1)));
1220 }
1221
1222 /* 8, 9 */
1223 if (isU64(cc_op, AMD64G_CC_OP_SUBL) && isU64(cond, AMD64CondS)) {
1224 /* long sub/cmp, then S (negative)
1225 --> (dst-src)[31]
1226 --> ((dst -64 src) >>u 31) & 1
1227 Pointless to narrow the args to 32 bit before the subtract. */
1228 return binop(Iop_And64,
1229 binop(Iop_Shr64,
1230 binop(Iop_Sub64, cc_dep1, cc_dep2),
1231 mkU8(31)),
1232 mkU64(1));
1233 }
1234 if (isU64(cc_op, AMD64G_CC_OP_SUBL) && isU64(cond, AMD64CondNS)) {
1235 /* long sub/cmp, then NS (not negative)
1236 --> (dst-src)[31] ^ 1
1237 --> (((dst -64 src) >>u 31) & 1) ^ 1
1238 Pointless to narrow the args to 32 bit before the subtract. */
1239 return binop(Iop_Xor64,
1240 binop(Iop_And64,
1241 binop(Iop_Shr64,
1242 binop(Iop_Sub64, cc_dep1, cc_dep2),
1243 mkU8(31)),
1244 mkU64(1)),
1245 mkU64(1));
1246 }
1247
1248 /* 12, 13 */
1249 if (isU64(cc_op, AMD64G_CC_OP_SUBL) && isU64(cond, AMD64CondL)) {
1250 /* long sub/cmp, then L (signed less than)
1251 --> test dst <s src */
1252 return unop(Iop_1Uto64,
1253 binop(Iop_CmpLT32S,
1254 unop(Iop_64to32, cc_dep1),
1255 unop(Iop_64to32, cc_dep2)));
1256 }
1257 if (isU64(cc_op, AMD64G_CC_OP_SUBL) && isU64(cond, AMD64CondNL)) {
1258 /* long sub/cmp, then NL (signed greater than or equal)
1259 --> test dst >=s src
1260 --> test src <=s dst */
1261 return unop(Iop_1Uto64,
1262 binop(Iop_CmpLE32S,
1263 unop(Iop_64to32, cc_dep2),
1264 unop(Iop_64to32, cc_dep1)));
1265 }
1266
1267 /* 14, 15 */
1268 if (isU64(cc_op, AMD64G_CC_OP_SUBL) && isU64(cond, AMD64CondLE)) {
1269 /* long sub/cmp, then LE (signed less than or equal)
1270 --> test dst <=s src */
1271 return unop(Iop_1Uto64,
1272 binop(Iop_CmpLE32S,
1273 unop(Iop_64to32, cc_dep1),
1274 unop(Iop_64to32, cc_dep2)));
1275
1276 }
1277 if (isU64(cc_op, AMD64G_CC_OP_SUBL) && isU64(cond, AMD64CondNLE)) {
1278 /* long sub/cmp, then NLE (signed greater than)
1279 --> test !(dst <=s src)
1280 --> test (dst >s src)
1281 --> test (src <s dst) */
1282 return unop(Iop_1Uto64,
1283 binop(Iop_CmpLT32S,
1284 unop(Iop_64to32, cc_dep2),
1285 unop(Iop_64to32, cc_dep1)));
1286
1287 }
1288
1289 /*---------------- SUBW ----------------*/
1290
1291 /* 4, 5 */
1292 if (isU64(cc_op, AMD64G_CC_OP_SUBW) && isU64(cond, AMD64CondZ)) {
1293 /* word sub/cmp, then Z --> test dst==src */
1294 return unop(Iop_1Uto64,
1295 binop(Iop_CmpEQ16,
1296 unop(Iop_64to16,cc_dep1),
1297 unop(Iop_64to16,cc_dep2)));
1298 }
1299 if (isU64(cc_op, AMD64G_CC_OP_SUBW) && isU64(cond, AMD64CondNZ)) {
1300 /* word sub/cmp, then NZ --> test dst!=src */
1301 return unop(Iop_1Uto64,
1302 binop(Iop_CmpNE16,
1303 unop(Iop_64to16,cc_dep1),
1304 unop(Iop_64to16,cc_dep2)));
1305 }
1306
1307 /* 6, */
1308 if (isU64(cc_op, AMD64G_CC_OP_SUBW) && isU64(cond, AMD64CondBE)) {
1309 /* word sub/cmp, then BE (unsigned less than or equal)
1310 --> test dst <=u src */
1311 return unop(Iop_1Uto64,
1312 binop(Iop_CmpLE64U,
1313 binop(Iop_Shl64, cc_dep1, mkU8(48)),
1314 binop(Iop_Shl64, cc_dep2, mkU8(48))));
1315 }
1316
1317 /* 14, */
1318 if (isU64(cc_op, AMD64G_CC_OP_SUBW) && isU64(cond, AMD64CondLE)) {
1319 /* word sub/cmp, then LE (signed less than or equal)
1320 --> test dst <=s src */
1321 return unop(Iop_1Uto64,
1322 binop(Iop_CmpLE64S,
1323 binop(Iop_Shl64,cc_dep1,mkU8(48)),
1324 binop(Iop_Shl64,cc_dep2,mkU8(48))));
1325
1326 }
1327
1328 /*---------------- SUBB ----------------*/
1329
1330 /* 2, 3 */
1331 if (isU64(cc_op, AMD64G_CC_OP_SUBB) && isU64(cond, AMD64CondB)) {
1332 /* byte sub/cmp, then B (unsigned less than)
1333 --> test dst <u src */
1334 return unop(Iop_1Uto64,
1335 binop(Iop_CmpLT64U,
1336 binop(Iop_And64, cc_dep1, mkU64(0xFF)),
1337 binop(Iop_And64, cc_dep2, mkU64(0xFF))));
1338 }
1339 if (isU64(cc_op, AMD64G_CC_OP_SUBB) && isU64(cond, AMD64CondNB)) {
1340 /* byte sub/cmp, then NB (unsigned greater than or equal)
1341 --> test src <=u dst */
1342 /* Note, args are opposite way round from the usual */
1343 return unop(Iop_1Uto64,
1344 binop(Iop_CmpLE64U,
1345 binop(Iop_And64, cc_dep2, mkU64(0xFF)),
1346 binop(Iop_And64, cc_dep1, mkU64(0xFF))));
1347 }
1348
1349 /* 4, 5 */
1350 if (isU64(cc_op, AMD64G_CC_OP_SUBB) && isU64(cond, AMD64CondZ)) {
1351 /* byte sub/cmp, then Z --> test dst==src */
1352 return unop(Iop_1Uto64,
1353 binop(Iop_CmpEQ8,
1354 unop(Iop_64to8,cc_dep1),
1355 unop(Iop_64to8,cc_dep2)));
1356 }
1357 if (isU64(cc_op, AMD64G_CC_OP_SUBB) && isU64(cond, AMD64CondNZ)) {
1358 /* byte sub/cmp, then NZ --> test dst!=src */
1359 return unop(Iop_1Uto64,
1360 binop(Iop_CmpNE8,
1361 unop(Iop_64to8,cc_dep1),
1362 unop(Iop_64to8,cc_dep2)));
1363 }
1364
1365 /* 6, */
1366 if (isU64(cc_op, AMD64G_CC_OP_SUBB) && isU64(cond, AMD64CondBE)) {
1367 /* byte sub/cmp, then BE (unsigned less than or equal)
1368 --> test dst <=u src */
1369 return unop(Iop_1Uto64,
1370 binop(Iop_CmpLE64U,
1371 binop(Iop_And64, cc_dep1, mkU64(0xFF)),
1372 binop(Iop_And64, cc_dep2, mkU64(0xFF))));
1373 }
1374
1375 /* 8, 9 */
1376 if (isU64(cc_op, AMD64G_CC_OP_SUBB) && isU64(cond, AMD64CondS)
1377 && isU64(cc_dep2, 0)) {
1378 /* byte sub/cmp of zero, then S --> test (dst-0 <s 0)
1379 --> test dst <s 0
1380 --> (ULong)dst[7]
1381 This is yet another scheme by which gcc figures out if the
1382 top bit of a byte is 1 or 0. See also LOGICB/CondS below. */
1383 /* Note: isU64(cc_dep2, 0) is correct, even though this is
1384 for an 8-bit comparison, since the args to the helper
1385 function are always U64s. */
1386 return binop(Iop_And64,
1387 binop(Iop_Shr64,cc_dep1,mkU8(7)),
1388 mkU64(1));
1389 }
1390 if (isU64(cc_op, AMD64G_CC_OP_SUBB) && isU64(cond, AMD64CondNS)
1391 && isU64(cc_dep2, 0)) {
1392 /* byte sub/cmp of zero, then NS --> test !(dst-0 <s 0)
1393 --> test !(dst <s 0)
1394 --> (ULong) !dst[7]
1395 */
1396 return binop(Iop_Xor64,
1397 binop(Iop_And64,
1398 binop(Iop_Shr64,cc_dep1,mkU8(7)),
1399 mkU64(1)),
1400 mkU64(1));
1401 }
1402
1403 /*---------------- LOGICQ ----------------*/
1404
1405 if (isU64(cc_op, AMD64G_CC_OP_LOGICQ) && isU64(cond, AMD64CondZ)) {
1406 /* long long and/or/xor, then Z --> test dst==0 */
1407 return unop(Iop_1Uto64,
1408 binop(Iop_CmpEQ64, cc_dep1, mkU64(0)));
1409 }
1410 if (isU64(cc_op, AMD64G_CC_OP_LOGICQ) && isU64(cond, AMD64CondNZ)) {
1411 /* long long and/or/xor, then NZ --> test dst!=0 */
1412 return unop(Iop_1Uto64,
1413 binop(Iop_CmpNE64, cc_dep1, mkU64(0)));
1414 }
1415
1416 if (isU64(cc_op, AMD64G_CC_OP_LOGICQ) && isU64(cond, AMD64CondL)) {
1417 /* long long and/or/xor, then L
1418 LOGIC sets SF and ZF according to the
1419 result and makes OF be zero. L computes SF ^ OF, but
1420 OF is zero, so this reduces to SF -- which will be 1 iff
1421 the result is < signed 0. Hence ...
1422 */
1423 return unop(Iop_1Uto64,
1424 binop(Iop_CmpLT64S,
1425 cc_dep1,
1426 mkU64(0)));
1427 }
1428
1429 /*---------------- LOGICL ----------------*/
1430
1431 if (isU64(cc_op, AMD64G_CC_OP_LOGICL) && isU64(cond, AMD64CondZ)) {
1432 /* long and/or/xor, then Z --> test dst==0 */
1433 return unop(Iop_1Uto64,
1434 binop(Iop_CmpEQ32,
1435 unop(Iop_64to32, cc_dep1),
1436 mkU32(0)));
1437 }
1438 if (isU64(cc_op, AMD64G_CC_OP_LOGICL) && isU64(cond, AMD64CondNZ)) {
1439 /* long and/or/xor, then NZ --> test dst!=0 */
1440 return unop(Iop_1Uto64,
1441 binop(Iop_CmpNE32,
1442 unop(Iop_64to32, cc_dep1),
1443 mkU32(0)));
1444 }
1445
1446 if (isU64(cc_op, AMD64G_CC_OP_LOGICL) && isU64(cond, AMD64CondLE)) {
1447 /* long and/or/xor, then LE
1448 This is pretty subtle. LOGIC sets SF and ZF according to the
1449 result and makes OF be zero. LE computes (SF ^ OF) | ZF, but
1450 OF is zero, so this reduces to SF | ZF -- which will be 1 iff
1451 the result is <=signed 0. Hence ...
1452 */
1453 return unop(Iop_1Uto64,
1454 binop(Iop_CmpLE32S,
1455 unop(Iop_64to32, cc_dep1),
1456 mkU32(0)));
1457 }
1458
1459 if (isU64(cc_op, AMD64G_CC_OP_LOGICL) && isU64(cond, AMD64CondS)) {
1460 /* long and/or/xor, then S --> (ULong)result[31] */
1461 return binop(Iop_And64,
1462 binop(Iop_Shr64, cc_dep1, mkU8(31)),
1463 mkU64(1));
1464 }
1465 if (isU64(cc_op, AMD64G_CC_OP_LOGICL) && isU64(cond, AMD64CondNS)) {
1466 /* long and/or/xor, then S --> (ULong) ~ result[31] */
1467 return binop(Iop_Xor64,
1468 binop(Iop_And64,
1469 binop(Iop_Shr64, cc_dep1, mkU8(31)),
1470 mkU64(1)),
1471 mkU64(1));
1472 }
1473
1474 /*---------------- LOGICW ----------------*/
1475
1476 if (isU64(cc_op, AMD64G_CC_OP_LOGICW) && isU64(cond, AMD64CondZ)) {
1477 /* word and/or/xor, then Z --> test dst==0 */
1478 return unop(Iop_1Uto64,
1479 binop(Iop_CmpEQ64,
1480 binop(Iop_And64, cc_dep1, mkU64(0xFFFF)),
1481 mkU64(0)));
1482 }
1483 if (isU64(cc_op, AMD64G_CC_OP_LOGICW) && isU64(cond, AMD64CondNZ)) {
1484 /* word and/or/xor, then NZ --> test dst!=0 */
1485 return unop(Iop_1Uto64,
1486 binop(Iop_CmpNE64,
1487 binop(Iop_And64, cc_dep1, mkU64(0xFFFF)),
1488 mkU64(0)));
1489 }
1490
1491 /*---------------- LOGICB ----------------*/
1492
1493 if (isU64(cc_op, AMD64G_CC_OP_LOGICB) && isU64(cond, AMD64CondZ)) {
1494 /* byte and/or/xor, then Z --> test dst==0 */
1495 return unop(Iop_1Uto64,
1496 binop(Iop_CmpEQ64, binop(Iop_And64,cc_dep1,mkU64(255)),
1497 mkU64(0)));
1498 }
1499 if (isU64(cc_op, AMD64G_CC_OP_LOGICB) && isU64(cond, AMD64CondNZ)) {
1500 /* byte and/or/xor, then NZ --> test dst!=0 */
1501 return unop(Iop_1Uto64,
1502 binop(Iop_CmpNE64, binop(Iop_And64,cc_dep1,mkU64(255)),
1503 mkU64(0)));
1504 }
1505
1506 if (isU64(cc_op, AMD64G_CC_OP_LOGICB) && isU64(cond, AMD64CondS)) {
1507 /* this is an idiom gcc sometimes uses to find out if the top
1508 bit of a byte register is set: eg testb %al,%al; js ..
1509 Since it just depends on the top bit of the byte, extract
1510 that bit and explicitly get rid of all the rest. This
1511 helps memcheck avoid false positives in the case where any
1512 of the other bits in the byte are undefined. */
1513 /* byte and/or/xor, then S --> (UInt)result[7] */
1514 return binop(Iop_And64,
1515 binop(Iop_Shr64,cc_dep1,mkU8(7)),
1516 mkU64(1));
1517 }
1518 if (isU64(cc_op, AMD64G_CC_OP_LOGICB) && isU64(cond, AMD64CondNS)) {
1519 /* byte and/or/xor, then NS --> (UInt)!result[7] */
1520 return binop(Iop_Xor64,
1521 binop(Iop_And64,
1522 binop(Iop_Shr64,cc_dep1,mkU8(7)),
1523 mkU64(1)),
1524 mkU64(1));
1525 }
1526
1527 /*---------------- INCB ----------------*/
1528
1529 if (isU64(cc_op, AMD64G_CC_OP_INCB) && isU64(cond, AMD64CondLE)) {
1530 /* 8-bit inc, then LE --> sign bit of the arg */
1531 return binop(Iop_And64,
1532 binop(Iop_Shr64,
1533 binop(Iop_Sub64, cc_dep1, mkU64(1)),
1534 mkU8(7)),
1535 mkU64(1));
1536 }
1537
1538 /*---------------- INCW ----------------*/
1539
1540 if (isU64(cc_op, AMD64G_CC_OP_INCW) && isU64(cond, AMD64CondZ)) {
1541 /* 16-bit inc, then Z --> test dst == 0 */
1542 return unop(Iop_1Uto64,
1543 binop(Iop_CmpEQ64,
1544 binop(Iop_Shl64,cc_dep1,mkU8(48)),
1545 mkU64(0)));
1546 }
1547
1548 /*---------------- DECL ----------------*/
1549
1550 if (isU64(cc_op, AMD64G_CC_OP_DECL) && isU64(cond, AMD64CondZ)) {
1551 /* dec L, then Z --> test dst == 0 */
1552 return unop(Iop_1Uto64,
1553 binop(Iop_CmpEQ32,
1554 unop(Iop_64to32, cc_dep1),
1555 mkU32(0)));
1556 }
1557
1558 /*---------------- DECW ----------------*/
1559
1560 if (isU64(cc_op, AMD64G_CC_OP_DECW) && isU64(cond, AMD64CondNZ)) {
1561 /* 16-bit dec, then NZ --> test dst != 0 */
1562 return unop(Iop_1Uto64,
1563 binop(Iop_CmpNE64,
1564 binop(Iop_Shl64,cc_dep1,mkU8(48)),
1565 mkU64(0)));
1566 }
1567
1568 /*---------------- COPY ----------------*/
1569 /* This can happen, as a result of amd64 FP compares: "comisd ... ;
1570 jbe" for example. */
1571
1572 if (isU64(cc_op, AMD64G_CC_OP_COPY) &&
1573 (isU64(cond, AMD64CondBE) || isU64(cond, AMD64CondNBE))) {
1574 /* COPY, then BE --> extract C and Z from dep1, and test (C
1575 or Z == 1). */
1576 /* COPY, then NBE --> extract C and Z from dep1, and test (C
1577 or Z == 0). */
1578 ULong nnn = isU64(cond, AMD64CondBE) ? 1 : 0;
1579 return
1580 unop(
1581 Iop_1Uto64,
1582 binop(
1583 Iop_CmpEQ64,
1584 binop(
1585 Iop_And64,
1586 binop(
1587 Iop_Or64,
1588 binop(Iop_Shr64, cc_dep1, mkU8(AMD64G_CC_SHIFT_C)),
1589 binop(Iop_Shr64, cc_dep1, mkU8(AMD64G_CC_SHIFT_Z))
1590 ),
1591 mkU64(1)
1592 ),
1593 mkU64(nnn)
1594 )
1595 );
1596 }
1597
1598 if (isU64(cc_op, AMD64G_CC_OP_COPY) && isU64(cond, AMD64CondB)) {
1599 /* COPY, then B --> extract C dep1, and test (C == 1). */
1600 return
1601 unop(
1602 Iop_1Uto64,
1603 binop(
1604 Iop_CmpNE64,
1605 binop(
1606 Iop_And64,
1607 binop(Iop_Shr64, cc_dep1, mkU8(AMD64G_CC_SHIFT_C)),
1608 mkU64(1)
1609 ),
1610 mkU64(0)
1611 )
1612 );
1613 }
1614
1615 if (isU64(cc_op, AMD64G_CC_OP_COPY)
1616 && (isU64(cond, AMD64CondZ) || isU64(cond, AMD64CondNZ))) {
1617 /* COPY, then Z --> extract Z from dep1, and test (Z == 1). */
1618 /* COPY, then NZ --> extract Z from dep1, and test (Z == 0). */
1619 UInt nnn = isU64(cond, AMD64CondZ) ? 1 : 0;
1620 return
1621 unop(
1622 Iop_1Uto64,
1623 binop(
1624 Iop_CmpEQ64,
1625 binop(
1626 Iop_And64,
1627 binop(Iop_Shr64, cc_dep1, mkU8(AMD64G_CC_SHIFT_Z)),
1628 mkU64(1)
1629 ),
1630 mkU64(nnn)
1631 )
1632 );
1633 }
1634
1635 if (isU64(cc_op, AMD64G_CC_OP_COPY) && isU64(cond, AMD64CondP)) {
1636 /* COPY, then P --> extract P from dep1, and test (P == 1). */
1637 return
1638 unop(
1639 Iop_1Uto64,
1640 binop(
1641 Iop_CmpNE64,
1642 binop(
1643 Iop_And64,
1644 binop(Iop_Shr64, cc_dep1, mkU8(AMD64G_CC_SHIFT_P)),
1645 mkU64(1)
1646 ),
1647 mkU64(0)
1648 )
1649 );
1650 }
1651
1652 return NULL;
1653 }
1654
1655 /* --------- specialising "amd64g_calculate_rflags_c" --------- */
1656
1657 if (vex_streq(function_name, "amd64g_calculate_rflags_c")) {
1658 /* specialise calls to above "calculate_rflags_c" function */
1659 IRExpr *cc_op, *cc_dep1, *cc_dep2, *cc_ndep;
1660 vassert(arity == 4);
1661 cc_op = args[0];
1662 cc_dep1 = args[1];
1663 cc_dep2 = args[2];
1664 cc_ndep = args[3];
1665
1666 if (isU64(cc_op, AMD64G_CC_OP_SUBQ)) {
1667 /* C after sub denotes unsigned less than */
1668 return unop(Iop_1Uto64,
1669 binop(Iop_CmpLT64U,
1670 cc_dep1,
1671 cc_dep2));
1672 }
1673 if (isU64(cc_op, AMD64G_CC_OP_SUBL)) {
1674 /* C after sub denotes unsigned less than */
1675 return unop(Iop_1Uto64,
1676 binop(Iop_CmpLT32U,
1677 unop(Iop_64to32, cc_dep1),
1678 unop(Iop_64to32, cc_dep2)));
1679 }
1680 if (isU64(cc_op, AMD64G_CC_OP_SUBB)) {
1681 /* C after sub denotes unsigned less than */
1682 return unop(Iop_1Uto64,
1683 binop(Iop_CmpLT64U,
1684 binop(Iop_And64,cc_dep1,mkU64(0xFF)),
1685 binop(Iop_And64,cc_dep2,mkU64(0xFF))));
1686 }
1687 if (isU64(cc_op, AMD64G_CC_OP_LOGICQ)
1688 || isU64(cc_op, AMD64G_CC_OP_LOGICL)
1689 || isU64(cc_op, AMD64G_CC_OP_LOGICW)
1690 || isU64(cc_op, AMD64G_CC_OP_LOGICB)) {
1691 /* cflag after logic is zero */
1692 return mkU64(0);
1693 }
1694 if (isU64(cc_op, AMD64G_CC_OP_DECL) || isU64(cc_op, AMD64G_CC_OP_INCL)
1695 || isU64(cc_op, AMD64G_CC_OP_DECQ) || isU64(cc_op, AMD64G_CC_OP_INCQ)) {
1696 /* If the thunk is dec or inc, the cflag is supplied as CC_NDEP. */
1697 return cc_ndep;
1698 }
1699
1700 # if 0
1701 if (cc_op->tag == Iex_Const) {
1702 vex_printf("CFLAG "); ppIRExpr(cc_op); vex_printf("\n");
1703 }
1704 # endif
1705
1706 return NULL;
1707 }
1708
1709 # undef unop
1710 # undef binop
1711 # undef mkU64
1712 # undef mkU32
1713 # undef mkU8
1714
1715 return NULL;
1716 }
1717
1718
1719 /*---------------------------------------------------------------*/
1720 /*--- Supporting functions for x87 FPU activities. ---*/
1721 /*---------------------------------------------------------------*/
1722
host_is_little_endian(void)1723 static inline Bool host_is_little_endian ( void )
1724 {
1725 UInt x = 0x76543210;
1726 UChar* p = (UChar*)(&x);
1727 return toBool(*p == 0x10);
1728 }
1729
1730 /* Inspect a value and its tag, as per the x87 'FXAM' instruction. */
1731 /* CALLED FROM GENERATED CODE: CLEAN HELPER */
amd64g_calculate_FXAM(ULong tag,ULong dbl)1732 ULong amd64g_calculate_FXAM ( ULong tag, ULong dbl )
1733 {
1734 Bool mantissaIsZero;
1735 Int bexp;
1736 UChar sign;
1737 UChar* f64;
1738
1739 vassert(host_is_little_endian());
1740
1741 /* vex_printf("calculate_FXAM ( %d, %llx ) .. ", tag, dbl ); */
1742
1743 f64 = (UChar*)(&dbl);
1744 sign = toUChar( (f64[7] >> 7) & 1 );
1745
1746 /* First off, if the tag indicates the register was empty,
1747 return 1,0,sign,1 */
1748 if (tag == 0) {
1749 /* vex_printf("Empty\n"); */
1750 return AMD64G_FC_MASK_C3 | 0 | (sign << AMD64G_FC_SHIFT_C1)
1751 | AMD64G_FC_MASK_C0;
1752 }
1753
1754 bexp = (f64[7] << 4) | ((f64[6] >> 4) & 0x0F);
1755 bexp &= 0x7FF;
1756
1757 mantissaIsZero
1758 = toBool(
1759 (f64[6] & 0x0F) == 0
1760 && (f64[5] | f64[4] | f64[3] | f64[2] | f64[1] | f64[0]) == 0
1761 );
1762
1763 /* If both exponent and mantissa are zero, the value is zero.
1764 Return 1,0,sign,0. */
1765 if (bexp == 0 && mantissaIsZero) {
1766 /* vex_printf("Zero\n"); */
1767 return AMD64G_FC_MASK_C3 | 0
1768 | (sign << AMD64G_FC_SHIFT_C1) | 0;
1769 }
1770
1771 /* If exponent is zero but mantissa isn't, it's a denormal.
1772 Return 1,1,sign,0. */
1773 if (bexp == 0 && !mantissaIsZero) {
1774 /* vex_printf("Denormal\n"); */
1775 return AMD64G_FC_MASK_C3 | AMD64G_FC_MASK_C2
1776 | (sign << AMD64G_FC_SHIFT_C1) | 0;
1777 }
1778
1779 /* If the exponent is 7FF and the mantissa is zero, this is an infinity.
1780 Return 0,1,sign,1. */
1781 if (bexp == 0x7FF && mantissaIsZero) {
1782 /* vex_printf("Inf\n"); */
1783 return 0 | AMD64G_FC_MASK_C2 | (sign << AMD64G_FC_SHIFT_C1)
1784 | AMD64G_FC_MASK_C0;
1785 }
1786
1787 /* If the exponent is 7FF and the mantissa isn't zero, this is a NaN.
1788 Return 0,0,sign,1. */
1789 if (bexp == 0x7FF && !mantissaIsZero) {
1790 /* vex_printf("NaN\n"); */
1791 return 0 | 0 | (sign << AMD64G_FC_SHIFT_C1) | AMD64G_FC_MASK_C0;
1792 }
1793
1794 /* Uh, ok, we give up. It must be a normal finite number.
1795 Return 0,1,sign,0.
1796 */
1797 /* vex_printf("normal\n"); */
1798 return 0 | AMD64G_FC_MASK_C2 | (sign << AMD64G_FC_SHIFT_C1) | 0;
1799 }
1800
1801
1802 /* This is used to implement both 'frstor' and 'fldenv'. The latter
1803 appears to differ from the former only in that the 8 FP registers
1804 themselves are not transferred into the guest state. */
1805 static
do_put_x87(Bool moveRegs,UChar * x87_state,VexGuestAMD64State * vex_state)1806 VexEmNote do_put_x87 ( Bool moveRegs,
1807 /*IN*/UChar* x87_state,
1808 /*OUT*/VexGuestAMD64State* vex_state )
1809 {
1810 Int stno, preg;
1811 UInt tag;
1812 ULong* vexRegs = (ULong*)(&vex_state->guest_FPREG[0]);
1813 UChar* vexTags = (UChar*)(&vex_state->guest_FPTAG[0]);
1814 Fpu_State* x87 = (Fpu_State*)x87_state;
1815 UInt ftop = (x87->env[FP_ENV_STAT] >> 11) & 7;
1816 UInt tagw = x87->env[FP_ENV_TAG];
1817 UInt fpucw = x87->env[FP_ENV_CTRL];
1818 UInt c3210 = x87->env[FP_ENV_STAT] & 0x4700;
1819 VexEmNote ew;
1820 UInt fpround;
1821 ULong pair;
1822
1823 /* Copy registers and tags */
1824 for (stno = 0; stno < 8; stno++) {
1825 preg = (stno + ftop) & 7;
1826 tag = (tagw >> (2*preg)) & 3;
1827 if (tag == 3) {
1828 /* register is empty */
1829 /* hmm, if it's empty, does it still get written? Probably
1830 safer to say it does. If we don't, memcheck could get out
1831 of sync, in that it thinks all FP registers are defined by
1832 this helper, but in reality some have not been updated. */
1833 if (moveRegs)
1834 vexRegs[preg] = 0; /* IEEE754 64-bit zero */
1835 vexTags[preg] = 0;
1836 } else {
1837 /* register is non-empty */
1838 if (moveRegs)
1839 convert_f80le_to_f64le( &x87->reg[10*stno],
1840 (UChar*)&vexRegs[preg] );
1841 vexTags[preg] = 1;
1842 }
1843 }
1844
1845 /* stack pointer */
1846 vex_state->guest_FTOP = ftop;
1847
1848 /* status word */
1849 vex_state->guest_FC3210 = c3210;
1850
1851 /* handle the control word, setting FPROUND and detecting any
1852 emulation warnings. */
1853 pair = amd64g_check_fldcw ( (ULong)fpucw );
1854 fpround = (UInt)pair & 0xFFFFFFFFULL;
1855 ew = (VexEmNote)(pair >> 32);
1856
1857 vex_state->guest_FPROUND = fpround & 3;
1858
1859 /* emulation warnings --> caller */
1860 return ew;
1861 }
1862
1863
1864 /* Create an x87 FPU state from the guest state, as close as
1865 we can approximate it. */
1866 static
do_get_x87(VexGuestAMD64State * vex_state,UChar * x87_state)1867 void do_get_x87 ( /*IN*/VexGuestAMD64State* vex_state,
1868 /*OUT*/UChar* x87_state )
1869 {
1870 Int i, stno, preg;
1871 UInt tagw;
1872 ULong* vexRegs = (ULong*)(&vex_state->guest_FPREG[0]);
1873 UChar* vexTags = (UChar*)(&vex_state->guest_FPTAG[0]);
1874 Fpu_State* x87 = (Fpu_State*)x87_state;
1875 UInt ftop = vex_state->guest_FTOP;
1876 UInt c3210 = vex_state->guest_FC3210;
1877
1878 for (i = 0; i < 14; i++)
1879 x87->env[i] = 0;
1880
1881 x87->env[1] = x87->env[3] = x87->env[5] = x87->env[13] = 0xFFFF;
1882 x87->env[FP_ENV_STAT]
1883 = toUShort(((ftop & 7) << 11) | (c3210 & 0x4700));
1884 x87->env[FP_ENV_CTRL]
1885 = toUShort(amd64g_create_fpucw( vex_state->guest_FPROUND ));
1886
1887 /* Dump the register stack in ST order. */
1888 tagw = 0;
1889 for (stno = 0; stno < 8; stno++) {
1890 preg = (stno + ftop) & 7;
1891 if (vexTags[preg] == 0) {
1892 /* register is empty */
1893 tagw |= (3 << (2*preg));
1894 convert_f64le_to_f80le( (UChar*)&vexRegs[preg],
1895 &x87->reg[10*stno] );
1896 } else {
1897 /* register is full. */
1898 tagw |= (0 << (2*preg));
1899 convert_f64le_to_f80le( (UChar*)&vexRegs[preg],
1900 &x87->reg[10*stno] );
1901 }
1902 }
1903 x87->env[FP_ENV_TAG] = toUShort(tagw);
1904 }
1905
1906
1907 /* CALLED FROM GENERATED CODE */
1908 /* DIRTY HELPER (reads guest state, writes guest mem) */
1909 /* NOTE: only handles 32-bit format (no REX.W on the insn) */
amd64g_dirtyhelper_FXSAVE_ALL_EXCEPT_XMM(VexGuestAMD64State * gst,HWord addr)1910 void amd64g_dirtyhelper_FXSAVE_ALL_EXCEPT_XMM ( VexGuestAMD64State* gst,
1911 HWord addr )
1912 {
1913 /* Derived from values obtained from
1914 vendor_id : AuthenticAMD
1915 cpu family : 15
1916 model : 12
1917 model name : AMD Athlon(tm) 64 Processor 3200+
1918 stepping : 0
1919 cpu MHz : 2200.000
1920 cache size : 512 KB
1921 */
1922 /* Somewhat roundabout, but at least it's simple. */
1923 Fpu_State tmp;
1924 UShort* addrS = (UShort*)addr;
1925 UChar* addrC = (UChar*)addr;
1926 UInt mxcsr;
1927 UShort fp_tags;
1928 UInt summary_tags;
1929 Int r, stno;
1930 UShort *srcS, *dstS;
1931
1932 do_get_x87( gst, (UChar*)&tmp );
1933 mxcsr = amd64g_create_mxcsr( gst->guest_SSEROUND );
1934
1935 /* Now build the proper fxsave image from the x87 image we just
1936 made. */
1937
1938 addrS[0] = tmp.env[FP_ENV_CTRL]; /* FCW: fpu control word */
1939 addrS[1] = tmp.env[FP_ENV_STAT]; /* FCW: fpu status word */
1940
1941 /* set addrS[2] in an endian-independent way */
1942 summary_tags = 0;
1943 fp_tags = tmp.env[FP_ENV_TAG];
1944 for (r = 0; r < 8; r++) {
1945 if ( ((fp_tags >> (2*r)) & 3) != 3 )
1946 summary_tags |= (1 << r);
1947 }
1948 addrC[4] = toUChar(summary_tags); /* FTW: tag summary byte */
1949 addrC[5] = 0; /* pad */
1950
1951 /* FOP: faulting fpu opcode. From experimentation, the real CPU
1952 does not write this field. (?!) */
1953 addrS[3] = 0; /* BOGUS */
1954
1955 /* RIP (Last x87 instruction pointer). From experimentation, the
1956 real CPU does not write this field. (?!) */
1957 addrS[4] = 0; /* BOGUS */
1958 addrS[5] = 0; /* BOGUS */
1959 addrS[6] = 0; /* BOGUS */
1960 addrS[7] = 0; /* BOGUS */
1961
1962 /* RDP (Last x87 data pointer). From experimentation, the real CPU
1963 does not write this field. (?!) */
1964 addrS[8] = 0; /* BOGUS */
1965 addrS[9] = 0; /* BOGUS */
1966 addrS[10] = 0; /* BOGUS */
1967 addrS[11] = 0; /* BOGUS */
1968
1969 addrS[12] = toUShort(mxcsr); /* MXCSR */
1970 addrS[13] = toUShort(mxcsr >> 16);
1971
1972 addrS[14] = 0xFFFF; /* MXCSR mask (lo16) */
1973 addrS[15] = 0x0000; /* MXCSR mask (hi16) */
1974
1975 /* Copy in the FP registers, in ST order. */
1976 for (stno = 0; stno < 8; stno++) {
1977 srcS = (UShort*)(&tmp.reg[10*stno]);
1978 dstS = (UShort*)(&addrS[16 + 8*stno]);
1979 dstS[0] = srcS[0];
1980 dstS[1] = srcS[1];
1981 dstS[2] = srcS[2];
1982 dstS[3] = srcS[3];
1983 dstS[4] = srcS[4];
1984 dstS[5] = 0;
1985 dstS[6] = 0;
1986 dstS[7] = 0;
1987 }
1988
1989 /* That's the first 160 bytes of the image done. Now only %xmm0
1990 .. %xmm15 remain to be copied, and we let the generated IR do
1991 that, so as to make Memcheck's definedness flow for the non-XMM
1992 parts independant from that of the all the other control and
1993 status words in the structure. This avoids the false positives
1994 shown in #291310. */
1995 }
1996
1997
1998 /* CALLED FROM GENERATED CODE */
1999 /* DIRTY HELPER (writes guest state, reads guest mem) */
amd64g_dirtyhelper_FXRSTOR_ALL_EXCEPT_XMM(VexGuestAMD64State * gst,HWord addr)2000 VexEmNote amd64g_dirtyhelper_FXRSTOR_ALL_EXCEPT_XMM ( VexGuestAMD64State* gst,
2001 HWord addr )
2002 {
2003 Fpu_State tmp;
2004 VexEmNote warnX87 = EmNote_NONE;
2005 VexEmNote warnXMM = EmNote_NONE;
2006 UShort* addrS = (UShort*)addr;
2007 UChar* addrC = (UChar*)addr;
2008 UShort fp_tags;
2009 Int r, stno, i;
2010
2011 /* Don't restore %xmm0 .. %xmm15, for the same reasons that
2012 amd64g_dirtyhelper_FXSAVE_ALL_EXCEPT_XMM doesn't save them. See
2013 comment in that function for details. */
2014
2015 /* Copy the x87 registers out of the image, into a temporary
2016 Fpu_State struct. */
2017 for (i = 0; i < 14; i++) tmp.env[i] = 0;
2018 for (i = 0; i < 80; i++) tmp.reg[i] = 0;
2019 /* fill in tmp.reg[0..7] */
2020 for (stno = 0; stno < 8; stno++) {
2021 UShort* dstS = (UShort*)(&tmp.reg[10*stno]);
2022 UShort* srcS = (UShort*)(&addrS[16 + 8*stno]);
2023 dstS[0] = srcS[0];
2024 dstS[1] = srcS[1];
2025 dstS[2] = srcS[2];
2026 dstS[3] = srcS[3];
2027 dstS[4] = srcS[4];
2028 }
2029 /* fill in tmp.env[0..13] */
2030 tmp.env[FP_ENV_CTRL] = addrS[0]; /* FCW: fpu control word */
2031 tmp.env[FP_ENV_STAT] = addrS[1]; /* FCW: fpu status word */
2032
2033 fp_tags = 0;
2034 for (r = 0; r < 8; r++) {
2035 if (addrC[4] & (1<<r))
2036 fp_tags |= (0 << (2*r)); /* EMPTY */
2037 else
2038 fp_tags |= (3 << (2*r)); /* VALID -- not really precise enough. */
2039 }
2040 tmp.env[FP_ENV_TAG] = fp_tags;
2041
2042 /* Now write 'tmp' into the guest state. */
2043 warnX87 = do_put_x87( True/*moveRegs*/, (UChar*)&tmp, gst );
2044
2045 { UInt w32 = (((UInt)addrS[12]) & 0xFFFF)
2046 | ((((UInt)addrS[13]) & 0xFFFF) << 16);
2047 ULong w64 = amd64g_check_ldmxcsr( (ULong)w32 );
2048
2049 warnXMM = (VexEmNote)(w64 >> 32);
2050
2051 gst->guest_SSEROUND = w64 & 0xFFFFFFFFULL;
2052 }
2053
2054 /* Prefer an X87 emwarn over an XMM one, if both exist. */
2055 if (warnX87 != EmNote_NONE)
2056 return warnX87;
2057 else
2058 return warnXMM;
2059 }
2060
2061
2062 /* DIRTY HELPER (writes guest state) */
2063 /* Initialise the x87 FPU state as per 'finit'. */
amd64g_dirtyhelper_FINIT(VexGuestAMD64State * gst)2064 void amd64g_dirtyhelper_FINIT ( VexGuestAMD64State* gst )
2065 {
2066 Int i;
2067 gst->guest_FTOP = 0;
2068 for (i = 0; i < 8; i++) {
2069 gst->guest_FPTAG[i] = 0; /* empty */
2070 gst->guest_FPREG[i] = 0; /* IEEE754 64-bit zero */
2071 }
2072 gst->guest_FPROUND = (ULong)Irrm_NEAREST;
2073 gst->guest_FC3210 = 0;
2074 }
2075
2076
2077 /* CALLED FROM GENERATED CODE */
2078 /* DIRTY HELPER (reads guest memory) */
amd64g_dirtyhelper_loadF80le(Addr addrU)2079 ULong amd64g_dirtyhelper_loadF80le ( Addr addrU )
2080 {
2081 ULong f64;
2082 convert_f80le_to_f64le ( (UChar*)addrU, (UChar*)&f64 );
2083 return f64;
2084 }
2085
2086 /* CALLED FROM GENERATED CODE */
2087 /* DIRTY HELPER (writes guest memory) */
amd64g_dirtyhelper_storeF80le(Addr addrU,ULong f64)2088 void amd64g_dirtyhelper_storeF80le ( Addr addrU, ULong f64 )
2089 {
2090 convert_f64le_to_f80le( (UChar*)&f64, (UChar*)addrU );
2091 }
2092
2093
2094 /* CALLED FROM GENERATED CODE */
2095 /* CLEAN HELPER */
2096 /* mxcsr[15:0] contains a SSE native format MXCSR value.
2097 Extract from it the required SSEROUND value and any resulting
2098 emulation warning, and return (warn << 32) | sseround value.
2099 */
amd64g_check_ldmxcsr(ULong mxcsr)2100 ULong amd64g_check_ldmxcsr ( ULong mxcsr )
2101 {
2102 /* Decide on a rounding mode. mxcsr[14:13] holds it. */
2103 /* NOTE, encoded exactly as per enum IRRoundingMode. */
2104 ULong rmode = (mxcsr >> 13) & 3;
2105
2106 /* Detect any required emulation warnings. */
2107 VexEmNote ew = EmNote_NONE;
2108
2109 if ((mxcsr & 0x1F80) != 0x1F80) {
2110 /* unmasked exceptions! */
2111 ew = EmWarn_X86_sseExns;
2112 }
2113 else
2114 if (mxcsr & (1<<15)) {
2115 /* FZ is set */
2116 ew = EmWarn_X86_fz;
2117 }
2118 else
2119 if (mxcsr & (1<<6)) {
2120 /* DAZ is set */
2121 ew = EmWarn_X86_daz;
2122 }
2123
2124 return (((ULong)ew) << 32) | ((ULong)rmode);
2125 }
2126
2127
2128 /* CALLED FROM GENERATED CODE */
2129 /* CLEAN HELPER */
2130 /* Given sseround as an IRRoundingMode value, create a suitable SSE
2131 native format MXCSR value. */
amd64g_create_mxcsr(ULong sseround)2132 ULong amd64g_create_mxcsr ( ULong sseround )
2133 {
2134 sseround &= 3;
2135 return 0x1F80 | (sseround << 13);
2136 }
2137
2138
2139 /* CLEAN HELPER */
2140 /* fpucw[15:0] contains a x87 native format FPU control word.
2141 Extract from it the required FPROUND value and any resulting
2142 emulation warning, and return (warn << 32) | fpround value.
2143 */
amd64g_check_fldcw(ULong fpucw)2144 ULong amd64g_check_fldcw ( ULong fpucw )
2145 {
2146 /* Decide on a rounding mode. fpucw[11:10] holds it. */
2147 /* NOTE, encoded exactly as per enum IRRoundingMode. */
2148 ULong rmode = (fpucw >> 10) & 3;
2149
2150 /* Detect any required emulation warnings. */
2151 VexEmNote ew = EmNote_NONE;
2152
2153 if ((fpucw & 0x3F) != 0x3F) {
2154 /* unmasked exceptions! */
2155 ew = EmWarn_X86_x87exns;
2156 }
2157 else
2158 if (((fpucw >> 8) & 3) != 3) {
2159 /* unsupported precision */
2160 ew = EmWarn_X86_x87precision;
2161 }
2162
2163 return (((ULong)ew) << 32) | ((ULong)rmode);
2164 }
2165
2166
2167 /* CLEAN HELPER */
2168 /* Given fpround as an IRRoundingMode value, create a suitable x87
2169 native format FPU control word. */
amd64g_create_fpucw(ULong fpround)2170 ULong amd64g_create_fpucw ( ULong fpround )
2171 {
2172 fpround &= 3;
2173 return 0x037F | (fpround << 10);
2174 }
2175
2176
2177 /* This is used to implement 'fldenv'.
2178 Reads 28 bytes at x87_state[0 .. 27]. */
2179 /* CALLED FROM GENERATED CODE */
2180 /* DIRTY HELPER */
amd64g_dirtyhelper_FLDENV(VexGuestAMD64State * vex_state,HWord x87_state)2181 VexEmNote amd64g_dirtyhelper_FLDENV ( /*OUT*/VexGuestAMD64State* vex_state,
2182 /*IN*/HWord x87_state)
2183 {
2184 return do_put_x87( False, (UChar*)x87_state, vex_state );
2185 }
2186
2187
2188 /* CALLED FROM GENERATED CODE */
2189 /* DIRTY HELPER */
2190 /* Create an x87 FPU env from the guest state, as close as we can
2191 approximate it. Writes 28 bytes at x87_state[0..27]. */
amd64g_dirtyhelper_FSTENV(VexGuestAMD64State * vex_state,HWord x87_state)2192 void amd64g_dirtyhelper_FSTENV ( /*IN*/VexGuestAMD64State* vex_state,
2193 /*OUT*/HWord x87_state )
2194 {
2195 Int i, stno, preg;
2196 UInt tagw;
2197 UChar* vexTags = (UChar*)(&vex_state->guest_FPTAG[0]);
2198 Fpu_State* x87 = (Fpu_State*)x87_state;
2199 UInt ftop = vex_state->guest_FTOP;
2200 ULong c3210 = vex_state->guest_FC3210;
2201
2202 for (i = 0; i < 14; i++)
2203 x87->env[i] = 0;
2204
2205 x87->env[1] = x87->env[3] = x87->env[5] = x87->env[13] = 0xFFFF;
2206 x87->env[FP_ENV_STAT]
2207 = toUShort(toUInt( ((ftop & 7) << 11) | (c3210 & 0x4700) ));
2208 x87->env[FP_ENV_CTRL]
2209 = toUShort(toUInt( amd64g_create_fpucw( vex_state->guest_FPROUND ) ));
2210
2211 /* Compute the x87 tag word. */
2212 tagw = 0;
2213 for (stno = 0; stno < 8; stno++) {
2214 preg = (stno + ftop) & 7;
2215 if (vexTags[preg] == 0) {
2216 /* register is empty */
2217 tagw |= (3 << (2*preg));
2218 } else {
2219 /* register is full. */
2220 tagw |= (0 << (2*preg));
2221 }
2222 }
2223 x87->env[FP_ENV_TAG] = toUShort(tagw);
2224
2225 /* We don't dump the x87 registers, tho. */
2226 }
2227
2228
2229 /* This is used to implement 'fnsave'.
2230 Writes 108 bytes at x87_state[0 .. 107]. */
2231 /* CALLED FROM GENERATED CODE */
2232 /* DIRTY HELPER */
amd64g_dirtyhelper_FNSAVE(VexGuestAMD64State * vex_state,HWord x87_state)2233 void amd64g_dirtyhelper_FNSAVE ( /*IN*/VexGuestAMD64State* vex_state,
2234 /*OUT*/HWord x87_state)
2235 {
2236 do_get_x87( vex_state, (UChar*)x87_state );
2237 }
2238
2239
2240 /* This is used to implement 'fnsaves'.
2241 Writes 94 bytes at x87_state[0 .. 93]. */
2242 /* CALLED FROM GENERATED CODE */
2243 /* DIRTY HELPER */
amd64g_dirtyhelper_FNSAVES(VexGuestAMD64State * vex_state,HWord x87_state)2244 void amd64g_dirtyhelper_FNSAVES ( /*IN*/VexGuestAMD64State* vex_state,
2245 /*OUT*/HWord x87_state)
2246 {
2247 Int i, stno, preg;
2248 UInt tagw;
2249 ULong* vexRegs = (ULong*)(&vex_state->guest_FPREG[0]);
2250 UChar* vexTags = (UChar*)(&vex_state->guest_FPTAG[0]);
2251 Fpu_State_16* x87 = (Fpu_State_16*)x87_state;
2252 UInt ftop = vex_state->guest_FTOP;
2253 UInt c3210 = vex_state->guest_FC3210;
2254
2255 for (i = 0; i < 7; i++)
2256 x87->env[i] = 0;
2257
2258 x87->env[FPS_ENV_STAT]
2259 = toUShort(((ftop & 7) << 11) | (c3210 & 0x4700));
2260 x87->env[FPS_ENV_CTRL]
2261 = toUShort(amd64g_create_fpucw( vex_state->guest_FPROUND ));
2262
2263 /* Dump the register stack in ST order. */
2264 tagw = 0;
2265 for (stno = 0; stno < 8; stno++) {
2266 preg = (stno + ftop) & 7;
2267 if (vexTags[preg] == 0) {
2268 /* register is empty */
2269 tagw |= (3 << (2*preg));
2270 convert_f64le_to_f80le( (UChar*)&vexRegs[preg],
2271 &x87->reg[10*stno] );
2272 } else {
2273 /* register is full. */
2274 tagw |= (0 << (2*preg));
2275 convert_f64le_to_f80le( (UChar*)&vexRegs[preg],
2276 &x87->reg[10*stno] );
2277 }
2278 }
2279 x87->env[FPS_ENV_TAG] = toUShort(tagw);
2280 }
2281
2282
2283 /* This is used to implement 'frstor'.
2284 Reads 108 bytes at x87_state[0 .. 107]. */
2285 /* CALLED FROM GENERATED CODE */
2286 /* DIRTY HELPER */
amd64g_dirtyhelper_FRSTOR(VexGuestAMD64State * vex_state,HWord x87_state)2287 VexEmNote amd64g_dirtyhelper_FRSTOR ( /*OUT*/VexGuestAMD64State* vex_state,
2288 /*IN*/HWord x87_state)
2289 {
2290 return do_put_x87( True, (UChar*)x87_state, vex_state );
2291 }
2292
2293
2294 /* This is used to implement 'frstors'.
2295 Reads 94 bytes at x87_state[0 .. 93]. */
2296 /* CALLED FROM GENERATED CODE */
2297 /* DIRTY HELPER */
amd64g_dirtyhelper_FRSTORS(VexGuestAMD64State * vex_state,HWord x87_state)2298 VexEmNote amd64g_dirtyhelper_FRSTORS ( /*OUT*/VexGuestAMD64State* vex_state,
2299 /*IN*/HWord x87_state)
2300 {
2301 Int stno, preg;
2302 UInt tag;
2303 ULong* vexRegs = (ULong*)(&vex_state->guest_FPREG[0]);
2304 UChar* vexTags = (UChar*)(&vex_state->guest_FPTAG[0]);
2305 Fpu_State_16* x87 = (Fpu_State_16*)x87_state;
2306 UInt ftop = (x87->env[FPS_ENV_STAT] >> 11) & 7;
2307 UInt tagw = x87->env[FPS_ENV_TAG];
2308 UInt fpucw = x87->env[FPS_ENV_CTRL];
2309 UInt c3210 = x87->env[FPS_ENV_STAT] & 0x4700;
2310 VexEmNote ew;
2311 UInt fpround;
2312 ULong pair;
2313
2314 /* Copy registers and tags */
2315 for (stno = 0; stno < 8; stno++) {
2316 preg = (stno + ftop) & 7;
2317 tag = (tagw >> (2*preg)) & 3;
2318 if (tag == 3) {
2319 /* register is empty */
2320 /* hmm, if it's empty, does it still get written? Probably
2321 safer to say it does. If we don't, memcheck could get out
2322 of sync, in that it thinks all FP registers are defined by
2323 this helper, but in reality some have not been updated. */
2324 vexRegs[preg] = 0; /* IEEE754 64-bit zero */
2325 vexTags[preg] = 0;
2326 } else {
2327 /* register is non-empty */
2328 convert_f80le_to_f64le( &x87->reg[10*stno],
2329 (UChar*)&vexRegs[preg] );
2330 vexTags[preg] = 1;
2331 }
2332 }
2333
2334 /* stack pointer */
2335 vex_state->guest_FTOP = ftop;
2336
2337 /* status word */
2338 vex_state->guest_FC3210 = c3210;
2339
2340 /* handle the control word, setting FPROUND and detecting any
2341 emulation warnings. */
2342 pair = amd64g_check_fldcw ( (ULong)fpucw );
2343 fpround = (UInt)pair & 0xFFFFFFFFULL;
2344 ew = (VexEmNote)(pair >> 32);
2345
2346 vex_state->guest_FPROUND = fpround & 3;
2347
2348 /* emulation warnings --> caller */
2349 return ew;
2350 }
2351
2352
2353 /*---------------------------------------------------------------*/
2354 /*--- Misc integer helpers, including rotates and CPUID. ---*/
2355 /*---------------------------------------------------------------*/
2356
2357 /* Claim to be the following CPU, which is probably representative of
2358 the lowliest (earliest) amd64 offerings. It can do neither sse3
2359 nor cx16.
2360
2361 vendor_id : AuthenticAMD
2362 cpu family : 15
2363 model : 5
2364 model name : AMD Opteron (tm) Processor 848
2365 stepping : 10
2366 cpu MHz : 1797.682
2367 cache size : 1024 KB
2368 fpu : yes
2369 fpu_exception : yes
2370 cpuid level : 1
2371 wp : yes
2372 flags : fpu vme de pse tsc msr pae mce cx8 apic sep
2373 mtrr pge mca cmov pat pse36 clflush mmx fxsr
2374 sse sse2 syscall nx mmxext lm 3dnowext 3dnow
2375 bogomips : 3600.62
2376 TLB size : 1088 4K pages
2377 clflush size : 64
2378 cache_alignment : 64
2379 address sizes : 40 bits physical, 48 bits virtual
2380 power management: ts fid vid ttp
2381
2382 2012-Feb-21: don't claim 3dnow or 3dnowext, since in fact
2383 we don't support them. See #291568. 3dnow is 80000001.EDX.31
2384 and 3dnowext is 80000001.EDX.30.
2385 */
amd64g_dirtyhelper_CPUID_baseline(VexGuestAMD64State * st)2386 void amd64g_dirtyhelper_CPUID_baseline ( VexGuestAMD64State* st )
2387 {
2388 # define SET_ABCD(_a,_b,_c,_d) \
2389 do { st->guest_RAX = (ULong)(_a); \
2390 st->guest_RBX = (ULong)(_b); \
2391 st->guest_RCX = (ULong)(_c); \
2392 st->guest_RDX = (ULong)(_d); \
2393 } while (0)
2394
2395 switch (0xFFFFFFFF & st->guest_RAX) {
2396 case 0x00000000:
2397 SET_ABCD(0x00000001, 0x68747541, 0x444d4163, 0x69746e65);
2398 break;
2399 case 0x00000001:
2400 SET_ABCD(0x00000f5a, 0x01000800, 0x00000000, 0x078bfbff);
2401 break;
2402 case 0x80000000:
2403 SET_ABCD(0x80000018, 0x68747541, 0x444d4163, 0x69746e65);
2404 break;
2405 case 0x80000001:
2406 /* Don't claim to support 3dnow or 3dnowext. 0xe1d3fbff is
2407 the original it-is-supported value that the h/w provides.
2408 See #291568. */
2409 SET_ABCD(0x00000f5a, 0x00000505, 0x00000000, /*0xe1d3fbff*/
2410 0x21d3fbff);
2411 break;
2412 case 0x80000002:
2413 SET_ABCD(0x20444d41, 0x6574704f, 0x206e6f72, 0x296d7428);
2414 break;
2415 case 0x80000003:
2416 SET_ABCD(0x6f725020, 0x73736563, 0x3820726f, 0x00003834);
2417 break;
2418 case 0x80000004:
2419 SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x00000000);
2420 break;
2421 case 0x80000005:
2422 SET_ABCD(0xff08ff08, 0xff20ff20, 0x40020140, 0x40020140);
2423 break;
2424 case 0x80000006:
2425 SET_ABCD(0x00000000, 0x42004200, 0x04008140, 0x00000000);
2426 break;
2427 case 0x80000007:
2428 SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x0000000f);
2429 break;
2430 case 0x80000008:
2431 SET_ABCD(0x00003028, 0x00000000, 0x00000000, 0x00000000);
2432 break;
2433 default:
2434 SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x00000000);
2435 break;
2436 }
2437 # undef SET_ABCD
2438 }
2439
2440
2441 /* Claim to be the following CPU (2 x ...), which is sse3 and cx16
2442 capable.
2443
2444 vendor_id : GenuineIntel
2445 cpu family : 6
2446 model : 15
2447 model name : Intel(R) Core(TM)2 CPU 6600 @ 2.40GHz
2448 stepping : 6
2449 cpu MHz : 2394.000
2450 cache size : 4096 KB
2451 physical id : 0
2452 siblings : 2
2453 core id : 0
2454 cpu cores : 2
2455 fpu : yes
2456 fpu_exception : yes
2457 cpuid level : 10
2458 wp : yes
2459 flags : fpu vme de pse tsc msr pae mce cx8 apic sep
2460 mtrr pge mca cmov pat pse36 clflush dts acpi
2461 mmx fxsr sse sse2 ss ht tm syscall nx lm
2462 constant_tsc pni monitor ds_cpl vmx est tm2
2463 cx16 xtpr lahf_lm
2464 bogomips : 4798.78
2465 clflush size : 64
2466 cache_alignment : 64
2467 address sizes : 36 bits physical, 48 bits virtual
2468 power management:
2469 */
amd64g_dirtyhelper_CPUID_sse3_and_cx16(VexGuestAMD64State * st)2470 void amd64g_dirtyhelper_CPUID_sse3_and_cx16 ( VexGuestAMD64State* st )
2471 {
2472 # define SET_ABCD(_a,_b,_c,_d) \
2473 do { st->guest_RAX = (ULong)(_a); \
2474 st->guest_RBX = (ULong)(_b); \
2475 st->guest_RCX = (ULong)(_c); \
2476 st->guest_RDX = (ULong)(_d); \
2477 } while (0)
2478
2479 switch (0xFFFFFFFF & st->guest_RAX) {
2480 case 0x00000000:
2481 SET_ABCD(0x0000000a, 0x756e6547, 0x6c65746e, 0x49656e69);
2482 break;
2483 case 0x00000001:
2484 SET_ABCD(0x000006f6, 0x00020800, 0x0000e3bd, 0xbfebfbff);
2485 break;
2486 case 0x00000002:
2487 SET_ABCD(0x05b0b101, 0x005657f0, 0x00000000, 0x2cb43049);
2488 break;
2489 case 0x00000003:
2490 SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x00000000);
2491 break;
2492 case 0x00000004: {
2493 switch (0xFFFFFFFF & st->guest_RCX) {
2494 case 0x00000000: SET_ABCD(0x04000121, 0x01c0003f,
2495 0x0000003f, 0x00000001); break;
2496 case 0x00000001: SET_ABCD(0x04000122, 0x01c0003f,
2497 0x0000003f, 0x00000001); break;
2498 case 0x00000002: SET_ABCD(0x04004143, 0x03c0003f,
2499 0x00000fff, 0x00000001); break;
2500 default: SET_ABCD(0x00000000, 0x00000000,
2501 0x00000000, 0x00000000); break;
2502 }
2503 break;
2504 }
2505 case 0x00000005:
2506 SET_ABCD(0x00000040, 0x00000040, 0x00000003, 0x00000020);
2507 break;
2508 case 0x00000006:
2509 SET_ABCD(0x00000001, 0x00000002, 0x00000001, 0x00000000);
2510 break;
2511 case 0x00000007:
2512 SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x00000000);
2513 break;
2514 case 0x00000008:
2515 SET_ABCD(0x00000400, 0x00000000, 0x00000000, 0x00000000);
2516 break;
2517 case 0x00000009:
2518 SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x00000000);
2519 break;
2520 case 0x0000000a:
2521 unhandled_eax_value:
2522 SET_ABCD(0x07280202, 0x00000000, 0x00000000, 0x00000000);
2523 break;
2524 case 0x80000000:
2525 SET_ABCD(0x80000008, 0x00000000, 0x00000000, 0x00000000);
2526 break;
2527 case 0x80000001:
2528 SET_ABCD(0x00000000, 0x00000000, 0x00000001, 0x20100800);
2529 break;
2530 case 0x80000002:
2531 SET_ABCD(0x65746e49, 0x2952286c, 0x726f4320, 0x4d542865);
2532 break;
2533 case 0x80000003:
2534 SET_ABCD(0x43203229, 0x20205550, 0x20202020, 0x20202020);
2535 break;
2536 case 0x80000004:
2537 SET_ABCD(0x30303636, 0x20402020, 0x30342e32, 0x007a4847);
2538 break;
2539 case 0x80000005:
2540 SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x00000000);
2541 break;
2542 case 0x80000006:
2543 SET_ABCD(0x00000000, 0x00000000, 0x10008040, 0x00000000);
2544 break;
2545 case 0x80000007:
2546 SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x00000000);
2547 break;
2548 case 0x80000008:
2549 SET_ABCD(0x00003024, 0x00000000, 0x00000000, 0x00000000);
2550 break;
2551 default:
2552 goto unhandled_eax_value;
2553 }
2554 # undef SET_ABCD
2555 }
2556
2557
2558 /* Claim to be the following CPU (4 x ...), which is sse4.2 and cx16
2559 capable.
2560
2561 vendor_id : GenuineIntel
2562 cpu family : 6
2563 model : 37
2564 model name : Intel(R) Core(TM) i5 CPU 670 @ 3.47GHz
2565 stepping : 2
2566 cpu MHz : 3334.000
2567 cache size : 4096 KB
2568 physical id : 0
2569 siblings : 4
2570 core id : 0
2571 cpu cores : 2
2572 apicid : 0
2573 initial apicid : 0
2574 fpu : yes
2575 fpu_exception : yes
2576 cpuid level : 11
2577 wp : yes
2578 flags : fpu vme de pse tsc msr pae mce cx8 apic sep
2579 mtrr pge mca cmov pat pse36 clflush dts acpi
2580 mmx fxsr sse sse2 ss ht tm pbe syscall nx rdtscp
2581 lm constant_tsc arch_perfmon pebs bts rep_good
2582 xtopology nonstop_tsc aperfmperf pni pclmulqdq
2583 dtes64 monitor ds_cpl vmx smx est tm2 ssse3 cx16
2584 xtpr pdcm sse4_1 sse4_2 popcnt aes lahf_lm ida
2585 arat tpr_shadow vnmi flexpriority ept vpid
2586 bogomips : 6957.57
2587 clflush size : 64
2588 cache_alignment : 64
2589 address sizes : 36 bits physical, 48 bits virtual
2590 power management:
2591 */
amd64g_dirtyhelper_CPUID_sse42_and_cx16(VexGuestAMD64State * st)2592 void amd64g_dirtyhelper_CPUID_sse42_and_cx16 ( VexGuestAMD64State* st )
2593 {
2594 # define SET_ABCD(_a,_b,_c,_d) \
2595 do { st->guest_RAX = (ULong)(_a); \
2596 st->guest_RBX = (ULong)(_b); \
2597 st->guest_RCX = (ULong)(_c); \
2598 st->guest_RDX = (ULong)(_d); \
2599 } while (0)
2600
2601 UInt old_eax = (UInt)st->guest_RAX;
2602 UInt old_ecx = (UInt)st->guest_RCX;
2603
2604 switch (old_eax) {
2605 case 0x00000000:
2606 SET_ABCD(0x0000000b, 0x756e6547, 0x6c65746e, 0x49656e69);
2607 break;
2608 case 0x00000001:
2609 SET_ABCD(0x00020652, 0x00100800, 0x0298e3ff, 0xbfebfbff);
2610 break;
2611 case 0x00000002:
2612 SET_ABCD(0x55035a01, 0x00f0b2e3, 0x00000000, 0x09ca212c);
2613 break;
2614 case 0x00000003:
2615 SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x00000000);
2616 break;
2617 case 0x00000004:
2618 switch (old_ecx) {
2619 case 0x00000000: SET_ABCD(0x1c004121, 0x01c0003f,
2620 0x0000003f, 0x00000000); break;
2621 case 0x00000001: SET_ABCD(0x1c004122, 0x00c0003f,
2622 0x0000007f, 0x00000000); break;
2623 case 0x00000002: SET_ABCD(0x1c004143, 0x01c0003f,
2624 0x000001ff, 0x00000000); break;
2625 case 0x00000003: SET_ABCD(0x1c03c163, 0x03c0003f,
2626 0x00000fff, 0x00000002); break;
2627 default: SET_ABCD(0x00000000, 0x00000000,
2628 0x00000000, 0x00000000); break;
2629 }
2630 break;
2631 case 0x00000005:
2632 SET_ABCD(0x00000040, 0x00000040, 0x00000003, 0x00001120);
2633 break;
2634 case 0x00000006:
2635 SET_ABCD(0x00000007, 0x00000002, 0x00000001, 0x00000000);
2636 break;
2637 case 0x00000007:
2638 SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x00000000);
2639 break;
2640 case 0x00000008:
2641 SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x00000000);
2642 break;
2643 case 0x00000009:
2644 SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x00000000);
2645 break;
2646 case 0x0000000a:
2647 SET_ABCD(0x07300403, 0x00000004, 0x00000000, 0x00000603);
2648 break;
2649 case 0x0000000b:
2650 switch (old_ecx) {
2651 case 0x00000000:
2652 SET_ABCD(0x00000001, 0x00000002,
2653 0x00000100, 0x00000000); break;
2654 case 0x00000001:
2655 SET_ABCD(0x00000004, 0x00000004,
2656 0x00000201, 0x00000000); break;
2657 default:
2658 SET_ABCD(0x00000000, 0x00000000,
2659 old_ecx, 0x00000000); break;
2660 }
2661 break;
2662 case 0x0000000c:
2663 SET_ABCD(0x00000001, 0x00000002, 0x00000100, 0x00000000);
2664 break;
2665 case 0x0000000d:
2666 switch (old_ecx) {
2667 case 0x00000000: SET_ABCD(0x00000001, 0x00000002,
2668 0x00000100, 0x00000000); break;
2669 case 0x00000001: SET_ABCD(0x00000004, 0x00000004,
2670 0x00000201, 0x00000000); break;
2671 default: SET_ABCD(0x00000000, 0x00000000,
2672 old_ecx, 0x00000000); break;
2673 }
2674 break;
2675 case 0x80000000:
2676 SET_ABCD(0x80000008, 0x00000000, 0x00000000, 0x00000000);
2677 break;
2678 case 0x80000001:
2679 SET_ABCD(0x00000000, 0x00000000, 0x00000001, 0x28100800);
2680 break;
2681 case 0x80000002:
2682 SET_ABCD(0x65746e49, 0x2952286c, 0x726f4320, 0x4d542865);
2683 break;
2684 case 0x80000003:
2685 SET_ABCD(0x35692029, 0x55504320, 0x20202020, 0x20202020);
2686 break;
2687 case 0x80000004:
2688 SET_ABCD(0x30373620, 0x20402020, 0x37342e33, 0x007a4847);
2689 break;
2690 case 0x80000005:
2691 SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x00000000);
2692 break;
2693 case 0x80000006:
2694 SET_ABCD(0x00000000, 0x00000000, 0x01006040, 0x00000000);
2695 break;
2696 case 0x80000007:
2697 SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x00000100);
2698 break;
2699 case 0x80000008:
2700 SET_ABCD(0x00003024, 0x00000000, 0x00000000, 0x00000000);
2701 break;
2702 default:
2703 SET_ABCD(0x00000001, 0x00000002, 0x00000100, 0x00000000);
2704 break;
2705 }
2706 # undef SET_ABCD
2707 }
2708
2709
2710 /* Claim to be the following CPU (4 x ...), which is AVX and cx16
2711 capable. Plus (kludge!) it "supports" HTM.
2712
2713 vendor_id : GenuineIntel
2714 cpu family : 6
2715 model : 42
2716 model name : Intel(R) Core(TM) i5-2300 CPU @ 2.80GHz
2717 stepping : 7
2718 cpu MHz : 1600.000
2719 cache size : 6144 KB
2720 physical id : 0
2721 siblings : 4
2722 core id : 3
2723 cpu cores : 4
2724 apicid : 6
2725 initial apicid : 6
2726 fpu : yes
2727 fpu_exception : yes
2728 cpuid level : 13
2729 wp : yes
2730 flags : fpu vme de pse tsc msr pae mce cx8 apic sep
2731 mtrr pge mca cmov pat pse36 clflush dts acpi
2732 mmx fxsr sse sse2 ss ht tm pbe syscall nx rdtscp
2733 lm constant_tsc arch_perfmon pebs bts rep_good
2734 nopl xtopology nonstop_tsc aperfmperf pni pclmulqdq
2735 dtes64 monitor ds_cpl vmx est tm2 ssse3 cx16
2736 xtpr pdcm sse4_1 sse4_2 popcnt aes xsave avx
2737 lahf_lm ida arat epb xsaveopt pln pts dts
2738 tpr_shadow vnmi flexpriority ept vpid
2739
2740 bogomips : 5768.94
2741 clflush size : 64
2742 cache_alignment : 64
2743 address sizes : 36 bits physical, 48 bits virtual
2744 power management:
2745 */
amd64g_dirtyhelper_CPUID_avx_and_cx16(VexGuestAMD64State * st)2746 void amd64g_dirtyhelper_CPUID_avx_and_cx16 ( VexGuestAMD64State* st )
2747 {
2748 # define SET_ABCD(_a,_b,_c,_d) \
2749 do { st->guest_RAX = (ULong)(_a); \
2750 st->guest_RBX = (ULong)(_b); \
2751 st->guest_RCX = (ULong)(_c); \
2752 st->guest_RDX = (ULong)(_d); \
2753 } while (0)
2754
2755 UInt old_eax = (UInt)st->guest_RAX;
2756 UInt old_ecx = (UInt)st->guest_RCX;
2757
2758 switch (old_eax) {
2759 case 0x00000000:
2760 SET_ABCD(0x0000000d, 0x756e6547, 0x6c65746e, 0x49656e69);
2761 break;
2762 case 0x00000001:
2763 SET_ABCD(0x000206a7, 0x00100800, 0x1f9ae3bf, 0xbfebfbff);
2764 break;
2765 case 0x00000002:
2766 SET_ABCD(0x76035a01, 0x00f0b0ff, 0x00000000, 0x00ca0000);
2767 break;
2768 case 0x00000003:
2769 SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x00000000);
2770 break;
2771 case 0x00000004:
2772 switch (old_ecx) {
2773 case 0x00000000: SET_ABCD(0x1c004121, 0x01c0003f,
2774 0x0000003f, 0x00000000); break;
2775 case 0x00000001: SET_ABCD(0x1c004122, 0x01c0003f,
2776 0x0000003f, 0x00000000); break;
2777 case 0x00000002: SET_ABCD(0x1c004143, 0x01c0003f,
2778 0x000001ff, 0x00000000); break;
2779 case 0x00000003: SET_ABCD(0x1c03c163, 0x02c0003f,
2780 0x00001fff, 0x00000006); break;
2781 default: SET_ABCD(0x00000000, 0x00000000,
2782 0x00000000, 0x00000000); break;
2783 }
2784 break;
2785 case 0x00000005:
2786 SET_ABCD(0x00000040, 0x00000040, 0x00000003, 0x00001120);
2787 break;
2788 case 0x00000006:
2789 SET_ABCD(0x00000077, 0x00000002, 0x00000009, 0x00000000);
2790 break;
2791 case 0x00000007:
2792 SET_ABCD(0x00000000, 0x00000800, 0x00000000, 0x00000000);
2793 break;
2794 case 0x00000008:
2795 SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x00000000);
2796 break;
2797 case 0x00000009:
2798 SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x00000000);
2799 break;
2800 case 0x0000000a:
2801 SET_ABCD(0x07300803, 0x00000000, 0x00000000, 0x00000603);
2802 break;
2803 case 0x0000000b:
2804 switch (old_ecx) {
2805 case 0x00000000:
2806 SET_ABCD(0x00000001, 0x00000001,
2807 0x00000100, 0x00000000); break;
2808 case 0x00000001:
2809 SET_ABCD(0x00000004, 0x00000004,
2810 0x00000201, 0x00000000); break;
2811 default:
2812 SET_ABCD(0x00000000, 0x00000000,
2813 old_ecx, 0x00000000); break;
2814 }
2815 break;
2816 case 0x0000000c:
2817 SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x00000000);
2818 break;
2819 case 0x0000000d:
2820 switch (old_ecx) {
2821 case 0x00000000: SET_ABCD(0x00000007, 0x00000340,
2822 0x00000340, 0x00000000); break;
2823 case 0x00000001: SET_ABCD(0x00000001, 0x00000000,
2824 0x00000000, 0x00000000); break;
2825 case 0x00000002: SET_ABCD(0x00000100, 0x00000240,
2826 0x00000000, 0x00000000); break;
2827 default: SET_ABCD(0x00000000, 0x00000000,
2828 0x00000000, 0x00000000); break;
2829 }
2830 break;
2831 case 0x0000000e:
2832 SET_ABCD(0x00000007, 0x00000340, 0x00000340, 0x00000000);
2833 break;
2834 case 0x0000000f:
2835 SET_ABCD(0x00000007, 0x00000340, 0x00000340, 0x00000000);
2836 break;
2837 case 0x80000000:
2838 SET_ABCD(0x80000008, 0x00000000, 0x00000000, 0x00000000);
2839 break;
2840 case 0x80000001:
2841 SET_ABCD(0x00000000, 0x00000000, 0x00000001, 0x28100800);
2842 break;
2843 case 0x80000002:
2844 SET_ABCD(0x20202020, 0x20202020, 0x65746e49, 0x2952286c);
2845 break;
2846 case 0x80000003:
2847 SET_ABCD(0x726f4320, 0x4d542865, 0x35692029, 0x3033322d);
2848 break;
2849 case 0x80000004:
2850 SET_ABCD(0x50432030, 0x20402055, 0x30382e32, 0x007a4847);
2851 break;
2852 case 0x80000005:
2853 SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x00000000);
2854 break;
2855 case 0x80000006:
2856 SET_ABCD(0x00000000, 0x00000000, 0x01006040, 0x00000000);
2857 break;
2858 case 0x80000007:
2859 SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x00000100);
2860 break;
2861 case 0x80000008:
2862 SET_ABCD(0x00003024, 0x00000000, 0x00000000, 0x00000000);
2863 break;
2864 default:
2865 SET_ABCD(0x00000007, 0x00000340, 0x00000340, 0x00000000);
2866 break;
2867 }
2868 # undef SET_ABCD
2869 }
2870
2871
amd64g_calculate_RCR(ULong arg,ULong rot_amt,ULong rflags_in,Long szIN)2872 ULong amd64g_calculate_RCR ( ULong arg,
2873 ULong rot_amt,
2874 ULong rflags_in,
2875 Long szIN )
2876 {
2877 Bool wantRflags = toBool(szIN < 0);
2878 ULong sz = wantRflags ? (-szIN) : szIN;
2879 ULong tempCOUNT = rot_amt & (sz == 8 ? 0x3F : 0x1F);
2880 ULong cf=0, of=0, tempcf;
2881
2882 switch (sz) {
2883 case 8:
2884 cf = (rflags_in >> AMD64G_CC_SHIFT_C) & 1;
2885 of = ((arg >> 63) ^ cf) & 1;
2886 while (tempCOUNT > 0) {
2887 tempcf = arg & 1;
2888 arg = (arg >> 1) | (cf << 63);
2889 cf = tempcf;
2890 tempCOUNT--;
2891 }
2892 break;
2893 case 4:
2894 while (tempCOUNT >= 33) tempCOUNT -= 33;
2895 cf = (rflags_in >> AMD64G_CC_SHIFT_C) & 1;
2896 of = ((arg >> 31) ^ cf) & 1;
2897 while (tempCOUNT > 0) {
2898 tempcf = arg & 1;
2899 arg = ((arg >> 1) & 0x7FFFFFFFULL) | (cf << 31);
2900 cf = tempcf;
2901 tempCOUNT--;
2902 }
2903 break;
2904 case 2:
2905 while (tempCOUNT >= 17) tempCOUNT -= 17;
2906 cf = (rflags_in >> AMD64G_CC_SHIFT_C) & 1;
2907 of = ((arg >> 15) ^ cf) & 1;
2908 while (tempCOUNT > 0) {
2909 tempcf = arg & 1;
2910 arg = ((arg >> 1) & 0x7FFFULL) | (cf << 15);
2911 cf = tempcf;
2912 tempCOUNT--;
2913 }
2914 break;
2915 case 1:
2916 while (tempCOUNT >= 9) tempCOUNT -= 9;
2917 cf = (rflags_in >> AMD64G_CC_SHIFT_C) & 1;
2918 of = ((arg >> 7) ^ cf) & 1;
2919 while (tempCOUNT > 0) {
2920 tempcf = arg & 1;
2921 arg = ((arg >> 1) & 0x7FULL) | (cf << 7);
2922 cf = tempcf;
2923 tempCOUNT--;
2924 }
2925 break;
2926 default:
2927 vpanic("calculate_RCR(amd64g): invalid size");
2928 }
2929
2930 cf &= 1;
2931 of &= 1;
2932 rflags_in &= ~(AMD64G_CC_MASK_C | AMD64G_CC_MASK_O);
2933 rflags_in |= (cf << AMD64G_CC_SHIFT_C) | (of << AMD64G_CC_SHIFT_O);
2934
2935 /* caller can ask to have back either the resulting flags or
2936 resulting value, but not both */
2937 return wantRflags ? rflags_in : arg;
2938 }
2939
amd64g_calculate_RCL(ULong arg,ULong rot_amt,ULong rflags_in,Long szIN)2940 ULong amd64g_calculate_RCL ( ULong arg,
2941 ULong rot_amt,
2942 ULong rflags_in,
2943 Long szIN )
2944 {
2945 Bool wantRflags = toBool(szIN < 0);
2946 ULong sz = wantRflags ? (-szIN) : szIN;
2947 ULong tempCOUNT = rot_amt & (sz == 8 ? 0x3F : 0x1F);
2948 ULong cf=0, of=0, tempcf;
2949
2950 switch (sz) {
2951 case 8:
2952 cf = (rflags_in >> AMD64G_CC_SHIFT_C) & 1;
2953 while (tempCOUNT > 0) {
2954 tempcf = (arg >> 63) & 1;
2955 arg = (arg << 1) | (cf & 1);
2956 cf = tempcf;
2957 tempCOUNT--;
2958 }
2959 of = ((arg >> 63) ^ cf) & 1;
2960 break;
2961 case 4:
2962 while (tempCOUNT >= 33) tempCOUNT -= 33;
2963 cf = (rflags_in >> AMD64G_CC_SHIFT_C) & 1;
2964 while (tempCOUNT > 0) {
2965 tempcf = (arg >> 31) & 1;
2966 arg = 0xFFFFFFFFULL & ((arg << 1) | (cf & 1));
2967 cf = tempcf;
2968 tempCOUNT--;
2969 }
2970 of = ((arg >> 31) ^ cf) & 1;
2971 break;
2972 case 2:
2973 while (tempCOUNT >= 17) tempCOUNT -= 17;
2974 cf = (rflags_in >> AMD64G_CC_SHIFT_C) & 1;
2975 while (tempCOUNT > 0) {
2976 tempcf = (arg >> 15) & 1;
2977 arg = 0xFFFFULL & ((arg << 1) | (cf & 1));
2978 cf = tempcf;
2979 tempCOUNT--;
2980 }
2981 of = ((arg >> 15) ^ cf) & 1;
2982 break;
2983 case 1:
2984 while (tempCOUNT >= 9) tempCOUNT -= 9;
2985 cf = (rflags_in >> AMD64G_CC_SHIFT_C) & 1;
2986 while (tempCOUNT > 0) {
2987 tempcf = (arg >> 7) & 1;
2988 arg = 0xFFULL & ((arg << 1) | (cf & 1));
2989 cf = tempcf;
2990 tempCOUNT--;
2991 }
2992 of = ((arg >> 7) ^ cf) & 1;
2993 break;
2994 default:
2995 vpanic("calculate_RCL(amd64g): invalid size");
2996 }
2997
2998 cf &= 1;
2999 of &= 1;
3000 rflags_in &= ~(AMD64G_CC_MASK_C | AMD64G_CC_MASK_O);
3001 rflags_in |= (cf << AMD64G_CC_SHIFT_C) | (of << AMD64G_CC_SHIFT_O);
3002
3003 return wantRflags ? rflags_in : arg;
3004 }
3005
3006 /* Taken from gf2x-0.9.5, released under GPLv2+ (later versions LGPLv2+)
3007 * svn://scm.gforge.inria.fr/svn/gf2x/trunk/hardware/opteron/gf2x_mul1.h@25
3008 */
amd64g_calculate_pclmul(ULong a,ULong b,ULong which)3009 ULong amd64g_calculate_pclmul(ULong a, ULong b, ULong which)
3010 {
3011 ULong hi, lo, tmp, A[16];
3012
3013 A[0] = 0; A[1] = a;
3014 A[2] = A[1] << 1; A[3] = A[2] ^ a;
3015 A[4] = A[2] << 1; A[5] = A[4] ^ a;
3016 A[6] = A[3] << 1; A[7] = A[6] ^ a;
3017 A[8] = A[4] << 1; A[9] = A[8] ^ a;
3018 A[10] = A[5] << 1; A[11] = A[10] ^ a;
3019 A[12] = A[6] << 1; A[13] = A[12] ^ a;
3020 A[14] = A[7] << 1; A[15] = A[14] ^ a;
3021
3022 lo = (A[b >> 60] << 4) ^ A[(b >> 56) & 15];
3023 hi = lo >> 56;
3024 lo = (lo << 8) ^ (A[(b >> 52) & 15] << 4) ^ A[(b >> 48) & 15];
3025 hi = (hi << 8) | (lo >> 56);
3026 lo = (lo << 8) ^ (A[(b >> 44) & 15] << 4) ^ A[(b >> 40) & 15];
3027 hi = (hi << 8) | (lo >> 56);
3028 lo = (lo << 8) ^ (A[(b >> 36) & 15] << 4) ^ A[(b >> 32) & 15];
3029 hi = (hi << 8) | (lo >> 56);
3030 lo = (lo << 8) ^ (A[(b >> 28) & 15] << 4) ^ A[(b >> 24) & 15];
3031 hi = (hi << 8) | (lo >> 56);
3032 lo = (lo << 8) ^ (A[(b >> 20) & 15] << 4) ^ A[(b >> 16) & 15];
3033 hi = (hi << 8) | (lo >> 56);
3034 lo = (lo << 8) ^ (A[(b >> 12) & 15] << 4) ^ A[(b >> 8) & 15];
3035 hi = (hi << 8) | (lo >> 56);
3036 lo = (lo << 8) ^ (A[(b >> 4) & 15] << 4) ^ A[b & 15];
3037
3038 ULong m0 = -1;
3039 m0 /= 255;
3040 tmp = -((a >> 63) & 1); tmp &= ((b & (m0 * 0xfe)) >> 1); hi = hi ^ tmp;
3041 tmp = -((a >> 62) & 1); tmp &= ((b & (m0 * 0xfc)) >> 2); hi = hi ^ tmp;
3042 tmp = -((a >> 61) & 1); tmp &= ((b & (m0 * 0xf8)) >> 3); hi = hi ^ tmp;
3043 tmp = -((a >> 60) & 1); tmp &= ((b & (m0 * 0xf0)) >> 4); hi = hi ^ tmp;
3044 tmp = -((a >> 59) & 1); tmp &= ((b & (m0 * 0xe0)) >> 5); hi = hi ^ tmp;
3045 tmp = -((a >> 58) & 1); tmp &= ((b & (m0 * 0xc0)) >> 6); hi = hi ^ tmp;
3046 tmp = -((a >> 57) & 1); tmp &= ((b & (m0 * 0x80)) >> 7); hi = hi ^ tmp;
3047
3048 return which ? hi : lo;
3049 }
3050
3051
3052 /* CALLED FROM GENERATED CODE */
3053 /* DIRTY HELPER (non-referentially-transparent) */
3054 /* Horrible hack. On non-amd64 platforms, return 1. */
amd64g_dirtyhelper_RDTSC(void)3055 ULong amd64g_dirtyhelper_RDTSC ( void )
3056 {
3057 # if defined(__x86_64__)
3058 UInt eax, edx;
3059 __asm__ __volatile__("rdtsc" : "=a" (eax), "=d" (edx));
3060 return (((ULong)edx) << 32) | ((ULong)eax);
3061 # else
3062 return 1ULL;
3063 # endif
3064 }
3065
3066 /* CALLED FROM GENERATED CODE */
3067 /* DIRTY HELPER (non-referentially-transparent) */
3068 /* Horrible hack. On non-amd64 platforms, return 1. */
3069 /* This uses a different calling convention from _RDTSC just above
3070 only because of the difficulty of returning 96 bits from a C
3071 function -- RDTSC returns 64 bits and so is simple by comparison,
3072 on amd64. */
amd64g_dirtyhelper_RDTSCP(VexGuestAMD64State * st)3073 void amd64g_dirtyhelper_RDTSCP ( VexGuestAMD64State* st )
3074 {
3075 # if defined(__x86_64__)
3076 UInt eax, ecx, edx;
3077 __asm__ __volatile__("rdtscp" : "=a" (eax), "=d" (edx), "=c" (ecx));
3078 st->guest_RAX = (ULong)eax;
3079 st->guest_RCX = (ULong)ecx;
3080 st->guest_RDX = (ULong)edx;
3081 # else
3082 /* Do nothing. */
3083 # endif
3084 }
3085
3086 /* CALLED FROM GENERATED CODE */
3087 /* DIRTY HELPER (non-referentially-transparent) */
3088 /* Horrible hack. On non-amd64 platforms, return 0. */
amd64g_dirtyhelper_IN(ULong portno,ULong sz)3089 ULong amd64g_dirtyhelper_IN ( ULong portno, ULong sz/*1,2 or 4*/ )
3090 {
3091 # if defined(__x86_64__)
3092 ULong r = 0;
3093 portno &= 0xFFFF;
3094 switch (sz) {
3095 case 4:
3096 __asm__ __volatile__("movq $0,%%rax; inl %w1,%%eax; movq %%rax,%0"
3097 : "=a" (r) : "Nd" (portno));
3098 break;
3099 case 2:
3100 __asm__ __volatile__("movq $0,%%rax; inw %w1,%w0"
3101 : "=a" (r) : "Nd" (portno));
3102 break;
3103 case 1:
3104 __asm__ __volatile__("movq $0,%%rax; inb %w1,%b0"
3105 : "=a" (r) : "Nd" (portno));
3106 break;
3107 default:
3108 break; /* note: no 64-bit version of insn exists */
3109 }
3110 return r;
3111 # else
3112 return 0;
3113 # endif
3114 }
3115
3116
3117 /* CALLED FROM GENERATED CODE */
3118 /* DIRTY HELPER (non-referentially-transparent) */
3119 /* Horrible hack. On non-amd64 platforms, do nothing. */
amd64g_dirtyhelper_OUT(ULong portno,ULong data,ULong sz)3120 void amd64g_dirtyhelper_OUT ( ULong portno, ULong data, ULong sz/*1,2 or 4*/ )
3121 {
3122 # if defined(__x86_64__)
3123 portno &= 0xFFFF;
3124 switch (sz) {
3125 case 4:
3126 __asm__ __volatile__("movq %0,%%rax; outl %%eax, %w1"
3127 : : "a" (data), "Nd" (portno));
3128 break;
3129 case 2:
3130 __asm__ __volatile__("outw %w0, %w1"
3131 : : "a" (data), "Nd" (portno));
3132 break;
3133 case 1:
3134 __asm__ __volatile__("outb %b0, %w1"
3135 : : "a" (data), "Nd" (portno));
3136 break;
3137 default:
3138 break; /* note: no 64-bit version of insn exists */
3139 }
3140 # else
3141 /* do nothing */
3142 # endif
3143 }
3144
3145 /* CALLED FROM GENERATED CODE */
3146 /* DIRTY HELPER (non-referentially-transparent) */
3147 /* Horrible hack. On non-amd64 platforms, do nothing. */
3148 /* op = 0: call the native SGDT instruction.
3149 op = 1: call the native SIDT instruction.
3150 */
amd64g_dirtyhelper_SxDT(void * address,ULong op)3151 void amd64g_dirtyhelper_SxDT ( void *address, ULong op ) {
3152 # if defined(__x86_64__)
3153 switch (op) {
3154 case 0:
3155 __asm__ __volatile__("sgdt (%0)" : : "r" (address) : "memory");
3156 break;
3157 case 1:
3158 __asm__ __volatile__("sidt (%0)" : : "r" (address) : "memory");
3159 break;
3160 default:
3161 vpanic("amd64g_dirtyhelper_SxDT");
3162 }
3163 # else
3164 /* do nothing */
3165 UChar* p = (UChar*)address;
3166 p[0] = p[1] = p[2] = p[3] = p[4] = p[5] = 0;
3167 p[6] = p[7] = p[8] = p[9] = 0;
3168 # endif
3169 }
3170
3171 /*---------------------------------------------------------------*/
3172 /*--- Helpers for MMX/SSE/SSE2. ---*/
3173 /*---------------------------------------------------------------*/
3174
abdU8(UChar xx,UChar yy)3175 static inline UChar abdU8 ( UChar xx, UChar yy ) {
3176 return toUChar(xx>yy ? xx-yy : yy-xx);
3177 }
3178
mk32x2(UInt w1,UInt w0)3179 static inline ULong mk32x2 ( UInt w1, UInt w0 ) {
3180 return (((ULong)w1) << 32) | ((ULong)w0);
3181 }
3182
sel16x4_3(ULong w64)3183 static inline UShort sel16x4_3 ( ULong w64 ) {
3184 UInt hi32 = toUInt(w64 >> 32);
3185 return toUShort(hi32 >> 16);
3186 }
sel16x4_2(ULong w64)3187 static inline UShort sel16x4_2 ( ULong w64 ) {
3188 UInt hi32 = toUInt(w64 >> 32);
3189 return toUShort(hi32);
3190 }
sel16x4_1(ULong w64)3191 static inline UShort sel16x4_1 ( ULong w64 ) {
3192 UInt lo32 = toUInt(w64);
3193 return toUShort(lo32 >> 16);
3194 }
sel16x4_0(ULong w64)3195 static inline UShort sel16x4_0 ( ULong w64 ) {
3196 UInt lo32 = toUInt(w64);
3197 return toUShort(lo32);
3198 }
3199
sel8x8_7(ULong w64)3200 static inline UChar sel8x8_7 ( ULong w64 ) {
3201 UInt hi32 = toUInt(w64 >> 32);
3202 return toUChar(hi32 >> 24);
3203 }
sel8x8_6(ULong w64)3204 static inline UChar sel8x8_6 ( ULong w64 ) {
3205 UInt hi32 = toUInt(w64 >> 32);
3206 return toUChar(hi32 >> 16);
3207 }
sel8x8_5(ULong w64)3208 static inline UChar sel8x8_5 ( ULong w64 ) {
3209 UInt hi32 = toUInt(w64 >> 32);
3210 return toUChar(hi32 >> 8);
3211 }
sel8x8_4(ULong w64)3212 static inline UChar sel8x8_4 ( ULong w64 ) {
3213 UInt hi32 = toUInt(w64 >> 32);
3214 return toUChar(hi32 >> 0);
3215 }
sel8x8_3(ULong w64)3216 static inline UChar sel8x8_3 ( ULong w64 ) {
3217 UInt lo32 = toUInt(w64);
3218 return toUChar(lo32 >> 24);
3219 }
sel8x8_2(ULong w64)3220 static inline UChar sel8x8_2 ( ULong w64 ) {
3221 UInt lo32 = toUInt(w64);
3222 return toUChar(lo32 >> 16);
3223 }
sel8x8_1(ULong w64)3224 static inline UChar sel8x8_1 ( ULong w64 ) {
3225 UInt lo32 = toUInt(w64);
3226 return toUChar(lo32 >> 8);
3227 }
sel8x8_0(ULong w64)3228 static inline UChar sel8x8_0 ( ULong w64 ) {
3229 UInt lo32 = toUInt(w64);
3230 return toUChar(lo32 >> 0);
3231 }
3232
3233 /* CALLED FROM GENERATED CODE: CLEAN HELPER */
amd64g_calculate_mmx_pmaddwd(ULong xx,ULong yy)3234 ULong amd64g_calculate_mmx_pmaddwd ( ULong xx, ULong yy )
3235 {
3236 return
3237 mk32x2(
3238 (((Int)(Short)sel16x4_3(xx)) * ((Int)(Short)sel16x4_3(yy)))
3239 + (((Int)(Short)sel16x4_2(xx)) * ((Int)(Short)sel16x4_2(yy))),
3240 (((Int)(Short)sel16x4_1(xx)) * ((Int)(Short)sel16x4_1(yy)))
3241 + (((Int)(Short)sel16x4_0(xx)) * ((Int)(Short)sel16x4_0(yy)))
3242 );
3243 }
3244
3245 /* CALLED FROM GENERATED CODE: CLEAN HELPER */
amd64g_calculate_mmx_psadbw(ULong xx,ULong yy)3246 ULong amd64g_calculate_mmx_psadbw ( ULong xx, ULong yy )
3247 {
3248 UInt t = 0;
3249 t += (UInt)abdU8( sel8x8_7(xx), sel8x8_7(yy) );
3250 t += (UInt)abdU8( sel8x8_6(xx), sel8x8_6(yy) );
3251 t += (UInt)abdU8( sel8x8_5(xx), sel8x8_5(yy) );
3252 t += (UInt)abdU8( sel8x8_4(xx), sel8x8_4(yy) );
3253 t += (UInt)abdU8( sel8x8_3(xx), sel8x8_3(yy) );
3254 t += (UInt)abdU8( sel8x8_2(xx), sel8x8_2(yy) );
3255 t += (UInt)abdU8( sel8x8_1(xx), sel8x8_1(yy) );
3256 t += (UInt)abdU8( sel8x8_0(xx), sel8x8_0(yy) );
3257 t &= 0xFFFF;
3258 return (ULong)t;
3259 }
3260
3261 /* CALLED FROM GENERATED CODE: CLEAN HELPER */
amd64g_calculate_sse_phminposuw(ULong sLo,ULong sHi)3262 ULong amd64g_calculate_sse_phminposuw ( ULong sLo, ULong sHi )
3263 {
3264 UShort t, min;
3265 UInt idx;
3266 t = sel16x4_0(sLo); if (True) { min = t; idx = 0; }
3267 t = sel16x4_1(sLo); if (t < min) { min = t; idx = 1; }
3268 t = sel16x4_2(sLo); if (t < min) { min = t; idx = 2; }
3269 t = sel16x4_3(sLo); if (t < min) { min = t; idx = 3; }
3270 t = sel16x4_0(sHi); if (t < min) { min = t; idx = 4; }
3271 t = sel16x4_1(sHi); if (t < min) { min = t; idx = 5; }
3272 t = sel16x4_2(sHi); if (t < min) { min = t; idx = 6; }
3273 t = sel16x4_3(sHi); if (t < min) { min = t; idx = 7; }
3274 return ((ULong)(idx << 16)) | ((ULong)min);
3275 }
3276
3277 /* CALLED FROM GENERATED CODE: CLEAN HELPER */
amd64g_calc_crc32b(ULong crcIn,ULong b)3278 ULong amd64g_calc_crc32b ( ULong crcIn, ULong b )
3279 {
3280 UInt i;
3281 ULong crc = (b & 0xFFULL) ^ crcIn;
3282 for (i = 0; i < 8; i++)
3283 crc = (crc >> 1) ^ ((crc & 1) ? 0x82f63b78ULL : 0);
3284 return crc;
3285 }
3286
3287 /* CALLED FROM GENERATED CODE: CLEAN HELPER */
amd64g_calc_crc32w(ULong crcIn,ULong w)3288 ULong amd64g_calc_crc32w ( ULong crcIn, ULong w )
3289 {
3290 UInt i;
3291 ULong crc = (w & 0xFFFFULL) ^ crcIn;
3292 for (i = 0; i < 16; i++)
3293 crc = (crc >> 1) ^ ((crc & 1) ? 0x82f63b78ULL : 0);
3294 return crc;
3295 }
3296
3297 /* CALLED FROM GENERATED CODE: CLEAN HELPER */
amd64g_calc_crc32l(ULong crcIn,ULong l)3298 ULong amd64g_calc_crc32l ( ULong crcIn, ULong l )
3299 {
3300 UInt i;
3301 ULong crc = (l & 0xFFFFFFFFULL) ^ crcIn;
3302 for (i = 0; i < 32; i++)
3303 crc = (crc >> 1) ^ ((crc & 1) ? 0x82f63b78ULL : 0);
3304 return crc;
3305 }
3306
3307 /* CALLED FROM GENERATED CODE: CLEAN HELPER */
amd64g_calc_crc32q(ULong crcIn,ULong q)3308 ULong amd64g_calc_crc32q ( ULong crcIn, ULong q )
3309 {
3310 ULong crc = amd64g_calc_crc32l(crcIn, q);
3311 return amd64g_calc_crc32l(crc, q >> 32);
3312 }
3313
3314
3315 /* .. helper for next fn .. */
sad_8x4(ULong xx,ULong yy)3316 static inline ULong sad_8x4 ( ULong xx, ULong yy )
3317 {
3318 UInt t = 0;
3319 t += (UInt)abdU8( sel8x8_3(xx), sel8x8_3(yy) );
3320 t += (UInt)abdU8( sel8x8_2(xx), sel8x8_2(yy) );
3321 t += (UInt)abdU8( sel8x8_1(xx), sel8x8_1(yy) );
3322 t += (UInt)abdU8( sel8x8_0(xx), sel8x8_0(yy) );
3323 return (ULong)t;
3324 }
3325
3326 /* CALLED FROM GENERATED CODE: CLEAN HELPER */
amd64g_calc_mpsadbw(ULong sHi,ULong sLo,ULong dHi,ULong dLo,ULong imm_and_return_control_bit)3327 ULong amd64g_calc_mpsadbw ( ULong sHi, ULong sLo,
3328 ULong dHi, ULong dLo,
3329 ULong imm_and_return_control_bit )
3330 {
3331 UInt imm8 = imm_and_return_control_bit & 7;
3332 Bool calcHi = (imm_and_return_control_bit >> 7) & 1;
3333 UInt srcOffsL = imm8 & 3; /* src offs in 32-bit (L) chunks */
3334 UInt dstOffsL = (imm8 >> 2) & 1; /* dst offs in ditto chunks */
3335 /* For src we only need 32 bits, so get them into the
3336 lower half of a 64 bit word. */
3337 ULong src = ((srcOffsL & 2) ? sHi : sLo) >> (32 * (srcOffsL & 1));
3338 /* For dst we need to get hold of 56 bits (7 bytes) from a total of
3339 11 bytes. If calculating the low part of the result, need bytes
3340 dstOffsL * 4 + (0 .. 6); if calculating the high part,
3341 dstOffsL * 4 + (4 .. 10). */
3342 ULong dst;
3343 /* dstOffL = 0, Lo -> 0 .. 6
3344 dstOffL = 1, Lo -> 4 .. 10
3345 dstOffL = 0, Hi -> 4 .. 10
3346 dstOffL = 1, Hi -> 8 .. 14
3347 */
3348 if (calcHi && dstOffsL) {
3349 /* 8 .. 14 */
3350 dst = dHi & 0x00FFFFFFFFFFFFFFULL;
3351 }
3352 else if (!calcHi && !dstOffsL) {
3353 /* 0 .. 6 */
3354 dst = dLo & 0x00FFFFFFFFFFFFFFULL;
3355 }
3356 else {
3357 /* 4 .. 10 */
3358 dst = (dLo >> 32) | ((dHi & 0x00FFFFFFULL) << 32);
3359 }
3360 ULong r0 = sad_8x4( dst >> 0, src );
3361 ULong r1 = sad_8x4( dst >> 8, src );
3362 ULong r2 = sad_8x4( dst >> 16, src );
3363 ULong r3 = sad_8x4( dst >> 24, src );
3364 ULong res = (r3 << 48) | (r2 << 32) | (r1 << 16) | r0;
3365 return res;
3366 }
3367
3368 /* CALLED FROM GENERATED CODE: CLEAN HELPER */
amd64g_calculate_pext(ULong src_masked,ULong mask)3369 ULong amd64g_calculate_pext ( ULong src_masked, ULong mask )
3370 {
3371 ULong dst = 0;
3372 ULong src_bit;
3373 ULong dst_bit = 1;
3374 for (src_bit = 1; src_bit; src_bit <<= 1) {
3375 if (mask & src_bit) {
3376 if (src_masked & src_bit) dst |= dst_bit;
3377 dst_bit <<= 1;
3378 }
3379 }
3380 return dst;
3381 }
3382
3383 /* CALLED FROM GENERATED CODE: CLEAN HELPER */
amd64g_calculate_pdep(ULong src,ULong mask)3384 ULong amd64g_calculate_pdep ( ULong src, ULong mask )
3385 {
3386 ULong dst = 0;
3387 ULong dst_bit;
3388 ULong src_bit = 1;
3389 for (dst_bit = 1; dst_bit; dst_bit <<= 1) {
3390 if (mask & dst_bit) {
3391 if (src & src_bit) dst |= dst_bit;
3392 src_bit <<= 1;
3393 }
3394 }
3395 return dst;
3396 }
3397
3398 /*---------------------------------------------------------------*/
3399 /*--- Helpers for SSE4.2 PCMP{E,I}STR{I,M} ---*/
3400 /*---------------------------------------------------------------*/
3401
zmask_from_V128(V128 * arg)3402 static UInt zmask_from_V128 ( V128* arg )
3403 {
3404 UInt i, res = 0;
3405 for (i = 0; i < 16; i++) {
3406 res |= ((arg->w8[i] == 0) ? 1 : 0) << i;
3407 }
3408 return res;
3409 }
3410
zmask_from_V128_wide(V128 * arg)3411 static UInt zmask_from_V128_wide ( V128* arg )
3412 {
3413 UInt i, res = 0;
3414 for (i = 0; i < 8; i++) {
3415 res |= ((arg->w16[i] == 0) ? 1 : 0) << i;
3416 }
3417 return res;
3418 }
3419
3420 /* Helps with PCMP{I,E}STR{I,M}.
3421
3422 CALLED FROM GENERATED CODE: DIRTY HELPER(s). (But not really,
3423 actually it could be a clean helper, but for the fact that we can't
3424 pass by value 2 x V128 to a clean helper, nor have one returned.)
3425 Reads guest state, writes to guest state for the xSTRM cases, no
3426 accesses of memory, is a pure function.
3427
3428 opc_and_imm contains (4th byte of opcode << 8) | the-imm8-byte so
3429 the callee knows which I/E and I/M variant it is dealing with and
3430 what the specific operation is. 4th byte of opcode is in the range
3431 0x60 to 0x63:
3432 istri 66 0F 3A 63
3433 istrm 66 0F 3A 62
3434 estri 66 0F 3A 61
3435 estrm 66 0F 3A 60
3436
3437 gstOffL and gstOffR are the guest state offsets for the two XMM
3438 register inputs. We never have to deal with the memory case since
3439 that is handled by pre-loading the relevant value into the fake
3440 XMM16 register.
3441
3442 For ESTRx variants, edxIN and eaxIN hold the values of those two
3443 registers.
3444
3445 In all cases, the bottom 16 bits of the result contain the new
3446 OSZACP %rflags values. For xSTRI variants, bits[31:16] of the
3447 result hold the new %ecx value. For xSTRM variants, the helper
3448 writes the result directly to the guest XMM0.
3449
3450 Declarable side effects: in all cases, reads guest state at
3451 [gstOffL, +16) and [gstOffR, +16). For xSTRM variants, also writes
3452 guest_XMM0.
3453
3454 Is expected to be called with opc_and_imm combinations which have
3455 actually been validated, and will assert if otherwise. The front
3456 end should ensure we're only called with verified values.
3457 */
amd64g_dirtyhelper_PCMPxSTRx(VexGuestAMD64State * gst,HWord opc4_and_imm,HWord gstOffL,HWord gstOffR,HWord edxIN,HWord eaxIN)3458 ULong amd64g_dirtyhelper_PCMPxSTRx (
3459 VexGuestAMD64State* gst,
3460 HWord opc4_and_imm,
3461 HWord gstOffL, HWord gstOffR,
3462 HWord edxIN, HWord eaxIN
3463 )
3464 {
3465 HWord opc4 = (opc4_and_imm >> 8) & 0xFF;
3466 HWord imm8 = opc4_and_imm & 0xFF;
3467 HWord isISTRx = opc4 & 2;
3468 HWord isxSTRM = (opc4 & 1) ^ 1;
3469 vassert((opc4 & 0xFC) == 0x60); /* 0x60 .. 0x63 */
3470 HWord wide = (imm8 & 1);
3471
3472 // where the args are
3473 V128* argL = (V128*)( ((UChar*)gst) + gstOffL );
3474 V128* argR = (V128*)( ((UChar*)gst) + gstOffR );
3475
3476 /* Create the arg validity masks, either from the vectors
3477 themselves or from the supplied edx/eax values. */
3478 // FIXME: this is only right for the 8-bit data cases.
3479 // At least that is asserted above.
3480 UInt zmaskL, zmaskR;
3481
3482 // temp spot for the resulting flags and vector.
3483 V128 resV;
3484 UInt resOSZACP;
3485
3486 // for checking whether case was handled
3487 Bool ok = False;
3488
3489 if (wide) {
3490 if (isISTRx) {
3491 zmaskL = zmask_from_V128_wide(argL);
3492 zmaskR = zmask_from_V128_wide(argR);
3493 } else {
3494 Int tmp;
3495 tmp = edxIN & 0xFFFFFFFF;
3496 if (tmp < -8) tmp = -8;
3497 if (tmp > 8) tmp = 8;
3498 if (tmp < 0) tmp = -tmp;
3499 vassert(tmp >= 0 && tmp <= 8);
3500 zmaskL = (1 << tmp) & 0xFF;
3501 tmp = eaxIN & 0xFFFFFFFF;
3502 if (tmp < -8) tmp = -8;
3503 if (tmp > 8) tmp = 8;
3504 if (tmp < 0) tmp = -tmp;
3505 vassert(tmp >= 0 && tmp <= 8);
3506 zmaskR = (1 << tmp) & 0xFF;
3507 }
3508 // do the meyaath
3509 ok = compute_PCMPxSTRx_wide (
3510 &resV, &resOSZACP, argL, argR,
3511 zmaskL, zmaskR, imm8, (Bool)isxSTRM
3512 );
3513 } else {
3514 if (isISTRx) {
3515 zmaskL = zmask_from_V128(argL);
3516 zmaskR = zmask_from_V128(argR);
3517 } else {
3518 Int tmp;
3519 tmp = edxIN & 0xFFFFFFFF;
3520 if (tmp < -16) tmp = -16;
3521 if (tmp > 16) tmp = 16;
3522 if (tmp < 0) tmp = -tmp;
3523 vassert(tmp >= 0 && tmp <= 16);
3524 zmaskL = (1 << tmp) & 0xFFFF;
3525 tmp = eaxIN & 0xFFFFFFFF;
3526 if (tmp < -16) tmp = -16;
3527 if (tmp > 16) tmp = 16;
3528 if (tmp < 0) tmp = -tmp;
3529 vassert(tmp >= 0 && tmp <= 16);
3530 zmaskR = (1 << tmp) & 0xFFFF;
3531 }
3532 // do the meyaath
3533 ok = compute_PCMPxSTRx (
3534 &resV, &resOSZACP, argL, argR,
3535 zmaskL, zmaskR, imm8, (Bool)isxSTRM
3536 );
3537 }
3538
3539 // front end shouldn't pass us any imm8 variants we can't
3540 // handle. Hence:
3541 vassert(ok);
3542
3543 // So, finally we need to get the results back to the caller.
3544 // In all cases, the new OSZACP value is the lowest 16 of
3545 // the return value.
3546 if (isxSTRM) {
3547 gst->guest_YMM0[0] = resV.w32[0];
3548 gst->guest_YMM0[1] = resV.w32[1];
3549 gst->guest_YMM0[2] = resV.w32[2];
3550 gst->guest_YMM0[3] = resV.w32[3];
3551 return resOSZACP & 0x8D5;
3552 } else {
3553 UInt newECX = resV.w32[0] & 0xFFFF;
3554 return (newECX << 16) | (resOSZACP & 0x8D5);
3555 }
3556 }
3557
3558 /*---------------------------------------------------------------*/
3559 /*--- AES primitives and helpers ---*/
3560 /*---------------------------------------------------------------*/
3561 /* a 16 x 16 matrix */
3562 static const UChar sbox[256] = { // row nr
3563 0x63, 0x7c, 0x77, 0x7b, 0xf2, 0x6b, 0x6f, 0xc5, // 1
3564 0x30, 0x01, 0x67, 0x2b, 0xfe, 0xd7, 0xab, 0x76,
3565 0xca, 0x82, 0xc9, 0x7d, 0xfa, 0x59, 0x47, 0xf0, // 2
3566 0xad, 0xd4, 0xa2, 0xaf, 0x9c, 0xa4, 0x72, 0xc0,
3567 0xb7, 0xfd, 0x93, 0x26, 0x36, 0x3f, 0xf7, 0xcc, // 3
3568 0x34, 0xa5, 0xe5, 0xf1, 0x71, 0xd8, 0x31, 0x15,
3569 0x04, 0xc7, 0x23, 0xc3, 0x18, 0x96, 0x05, 0x9a, // 4
3570 0x07, 0x12, 0x80, 0xe2, 0xeb, 0x27, 0xb2, 0x75,
3571 0x09, 0x83, 0x2c, 0x1a, 0x1b, 0x6e, 0x5a, 0xa0, // 5
3572 0x52, 0x3b, 0xd6, 0xb3, 0x29, 0xe3, 0x2f, 0x84,
3573 0x53, 0xd1, 0x00, 0xed, 0x20, 0xfc, 0xb1, 0x5b, // 6
3574 0x6a, 0xcb, 0xbe, 0x39, 0x4a, 0x4c, 0x58, 0xcf,
3575 0xd0, 0xef, 0xaa, 0xfb, 0x43, 0x4d, 0x33, 0x85, // 7
3576 0x45, 0xf9, 0x02, 0x7f, 0x50, 0x3c, 0x9f, 0xa8,
3577 0x51, 0xa3, 0x40, 0x8f, 0x92, 0x9d, 0x38, 0xf5, // 8
3578 0xbc, 0xb6, 0xda, 0x21, 0x10, 0xff, 0xf3, 0xd2,
3579 0xcd, 0x0c, 0x13, 0xec, 0x5f, 0x97, 0x44, 0x17, // 9
3580 0xc4, 0xa7, 0x7e, 0x3d, 0x64, 0x5d, 0x19, 0x73,
3581 0x60, 0x81, 0x4f, 0xdc, 0x22, 0x2a, 0x90, 0x88, //10
3582 0x46, 0xee, 0xb8, 0x14, 0xde, 0x5e, 0x0b, 0xdb,
3583 0xe0, 0x32, 0x3a, 0x0a, 0x49, 0x06, 0x24, 0x5c, //11
3584 0xc2, 0xd3, 0xac, 0x62, 0x91, 0x95, 0xe4, 0x79,
3585 0xe7, 0xc8, 0x37, 0x6d, 0x8d, 0xd5, 0x4e, 0xa9, //12
3586 0x6c, 0x56, 0xf4, 0xea, 0x65, 0x7a, 0xae, 0x08,
3587 0xba, 0x78, 0x25, 0x2e, 0x1c, 0xa6, 0xb4, 0xc6, //13
3588 0xe8, 0xdd, 0x74, 0x1f, 0x4b, 0xbd, 0x8b, 0x8a,
3589 0x70, 0x3e, 0xb5, 0x66, 0x48, 0x03, 0xf6, 0x0e, //14
3590 0x61, 0x35, 0x57, 0xb9, 0x86, 0xc1, 0x1d, 0x9e,
3591 0xe1, 0xf8, 0x98, 0x11, 0x69, 0xd9, 0x8e, 0x94, //15
3592 0x9b, 0x1e, 0x87, 0xe9, 0xce, 0x55, 0x28, 0xdf,
3593 0x8c, 0xa1, 0x89, 0x0d, 0xbf, 0xe6, 0x42, 0x68, //16
3594 0x41, 0x99, 0x2d, 0x0f, 0xb0, 0x54, 0xbb, 0x16
3595 };
SubBytes(V128 * v)3596 static void SubBytes (V128* v)
3597 {
3598 V128 r;
3599 UInt i;
3600 for (i = 0; i < 16; i++)
3601 r.w8[i] = sbox[v->w8[i]];
3602 *v = r;
3603 }
3604
3605 /* a 16 x 16 matrix */
3606 static const UChar invsbox[256] = { // row nr
3607 0x52, 0x09, 0x6a, 0xd5, 0x30, 0x36, 0xa5, 0x38, // 1
3608 0xbf, 0x40, 0xa3, 0x9e, 0x81, 0xf3, 0xd7, 0xfb,
3609 0x7c, 0xe3, 0x39, 0x82, 0x9b, 0x2f, 0xff, 0x87, // 2
3610 0x34, 0x8e, 0x43, 0x44, 0xc4, 0xde, 0xe9, 0xcb,
3611 0x54, 0x7b, 0x94, 0x32, 0xa6, 0xc2, 0x23, 0x3d, // 3
3612 0xee, 0x4c, 0x95, 0x0b, 0x42, 0xfa, 0xc3, 0x4e,
3613 0x08, 0x2e, 0xa1, 0x66, 0x28, 0xd9, 0x24, 0xb2, // 4
3614 0x76, 0x5b, 0xa2, 0x49, 0x6d, 0x8b, 0xd1, 0x25,
3615 0x72, 0xf8, 0xf6, 0x64, 0x86, 0x68, 0x98, 0x16, // 5
3616 0xd4, 0xa4, 0x5c, 0xcc, 0x5d, 0x65, 0xb6, 0x92,
3617 0x6c, 0x70, 0x48, 0x50, 0xfd, 0xed, 0xb9, 0xda, // 6
3618 0x5e, 0x15, 0x46, 0x57, 0xa7, 0x8d, 0x9d, 0x84,
3619 0x90, 0xd8, 0xab, 0x00, 0x8c, 0xbc, 0xd3, 0x0a, // 7
3620 0xf7, 0xe4, 0x58, 0x05, 0xb8, 0xb3, 0x45, 0x06,
3621 0xd0, 0x2c, 0x1e, 0x8f, 0xca, 0x3f, 0x0f, 0x02, // 8
3622 0xc1, 0xaf, 0xbd, 0x03, 0x01, 0x13, 0x8a, 0x6b,
3623 0x3a, 0x91, 0x11, 0x41, 0x4f, 0x67, 0xdc, 0xea, // 9
3624 0x97, 0xf2, 0xcf, 0xce, 0xf0, 0xb4, 0xe6, 0x73,
3625 0x96, 0xac, 0x74, 0x22, 0xe7, 0xad, 0x35, 0x85, //10
3626 0xe2, 0xf9, 0x37, 0xe8, 0x1c, 0x75, 0xdf, 0x6e,
3627 0x47, 0xf1, 0x1a, 0x71, 0x1d, 0x29, 0xc5, 0x89, //11
3628 0x6f, 0xb7, 0x62, 0x0e, 0xaa, 0x18, 0xbe, 0x1b,
3629 0xfc, 0x56, 0x3e, 0x4b, 0xc6, 0xd2, 0x79, 0x20, //12
3630 0x9a, 0xdb, 0xc0, 0xfe, 0x78, 0xcd, 0x5a, 0xf4,
3631 0x1f, 0xdd, 0xa8, 0x33, 0x88, 0x07, 0xc7, 0x31, //13
3632 0xb1, 0x12, 0x10, 0x59, 0x27, 0x80, 0xec, 0x5f,
3633 0x60, 0x51, 0x7f, 0xa9, 0x19, 0xb5, 0x4a, 0x0d, //14
3634 0x2d, 0xe5, 0x7a, 0x9f, 0x93, 0xc9, 0x9c, 0xef,
3635 0xa0, 0xe0, 0x3b, 0x4d, 0xae, 0x2a, 0xf5, 0xb0, //15
3636 0xc8, 0xeb, 0xbb, 0x3c, 0x83, 0x53, 0x99, 0x61,
3637 0x17, 0x2b, 0x04, 0x7e, 0xba, 0x77, 0xd6, 0x26, //16
3638 0xe1, 0x69, 0x14, 0x63, 0x55, 0x21, 0x0c, 0x7d
3639 };
InvSubBytes(V128 * v)3640 static void InvSubBytes (V128* v)
3641 {
3642 V128 r;
3643 UInt i;
3644 for (i = 0; i < 16; i++)
3645 r.w8[i] = invsbox[v->w8[i]];
3646 *v = r;
3647 }
3648
3649 static const UChar ShiftRows_op[16] =
3650 {11, 6, 1, 12, 7, 2, 13, 8, 3, 14, 9, 4, 15, 10, 5, 0};
ShiftRows(V128 * v)3651 static void ShiftRows (V128* v)
3652 {
3653 V128 r;
3654 UInt i;
3655 for (i = 0; i < 16; i++)
3656 r.w8[i] = v->w8[ShiftRows_op[15-i]];
3657 *v = r;
3658 }
3659
3660 static const UChar InvShiftRows_op[16] =
3661 {3, 6, 9, 12, 15, 2, 5, 8, 11, 14, 1, 4, 7, 10, 13, 0};
InvShiftRows(V128 * v)3662 static void InvShiftRows (V128* v)
3663 {
3664 V128 r;
3665 UInt i;
3666 for (i = 0; i < 16; i++)
3667 r.w8[i] = v->w8[InvShiftRows_op[15-i]];
3668 *v = r;
3669 }
3670
3671 /* Multiplication of the finite fields elements of AES.
3672 See "A Specification for The AES Algorithm Rijndael
3673 (by Joan Daemen & Vincent Rijmen)"
3674 Dr. Brian Gladman, v3.1, 3rd March 2001. */
3675 /* N values so that (hex) xy = 0x03^N.
3676 0x00 cannot be used. We put 0xff for this value.*/
3677 /* a 16 x 16 matrix */
3678 static const UChar Nxy[256] = { // row nr
3679 0xff, 0x00, 0x19, 0x01, 0x32, 0x02, 0x1a, 0xc6, // 1
3680 0x4b, 0xc7, 0x1b, 0x68, 0x33, 0xee, 0xdf, 0x03,
3681 0x64, 0x04, 0xe0, 0x0e, 0x34, 0x8d, 0x81, 0xef, // 2
3682 0x4c, 0x71, 0x08, 0xc8, 0xf8, 0x69, 0x1c, 0xc1,
3683 0x7d, 0xc2, 0x1d, 0xb5, 0xf9, 0xb9, 0x27, 0x6a, // 3
3684 0x4d, 0xe4, 0xa6, 0x72, 0x9a, 0xc9, 0x09, 0x78,
3685 0x65, 0x2f, 0x8a, 0x05, 0x21, 0x0f, 0xe1, 0x24, // 4
3686 0x12, 0xf0, 0x82, 0x45, 0x35, 0x93, 0xda, 0x8e,
3687 0x96, 0x8f, 0xdb, 0xbd, 0x36, 0xd0, 0xce, 0x94, // 5
3688 0x13, 0x5c, 0xd2, 0xf1, 0x40, 0x46, 0x83, 0x38,
3689 0x66, 0xdd, 0xfd, 0x30, 0xbf, 0x06, 0x8b, 0x62, // 6
3690 0xb3, 0x25, 0xe2, 0x98, 0x22, 0x88, 0x91, 0x10,
3691 0x7e, 0x6e, 0x48, 0xc3, 0xa3, 0xb6, 0x1e, 0x42, // 7
3692 0x3a, 0x6b, 0x28, 0x54, 0xfa, 0x85, 0x3d, 0xba,
3693 0x2b, 0x79, 0x0a, 0x15, 0x9b, 0x9f, 0x5e, 0xca, // 8
3694 0x4e, 0xd4, 0xac, 0xe5, 0xf3, 0x73, 0xa7, 0x57,
3695 0xaf, 0x58, 0xa8, 0x50, 0xf4, 0xea, 0xd6, 0x74, // 9
3696 0x4f, 0xae, 0xe9, 0xd5, 0xe7, 0xe6, 0xad, 0xe8,
3697 0x2c, 0xd7, 0x75, 0x7a, 0xeb, 0x16, 0x0b, 0xf5, //10
3698 0x59, 0xcb, 0x5f, 0xb0, 0x9c, 0xa9, 0x51, 0xa0,
3699 0x7f, 0x0c, 0xf6, 0x6f, 0x17, 0xc4, 0x49, 0xec, //11
3700 0xd8, 0x43, 0x1f, 0x2d, 0xa4, 0x76, 0x7b, 0xb7,
3701 0xcc, 0xbb, 0x3e, 0x5a, 0xfb, 0x60, 0xb1, 0x86, //12
3702 0x3b, 0x52, 0xa1, 0x6c, 0xaa, 0x55, 0x29, 0x9d,
3703 0x97, 0xb2, 0x87, 0x90, 0x61, 0xbe, 0xdc, 0xfc, //13
3704 0xbc, 0x95, 0xcf, 0xcd, 0x37, 0x3f, 0x5b, 0xd1,
3705 0x53, 0x39, 0x84, 0x3c, 0x41, 0xa2, 0x6d, 0x47, //14
3706 0x14, 0x2a, 0x9e, 0x5d, 0x56, 0xf2, 0xd3, 0xab,
3707 0x44, 0x11, 0x92, 0xd9, 0x23, 0x20, 0x2e, 0x89, //15
3708 0xb4, 0x7c, 0xb8, 0x26, 0x77, 0x99, 0xe3, 0xa5,
3709 0x67, 0x4a, 0xed, 0xde, 0xc5, 0x31, 0xfe, 0x18, //16
3710 0x0d, 0x63, 0x8c, 0x80, 0xc0, 0xf7, 0x70, 0x07
3711 };
3712
3713 /* E values so that E = 0x03^xy. */
3714 static const UChar Exy[256] = { // row nr
3715 0x01, 0x03, 0x05, 0x0f, 0x11, 0x33, 0x55, 0xff, // 1
3716 0x1a, 0x2e, 0x72, 0x96, 0xa1, 0xf8, 0x13, 0x35,
3717 0x5f, 0xe1, 0x38, 0x48, 0xd8, 0x73, 0x95, 0xa4, // 2
3718 0xf7, 0x02, 0x06, 0x0a, 0x1e, 0x22, 0x66, 0xaa,
3719 0xe5, 0x34, 0x5c, 0xe4, 0x37, 0x59, 0xeb, 0x26, // 3
3720 0x6a, 0xbe, 0xd9, 0x70, 0x90, 0xab, 0xe6, 0x31,
3721 0x53, 0xf5, 0x04, 0x0c, 0x14, 0x3c, 0x44, 0xcc, // 4
3722 0x4f, 0xd1, 0x68, 0xb8, 0xd3, 0x6e, 0xb2, 0xcd,
3723 0x4c, 0xd4, 0x67, 0xa9, 0xe0, 0x3b, 0x4d, 0xd7, // 5
3724 0x62, 0xa6, 0xf1, 0x08, 0x18, 0x28, 0x78, 0x88,
3725 0x83, 0x9e, 0xb9, 0xd0, 0x6b, 0xbd, 0xdc, 0x7f, // 6
3726 0x81, 0x98, 0xb3, 0xce, 0x49, 0xdb, 0x76, 0x9a,
3727 0xb5, 0xc4, 0x57, 0xf9, 0x10, 0x30, 0x50, 0xf0, // 7
3728 0x0b, 0x1d, 0x27, 0x69, 0xbb, 0xd6, 0x61, 0xa3,
3729 0xfe, 0x19, 0x2b, 0x7d, 0x87, 0x92, 0xad, 0xec, // 8
3730 0x2f, 0x71, 0x93, 0xae, 0xe9, 0x20, 0x60, 0xa0,
3731 0xfb, 0x16, 0x3a, 0x4e, 0xd2, 0x6d, 0xb7, 0xc2, // 9
3732 0x5d, 0xe7, 0x32, 0x56, 0xfa, 0x15, 0x3f, 0x41,
3733 0xc3, 0x5e, 0xe2, 0x3d, 0x47, 0xc9, 0x40, 0xc0, //10
3734 0x5b, 0xed, 0x2c, 0x74, 0x9c, 0xbf, 0xda, 0x75,
3735 0x9f, 0xba, 0xd5, 0x64, 0xac, 0xef, 0x2a, 0x7e, //11
3736 0x82, 0x9d, 0xbc, 0xdf, 0x7a, 0x8e, 0x89, 0x80,
3737 0x9b, 0xb6, 0xc1, 0x58, 0xe8, 0x23, 0x65, 0xaf, //12
3738 0xea, 0x25, 0x6f, 0xb1, 0xc8, 0x43, 0xc5, 0x54,
3739 0xfc, 0x1f, 0x21, 0x63, 0xa5, 0xf4, 0x07, 0x09, //13
3740 0x1b, 0x2d, 0x77, 0x99, 0xb0, 0xcb, 0x46, 0xca,
3741 0x45, 0xcf, 0x4a, 0xde, 0x79, 0x8b, 0x86, 0x91, //14
3742 0xa8, 0xe3, 0x3e, 0x42, 0xc6, 0x51, 0xf3, 0x0e,
3743 0x12, 0x36, 0x5a, 0xee, 0x29, 0x7b, 0x8d, 0x8c, //15
3744 0x8f, 0x8a, 0x85, 0x94, 0xa7, 0xf2, 0x0d, 0x17,
3745 0x39, 0x4b, 0xdd, 0x7c, 0x84, 0x97, 0xa2, 0xfd, //16
3746 0x1c, 0x24, 0x6c, 0xb4, 0xc7, 0x52, 0xf6, 0x01};
3747
ff_mul(UChar u1,UChar u2)3748 static inline UChar ff_mul(UChar u1, UChar u2)
3749 {
3750 if ((u1 > 0) && (u2 > 0)) {
3751 UInt ui = Nxy[u1] + Nxy[u2];
3752 if (ui >= 255)
3753 ui = ui - 255;
3754 return Exy[ui];
3755 } else {
3756 return 0;
3757 };
3758 }
3759
MixColumns(V128 * v)3760 static void MixColumns (V128* v)
3761 {
3762 V128 r;
3763 Int j;
3764 #define P(x,row,col) (x)->w8[((row)*4+(col))]
3765 for (j = 0; j < 4; j++) {
3766 P(&r,j,0) = ff_mul(0x02, P(v,j,0)) ^ ff_mul(0x03, P(v,j,1))
3767 ^ P(v,j,2) ^ P(v,j,3);
3768 P(&r,j,1) = P(v,j,0) ^ ff_mul( 0x02, P(v,j,1) )
3769 ^ ff_mul(0x03, P(v,j,2) ) ^ P(v,j,3);
3770 P(&r,j,2) = P(v,j,0) ^ P(v,j,1) ^ ff_mul( 0x02, P(v,j,2) )
3771 ^ ff_mul(0x03, P(v,j,3) );
3772 P(&r,j,3) = ff_mul(0x03, P(v,j,0) ) ^ P(v,j,1) ^ P(v,j,2)
3773 ^ ff_mul( 0x02, P(v,j,3) );
3774 }
3775 *v = r;
3776 #undef P
3777 }
3778
InvMixColumns(V128 * v)3779 static void InvMixColumns (V128* v)
3780 {
3781 V128 r;
3782 Int j;
3783 #define P(x,row,col) (x)->w8[((row)*4+(col))]
3784 for (j = 0; j < 4; j++) {
3785 P(&r,j,0) = ff_mul(0x0e, P(v,j,0) ) ^ ff_mul(0x0b, P(v,j,1) )
3786 ^ ff_mul(0x0d,P(v,j,2) ) ^ ff_mul(0x09, P(v,j,3) );
3787 P(&r,j,1) = ff_mul(0x09, P(v,j,0) ) ^ ff_mul(0x0e, P(v,j,1) )
3788 ^ ff_mul(0x0b,P(v,j,2) ) ^ ff_mul(0x0d, P(v,j,3) );
3789 P(&r,j,2) = ff_mul(0x0d, P(v,j,0) ) ^ ff_mul(0x09, P(v,j,1) )
3790 ^ ff_mul(0x0e,P(v,j,2) ) ^ ff_mul(0x0b, P(v,j,3) );
3791 P(&r,j,3) = ff_mul(0x0b, P(v,j,0) ) ^ ff_mul(0x0d, P(v,j,1) )
3792 ^ ff_mul(0x09,P(v,j,2) ) ^ ff_mul(0x0e, P(v,j,3) );
3793 }
3794 *v = r;
3795 #undef P
3796
3797 }
3798
3799 /* For description, see definition in guest_amd64_defs.h */
amd64g_dirtyhelper_AES(VexGuestAMD64State * gst,HWord opc4,HWord gstOffD,HWord gstOffL,HWord gstOffR)3800 void amd64g_dirtyhelper_AES (
3801 VexGuestAMD64State* gst,
3802 HWord opc4, HWord gstOffD,
3803 HWord gstOffL, HWord gstOffR
3804 )
3805 {
3806 // where the args are
3807 V128* argD = (V128*)( ((UChar*)gst) + gstOffD );
3808 V128* argL = (V128*)( ((UChar*)gst) + gstOffL );
3809 V128* argR = (V128*)( ((UChar*)gst) + gstOffR );
3810 V128 r;
3811
3812 switch (opc4) {
3813 case 0xDC: /* AESENC */
3814 case 0xDD: /* AESENCLAST */
3815 r = *argR;
3816 ShiftRows (&r);
3817 SubBytes (&r);
3818 if (opc4 == 0xDC)
3819 MixColumns (&r);
3820 argD->w64[0] = r.w64[0] ^ argL->w64[0];
3821 argD->w64[1] = r.w64[1] ^ argL->w64[1];
3822 break;
3823
3824 case 0xDE: /* AESDEC */
3825 case 0xDF: /* AESDECLAST */
3826 r = *argR;
3827 InvShiftRows (&r);
3828 InvSubBytes (&r);
3829 if (opc4 == 0xDE)
3830 InvMixColumns (&r);
3831 argD->w64[0] = r.w64[0] ^ argL->w64[0];
3832 argD->w64[1] = r.w64[1] ^ argL->w64[1];
3833 break;
3834
3835 case 0xDB: /* AESIMC */
3836 *argD = *argL;
3837 InvMixColumns (argD);
3838 break;
3839 default: vassert(0);
3840 }
3841 }
3842
RotWord(UInt w32)3843 static inline UInt RotWord (UInt w32)
3844 {
3845 return ((w32 >> 8) | (w32 << 24));
3846 }
3847
SubWord(UInt w32)3848 static inline UInt SubWord (UInt w32)
3849 {
3850 UChar *w8;
3851 UChar *r8;
3852 UInt res;
3853 w8 = (UChar*) &w32;
3854 r8 = (UChar*) &res;
3855 r8[0] = sbox[w8[0]];
3856 r8[1] = sbox[w8[1]];
3857 r8[2] = sbox[w8[2]];
3858 r8[3] = sbox[w8[3]];
3859 return res;
3860 }
3861
3862 /* For description, see definition in guest_amd64_defs.h */
amd64g_dirtyhelper_AESKEYGENASSIST(VexGuestAMD64State * gst,HWord imm8,HWord gstOffL,HWord gstOffR)3863 extern void amd64g_dirtyhelper_AESKEYGENASSIST (
3864 VexGuestAMD64State* gst,
3865 HWord imm8,
3866 HWord gstOffL, HWord gstOffR
3867 )
3868 {
3869 // where the args are
3870 V128* argL = (V128*)( ((UChar*)gst) + gstOffL );
3871 V128* argR = (V128*)( ((UChar*)gst) + gstOffR );
3872
3873 // We have to create the result in a temporary in the
3874 // case where the src and dst regs are the same. See #341698.
3875 V128 tmp;
3876
3877 tmp.w32[3] = RotWord (SubWord (argL->w32[3])) ^ imm8;
3878 tmp.w32[2] = SubWord (argL->w32[3]);
3879 tmp.w32[1] = RotWord (SubWord (argL->w32[1])) ^ imm8;
3880 tmp.w32[0] = SubWord (argL->w32[1]);
3881
3882 argR->w32[3] = tmp.w32[3];
3883 argR->w32[2] = tmp.w32[2];
3884 argR->w32[1] = tmp.w32[1];
3885 argR->w32[0] = tmp.w32[0];
3886 }
3887
3888
3889
3890 /*---------------------------------------------------------------*/
3891 /*--- Helpers for dealing with, and describing, ---*/
3892 /*--- guest state as a whole. ---*/
3893 /*---------------------------------------------------------------*/
3894
3895 /* Initialise the entire amd64 guest state. */
3896 /* VISIBLE TO LIBVEX CLIENT */
LibVEX_GuestAMD64_initialise(VexGuestAMD64State * vex_state)3897 void LibVEX_GuestAMD64_initialise ( /*OUT*/VexGuestAMD64State* vex_state )
3898 {
3899 vex_state->host_EvC_FAILADDR = 0;
3900 vex_state->host_EvC_COUNTER = 0;
3901 vex_state->pad0 = 0;
3902
3903 vex_state->guest_RAX = 0;
3904 vex_state->guest_RCX = 0;
3905 vex_state->guest_RDX = 0;
3906 vex_state->guest_RBX = 0;
3907 vex_state->guest_RSP = 0;
3908 vex_state->guest_RBP = 0;
3909 vex_state->guest_RSI = 0;
3910 vex_state->guest_RDI = 0;
3911 vex_state->guest_R8 = 0;
3912 vex_state->guest_R9 = 0;
3913 vex_state->guest_R10 = 0;
3914 vex_state->guest_R11 = 0;
3915 vex_state->guest_R12 = 0;
3916 vex_state->guest_R13 = 0;
3917 vex_state->guest_R14 = 0;
3918 vex_state->guest_R15 = 0;
3919
3920 vex_state->guest_CC_OP = AMD64G_CC_OP_COPY;
3921 vex_state->guest_CC_DEP1 = 0;
3922 vex_state->guest_CC_DEP2 = 0;
3923 vex_state->guest_CC_NDEP = 0;
3924
3925 vex_state->guest_DFLAG = 1; /* forwards */
3926 vex_state->guest_IDFLAG = 0;
3927 vex_state->guest_ACFLAG = 0;
3928
3929 /* HACK: represent the offset associated with a constant %fs.
3930 Typically, on linux, this assumes that %fs is only ever zero (main
3931 thread) or 0x63. */
3932 vex_state->guest_FS_CONST = 0;
3933
3934 vex_state->guest_RIP = 0;
3935
3936 /* Initialise the simulated FPU */
3937 amd64g_dirtyhelper_FINIT( vex_state );
3938
3939 /* Initialise the AVX state. */
3940 # define AVXZERO(_ymm) \
3941 do { _ymm[0]=_ymm[1]=_ymm[2]=_ymm[3] = 0; \
3942 _ymm[4]=_ymm[5]=_ymm[6]=_ymm[7] = 0; \
3943 } while (0)
3944 vex_state->guest_SSEROUND = (ULong)Irrm_NEAREST;
3945 AVXZERO(vex_state->guest_YMM0);
3946 AVXZERO(vex_state->guest_YMM1);
3947 AVXZERO(vex_state->guest_YMM2);
3948 AVXZERO(vex_state->guest_YMM3);
3949 AVXZERO(vex_state->guest_YMM4);
3950 AVXZERO(vex_state->guest_YMM5);
3951 AVXZERO(vex_state->guest_YMM6);
3952 AVXZERO(vex_state->guest_YMM7);
3953 AVXZERO(vex_state->guest_YMM8);
3954 AVXZERO(vex_state->guest_YMM9);
3955 AVXZERO(vex_state->guest_YMM10);
3956 AVXZERO(vex_state->guest_YMM11);
3957 AVXZERO(vex_state->guest_YMM12);
3958 AVXZERO(vex_state->guest_YMM13);
3959 AVXZERO(vex_state->guest_YMM14);
3960 AVXZERO(vex_state->guest_YMM15);
3961 AVXZERO(vex_state->guest_YMM16);
3962
3963 # undef AVXZERO
3964
3965 vex_state->guest_EMNOTE = EmNote_NONE;
3966
3967 /* These should not ever be either read or written, but we
3968 initialise them anyway. */
3969 vex_state->guest_CMSTART = 0;
3970 vex_state->guest_CMLEN = 0;
3971
3972 vex_state->guest_NRADDR = 0;
3973 vex_state->guest_SC_CLASS = 0;
3974 vex_state->guest_GS_CONST = 0;
3975
3976 vex_state->guest_IP_AT_SYSCALL = 0;
3977 vex_state->pad1 = 0;
3978 }
3979
3980
3981 /* Figure out if any part of the guest state contained in minoff
3982 .. maxoff requires precise memory exceptions. If in doubt return
3983 True (but this generates significantly slower code).
3984
3985 By default we enforce precise exns for guest %RSP, %RBP and %RIP
3986 only. These are the minimum needed to extract correct stack
3987 backtraces from amd64 code.
3988
3989 Only %RSP is needed in mode VexRegUpdSpAtMemAccess.
3990 */
guest_amd64_state_requires_precise_mem_exns(Int minoff,Int maxoff,VexRegisterUpdates pxControl)3991 Bool guest_amd64_state_requires_precise_mem_exns (
3992 Int minoff, Int maxoff, VexRegisterUpdates pxControl
3993 )
3994 {
3995 Int rbp_min = offsetof(VexGuestAMD64State, guest_RBP);
3996 Int rbp_max = rbp_min + 8 - 1;
3997 Int rsp_min = offsetof(VexGuestAMD64State, guest_RSP);
3998 Int rsp_max = rsp_min + 8 - 1;
3999 Int rip_min = offsetof(VexGuestAMD64State, guest_RIP);
4000 Int rip_max = rip_min + 8 - 1;
4001
4002 if (maxoff < rsp_min || minoff > rsp_max) {
4003 /* no overlap with rsp */
4004 if (pxControl == VexRegUpdSpAtMemAccess)
4005 return False; // We only need to check stack pointer.
4006 } else {
4007 return True;
4008 }
4009
4010 if (maxoff < rbp_min || minoff > rbp_max) {
4011 /* no overlap with rbp */
4012 } else {
4013 return True;
4014 }
4015
4016 if (maxoff < rip_min || minoff > rip_max) {
4017 /* no overlap with eip */
4018 } else {
4019 return True;
4020 }
4021
4022 return False;
4023 }
4024
4025
4026 #define ALWAYSDEFD(field) \
4027 { offsetof(VexGuestAMD64State, field), \
4028 (sizeof ((VexGuestAMD64State*)0)->field) }
4029
4030 VexGuestLayout
4031 amd64guest_layout
4032 = {
4033 /* Total size of the guest state, in bytes. */
4034 .total_sizeB = sizeof(VexGuestAMD64State),
4035
4036 /* Describe the stack pointer. */
4037 .offset_SP = offsetof(VexGuestAMD64State,guest_RSP),
4038 .sizeof_SP = 8,
4039
4040 /* Describe the frame pointer. */
4041 .offset_FP = offsetof(VexGuestAMD64State,guest_RBP),
4042 .sizeof_FP = 8,
4043
4044 /* Describe the instruction pointer. */
4045 .offset_IP = offsetof(VexGuestAMD64State,guest_RIP),
4046 .sizeof_IP = 8,
4047
4048 /* Describe any sections to be regarded by Memcheck as
4049 'always-defined'. */
4050 .n_alwaysDefd = 16,
4051
4052 /* flags thunk: OP and NDEP are always defd, whereas DEP1
4053 and DEP2 have to be tracked. See detailed comment in
4054 gdefs.h on meaning of thunk fields. */
4055 .alwaysDefd
4056 = { /* 0 */ ALWAYSDEFD(guest_CC_OP),
4057 /* 1 */ ALWAYSDEFD(guest_CC_NDEP),
4058 /* 2 */ ALWAYSDEFD(guest_DFLAG),
4059 /* 3 */ ALWAYSDEFD(guest_IDFLAG),
4060 /* 4 */ ALWAYSDEFD(guest_RIP),
4061 /* 5 */ ALWAYSDEFD(guest_FS_CONST),
4062 /* 6 */ ALWAYSDEFD(guest_FTOP),
4063 /* 7 */ ALWAYSDEFD(guest_FPTAG),
4064 /* 8 */ ALWAYSDEFD(guest_FPROUND),
4065 /* 9 */ ALWAYSDEFD(guest_FC3210),
4066 // /* */ ALWAYSDEFD(guest_CS),
4067 // /* */ ALWAYSDEFD(guest_DS),
4068 // /* */ ALWAYSDEFD(guest_ES),
4069 // /* */ ALWAYSDEFD(guest_FS),
4070 // /* */ ALWAYSDEFD(guest_GS),
4071 // /* */ ALWAYSDEFD(guest_SS),
4072 // /* */ ALWAYSDEFD(guest_LDT),
4073 // /* */ ALWAYSDEFD(guest_GDT),
4074 /* 10 */ ALWAYSDEFD(guest_EMNOTE),
4075 /* 11 */ ALWAYSDEFD(guest_SSEROUND),
4076 /* 12 */ ALWAYSDEFD(guest_CMSTART),
4077 /* 13 */ ALWAYSDEFD(guest_CMLEN),
4078 /* 14 */ ALWAYSDEFD(guest_SC_CLASS),
4079 /* 15 */ ALWAYSDEFD(guest_IP_AT_SYSCALL)
4080 }
4081 };
4082
4083
4084 /*---------------------------------------------------------------*/
4085 /*--- end guest_amd64_helpers.c ---*/
4086 /*---------------------------------------------------------------*/
4087