1
2 /*---------------------------------------------------------------*/
3 /*--- begin guest_x86_helpers.c ---*/
4 /*---------------------------------------------------------------*/
5
6 /*
7 This file is part of Valgrind, a dynamic binary instrumentation
8 framework.
9
10 Copyright (C) 2004-2013 OpenWorks LLP
11 info@open-works.net
12
13 This program is free software; you can redistribute it and/or
14 modify it under the terms of the GNU General Public License as
15 published by the Free Software Foundation; either version 2 of the
16 License, or (at your option) any later version.
17
18 This program is distributed in the hope that it will be useful, but
19 WITHOUT ANY WARRANTY; without even the implied warranty of
20 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
21 General Public License for more details.
22
23 You should have received a copy of the GNU General Public License
24 along with this program; if not, write to the Free Software
25 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
26 02110-1301, USA.
27
28 The GNU General Public License is contained in the file COPYING.
29
30 Neither the names of the U.S. Department of Energy nor the
31 University of California nor the names of its contributors may be
32 used to endorse or promote products derived from this software
33 without prior written permission.
34 */
35
36 #include "libvex_basictypes.h"
37 #include "libvex_emnote.h"
38 #include "libvex_guest_x86.h"
39 #include "libvex_ir.h"
40 #include "libvex.h"
41
42 #include "main_util.h"
43 #include "main_globals.h"
44 #include "guest_generic_bb_to_IR.h"
45 #include "guest_x86_defs.h"
46 #include "guest_generic_x87.h"
47
48
49 /* This file contains helper functions for x86 guest code.
50 Calls to these functions are generated by the back end.
51 These calls are of course in the host machine code and
52 this file will be compiled to host machine code, so that
53 all makes sense.
54
55 Only change the signatures of these helper functions very
56 carefully. If you change the signature here, you'll have to change
57 the parameters passed to it in the IR calls constructed by
58 guest-x86/toIR.c.
59
60 The convention used is that all functions called from generated
61 code are named x86g_<something>, and any function whose name lacks
62 that prefix is not called from generated code. Note that some
63 LibVEX_* functions can however be called by VEX's client, but that
64 is not the same as calling them from VEX-generated code.
65 */
66
67
68 /* Set to 1 to get detailed profiling info about use of the flag
69 machinery. */
70 #define PROFILE_EFLAGS 0
71
72
73 /*---------------------------------------------------------------*/
74 /*--- %eflags run-time helpers. ---*/
75 /*---------------------------------------------------------------*/
76
77 static const UChar parity_table[256] = {
78 X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0,
79 0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P,
80 0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P,
81 X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0,
82 0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P,
83 X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0,
84 X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0,
85 0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P,
86 0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P,
87 X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0,
88 X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0,
89 0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P,
90 X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0,
91 0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P,
92 0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P,
93 X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0,
94 0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P,
95 X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0,
96 X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0,
97 0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P,
98 X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0,
99 0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P,
100 0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P,
101 X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0,
102 X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0,
103 0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P,
104 0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P,
105 X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0,
106 0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P,
107 X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0,
108 X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0,
109 0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P,
110 };
111
112 /* generalised left-shifter */
lshift(Int x,Int n)113 inline static Int lshift ( Int x, Int n )
114 {
115 if (n >= 0)
116 return (UInt)x << n;
117 else
118 return x >> (-n);
119 }
120
121 /* identity on ULong */
idULong(ULong x)122 static inline ULong idULong ( ULong x )
123 {
124 return x;
125 }
126
127
128 #define PREAMBLE(__data_bits) \
129 /* const */ UInt DATA_MASK \
130 = __data_bits==8 ? 0xFF \
131 : (__data_bits==16 ? 0xFFFF \
132 : 0xFFFFFFFF); \
133 /* const */ UInt SIGN_MASK = 1u << (__data_bits - 1); \
134 /* const */ UInt CC_DEP1 = cc_dep1_formal; \
135 /* const */ UInt CC_DEP2 = cc_dep2_formal; \
136 /* const */ UInt CC_NDEP = cc_ndep_formal; \
137 /* Four bogus assignments, which hopefully gcc can */ \
138 /* optimise away, and which stop it complaining about */ \
139 /* unused variables. */ \
140 SIGN_MASK = SIGN_MASK; \
141 DATA_MASK = DATA_MASK; \
142 CC_DEP2 = CC_DEP2; \
143 CC_NDEP = CC_NDEP;
144
145
146 /*-------------------------------------------------------------*/
147
148 #define ACTIONS_ADD(DATA_BITS,DATA_UTYPE) \
149 { \
150 PREAMBLE(DATA_BITS); \
151 { UInt cf, pf, af, zf, sf, of; \
152 UInt argL, argR, res; \
153 argL = CC_DEP1; \
154 argR = CC_DEP2; \
155 res = argL + argR; \
156 cf = (DATA_UTYPE)res < (DATA_UTYPE)argL; \
157 pf = parity_table[(UChar)res]; \
158 af = (res ^ argL ^ argR) & 0x10; \
159 zf = ((DATA_UTYPE)res == 0) << 6; \
160 sf = lshift(res, 8 - DATA_BITS) & 0x80; \
161 of = lshift((argL ^ argR ^ -1) & (argL ^ res), \
162 12 - DATA_BITS) & X86G_CC_MASK_O; \
163 return cf | pf | af | zf | sf | of; \
164 } \
165 }
166
167 /*-------------------------------------------------------------*/
168
169 #define ACTIONS_SUB(DATA_BITS,DATA_UTYPE) \
170 { \
171 PREAMBLE(DATA_BITS); \
172 { UInt cf, pf, af, zf, sf, of; \
173 UInt argL, argR, res; \
174 argL = CC_DEP1; \
175 argR = CC_DEP2; \
176 res = argL - argR; \
177 cf = (DATA_UTYPE)argL < (DATA_UTYPE)argR; \
178 pf = parity_table[(UChar)res]; \
179 af = (res ^ argL ^ argR) & 0x10; \
180 zf = ((DATA_UTYPE)res == 0) << 6; \
181 sf = lshift(res, 8 - DATA_BITS) & 0x80; \
182 of = lshift((argL ^ argR) & (argL ^ res), \
183 12 - DATA_BITS) & X86G_CC_MASK_O; \
184 return cf | pf | af | zf | sf | of; \
185 } \
186 }
187
188 /*-------------------------------------------------------------*/
189
190 #define ACTIONS_ADC(DATA_BITS,DATA_UTYPE) \
191 { \
192 PREAMBLE(DATA_BITS); \
193 { UInt cf, pf, af, zf, sf, of; \
194 UInt argL, argR, oldC, res; \
195 oldC = CC_NDEP & X86G_CC_MASK_C; \
196 argL = CC_DEP1; \
197 argR = CC_DEP2 ^ oldC; \
198 res = (argL + argR) + oldC; \
199 if (oldC) \
200 cf = (DATA_UTYPE)res <= (DATA_UTYPE)argL; \
201 else \
202 cf = (DATA_UTYPE)res < (DATA_UTYPE)argL; \
203 pf = parity_table[(UChar)res]; \
204 af = (res ^ argL ^ argR) & 0x10; \
205 zf = ((DATA_UTYPE)res == 0) << 6; \
206 sf = lshift(res, 8 - DATA_BITS) & 0x80; \
207 of = lshift((argL ^ argR ^ -1) & (argL ^ res), \
208 12 - DATA_BITS) & X86G_CC_MASK_O; \
209 return cf | pf | af | zf | sf | of; \
210 } \
211 }
212
213 /*-------------------------------------------------------------*/
214
215 #define ACTIONS_SBB(DATA_BITS,DATA_UTYPE) \
216 { \
217 PREAMBLE(DATA_BITS); \
218 { UInt cf, pf, af, zf, sf, of; \
219 UInt argL, argR, oldC, res; \
220 oldC = CC_NDEP & X86G_CC_MASK_C; \
221 argL = CC_DEP1; \
222 argR = CC_DEP2 ^ oldC; \
223 res = (argL - argR) - oldC; \
224 if (oldC) \
225 cf = (DATA_UTYPE)argL <= (DATA_UTYPE)argR; \
226 else \
227 cf = (DATA_UTYPE)argL < (DATA_UTYPE)argR; \
228 pf = parity_table[(UChar)res]; \
229 af = (res ^ argL ^ argR) & 0x10; \
230 zf = ((DATA_UTYPE)res == 0) << 6; \
231 sf = lshift(res, 8 - DATA_BITS) & 0x80; \
232 of = lshift((argL ^ argR) & (argL ^ res), \
233 12 - DATA_BITS) & X86G_CC_MASK_O; \
234 return cf | pf | af | zf | sf | of; \
235 } \
236 }
237
238 /*-------------------------------------------------------------*/
239
240 #define ACTIONS_LOGIC(DATA_BITS,DATA_UTYPE) \
241 { \
242 PREAMBLE(DATA_BITS); \
243 { UInt cf, pf, af, zf, sf, of; \
244 cf = 0; \
245 pf = parity_table[(UChar)CC_DEP1]; \
246 af = 0; \
247 zf = ((DATA_UTYPE)CC_DEP1 == 0) << 6; \
248 sf = lshift(CC_DEP1, 8 - DATA_BITS) & 0x80; \
249 of = 0; \
250 return cf | pf | af | zf | sf | of; \
251 } \
252 }
253
254 /*-------------------------------------------------------------*/
255
256 #define ACTIONS_INC(DATA_BITS,DATA_UTYPE) \
257 { \
258 PREAMBLE(DATA_BITS); \
259 { UInt cf, pf, af, zf, sf, of; \
260 UInt argL, argR, res; \
261 res = CC_DEP1; \
262 argL = res - 1; \
263 argR = 1; \
264 cf = CC_NDEP & X86G_CC_MASK_C; \
265 pf = parity_table[(UChar)res]; \
266 af = (res ^ argL ^ argR) & 0x10; \
267 zf = ((DATA_UTYPE)res == 0) << 6; \
268 sf = lshift(res, 8 - DATA_BITS) & 0x80; \
269 of = ((res & DATA_MASK) == SIGN_MASK) << 11; \
270 return cf | pf | af | zf | sf | of; \
271 } \
272 }
273
274 /*-------------------------------------------------------------*/
275
276 #define ACTIONS_DEC(DATA_BITS,DATA_UTYPE) \
277 { \
278 PREAMBLE(DATA_BITS); \
279 { UInt cf, pf, af, zf, sf, of; \
280 UInt argL, argR, res; \
281 res = CC_DEP1; \
282 argL = res + 1; \
283 argR = 1; \
284 cf = CC_NDEP & X86G_CC_MASK_C; \
285 pf = parity_table[(UChar)res]; \
286 af = (res ^ argL ^ argR) & 0x10; \
287 zf = ((DATA_UTYPE)res == 0) << 6; \
288 sf = lshift(res, 8 - DATA_BITS) & 0x80; \
289 of = ((res & DATA_MASK) \
290 == ((UInt)SIGN_MASK - 1)) << 11; \
291 return cf | pf | af | zf | sf | of; \
292 } \
293 }
294
295 /*-------------------------------------------------------------*/
296
297 #define ACTIONS_SHL(DATA_BITS,DATA_UTYPE) \
298 { \
299 PREAMBLE(DATA_BITS); \
300 { UInt cf, pf, af, zf, sf, of; \
301 cf = (CC_DEP2 >> (DATA_BITS - 1)) & X86G_CC_MASK_C; \
302 pf = parity_table[(UChar)CC_DEP1]; \
303 af = 0; /* undefined */ \
304 zf = ((DATA_UTYPE)CC_DEP1 == 0) << 6; \
305 sf = lshift(CC_DEP1, 8 - DATA_BITS) & 0x80; \
306 /* of is defined if shift count == 1 */ \
307 of = lshift(CC_DEP2 ^ CC_DEP1, 12 - DATA_BITS) \
308 & X86G_CC_MASK_O; \
309 return cf | pf | af | zf | sf | of; \
310 } \
311 }
312
313 /*-------------------------------------------------------------*/
314
315 #define ACTIONS_SHR(DATA_BITS,DATA_UTYPE) \
316 { \
317 PREAMBLE(DATA_BITS); \
318 { UInt cf, pf, af, zf, sf, of; \
319 cf = CC_DEP2 & 1; \
320 pf = parity_table[(UChar)CC_DEP1]; \
321 af = 0; /* undefined */ \
322 zf = ((DATA_UTYPE)CC_DEP1 == 0) << 6; \
323 sf = lshift(CC_DEP1, 8 - DATA_BITS) & 0x80; \
324 /* of is defined if shift count == 1 */ \
325 of = lshift(CC_DEP2 ^ CC_DEP1, 12 - DATA_BITS) \
326 & X86G_CC_MASK_O; \
327 return cf | pf | af | zf | sf | of; \
328 } \
329 }
330
331 /*-------------------------------------------------------------*/
332
333 /* ROL: cf' = lsb(result). of' = msb(result) ^ lsb(result). */
334 /* DEP1 = result, NDEP = old flags */
335 #define ACTIONS_ROL(DATA_BITS,DATA_UTYPE) \
336 { \
337 PREAMBLE(DATA_BITS); \
338 { UInt fl \
339 = (CC_NDEP & ~(X86G_CC_MASK_O | X86G_CC_MASK_C)) \
340 | (X86G_CC_MASK_C & CC_DEP1) \
341 | (X86G_CC_MASK_O & (lshift(CC_DEP1, \
342 11-(DATA_BITS-1)) \
343 ^ lshift(CC_DEP1, 11))); \
344 return fl; \
345 } \
346 }
347
348 /*-------------------------------------------------------------*/
349
350 /* ROR: cf' = msb(result). of' = msb(result) ^ msb-1(result). */
351 /* DEP1 = result, NDEP = old flags */
352 #define ACTIONS_ROR(DATA_BITS,DATA_UTYPE) \
353 { \
354 PREAMBLE(DATA_BITS); \
355 { UInt fl \
356 = (CC_NDEP & ~(X86G_CC_MASK_O | X86G_CC_MASK_C)) \
357 | (X86G_CC_MASK_C & (CC_DEP1 >> (DATA_BITS-1))) \
358 | (X86G_CC_MASK_O & (lshift(CC_DEP1, \
359 11-(DATA_BITS-1)) \
360 ^ lshift(CC_DEP1, 11-(DATA_BITS-1)+1))); \
361 return fl; \
362 } \
363 }
364
365 /*-------------------------------------------------------------*/
366
367 #define ACTIONS_UMUL(DATA_BITS, DATA_UTYPE, NARROWtoU, \
368 DATA_U2TYPE, NARROWto2U) \
369 { \
370 PREAMBLE(DATA_BITS); \
371 { UInt cf, pf, af, zf, sf, of; \
372 DATA_UTYPE hi; \
373 DATA_UTYPE lo \
374 = NARROWtoU( ((DATA_UTYPE)CC_DEP1) \
375 * ((DATA_UTYPE)CC_DEP2) ); \
376 DATA_U2TYPE rr \
377 = NARROWto2U( \
378 ((DATA_U2TYPE)((DATA_UTYPE)CC_DEP1)) \
379 * ((DATA_U2TYPE)((DATA_UTYPE)CC_DEP2)) ); \
380 hi = NARROWtoU(rr >>/*u*/ DATA_BITS); \
381 cf = (hi != 0); \
382 pf = parity_table[(UChar)lo]; \
383 af = 0; /* undefined */ \
384 zf = (lo == 0) << 6; \
385 sf = lshift(lo, 8 - DATA_BITS) & 0x80; \
386 of = cf << 11; \
387 return cf | pf | af | zf | sf | of; \
388 } \
389 }
390
391 /*-------------------------------------------------------------*/
392
393 #define ACTIONS_SMUL(DATA_BITS, DATA_STYPE, NARROWtoS, \
394 DATA_S2TYPE, NARROWto2S) \
395 { \
396 PREAMBLE(DATA_BITS); \
397 { UInt cf, pf, af, zf, sf, of; \
398 DATA_STYPE hi; \
399 DATA_STYPE lo \
400 = NARROWtoS( ((DATA_S2TYPE)(DATA_STYPE)CC_DEP1) \
401 * ((DATA_S2TYPE)(DATA_STYPE)CC_DEP2) ); \
402 DATA_S2TYPE rr \
403 = NARROWto2S( \
404 ((DATA_S2TYPE)((DATA_STYPE)CC_DEP1)) \
405 * ((DATA_S2TYPE)((DATA_STYPE)CC_DEP2)) ); \
406 hi = NARROWtoS(rr >>/*s*/ DATA_BITS); \
407 cf = (hi != (lo >>/*s*/ (DATA_BITS-1))); \
408 pf = parity_table[(UChar)lo]; \
409 af = 0; /* undefined */ \
410 zf = (lo == 0) << 6; \
411 sf = lshift(lo, 8 - DATA_BITS) & 0x80; \
412 of = cf << 11; \
413 return cf | pf | af | zf | sf | of; \
414 } \
415 }
416
417
418 #if PROFILE_EFLAGS
419
420 static Bool initted = False;
421
422 /* C flag, fast route */
423 static UInt tabc_fast[X86G_CC_OP_NUMBER];
424 /* C flag, slow route */
425 static UInt tabc_slow[X86G_CC_OP_NUMBER];
426 /* table for calculate_cond */
427 static UInt tab_cond[X86G_CC_OP_NUMBER][16];
428 /* total entry counts for calc_all, calc_c, calc_cond. */
429 static UInt n_calc_all = 0;
430 static UInt n_calc_c = 0;
431 static UInt n_calc_cond = 0;
432
433 #define SHOW_COUNTS_NOW (0 == (0x3FFFFF & (n_calc_all+n_calc_c+n_calc_cond)))
434
435
showCounts(void)436 static void showCounts ( void )
437 {
438 Int op, co;
439 HChar ch;
440 vex_printf("\nTotal calls: calc_all=%u calc_cond=%u calc_c=%u\n",
441 n_calc_all, n_calc_cond, n_calc_c);
442
443 vex_printf(" cSLOW cFAST O NO B NB Z NZ BE NBE"
444 " S NS P NP L NL LE NLE\n");
445 vex_printf(" -----------------------------------------------------"
446 "----------------------------------------\n");
447 for (op = 0; op < X86G_CC_OP_NUMBER; op++) {
448
449 ch = ' ';
450 if (op > 0 && (op-1) % 3 == 0)
451 ch = 'B';
452 if (op > 0 && (op-1) % 3 == 1)
453 ch = 'W';
454 if (op > 0 && (op-1) % 3 == 2)
455 ch = 'L';
456
457 vex_printf("%2d%c: ", op, ch);
458 vex_printf("%6u ", tabc_slow[op]);
459 vex_printf("%6u ", tabc_fast[op]);
460 for (co = 0; co < 16; co++) {
461 Int n = tab_cond[op][co];
462 if (n >= 1000) {
463 vex_printf(" %3dK", n / 1000);
464 } else
465 if (n >= 0) {
466 vex_printf(" %3d ", n );
467 } else {
468 vex_printf(" ");
469 }
470 }
471 vex_printf("\n");
472 }
473 vex_printf("\n");
474 }
475
initCounts(void)476 static void initCounts ( void )
477 {
478 Int op, co;
479 initted = True;
480 for (op = 0; op < X86G_CC_OP_NUMBER; op++) {
481 tabc_fast[op] = tabc_slow[op] = 0;
482 for (co = 0; co < 16; co++)
483 tab_cond[op][co] = 0;
484 }
485 }
486
487 #endif /* PROFILE_EFLAGS */
488
489
490 /* CALLED FROM GENERATED CODE: CLEAN HELPER */
491 /* Calculate all the 6 flags from the supplied thunk parameters.
492 Worker function, not directly called from generated code. */
493 static
x86g_calculate_eflags_all_WRK(UInt cc_op,UInt cc_dep1_formal,UInt cc_dep2_formal,UInt cc_ndep_formal)494 UInt x86g_calculate_eflags_all_WRK ( UInt cc_op,
495 UInt cc_dep1_formal,
496 UInt cc_dep2_formal,
497 UInt cc_ndep_formal )
498 {
499 switch (cc_op) {
500 case X86G_CC_OP_COPY:
501 return cc_dep1_formal
502 & (X86G_CC_MASK_O | X86G_CC_MASK_S | X86G_CC_MASK_Z
503 | X86G_CC_MASK_A | X86G_CC_MASK_C | X86G_CC_MASK_P);
504
505 case X86G_CC_OP_ADDB: ACTIONS_ADD( 8, UChar );
506 case X86G_CC_OP_ADDW: ACTIONS_ADD( 16, UShort );
507 case X86G_CC_OP_ADDL: ACTIONS_ADD( 32, UInt );
508
509 case X86G_CC_OP_ADCB: ACTIONS_ADC( 8, UChar );
510 case X86G_CC_OP_ADCW: ACTIONS_ADC( 16, UShort );
511 case X86G_CC_OP_ADCL: ACTIONS_ADC( 32, UInt );
512
513 case X86G_CC_OP_SUBB: ACTIONS_SUB( 8, UChar );
514 case X86G_CC_OP_SUBW: ACTIONS_SUB( 16, UShort );
515 case X86G_CC_OP_SUBL: ACTIONS_SUB( 32, UInt );
516
517 case X86G_CC_OP_SBBB: ACTIONS_SBB( 8, UChar );
518 case X86G_CC_OP_SBBW: ACTIONS_SBB( 16, UShort );
519 case X86G_CC_OP_SBBL: ACTIONS_SBB( 32, UInt );
520
521 case X86G_CC_OP_LOGICB: ACTIONS_LOGIC( 8, UChar );
522 case X86G_CC_OP_LOGICW: ACTIONS_LOGIC( 16, UShort );
523 case X86G_CC_OP_LOGICL: ACTIONS_LOGIC( 32, UInt );
524
525 case X86G_CC_OP_INCB: ACTIONS_INC( 8, UChar );
526 case X86G_CC_OP_INCW: ACTIONS_INC( 16, UShort );
527 case X86G_CC_OP_INCL: ACTIONS_INC( 32, UInt );
528
529 case X86G_CC_OP_DECB: ACTIONS_DEC( 8, UChar );
530 case X86G_CC_OP_DECW: ACTIONS_DEC( 16, UShort );
531 case X86G_CC_OP_DECL: ACTIONS_DEC( 32, UInt );
532
533 case X86G_CC_OP_SHLB: ACTIONS_SHL( 8, UChar );
534 case X86G_CC_OP_SHLW: ACTIONS_SHL( 16, UShort );
535 case X86G_CC_OP_SHLL: ACTIONS_SHL( 32, UInt );
536
537 case X86G_CC_OP_SHRB: ACTIONS_SHR( 8, UChar );
538 case X86G_CC_OP_SHRW: ACTIONS_SHR( 16, UShort );
539 case X86G_CC_OP_SHRL: ACTIONS_SHR( 32, UInt );
540
541 case X86G_CC_OP_ROLB: ACTIONS_ROL( 8, UChar );
542 case X86G_CC_OP_ROLW: ACTIONS_ROL( 16, UShort );
543 case X86G_CC_OP_ROLL: ACTIONS_ROL( 32, UInt );
544
545 case X86G_CC_OP_RORB: ACTIONS_ROR( 8, UChar );
546 case X86G_CC_OP_RORW: ACTIONS_ROR( 16, UShort );
547 case X86G_CC_OP_RORL: ACTIONS_ROR( 32, UInt );
548
549 case X86G_CC_OP_UMULB: ACTIONS_UMUL( 8, UChar, toUChar,
550 UShort, toUShort );
551 case X86G_CC_OP_UMULW: ACTIONS_UMUL( 16, UShort, toUShort,
552 UInt, toUInt );
553 case X86G_CC_OP_UMULL: ACTIONS_UMUL( 32, UInt, toUInt,
554 ULong, idULong );
555
556 case X86G_CC_OP_SMULB: ACTIONS_SMUL( 8, Char, toUChar,
557 Short, toUShort );
558 case X86G_CC_OP_SMULW: ACTIONS_SMUL( 16, Short, toUShort,
559 Int, toUInt );
560 case X86G_CC_OP_SMULL: ACTIONS_SMUL( 32, Int, toUInt,
561 Long, idULong );
562
563 default:
564 /* shouldn't really make these calls from generated code */
565 vex_printf("x86g_calculate_eflags_all_WRK(X86)"
566 "( %u, 0x%x, 0x%x, 0x%x )\n",
567 cc_op, cc_dep1_formal, cc_dep2_formal, cc_ndep_formal );
568 vpanic("x86g_calculate_eflags_all_WRK(X86)");
569 }
570 }
571
572
573 /* CALLED FROM GENERATED CODE: CLEAN HELPER */
574 /* Calculate all the 6 flags from the supplied thunk parameters. */
x86g_calculate_eflags_all(UInt cc_op,UInt cc_dep1,UInt cc_dep2,UInt cc_ndep)575 UInt x86g_calculate_eflags_all ( UInt cc_op,
576 UInt cc_dep1,
577 UInt cc_dep2,
578 UInt cc_ndep )
579 {
580 # if PROFILE_EFLAGS
581 if (!initted) initCounts();
582 n_calc_all++;
583 if (SHOW_COUNTS_NOW) showCounts();
584 # endif
585 return
586 x86g_calculate_eflags_all_WRK ( cc_op, cc_dep1, cc_dep2, cc_ndep );
587 }
588
589
590 /* CALLED FROM GENERATED CODE: CLEAN HELPER */
591 /* Calculate just the carry flag from the supplied thunk parameters. */
592 VEX_REGPARM(3)
x86g_calculate_eflags_c(UInt cc_op,UInt cc_dep1,UInt cc_dep2,UInt cc_ndep)593 UInt x86g_calculate_eflags_c ( UInt cc_op,
594 UInt cc_dep1,
595 UInt cc_dep2,
596 UInt cc_ndep )
597 {
598 # if PROFILE_EFLAGS
599 if (!initted) initCounts();
600 n_calc_c++;
601 tabc_fast[cc_op]++;
602 if (SHOW_COUNTS_NOW) showCounts();
603 # endif
604
605 /* Fast-case some common ones. */
606 switch (cc_op) {
607 case X86G_CC_OP_LOGICL:
608 case X86G_CC_OP_LOGICW:
609 case X86G_CC_OP_LOGICB:
610 return 0;
611 case X86G_CC_OP_SUBL:
612 return ((UInt)cc_dep1) < ((UInt)cc_dep2)
613 ? X86G_CC_MASK_C : 0;
614 case X86G_CC_OP_SUBW:
615 return ((UInt)(cc_dep1 & 0xFFFF)) < ((UInt)(cc_dep2 & 0xFFFF))
616 ? X86G_CC_MASK_C : 0;
617 case X86G_CC_OP_SUBB:
618 return ((UInt)(cc_dep1 & 0xFF)) < ((UInt)(cc_dep2 & 0xFF))
619 ? X86G_CC_MASK_C : 0;
620 case X86G_CC_OP_INCL:
621 case X86G_CC_OP_DECL:
622 return cc_ndep & X86G_CC_MASK_C;
623 default:
624 break;
625 }
626
627 # if PROFILE_EFLAGS
628 tabc_fast[cc_op]--;
629 tabc_slow[cc_op]++;
630 # endif
631
632 return x86g_calculate_eflags_all_WRK(cc_op,cc_dep1,cc_dep2,cc_ndep)
633 & X86G_CC_MASK_C;
634 }
635
636
637 /* CALLED FROM GENERATED CODE: CLEAN HELPER */
638 /* returns 1 or 0 */
x86g_calculate_condition(UInt cond,UInt cc_op,UInt cc_dep1,UInt cc_dep2,UInt cc_ndep)639 UInt x86g_calculate_condition ( UInt/*X86Condcode*/ cond,
640 UInt cc_op,
641 UInt cc_dep1,
642 UInt cc_dep2,
643 UInt cc_ndep )
644 {
645 UInt eflags = x86g_calculate_eflags_all_WRK(cc_op, cc_dep1,
646 cc_dep2, cc_ndep);
647 UInt of,sf,zf,cf,pf;
648 UInt inv = cond & 1;
649
650 # if PROFILE_EFLAGS
651 if (!initted) initCounts();
652 tab_cond[cc_op][cond]++;
653 n_calc_cond++;
654 if (SHOW_COUNTS_NOW) showCounts();
655 # endif
656
657 switch (cond) {
658 case X86CondNO:
659 case X86CondO: /* OF == 1 */
660 of = eflags >> X86G_CC_SHIFT_O;
661 return 1 & (inv ^ of);
662
663 case X86CondNZ:
664 case X86CondZ: /* ZF == 1 */
665 zf = eflags >> X86G_CC_SHIFT_Z;
666 return 1 & (inv ^ zf);
667
668 case X86CondNB:
669 case X86CondB: /* CF == 1 */
670 cf = eflags >> X86G_CC_SHIFT_C;
671 return 1 & (inv ^ cf);
672 break;
673
674 case X86CondNBE:
675 case X86CondBE: /* (CF or ZF) == 1 */
676 cf = eflags >> X86G_CC_SHIFT_C;
677 zf = eflags >> X86G_CC_SHIFT_Z;
678 return 1 & (inv ^ (cf | zf));
679 break;
680
681 case X86CondNS:
682 case X86CondS: /* SF == 1 */
683 sf = eflags >> X86G_CC_SHIFT_S;
684 return 1 & (inv ^ sf);
685
686 case X86CondNP:
687 case X86CondP: /* PF == 1 */
688 pf = eflags >> X86G_CC_SHIFT_P;
689 return 1 & (inv ^ pf);
690
691 case X86CondNL:
692 case X86CondL: /* (SF xor OF) == 1 */
693 sf = eflags >> X86G_CC_SHIFT_S;
694 of = eflags >> X86G_CC_SHIFT_O;
695 return 1 & (inv ^ (sf ^ of));
696 break;
697
698 case X86CondNLE:
699 case X86CondLE: /* ((SF xor OF) or ZF) == 1 */
700 sf = eflags >> X86G_CC_SHIFT_S;
701 of = eflags >> X86G_CC_SHIFT_O;
702 zf = eflags >> X86G_CC_SHIFT_Z;
703 return 1 & (inv ^ ((sf ^ of) | zf));
704 break;
705
706 default:
707 /* shouldn't really make these calls from generated code */
708 vex_printf("x86g_calculate_condition( %u, %u, 0x%x, 0x%x, 0x%x )\n",
709 cond, cc_op, cc_dep1, cc_dep2, cc_ndep );
710 vpanic("x86g_calculate_condition");
711 }
712 }
713
714
715 /* VISIBLE TO LIBVEX CLIENT */
LibVEX_GuestX86_get_eflags(const VexGuestX86State * vex_state)716 UInt LibVEX_GuestX86_get_eflags ( /*IN*/const VexGuestX86State* vex_state )
717 {
718 UInt eflags = x86g_calculate_eflags_all_WRK(
719 vex_state->guest_CC_OP,
720 vex_state->guest_CC_DEP1,
721 vex_state->guest_CC_DEP2,
722 vex_state->guest_CC_NDEP
723 );
724 UInt dflag = vex_state->guest_DFLAG;
725 vassert(dflag == 1 || dflag == 0xFFFFFFFF);
726 if (dflag == 0xFFFFFFFF)
727 eflags |= (1<<10);
728 if (vex_state->guest_IDFLAG == 1)
729 eflags |= (1<<21);
730 if (vex_state->guest_ACFLAG == 1)
731 eflags |= (1<<18);
732
733 return eflags;
734 }
735
736 /* VISIBLE TO LIBVEX CLIENT */
737 void
LibVEX_GuestX86_put_eflag_c(UInt new_carry_flag,VexGuestX86State * vex_state)738 LibVEX_GuestX86_put_eflag_c ( UInt new_carry_flag,
739 /*MOD*/VexGuestX86State* vex_state )
740 {
741 UInt oszacp = x86g_calculate_eflags_all_WRK(
742 vex_state->guest_CC_OP,
743 vex_state->guest_CC_DEP1,
744 vex_state->guest_CC_DEP2,
745 vex_state->guest_CC_NDEP
746 );
747 if (new_carry_flag & 1) {
748 oszacp |= X86G_CC_MASK_C;
749 } else {
750 oszacp &= ~X86G_CC_MASK_C;
751 }
752 vex_state->guest_CC_OP = X86G_CC_OP_COPY;
753 vex_state->guest_CC_DEP1 = oszacp;
754 vex_state->guest_CC_DEP2 = 0;
755 vex_state->guest_CC_NDEP = 0;
756 }
757
758
759 /*---------------------------------------------------------------*/
760 /*--- %eflags translation-time function specialisers. ---*/
761 /*--- These help iropt specialise calls the above run-time ---*/
762 /*--- %eflags functions. ---*/
763 /*---------------------------------------------------------------*/
764
765 /* Used by the optimiser to try specialisations. Returns an
766 equivalent expression, or NULL if none. */
767
isU32(IRExpr * e,UInt n)768 static inline Bool isU32 ( IRExpr* e, UInt n )
769 {
770 return
771 toBool( e->tag == Iex_Const
772 && e->Iex.Const.con->tag == Ico_U32
773 && e->Iex.Const.con->Ico.U32 == n );
774 }
775
guest_x86_spechelper(const HChar * function_name,IRExpr ** args,IRStmt ** precedingStmts,Int n_precedingStmts)776 IRExpr* guest_x86_spechelper ( const HChar* function_name,
777 IRExpr** args,
778 IRStmt** precedingStmts,
779 Int n_precedingStmts )
780 {
781 # define unop(_op,_a1) IRExpr_Unop((_op),(_a1))
782 # define binop(_op,_a1,_a2) IRExpr_Binop((_op),(_a1),(_a2))
783 # define mkU32(_n) IRExpr_Const(IRConst_U32(_n))
784 # define mkU8(_n) IRExpr_Const(IRConst_U8(_n))
785
786 Int i, arity = 0;
787 for (i = 0; args[i]; i++)
788 arity++;
789 # if 0
790 vex_printf("spec request:\n");
791 vex_printf(" %s ", function_name);
792 for (i = 0; i < arity; i++) {
793 vex_printf(" ");
794 ppIRExpr(args[i]);
795 }
796 vex_printf("\n");
797 # endif
798
799 /* --------- specialising "x86g_calculate_condition" --------- */
800
801 if (vex_streq(function_name, "x86g_calculate_condition")) {
802 /* specialise calls to above "calculate condition" function */
803 IRExpr *cond, *cc_op, *cc_dep1, *cc_dep2;
804 vassert(arity == 5);
805 cond = args[0];
806 cc_op = args[1];
807 cc_dep1 = args[2];
808 cc_dep2 = args[3];
809
810 /*---------------- ADDL ----------------*/
811
812 if (isU32(cc_op, X86G_CC_OP_ADDL) && isU32(cond, X86CondZ)) {
813 /* long add, then Z --> test (dst+src == 0) */
814 return unop(Iop_1Uto32,
815 binop(Iop_CmpEQ32,
816 binop(Iop_Add32, cc_dep1, cc_dep2),
817 mkU32(0)));
818 }
819
820 /*---------------- SUBL ----------------*/
821
822 if (isU32(cc_op, X86G_CC_OP_SUBL) && isU32(cond, X86CondZ)) {
823 /* long sub/cmp, then Z --> test dst==src */
824 return unop(Iop_1Uto32,
825 binop(Iop_CmpEQ32, cc_dep1, cc_dep2));
826 }
827 if (isU32(cc_op, X86G_CC_OP_SUBL) && isU32(cond, X86CondNZ)) {
828 /* long sub/cmp, then NZ --> test dst!=src */
829 return unop(Iop_1Uto32,
830 binop(Iop_CmpNE32, cc_dep1, cc_dep2));
831 }
832
833 if (isU32(cc_op, X86G_CC_OP_SUBL) && isU32(cond, X86CondL)) {
834 /* long sub/cmp, then L (signed less than)
835 --> test dst <s src */
836 return unop(Iop_1Uto32,
837 binop(Iop_CmpLT32S, cc_dep1, cc_dep2));
838 }
839 if (isU32(cc_op, X86G_CC_OP_SUBL) && isU32(cond, X86CondNL)) {
840 /* long sub/cmp, then NL (signed greater than or equal)
841 --> test !(dst <s src) */
842 return binop(Iop_Xor32,
843 unop(Iop_1Uto32,
844 binop(Iop_CmpLT32S, cc_dep1, cc_dep2)),
845 mkU32(1));
846 }
847
848 if (isU32(cc_op, X86G_CC_OP_SUBL) && isU32(cond, X86CondLE)) {
849 /* long sub/cmp, then LE (signed less than or equal)
850 --> test dst <=s src */
851 return unop(Iop_1Uto32,
852 binop(Iop_CmpLE32S, cc_dep1, cc_dep2));
853 }
854 if (isU32(cc_op, X86G_CC_OP_SUBL) && isU32(cond, X86CondNLE)) {
855 /* long sub/cmp, then NLE (signed not less than or equal)
856 --> test dst >s src
857 --> test !(dst <=s src) */
858 return binop(Iop_Xor32,
859 unop(Iop_1Uto32,
860 binop(Iop_CmpLE32S, cc_dep1, cc_dep2)),
861 mkU32(1));
862 }
863
864 if (isU32(cc_op, X86G_CC_OP_SUBL) && isU32(cond, X86CondBE)) {
865 /* long sub/cmp, then BE (unsigned less than or equal)
866 --> test dst <=u src */
867 return unop(Iop_1Uto32,
868 binop(Iop_CmpLE32U, cc_dep1, cc_dep2));
869 }
870 if (isU32(cc_op, X86G_CC_OP_SUBL) && isU32(cond, X86CondNBE)) {
871 /* long sub/cmp, then BE (unsigned greater than)
872 --> test !(dst <=u src) */
873 return binop(Iop_Xor32,
874 unop(Iop_1Uto32,
875 binop(Iop_CmpLE32U, cc_dep1, cc_dep2)),
876 mkU32(1));
877 }
878
879 if (isU32(cc_op, X86G_CC_OP_SUBL) && isU32(cond, X86CondB)) {
880 /* long sub/cmp, then B (unsigned less than)
881 --> test dst <u src */
882 return unop(Iop_1Uto32,
883 binop(Iop_CmpLT32U, cc_dep1, cc_dep2));
884 }
885 if (isU32(cc_op, X86G_CC_OP_SUBL) && isU32(cond, X86CondNB)) {
886 /* long sub/cmp, then NB (unsigned greater than or equal)
887 --> test !(dst <u src) */
888 return binop(Iop_Xor32,
889 unop(Iop_1Uto32,
890 binop(Iop_CmpLT32U, cc_dep1, cc_dep2)),
891 mkU32(1));
892 }
893
894 if (isU32(cc_op, X86G_CC_OP_SUBL) && isU32(cond, X86CondS)) {
895 /* long sub/cmp, then S (negative) --> test (dst-src <s 0) */
896 return unop(Iop_1Uto32,
897 binop(Iop_CmpLT32S,
898 binop(Iop_Sub32, cc_dep1, cc_dep2),
899 mkU32(0)));
900 }
901 if (isU32(cc_op, X86G_CC_OP_SUBL) && isU32(cond, X86CondNS)) {
902 /* long sub/cmp, then NS (not negative) --> test !(dst-src <s 0) */
903 return binop(Iop_Xor32,
904 unop(Iop_1Uto32,
905 binop(Iop_CmpLT32S,
906 binop(Iop_Sub32, cc_dep1, cc_dep2),
907 mkU32(0))),
908 mkU32(1));
909 }
910
911 /*---------------- SUBW ----------------*/
912
913 if (isU32(cc_op, X86G_CC_OP_SUBW) && isU32(cond, X86CondZ)) {
914 /* word sub/cmp, then Z --> test dst==src */
915 return unop(Iop_1Uto32,
916 binop(Iop_CmpEQ16,
917 unop(Iop_32to16,cc_dep1),
918 unop(Iop_32to16,cc_dep2)));
919 }
920 if (isU32(cc_op, X86G_CC_OP_SUBW) && isU32(cond, X86CondNZ)) {
921 /* word sub/cmp, then NZ --> test dst!=src */
922 return unop(Iop_1Uto32,
923 binop(Iop_CmpNE16,
924 unop(Iop_32to16,cc_dep1),
925 unop(Iop_32to16,cc_dep2)));
926 }
927
928 /*---------------- SUBB ----------------*/
929
930 if (isU32(cc_op, X86G_CC_OP_SUBB) && isU32(cond, X86CondZ)) {
931 /* byte sub/cmp, then Z --> test dst==src */
932 return unop(Iop_1Uto32,
933 binop(Iop_CmpEQ8,
934 unop(Iop_32to8,cc_dep1),
935 unop(Iop_32to8,cc_dep2)));
936 }
937 if (isU32(cc_op, X86G_CC_OP_SUBB) && isU32(cond, X86CondNZ)) {
938 /* byte sub/cmp, then NZ --> test dst!=src */
939 return unop(Iop_1Uto32,
940 binop(Iop_CmpNE8,
941 unop(Iop_32to8,cc_dep1),
942 unop(Iop_32to8,cc_dep2)));
943 }
944
945 if (isU32(cc_op, X86G_CC_OP_SUBB) && isU32(cond, X86CondNBE)) {
946 /* byte sub/cmp, then NBE (unsigned greater than)
947 --> test src <u dst */
948 /* Note, args are opposite way round from the usual */
949 return unop(Iop_1Uto32,
950 binop(Iop_CmpLT32U,
951 binop(Iop_And32,cc_dep2,mkU32(0xFF)),
952 binop(Iop_And32,cc_dep1,mkU32(0xFF))));
953 }
954
955 if (isU32(cc_op, X86G_CC_OP_SUBB) && isU32(cond, X86CondS)
956 && isU32(cc_dep2, 0)) {
957 /* byte sub/cmp of zero, then S --> test (dst-0 <s 0)
958 --> test dst <s 0
959 --> (UInt)dst[7]
960 This is yet another scheme by which gcc figures out if the
961 top bit of a byte is 1 or 0. See also LOGICB/CondS below. */
962 /* Note: isU32(cc_dep2, 0) is correct, even though this is
963 for an 8-bit comparison, since the args to the helper
964 function are always U32s. */
965 return binop(Iop_And32,
966 binop(Iop_Shr32,cc_dep1,mkU8(7)),
967 mkU32(1));
968 }
969 if (isU32(cc_op, X86G_CC_OP_SUBB) && isU32(cond, X86CondNS)
970 && isU32(cc_dep2, 0)) {
971 /* byte sub/cmp of zero, then NS --> test !(dst-0 <s 0)
972 --> test !(dst <s 0)
973 --> (UInt) !dst[7]
974 */
975 return binop(Iop_Xor32,
976 binop(Iop_And32,
977 binop(Iop_Shr32,cc_dep1,mkU8(7)),
978 mkU32(1)),
979 mkU32(1));
980 }
981
982 /*---------------- LOGICL ----------------*/
983
984 if (isU32(cc_op, X86G_CC_OP_LOGICL) && isU32(cond, X86CondZ)) {
985 /* long and/or/xor, then Z --> test dst==0 */
986 return unop(Iop_1Uto32,binop(Iop_CmpEQ32, cc_dep1, mkU32(0)));
987 }
988 if (isU32(cc_op, X86G_CC_OP_LOGICL) && isU32(cond, X86CondNZ)) {
989 /* long and/or/xor, then NZ --> test dst!=0 */
990 return unop(Iop_1Uto32,binop(Iop_CmpNE32, cc_dep1, mkU32(0)));
991 }
992
993 if (isU32(cc_op, X86G_CC_OP_LOGICL) && isU32(cond, X86CondLE)) {
994 /* long and/or/xor, then LE
995 This is pretty subtle. LOGIC sets SF and ZF according to the
996 result and makes OF be zero. LE computes (SZ ^ OF) | ZF, but
997 OF is zero, so this reduces to SZ | ZF -- which will be 1 iff
998 the result is <=signed 0. Hence ...
999 */
1000 return unop(Iop_1Uto32,binop(Iop_CmpLE32S, cc_dep1, mkU32(0)));
1001 }
1002
1003 if (isU32(cc_op, X86G_CC_OP_LOGICL) && isU32(cond, X86CondBE)) {
1004 /* long and/or/xor, then BE
1005 LOGIC sets ZF according to the result and makes CF be zero.
1006 BE computes (CF | ZF), but CF is zero, so this reduces ZF
1007 -- which will be 1 iff the result is zero. Hence ...
1008 */
1009 return unop(Iop_1Uto32,binop(Iop_CmpEQ32, cc_dep1, mkU32(0)));
1010 }
1011
1012 if (isU32(cc_op, X86G_CC_OP_LOGICL) && isU32(cond, X86CondS)) {
1013 /* see comment below for (LOGICB, CondS) */
1014 /* long and/or/xor, then S --> (UInt)result[31] */
1015 return binop(Iop_And32,
1016 binop(Iop_Shr32,cc_dep1,mkU8(31)),
1017 mkU32(1));
1018 }
1019 if (isU32(cc_op, X86G_CC_OP_LOGICL) && isU32(cond, X86CondNS)) {
1020 /* see comment below for (LOGICB, CondNS) */
1021 /* long and/or/xor, then S --> (UInt) ~ result[31] */
1022 return binop(Iop_Xor32,
1023 binop(Iop_And32,
1024 binop(Iop_Shr32,cc_dep1,mkU8(31)),
1025 mkU32(1)),
1026 mkU32(1));
1027 }
1028
1029 /*---------------- LOGICW ----------------*/
1030
1031 if (isU32(cc_op, X86G_CC_OP_LOGICW) && isU32(cond, X86CondZ)) {
1032 /* word and/or/xor, then Z --> test dst==0 */
1033 return unop(Iop_1Uto32,
1034 binop(Iop_CmpEQ32, binop(Iop_And32,cc_dep1,mkU32(0xFFFF)),
1035 mkU32(0)));
1036 }
1037
1038 if (isU32(cc_op, X86G_CC_OP_LOGICW) && isU32(cond, X86CondS)) {
1039 /* see comment below for (LOGICB, CondS) */
1040 /* word and/or/xor, then S --> (UInt)result[15] */
1041 return binop(Iop_And32,
1042 binop(Iop_Shr32,cc_dep1,mkU8(15)),
1043 mkU32(1));
1044 }
1045
1046 /*---------------- LOGICB ----------------*/
1047
1048 if (isU32(cc_op, X86G_CC_OP_LOGICB) && isU32(cond, X86CondZ)) {
1049 /* byte and/or/xor, then Z --> test dst==0 */
1050 return unop(Iop_1Uto32,
1051 binop(Iop_CmpEQ32, binop(Iop_And32,cc_dep1,mkU32(255)),
1052 mkU32(0)));
1053 }
1054 if (isU32(cc_op, X86G_CC_OP_LOGICB) && isU32(cond, X86CondNZ)) {
1055 /* byte and/or/xor, then Z --> test dst!=0 */
1056 /* b9ac9: 84 c0 test %al,%al
1057 b9acb: 75 0d jne b9ada */
1058 return unop(Iop_1Uto32,
1059 binop(Iop_CmpNE32, binop(Iop_And32,cc_dep1,mkU32(255)),
1060 mkU32(0)));
1061 }
1062
1063 if (isU32(cc_op, X86G_CC_OP_LOGICB) && isU32(cond, X86CondS)) {
1064 /* this is an idiom gcc sometimes uses to find out if the top
1065 bit of a byte register is set: eg testb %al,%al; js ..
1066 Since it just depends on the top bit of the byte, extract
1067 that bit and explicitly get rid of all the rest. This
1068 helps memcheck avoid false positives in the case where any
1069 of the other bits in the byte are undefined. */
1070 /* byte and/or/xor, then S --> (UInt)result[7] */
1071 return binop(Iop_And32,
1072 binop(Iop_Shr32,cc_dep1,mkU8(7)),
1073 mkU32(1));
1074 }
1075 if (isU32(cc_op, X86G_CC_OP_LOGICB) && isU32(cond, X86CondNS)) {
1076 /* ditto, for negation-of-S. */
1077 /* byte and/or/xor, then S --> (UInt) ~ result[7] */
1078 return binop(Iop_Xor32,
1079 binop(Iop_And32,
1080 binop(Iop_Shr32,cc_dep1,mkU8(7)),
1081 mkU32(1)),
1082 mkU32(1));
1083 }
1084
1085 /*---------------- DECL ----------------*/
1086
1087 if (isU32(cc_op, X86G_CC_OP_DECL) && isU32(cond, X86CondZ)) {
1088 /* dec L, then Z --> test dst == 0 */
1089 return unop(Iop_1Uto32,binop(Iop_CmpEQ32, cc_dep1, mkU32(0)));
1090 }
1091
1092 if (isU32(cc_op, X86G_CC_OP_DECL) && isU32(cond, X86CondS)) {
1093 /* dec L, then S --> compare DST <s 0 */
1094 return unop(Iop_1Uto32,binop(Iop_CmpLT32S, cc_dep1, mkU32(0)));
1095 }
1096
1097 /*---------------- DECW ----------------*/
1098
1099 if (isU32(cc_op, X86G_CC_OP_DECW) && isU32(cond, X86CondZ)) {
1100 /* dec W, then Z --> test dst == 0 */
1101 return unop(Iop_1Uto32,
1102 binop(Iop_CmpEQ32,
1103 binop(Iop_Shl32,cc_dep1,mkU8(16)),
1104 mkU32(0)));
1105 }
1106
1107 /*---------------- INCW ----------------*/
1108
1109 if (isU32(cc_op, X86G_CC_OP_INCW) && isU32(cond, X86CondZ)) {
1110 /* This rewrite helps memcheck on 'incw %ax ; je ...'. */
1111 /* inc W, then Z --> test dst == 0 */
1112 return unop(Iop_1Uto32,
1113 binop(Iop_CmpEQ32,
1114 binop(Iop_Shl32,cc_dep1,mkU8(16)),
1115 mkU32(0)));
1116 }
1117
1118 /*---------------- SHRL ----------------*/
1119
1120 if (isU32(cc_op, X86G_CC_OP_SHRL) && isU32(cond, X86CondZ)) {
1121 /* SHRL, then Z --> test dep1 == 0 */
1122 return unop(Iop_1Uto32,binop(Iop_CmpEQ32, cc_dep1, mkU32(0)));
1123 }
1124
1125 /*---------------- COPY ----------------*/
1126 /* This can happen, as a result of x87 FP compares: "fcom ... ;
1127 fnstsw %ax ; sahf ; jbe" for example. */
1128
1129 if (isU32(cc_op, X86G_CC_OP_COPY) &&
1130 (isU32(cond, X86CondBE) || isU32(cond, X86CondNBE))) {
1131 /* COPY, then BE --> extract C and Z from dep1, and test
1132 (C or Z) == 1. */
1133 /* COPY, then NBE --> extract C and Z from dep1, and test
1134 (C or Z) == 0. */
1135 UInt nnn = isU32(cond, X86CondBE) ? 1 : 0;
1136 return
1137 unop(
1138 Iop_1Uto32,
1139 binop(
1140 Iop_CmpEQ32,
1141 binop(
1142 Iop_And32,
1143 binop(
1144 Iop_Or32,
1145 binop(Iop_Shr32, cc_dep1, mkU8(X86G_CC_SHIFT_C)),
1146 binop(Iop_Shr32, cc_dep1, mkU8(X86G_CC_SHIFT_Z))
1147 ),
1148 mkU32(1)
1149 ),
1150 mkU32(nnn)
1151 )
1152 );
1153 }
1154
1155 if (isU32(cc_op, X86G_CC_OP_COPY)
1156 && (isU32(cond, X86CondB) || isU32(cond, X86CondNB))) {
1157 /* COPY, then B --> extract C from dep1, and test (C == 1). */
1158 /* COPY, then NB --> extract C from dep1, and test (C == 0). */
1159 UInt nnn = isU32(cond, X86CondB) ? 1 : 0;
1160 return
1161 unop(
1162 Iop_1Uto32,
1163 binop(
1164 Iop_CmpEQ32,
1165 binop(
1166 Iop_And32,
1167 binop(Iop_Shr32, cc_dep1, mkU8(X86G_CC_SHIFT_C)),
1168 mkU32(1)
1169 ),
1170 mkU32(nnn)
1171 )
1172 );
1173 }
1174
1175 if (isU32(cc_op, X86G_CC_OP_COPY)
1176 && (isU32(cond, X86CondZ) || isU32(cond, X86CondNZ))) {
1177 /* COPY, then Z --> extract Z from dep1, and test (Z == 1). */
1178 /* COPY, then NZ --> extract Z from dep1, and test (Z == 0). */
1179 UInt nnn = isU32(cond, X86CondZ) ? 1 : 0;
1180 return
1181 unop(
1182 Iop_1Uto32,
1183 binop(
1184 Iop_CmpEQ32,
1185 binop(
1186 Iop_And32,
1187 binop(Iop_Shr32, cc_dep1, mkU8(X86G_CC_SHIFT_Z)),
1188 mkU32(1)
1189 ),
1190 mkU32(nnn)
1191 )
1192 );
1193 }
1194
1195 if (isU32(cc_op, X86G_CC_OP_COPY)
1196 && (isU32(cond, X86CondP) || isU32(cond, X86CondNP))) {
1197 /* COPY, then P --> extract P from dep1, and test (P == 1). */
1198 /* COPY, then NP --> extract P from dep1, and test (P == 0). */
1199 UInt nnn = isU32(cond, X86CondP) ? 1 : 0;
1200 return
1201 unop(
1202 Iop_1Uto32,
1203 binop(
1204 Iop_CmpEQ32,
1205 binop(
1206 Iop_And32,
1207 binop(Iop_Shr32, cc_dep1, mkU8(X86G_CC_SHIFT_P)),
1208 mkU32(1)
1209 ),
1210 mkU32(nnn)
1211 )
1212 );
1213 }
1214
1215 return NULL;
1216 }
1217
1218 /* --------- specialising "x86g_calculate_eflags_c" --------- */
1219
1220 if (vex_streq(function_name, "x86g_calculate_eflags_c")) {
1221 /* specialise calls to above "calculate_eflags_c" function */
1222 IRExpr *cc_op, *cc_dep1, *cc_dep2, *cc_ndep;
1223 vassert(arity == 4);
1224 cc_op = args[0];
1225 cc_dep1 = args[1];
1226 cc_dep2 = args[2];
1227 cc_ndep = args[3];
1228
1229 if (isU32(cc_op, X86G_CC_OP_SUBL)) {
1230 /* C after sub denotes unsigned less than */
1231 return unop(Iop_1Uto32,
1232 binop(Iop_CmpLT32U, cc_dep1, cc_dep2));
1233 }
1234 if (isU32(cc_op, X86G_CC_OP_SUBB)) {
1235 /* C after sub denotes unsigned less than */
1236 return unop(Iop_1Uto32,
1237 binop(Iop_CmpLT32U,
1238 binop(Iop_And32,cc_dep1,mkU32(0xFF)),
1239 binop(Iop_And32,cc_dep2,mkU32(0xFF))));
1240 }
1241 if (isU32(cc_op, X86G_CC_OP_LOGICL)
1242 || isU32(cc_op, X86G_CC_OP_LOGICW)
1243 || isU32(cc_op, X86G_CC_OP_LOGICB)) {
1244 /* cflag after logic is zero */
1245 return mkU32(0);
1246 }
1247 if (isU32(cc_op, X86G_CC_OP_DECL) || isU32(cc_op, X86G_CC_OP_INCL)) {
1248 /* If the thunk is dec or inc, the cflag is supplied as CC_NDEP. */
1249 return cc_ndep;
1250 }
1251 if (isU32(cc_op, X86G_CC_OP_COPY)) {
1252 /* cflag after COPY is stored in DEP1. */
1253 return
1254 binop(
1255 Iop_And32,
1256 binop(Iop_Shr32, cc_dep1, mkU8(X86G_CC_SHIFT_C)),
1257 mkU32(1)
1258 );
1259 }
1260 if (isU32(cc_op, X86G_CC_OP_ADDL)) {
1261 /* C after add denotes sum <u either arg */
1262 return unop(Iop_1Uto32,
1263 binop(Iop_CmpLT32U,
1264 binop(Iop_Add32, cc_dep1, cc_dep2),
1265 cc_dep1));
1266 }
1267 // ATC, requires verification, no test case known
1268 //if (isU32(cc_op, X86G_CC_OP_SMULL)) {
1269 // /* C after signed widening multiply denotes the case where
1270 // the top half of the result isn't simply the sign extension
1271 // of the bottom half (iow the result doesn't fit completely
1272 // in the bottom half). Hence:
1273 // C = hi-half(dep1 x dep2) != lo-half(dep1 x dep2) >>s 31
1274 // where 'x' denotes signed widening multiply.*/
1275 // return
1276 // unop(Iop_1Uto32,
1277 // binop(Iop_CmpNE32,
1278 // unop(Iop_64HIto32,
1279 // binop(Iop_MullS32, cc_dep1, cc_dep2)),
1280 // binop(Iop_Sar32,
1281 // binop(Iop_Mul32, cc_dep1, cc_dep2), mkU8(31)) ));
1282 //}
1283 # if 0
1284 if (cc_op->tag == Iex_Const) {
1285 vex_printf("CFLAG "); ppIRExpr(cc_op); vex_printf("\n");
1286 }
1287 # endif
1288
1289 return NULL;
1290 }
1291
1292 /* --------- specialising "x86g_calculate_eflags_all" --------- */
1293
1294 if (vex_streq(function_name, "x86g_calculate_eflags_all")) {
1295 /* specialise calls to above "calculate_eflags_all" function */
1296 IRExpr *cc_op, *cc_dep1; /*, *cc_dep2, *cc_ndep; */
1297 vassert(arity == 4);
1298 cc_op = args[0];
1299 cc_dep1 = args[1];
1300 /* cc_dep2 = args[2]; */
1301 /* cc_ndep = args[3]; */
1302
1303 if (isU32(cc_op, X86G_CC_OP_COPY)) {
1304 /* eflags after COPY are stored in DEP1. */
1305 return
1306 binop(
1307 Iop_And32,
1308 cc_dep1,
1309 mkU32(X86G_CC_MASK_O | X86G_CC_MASK_S | X86G_CC_MASK_Z
1310 | X86G_CC_MASK_A | X86G_CC_MASK_C | X86G_CC_MASK_P)
1311 );
1312 }
1313 return NULL;
1314 }
1315
1316 # undef unop
1317 # undef binop
1318 # undef mkU32
1319 # undef mkU8
1320
1321 return NULL;
1322 }
1323
1324
1325 /*---------------------------------------------------------------*/
1326 /*--- Supporting functions for x87 FPU activities. ---*/
1327 /*---------------------------------------------------------------*/
1328
host_is_little_endian(void)1329 static inline Bool host_is_little_endian ( void )
1330 {
1331 UInt x = 0x76543210;
1332 UChar* p = (UChar*)(&x);
1333 return toBool(*p == 0x10);
1334 }
1335
1336 /* 80 and 64-bit floating point formats:
1337
1338 80-bit:
1339
1340 S 0 0-------0 zero
1341 S 0 0X------X denormals
1342 S 1-7FFE 1X------X normals (all normals have leading 1)
1343 S 7FFF 10------0 infinity
1344 S 7FFF 10X-----X snan
1345 S 7FFF 11X-----X qnan
1346
1347 S is the sign bit. For runs X----X, at least one of the Xs must be
1348 nonzero. Exponent is 15 bits, fractional part is 63 bits, and
1349 there is an explicitly represented leading 1, and a sign bit,
1350 giving 80 in total.
1351
1352 64-bit avoids the confusion of an explicitly represented leading 1
1353 and so is simpler:
1354
1355 S 0 0------0 zero
1356 S 0 X------X denormals
1357 S 1-7FE any normals
1358 S 7FF 0------0 infinity
1359 S 7FF 0X-----X snan
1360 S 7FF 1X-----X qnan
1361
1362 Exponent is 11 bits, fractional part is 52 bits, and there is a
1363 sign bit, giving 64 in total.
1364 */
1365
1366 /* Inspect a value and its tag, as per the x87 'FXAM' instruction. */
1367 /* CALLED FROM GENERATED CODE: CLEAN HELPER */
x86g_calculate_FXAM(UInt tag,ULong dbl)1368 UInt x86g_calculate_FXAM ( UInt tag, ULong dbl )
1369 {
1370 Bool mantissaIsZero;
1371 Int bexp;
1372 UChar sign;
1373 UChar* f64;
1374
1375 vassert(host_is_little_endian());
1376
1377 /* vex_printf("calculate_FXAM ( %d, %llx ) .. ", tag, dbl ); */
1378
1379 f64 = (UChar*)(&dbl);
1380 sign = toUChar( (f64[7] >> 7) & 1 );
1381
1382 /* First off, if the tag indicates the register was empty,
1383 return 1,0,sign,1 */
1384 if (tag == 0) {
1385 /* vex_printf("Empty\n"); */
1386 return X86G_FC_MASK_C3 | 0 | (sign << X86G_FC_SHIFT_C1)
1387 | X86G_FC_MASK_C0;
1388 }
1389
1390 bexp = (f64[7] << 4) | ((f64[6] >> 4) & 0x0F);
1391 bexp &= 0x7FF;
1392
1393 mantissaIsZero
1394 = toBool(
1395 (f64[6] & 0x0F) == 0
1396 && (f64[5] | f64[4] | f64[3] | f64[2] | f64[1] | f64[0]) == 0
1397 );
1398
1399 /* If both exponent and mantissa are zero, the value is zero.
1400 Return 1,0,sign,0. */
1401 if (bexp == 0 && mantissaIsZero) {
1402 /* vex_printf("Zero\n"); */
1403 return X86G_FC_MASK_C3 | 0
1404 | (sign << X86G_FC_SHIFT_C1) | 0;
1405 }
1406
1407 /* If exponent is zero but mantissa isn't, it's a denormal.
1408 Return 1,1,sign,0. */
1409 if (bexp == 0 && !mantissaIsZero) {
1410 /* vex_printf("Denormal\n"); */
1411 return X86G_FC_MASK_C3 | X86G_FC_MASK_C2
1412 | (sign << X86G_FC_SHIFT_C1) | 0;
1413 }
1414
1415 /* If the exponent is 7FF and the mantissa is zero, this is an infinity.
1416 Return 0,1,sign,1. */
1417 if (bexp == 0x7FF && mantissaIsZero) {
1418 /* vex_printf("Inf\n"); */
1419 return 0 | X86G_FC_MASK_C2 | (sign << X86G_FC_SHIFT_C1)
1420 | X86G_FC_MASK_C0;
1421 }
1422
1423 /* If the exponent is 7FF and the mantissa isn't zero, this is a NaN.
1424 Return 0,0,sign,1. */
1425 if (bexp == 0x7FF && !mantissaIsZero) {
1426 /* vex_printf("NaN\n"); */
1427 return 0 | 0 | (sign << X86G_FC_SHIFT_C1) | X86G_FC_MASK_C0;
1428 }
1429
1430 /* Uh, ok, we give up. It must be a normal finite number.
1431 Return 0,1,sign,0.
1432 */
1433 /* vex_printf("normal\n"); */
1434 return 0 | X86G_FC_MASK_C2 | (sign << X86G_FC_SHIFT_C1) | 0;
1435 }
1436
1437
1438 /* CALLED FROM GENERATED CODE */
1439 /* DIRTY HELPER (reads guest memory) */
x86g_dirtyhelper_loadF80le(Addr addrU)1440 ULong x86g_dirtyhelper_loadF80le ( Addr addrU )
1441 {
1442 ULong f64;
1443 convert_f80le_to_f64le ( (UChar*)addrU, (UChar*)&f64 );
1444 return f64;
1445 }
1446
1447 /* CALLED FROM GENERATED CODE */
1448 /* DIRTY HELPER (writes guest memory) */
x86g_dirtyhelper_storeF80le(Addr addrU,ULong f64)1449 void x86g_dirtyhelper_storeF80le ( Addr addrU, ULong f64 )
1450 {
1451 convert_f64le_to_f80le( (UChar*)&f64, (UChar*)addrU );
1452 }
1453
1454
1455 /*----------------------------------------------*/
1456 /*--- The exported fns .. ---*/
1457 /*----------------------------------------------*/
1458
1459 /* Layout of the real x87 state. */
1460 /* 13 June 05: Fpu_State and auxiliary constants was moved to
1461 g_generic_x87.h */
1462
1463
1464 /* CLEAN HELPER */
1465 /* fpucw[15:0] contains a x87 native format FPU control word.
1466 Extract from it the required FPROUND value and any resulting
1467 emulation warning, and return (warn << 32) | fpround value.
1468 */
x86g_check_fldcw(UInt fpucw)1469 ULong x86g_check_fldcw ( UInt fpucw )
1470 {
1471 /* Decide on a rounding mode. fpucw[11:10] holds it. */
1472 /* NOTE, encoded exactly as per enum IRRoundingMode. */
1473 UInt rmode = (fpucw >> 10) & 3;
1474
1475 /* Detect any required emulation warnings. */
1476 VexEmNote ew = EmNote_NONE;
1477
1478 if ((fpucw & 0x3F) != 0x3F) {
1479 /* unmasked exceptions! */
1480 ew = EmWarn_X86_x87exns;
1481 }
1482 else
1483 if (((fpucw >> 8) & 3) != 3) {
1484 /* unsupported precision */
1485 ew = EmWarn_X86_x87precision;
1486 }
1487
1488 return (((ULong)ew) << 32) | ((ULong)rmode);
1489 }
1490
1491 /* CLEAN HELPER */
1492 /* Given fpround as an IRRoundingMode value, create a suitable x87
1493 native format FPU control word. */
x86g_create_fpucw(UInt fpround)1494 UInt x86g_create_fpucw ( UInt fpround )
1495 {
1496 fpround &= 3;
1497 return 0x037F | (fpround << 10);
1498 }
1499
1500
1501 /* CLEAN HELPER */
1502 /* mxcsr[15:0] contains a SSE native format MXCSR value.
1503 Extract from it the required SSEROUND value and any resulting
1504 emulation warning, and return (warn << 32) | sseround value.
1505 */
x86g_check_ldmxcsr(UInt mxcsr)1506 ULong x86g_check_ldmxcsr ( UInt mxcsr )
1507 {
1508 /* Decide on a rounding mode. mxcsr[14:13] holds it. */
1509 /* NOTE, encoded exactly as per enum IRRoundingMode. */
1510 UInt rmode = (mxcsr >> 13) & 3;
1511
1512 /* Detect any required emulation warnings. */
1513 VexEmNote ew = EmNote_NONE;
1514
1515 if ((mxcsr & 0x1F80) != 0x1F80) {
1516 /* unmasked exceptions! */
1517 ew = EmWarn_X86_sseExns;
1518 }
1519 else
1520 if (mxcsr & (1<<15)) {
1521 /* FZ is set */
1522 ew = EmWarn_X86_fz;
1523 }
1524 else
1525 if (mxcsr & (1<<6)) {
1526 /* DAZ is set */
1527 ew = EmWarn_X86_daz;
1528 }
1529
1530 return (((ULong)ew) << 32) | ((ULong)rmode);
1531 }
1532
1533
1534 /* CLEAN HELPER */
1535 /* Given sseround as an IRRoundingMode value, create a suitable SSE
1536 native format MXCSR value. */
x86g_create_mxcsr(UInt sseround)1537 UInt x86g_create_mxcsr ( UInt sseround )
1538 {
1539 sseround &= 3;
1540 return 0x1F80 | (sseround << 13);
1541 }
1542
1543
1544 /* CALLED FROM GENERATED CODE */
1545 /* DIRTY HELPER (writes guest state) */
1546 /* Initialise the x87 FPU state as per 'finit'. */
x86g_dirtyhelper_FINIT(VexGuestX86State * gst)1547 void x86g_dirtyhelper_FINIT ( VexGuestX86State* gst )
1548 {
1549 Int i;
1550 gst->guest_FTOP = 0;
1551 for (i = 0; i < 8; i++) {
1552 gst->guest_FPTAG[i] = 0; /* empty */
1553 gst->guest_FPREG[i] = 0; /* IEEE754 64-bit zero */
1554 }
1555 gst->guest_FPROUND = (UInt)Irrm_NEAREST;
1556 gst->guest_FC3210 = 0;
1557 }
1558
1559
1560 /* This is used to implement both 'frstor' and 'fldenv'. The latter
1561 appears to differ from the former only in that the 8 FP registers
1562 themselves are not transferred into the guest state. */
1563 static
do_put_x87(Bool moveRegs,UChar * x87_state,VexGuestX86State * vex_state)1564 VexEmNote do_put_x87 ( Bool moveRegs,
1565 /*IN*/UChar* x87_state,
1566 /*OUT*/VexGuestX86State* vex_state )
1567 {
1568 Int stno, preg;
1569 UInt tag;
1570 ULong* vexRegs = (ULong*)(&vex_state->guest_FPREG[0]);
1571 UChar* vexTags = (UChar*)(&vex_state->guest_FPTAG[0]);
1572 Fpu_State* x87 = (Fpu_State*)x87_state;
1573 UInt ftop = (x87->env[FP_ENV_STAT] >> 11) & 7;
1574 UInt tagw = x87->env[FP_ENV_TAG];
1575 UInt fpucw = x87->env[FP_ENV_CTRL];
1576 UInt c3210 = x87->env[FP_ENV_STAT] & 0x4700;
1577 VexEmNote ew;
1578 UInt fpround;
1579 ULong pair;
1580
1581 /* Copy registers and tags */
1582 for (stno = 0; stno < 8; stno++) {
1583 preg = (stno + ftop) & 7;
1584 tag = (tagw >> (2*preg)) & 3;
1585 if (tag == 3) {
1586 /* register is empty */
1587 /* hmm, if it's empty, does it still get written? Probably
1588 safer to say it does. If we don't, memcheck could get out
1589 of sync, in that it thinks all FP registers are defined by
1590 this helper, but in reality some have not been updated. */
1591 if (moveRegs)
1592 vexRegs[preg] = 0; /* IEEE754 64-bit zero */
1593 vexTags[preg] = 0;
1594 } else {
1595 /* register is non-empty */
1596 if (moveRegs)
1597 convert_f80le_to_f64le( &x87->reg[10*stno],
1598 (UChar*)&vexRegs[preg] );
1599 vexTags[preg] = 1;
1600 }
1601 }
1602
1603 /* stack pointer */
1604 vex_state->guest_FTOP = ftop;
1605
1606 /* status word */
1607 vex_state->guest_FC3210 = c3210;
1608
1609 /* handle the control word, setting FPROUND and detecting any
1610 emulation warnings. */
1611 pair = x86g_check_fldcw ( (UInt)fpucw );
1612 fpround = (UInt)pair;
1613 ew = (VexEmNote)(pair >> 32);
1614
1615 vex_state->guest_FPROUND = fpround & 3;
1616
1617 /* emulation warnings --> caller */
1618 return ew;
1619 }
1620
1621
1622 /* Create an x87 FPU state from the guest state, as close as
1623 we can approximate it. */
1624 static
do_get_x87(VexGuestX86State * vex_state,UChar * x87_state)1625 void do_get_x87 ( /*IN*/VexGuestX86State* vex_state,
1626 /*OUT*/UChar* x87_state )
1627 {
1628 Int i, stno, preg;
1629 UInt tagw;
1630 ULong* vexRegs = (ULong*)(&vex_state->guest_FPREG[0]);
1631 UChar* vexTags = (UChar*)(&vex_state->guest_FPTAG[0]);
1632 Fpu_State* x87 = (Fpu_State*)x87_state;
1633 UInt ftop = vex_state->guest_FTOP;
1634 UInt c3210 = vex_state->guest_FC3210;
1635
1636 for (i = 0; i < 14; i++)
1637 x87->env[i] = 0;
1638
1639 x87->env[1] = x87->env[3] = x87->env[5] = x87->env[13] = 0xFFFF;
1640 x87->env[FP_ENV_STAT]
1641 = toUShort(((ftop & 7) << 11) | (c3210 & 0x4700));
1642 x87->env[FP_ENV_CTRL]
1643 = toUShort(x86g_create_fpucw( vex_state->guest_FPROUND ));
1644
1645 /* Dump the register stack in ST order. */
1646 tagw = 0;
1647 for (stno = 0; stno < 8; stno++) {
1648 preg = (stno + ftop) & 7;
1649 if (vexTags[preg] == 0) {
1650 /* register is empty */
1651 tagw |= (3 << (2*preg));
1652 convert_f64le_to_f80le( (UChar*)&vexRegs[preg],
1653 &x87->reg[10*stno] );
1654 } else {
1655 /* register is full. */
1656 tagw |= (0 << (2*preg));
1657 convert_f64le_to_f80le( (UChar*)&vexRegs[preg],
1658 &x87->reg[10*stno] );
1659 }
1660 }
1661 x87->env[FP_ENV_TAG] = toUShort(tagw);
1662 }
1663
1664
1665 /* CALLED FROM GENERATED CODE */
1666 /* DIRTY HELPER (reads guest state, writes guest mem) */
x86g_dirtyhelper_FXSAVE(VexGuestX86State * gst,HWord addr)1667 void x86g_dirtyhelper_FXSAVE ( VexGuestX86State* gst, HWord addr )
1668 {
1669 /* Somewhat roundabout, but at least it's simple. */
1670 Fpu_State tmp;
1671 UShort* addrS = (UShort*)addr;
1672 UChar* addrC = (UChar*)addr;
1673 U128* xmm = (U128*)(addr + 160);
1674 UInt mxcsr;
1675 UShort fp_tags;
1676 UInt summary_tags;
1677 Int r, stno;
1678 UShort *srcS, *dstS;
1679
1680 do_get_x87( gst, (UChar*)&tmp );
1681 mxcsr = x86g_create_mxcsr( gst->guest_SSEROUND );
1682
1683 /* Now build the proper fxsave image from the x87 image we just
1684 made. */
1685
1686 addrS[0] = tmp.env[FP_ENV_CTRL]; /* FCW: fpu control word */
1687 addrS[1] = tmp.env[FP_ENV_STAT]; /* FCW: fpu status word */
1688
1689 /* set addrS[2] in an endian-independent way */
1690 summary_tags = 0;
1691 fp_tags = tmp.env[FP_ENV_TAG];
1692 for (r = 0; r < 8; r++) {
1693 if ( ((fp_tags >> (2*r)) & 3) != 3 )
1694 summary_tags |= (1 << r);
1695 }
1696 addrC[4] = toUChar(summary_tags); /* FTW: tag summary byte */
1697 addrC[5] = 0; /* pad */
1698
1699 addrS[3] = 0; /* FOP: fpu opcode (bogus) */
1700 addrS[4] = 0;
1701 addrS[5] = 0; /* FPU IP (bogus) */
1702 addrS[6] = 0; /* FPU IP's segment selector (bogus) (although we
1703 could conceivably dump %CS here) */
1704
1705 addrS[7] = 0; /* Intel reserved */
1706
1707 addrS[8] = 0; /* FPU DP (operand pointer) (bogus) */
1708 addrS[9] = 0; /* FPU DP (operand pointer) (bogus) */
1709 addrS[10] = 0; /* segment selector for above operand pointer; %DS
1710 perhaps? */
1711 addrS[11] = 0; /* Intel reserved */
1712
1713 addrS[12] = toUShort(mxcsr); /* MXCSR */
1714 addrS[13] = toUShort(mxcsr >> 16);
1715
1716 addrS[14] = 0xFFFF; /* MXCSR mask (lo16); who knows what for */
1717 addrS[15] = 0xFFFF; /* MXCSR mask (hi16); who knows what for */
1718
1719 /* Copy in the FP registers, in ST order. */
1720 for (stno = 0; stno < 8; stno++) {
1721 srcS = (UShort*)(&tmp.reg[10*stno]);
1722 dstS = (UShort*)(&addrS[16 + 8*stno]);
1723 dstS[0] = srcS[0];
1724 dstS[1] = srcS[1];
1725 dstS[2] = srcS[2];
1726 dstS[3] = srcS[3];
1727 dstS[4] = srcS[4];
1728 dstS[5] = 0;
1729 dstS[6] = 0;
1730 dstS[7] = 0;
1731 }
1732
1733 /* That's the first 160 bytes of the image done. Now only %xmm0
1734 .. %xmm7 remain to be copied. If the host is big-endian, these
1735 need to be byte-swapped. */
1736 vassert(host_is_little_endian());
1737
1738 # define COPY_U128(_dst,_src) \
1739 do { _dst[0] = _src[0]; _dst[1] = _src[1]; \
1740 _dst[2] = _src[2]; _dst[3] = _src[3]; } \
1741 while (0)
1742
1743 COPY_U128( xmm[0], gst->guest_XMM0 );
1744 COPY_U128( xmm[1], gst->guest_XMM1 );
1745 COPY_U128( xmm[2], gst->guest_XMM2 );
1746 COPY_U128( xmm[3], gst->guest_XMM3 );
1747 COPY_U128( xmm[4], gst->guest_XMM4 );
1748 COPY_U128( xmm[5], gst->guest_XMM5 );
1749 COPY_U128( xmm[6], gst->guest_XMM6 );
1750 COPY_U128( xmm[7], gst->guest_XMM7 );
1751
1752 # undef COPY_U128
1753 }
1754
1755
1756 /* CALLED FROM GENERATED CODE */
1757 /* DIRTY HELPER (writes guest state, reads guest mem) */
x86g_dirtyhelper_FXRSTOR(VexGuestX86State * gst,HWord addr)1758 VexEmNote x86g_dirtyhelper_FXRSTOR ( VexGuestX86State* gst, HWord addr )
1759 {
1760 Fpu_State tmp;
1761 VexEmNote warnX87 = EmNote_NONE;
1762 VexEmNote warnXMM = EmNote_NONE;
1763 UShort* addrS = (UShort*)addr;
1764 UChar* addrC = (UChar*)addr;
1765 U128* xmm = (U128*)(addr + 160);
1766 UShort fp_tags;
1767 Int r, stno, i;
1768
1769 /* Restore %xmm0 .. %xmm7. If the host is big-endian, these need
1770 to be byte-swapped. */
1771 vassert(host_is_little_endian());
1772
1773 # define COPY_U128(_dst,_src) \
1774 do { _dst[0] = _src[0]; _dst[1] = _src[1]; \
1775 _dst[2] = _src[2]; _dst[3] = _src[3]; } \
1776 while (0)
1777
1778 COPY_U128( gst->guest_XMM0, xmm[0] );
1779 COPY_U128( gst->guest_XMM1, xmm[1] );
1780 COPY_U128( gst->guest_XMM2, xmm[2] );
1781 COPY_U128( gst->guest_XMM3, xmm[3] );
1782 COPY_U128( gst->guest_XMM4, xmm[4] );
1783 COPY_U128( gst->guest_XMM5, xmm[5] );
1784 COPY_U128( gst->guest_XMM6, xmm[6] );
1785 COPY_U128( gst->guest_XMM7, xmm[7] );
1786
1787 # undef COPY_U128
1788
1789 /* Copy the x87 registers out of the image, into a temporary
1790 Fpu_State struct. */
1791
1792 /* LLVM on Darwin turns the following loop into a movaps plus a
1793 handful of scalar stores. This would work fine except for the
1794 fact that VEX doesn't keep the stack correctly (16-) aligned for
1795 the call, so it segfaults. Hence, split the loop into two
1796 pieces (and pray LLVM doesn't merely glue them back together) so
1797 it's composed only of scalar stores and so is alignment
1798 insensitive. Of course this is a kludge of the lamest kind --
1799 VEX should be fixed properly. */
1800 /* Code that seems to trigger the problem:
1801 for (i = 0; i < 14; i++) tmp.env[i] = 0; */
1802 for (i = 0; i < 7; i++) tmp.env[i+0] = 0;
1803 for (i = 0; i < 7; i++) tmp.env[i+7] = 0;
1804
1805 for (i = 0; i < 80; i++) tmp.reg[i] = 0;
1806 /* fill in tmp.reg[0..7] */
1807 for (stno = 0; stno < 8; stno++) {
1808 UShort* dstS = (UShort*)(&tmp.reg[10*stno]);
1809 UShort* srcS = (UShort*)(&addrS[16 + 8*stno]);
1810 dstS[0] = srcS[0];
1811 dstS[1] = srcS[1];
1812 dstS[2] = srcS[2];
1813 dstS[3] = srcS[3];
1814 dstS[4] = srcS[4];
1815 }
1816 /* fill in tmp.env[0..13] */
1817 tmp.env[FP_ENV_CTRL] = addrS[0]; /* FCW: fpu control word */
1818 tmp.env[FP_ENV_STAT] = addrS[1]; /* FCW: fpu status word */
1819
1820 fp_tags = 0;
1821 for (r = 0; r < 8; r++) {
1822 if (addrC[4] & (1<<r))
1823 fp_tags |= (0 << (2*r)); /* EMPTY */
1824 else
1825 fp_tags |= (3 << (2*r)); /* VALID -- not really precise enough. */
1826 }
1827 tmp.env[FP_ENV_TAG] = fp_tags;
1828
1829 /* Now write 'tmp' into the guest state. */
1830 warnX87 = do_put_x87( True/*moveRegs*/, (UChar*)&tmp, gst );
1831
1832 { UInt w32 = (((UInt)addrS[12]) & 0xFFFF)
1833 | ((((UInt)addrS[13]) & 0xFFFF) << 16);
1834 ULong w64 = x86g_check_ldmxcsr( w32 );
1835
1836 warnXMM = (VexEmNote)(w64 >> 32);
1837
1838 gst->guest_SSEROUND = (UInt)w64;
1839 }
1840
1841 /* Prefer an X87 emwarn over an XMM one, if both exist. */
1842 if (warnX87 != EmNote_NONE)
1843 return warnX87;
1844 else
1845 return warnXMM;
1846 }
1847
1848
1849 /* CALLED FROM GENERATED CODE */
1850 /* DIRTY HELPER (reads guest state, writes guest mem) */
x86g_dirtyhelper_FSAVE(VexGuestX86State * gst,HWord addr)1851 void x86g_dirtyhelper_FSAVE ( VexGuestX86State* gst, HWord addr )
1852 {
1853 do_get_x87( gst, (UChar*)addr );
1854 }
1855
1856 /* CALLED FROM GENERATED CODE */
1857 /* DIRTY HELPER (writes guest state, reads guest mem) */
x86g_dirtyhelper_FRSTOR(VexGuestX86State * gst,HWord addr)1858 VexEmNote x86g_dirtyhelper_FRSTOR ( VexGuestX86State* gst, HWord addr )
1859 {
1860 return do_put_x87( True/*regs too*/, (UChar*)addr, gst );
1861 }
1862
1863 /* CALLED FROM GENERATED CODE */
1864 /* DIRTY HELPER (reads guest state, writes guest mem) */
x86g_dirtyhelper_FSTENV(VexGuestX86State * gst,HWord addr)1865 void x86g_dirtyhelper_FSTENV ( VexGuestX86State* gst, HWord addr )
1866 {
1867 /* Somewhat roundabout, but at least it's simple. */
1868 Int i;
1869 UShort* addrP = (UShort*)addr;
1870 Fpu_State tmp;
1871 do_get_x87( gst, (UChar*)&tmp );
1872 for (i = 0; i < 14; i++)
1873 addrP[i] = tmp.env[i];
1874 }
1875
1876 /* CALLED FROM GENERATED CODE */
1877 /* DIRTY HELPER (writes guest state, reads guest mem) */
x86g_dirtyhelper_FLDENV(VexGuestX86State * gst,HWord addr)1878 VexEmNote x86g_dirtyhelper_FLDENV ( VexGuestX86State* gst, HWord addr )
1879 {
1880 return do_put_x87( False/*don't move regs*/, (UChar*)addr, gst);
1881 }
1882
1883
1884 /*---------------------------------------------------------------*/
1885 /*--- Misc integer helpers, including rotates and CPUID. ---*/
1886 /*---------------------------------------------------------------*/
1887
1888 /* CALLED FROM GENERATED CODE: CLEAN HELPER */
1889 /* Calculate both flags and value result for rotate right
1890 through the carry bit. Result in low 32 bits,
1891 new flags (OSZACP) in high 32 bits.
1892 */
x86g_calculate_RCR(UInt arg,UInt rot_amt,UInt eflags_in,UInt sz)1893 ULong x86g_calculate_RCR ( UInt arg, UInt rot_amt, UInt eflags_in, UInt sz )
1894 {
1895 UInt tempCOUNT = rot_amt & 0x1F, cf=0, of=0, tempcf;
1896
1897 switch (sz) {
1898 case 4:
1899 cf = (eflags_in >> X86G_CC_SHIFT_C) & 1;
1900 of = ((arg >> 31) ^ cf) & 1;
1901 while (tempCOUNT > 0) {
1902 tempcf = arg & 1;
1903 arg = (arg >> 1) | (cf << 31);
1904 cf = tempcf;
1905 tempCOUNT--;
1906 }
1907 break;
1908 case 2:
1909 while (tempCOUNT >= 17) tempCOUNT -= 17;
1910 cf = (eflags_in >> X86G_CC_SHIFT_C) & 1;
1911 of = ((arg >> 15) ^ cf) & 1;
1912 while (tempCOUNT > 0) {
1913 tempcf = arg & 1;
1914 arg = ((arg >> 1) & 0x7FFF) | (cf << 15);
1915 cf = tempcf;
1916 tempCOUNT--;
1917 }
1918 break;
1919 case 1:
1920 while (tempCOUNT >= 9) tempCOUNT -= 9;
1921 cf = (eflags_in >> X86G_CC_SHIFT_C) & 1;
1922 of = ((arg >> 7) ^ cf) & 1;
1923 while (tempCOUNT > 0) {
1924 tempcf = arg & 1;
1925 arg = ((arg >> 1) & 0x7F) | (cf << 7);
1926 cf = tempcf;
1927 tempCOUNT--;
1928 }
1929 break;
1930 default:
1931 vpanic("calculate_RCR: invalid size");
1932 }
1933
1934 cf &= 1;
1935 of &= 1;
1936 eflags_in &= ~(X86G_CC_MASK_C | X86G_CC_MASK_O);
1937 eflags_in |= (cf << X86G_CC_SHIFT_C) | (of << X86G_CC_SHIFT_O);
1938
1939 return (((ULong)eflags_in) << 32) | ((ULong)arg);
1940 }
1941
1942
1943 /* CALLED FROM GENERATED CODE: CLEAN HELPER */
1944 /* Calculate both flags and value result for rotate left
1945 through the carry bit. Result in low 32 bits,
1946 new flags (OSZACP) in high 32 bits.
1947 */
x86g_calculate_RCL(UInt arg,UInt rot_amt,UInt eflags_in,UInt sz)1948 ULong x86g_calculate_RCL ( UInt arg, UInt rot_amt, UInt eflags_in, UInt sz )
1949 {
1950 UInt tempCOUNT = rot_amt & 0x1F, cf=0, of=0, tempcf;
1951
1952 switch (sz) {
1953 case 4:
1954 cf = (eflags_in >> X86G_CC_SHIFT_C) & 1;
1955 while (tempCOUNT > 0) {
1956 tempcf = (arg >> 31) & 1;
1957 arg = (arg << 1) | (cf & 1);
1958 cf = tempcf;
1959 tempCOUNT--;
1960 }
1961 of = ((arg >> 31) ^ cf) & 1;
1962 break;
1963 case 2:
1964 while (tempCOUNT >= 17) tempCOUNT -= 17;
1965 cf = (eflags_in >> X86G_CC_SHIFT_C) & 1;
1966 while (tempCOUNT > 0) {
1967 tempcf = (arg >> 15) & 1;
1968 arg = 0xFFFF & ((arg << 1) | (cf & 1));
1969 cf = tempcf;
1970 tempCOUNT--;
1971 }
1972 of = ((arg >> 15) ^ cf) & 1;
1973 break;
1974 case 1:
1975 while (tempCOUNT >= 9) tempCOUNT -= 9;
1976 cf = (eflags_in >> X86G_CC_SHIFT_C) & 1;
1977 while (tempCOUNT > 0) {
1978 tempcf = (arg >> 7) & 1;
1979 arg = 0xFF & ((arg << 1) | (cf & 1));
1980 cf = tempcf;
1981 tempCOUNT--;
1982 }
1983 of = ((arg >> 7) ^ cf) & 1;
1984 break;
1985 default:
1986 vpanic("calculate_RCL: invalid size");
1987 }
1988
1989 cf &= 1;
1990 of &= 1;
1991 eflags_in &= ~(X86G_CC_MASK_C | X86G_CC_MASK_O);
1992 eflags_in |= (cf << X86G_CC_SHIFT_C) | (of << X86G_CC_SHIFT_O);
1993
1994 return (((ULong)eflags_in) << 32) | ((ULong)arg);
1995 }
1996
1997
1998 /* CALLED FROM GENERATED CODE: CLEAN HELPER */
1999 /* Calculate both flags and value result for DAA/DAS/AAA/AAS.
2000 AX value in low half of arg, OSZACP in upper half.
2001 See guest-x86/toIR.c usage point for details.
2002 */
calc_parity_8bit(UInt w32)2003 static UInt calc_parity_8bit ( UInt w32 ) {
2004 UInt i;
2005 UInt p = 1;
2006 for (i = 0; i < 8; i++)
2007 p ^= (1 & (w32 >> i));
2008 return p;
2009 }
x86g_calculate_daa_das_aaa_aas(UInt flags_and_AX,UInt opcode)2010 UInt x86g_calculate_daa_das_aaa_aas ( UInt flags_and_AX, UInt opcode )
2011 {
2012 UInt r_AL = (flags_and_AX >> 0) & 0xFF;
2013 UInt r_AH = (flags_and_AX >> 8) & 0xFF;
2014 UInt r_O = (flags_and_AX >> (16 + X86G_CC_SHIFT_O)) & 1;
2015 UInt r_S = (flags_and_AX >> (16 + X86G_CC_SHIFT_S)) & 1;
2016 UInt r_Z = (flags_and_AX >> (16 + X86G_CC_SHIFT_Z)) & 1;
2017 UInt r_A = (flags_and_AX >> (16 + X86G_CC_SHIFT_A)) & 1;
2018 UInt r_C = (flags_and_AX >> (16 + X86G_CC_SHIFT_C)) & 1;
2019 UInt r_P = (flags_and_AX >> (16 + X86G_CC_SHIFT_P)) & 1;
2020 UInt result = 0;
2021
2022 switch (opcode) {
2023 case 0x27: { /* DAA */
2024 UInt old_AL = r_AL;
2025 UInt old_C = r_C;
2026 r_C = 0;
2027 if ((r_AL & 0xF) > 9 || r_A == 1) {
2028 r_AL = r_AL + 6;
2029 r_C = old_C;
2030 if (r_AL >= 0x100) r_C = 1;
2031 r_A = 1;
2032 } else {
2033 r_A = 0;
2034 }
2035 if (old_AL > 0x99 || old_C == 1) {
2036 r_AL = r_AL + 0x60;
2037 r_C = 1;
2038 } else {
2039 r_C = 0;
2040 }
2041 /* O is undefined. S Z and P are set according to the
2042 result. */
2043 r_AL &= 0xFF;
2044 r_O = 0; /* let's say */
2045 r_S = (r_AL & 0x80) ? 1 : 0;
2046 r_Z = (r_AL == 0) ? 1 : 0;
2047 r_P = calc_parity_8bit( r_AL );
2048 break;
2049 }
2050 case 0x2F: { /* DAS */
2051 UInt old_AL = r_AL;
2052 UInt old_C = r_C;
2053 r_C = 0;
2054 if ((r_AL & 0xF) > 9 || r_A == 1) {
2055 Bool borrow = r_AL < 6;
2056 r_AL = r_AL - 6;
2057 r_C = old_C;
2058 if (borrow) r_C = 1;
2059 r_A = 1;
2060 } else {
2061 r_A = 0;
2062 }
2063 if (old_AL > 0x99 || old_C == 1) {
2064 r_AL = r_AL - 0x60;
2065 r_C = 1;
2066 } else {
2067 /* Intel docs are wrong: r_C = 0; */
2068 }
2069 /* O is undefined. S Z and P are set according to the
2070 result. */
2071 r_AL &= 0xFF;
2072 r_O = 0; /* let's say */
2073 r_S = (r_AL & 0x80) ? 1 : 0;
2074 r_Z = (r_AL == 0) ? 1 : 0;
2075 r_P = calc_parity_8bit( r_AL );
2076 break;
2077 }
2078 case 0x37: { /* AAA */
2079 Bool nudge = r_AL > 0xF9;
2080 if ((r_AL & 0xF) > 9 || r_A == 1) {
2081 r_AL = r_AL + 6;
2082 r_AH = r_AH + 1 + (nudge ? 1 : 0);
2083 r_A = 1;
2084 r_C = 1;
2085 r_AL = r_AL & 0xF;
2086 } else {
2087 r_A = 0;
2088 r_C = 0;
2089 r_AL = r_AL & 0xF;
2090 }
2091 /* O S Z and P are undefined. */
2092 r_O = r_S = r_Z = r_P = 0; /* let's say */
2093 break;
2094 }
2095 case 0x3F: { /* AAS */
2096 Bool nudge = r_AL < 0x06;
2097 if ((r_AL & 0xF) > 9 || r_A == 1) {
2098 r_AL = r_AL - 6;
2099 r_AH = r_AH - 1 - (nudge ? 1 : 0);
2100 r_A = 1;
2101 r_C = 1;
2102 r_AL = r_AL & 0xF;
2103 } else {
2104 r_A = 0;
2105 r_C = 0;
2106 r_AL = r_AL & 0xF;
2107 }
2108 /* O S Z and P are undefined. */
2109 r_O = r_S = r_Z = r_P = 0; /* let's say */
2110 break;
2111 }
2112 default:
2113 vassert(0);
2114 }
2115 result = ( (r_O & 1) << (16 + X86G_CC_SHIFT_O) )
2116 | ( (r_S & 1) << (16 + X86G_CC_SHIFT_S) )
2117 | ( (r_Z & 1) << (16 + X86G_CC_SHIFT_Z) )
2118 | ( (r_A & 1) << (16 + X86G_CC_SHIFT_A) )
2119 | ( (r_C & 1) << (16 + X86G_CC_SHIFT_C) )
2120 | ( (r_P & 1) << (16 + X86G_CC_SHIFT_P) )
2121 | ( (r_AH & 0xFF) << 8 )
2122 | ( (r_AL & 0xFF) << 0 );
2123 return result;
2124 }
2125
x86g_calculate_aad_aam(UInt flags_and_AX,UInt opcode)2126 UInt x86g_calculate_aad_aam ( UInt flags_and_AX, UInt opcode )
2127 {
2128 UInt r_AL = (flags_and_AX >> 0) & 0xFF;
2129 UInt r_AH = (flags_and_AX >> 8) & 0xFF;
2130 UInt r_O = (flags_and_AX >> (16 + X86G_CC_SHIFT_O)) & 1;
2131 UInt r_S = (flags_and_AX >> (16 + X86G_CC_SHIFT_S)) & 1;
2132 UInt r_Z = (flags_and_AX >> (16 + X86G_CC_SHIFT_Z)) & 1;
2133 UInt r_A = (flags_and_AX >> (16 + X86G_CC_SHIFT_A)) & 1;
2134 UInt r_C = (flags_and_AX >> (16 + X86G_CC_SHIFT_C)) & 1;
2135 UInt r_P = (flags_and_AX >> (16 + X86G_CC_SHIFT_P)) & 1;
2136 UInt result = 0;
2137
2138 switch (opcode) {
2139 case 0xD4: { /* AAM */
2140 r_AH = r_AL / 10;
2141 r_AL = r_AL % 10;
2142 break;
2143 }
2144 case 0xD5: { /* AAD */
2145 r_AL = ((r_AH * 10) + r_AL) & 0xff;
2146 r_AH = 0;
2147 break;
2148 }
2149 default:
2150 vassert(0);
2151 }
2152
2153 r_O = 0; /* let's say (undefined) */
2154 r_C = 0; /* let's say (undefined) */
2155 r_A = 0; /* let's say (undefined) */
2156 r_S = (r_AL & 0x80) ? 1 : 0;
2157 r_Z = (r_AL == 0) ? 1 : 0;
2158 r_P = calc_parity_8bit( r_AL );
2159
2160 result = ( (r_O & 1) << (16 + X86G_CC_SHIFT_O) )
2161 | ( (r_S & 1) << (16 + X86G_CC_SHIFT_S) )
2162 | ( (r_Z & 1) << (16 + X86G_CC_SHIFT_Z) )
2163 | ( (r_A & 1) << (16 + X86G_CC_SHIFT_A) )
2164 | ( (r_C & 1) << (16 + X86G_CC_SHIFT_C) )
2165 | ( (r_P & 1) << (16 + X86G_CC_SHIFT_P) )
2166 | ( (r_AH & 0xFF) << 8 )
2167 | ( (r_AL & 0xFF) << 0 );
2168 return result;
2169 }
2170
2171
2172 /* CALLED FROM GENERATED CODE */
2173 /* DIRTY HELPER (non-referentially-transparent) */
2174 /* Horrible hack. On non-x86 platforms, return 1. */
x86g_dirtyhelper_RDTSC(void)2175 ULong x86g_dirtyhelper_RDTSC ( void )
2176 {
2177 # if defined(__i386__)
2178 ULong res;
2179 __asm__ __volatile__("rdtsc" : "=A" (res));
2180 return res;
2181 # else
2182 return 1ULL;
2183 # endif
2184 }
2185
2186
2187 /* CALLED FROM GENERATED CODE */
2188 /* DIRTY HELPER (modifies guest state) */
2189 /* Claim to be a P55C (Intel Pentium/MMX) */
x86g_dirtyhelper_CPUID_sse0(VexGuestX86State * st)2190 void x86g_dirtyhelper_CPUID_sse0 ( VexGuestX86State* st )
2191 {
2192 switch (st->guest_EAX) {
2193 case 0:
2194 st->guest_EAX = 0x1;
2195 st->guest_EBX = 0x756e6547;
2196 st->guest_ECX = 0x6c65746e;
2197 st->guest_EDX = 0x49656e69;
2198 break;
2199 default:
2200 st->guest_EAX = 0x543;
2201 st->guest_EBX = 0x0;
2202 st->guest_ECX = 0x0;
2203 st->guest_EDX = 0x8001bf;
2204 break;
2205 }
2206 }
2207
2208 /* CALLED FROM GENERATED CODE */
2209 /* DIRTY HELPER (modifies guest state) */
2210 /* Claim to be a Athlon "Classic" (Model 2, K75 "Pluto/Orion") */
2211 /* But without 3DNow support (weird, but we really don't support it). */
x86g_dirtyhelper_CPUID_mmxext(VexGuestX86State * st)2212 void x86g_dirtyhelper_CPUID_mmxext ( VexGuestX86State* st )
2213 {
2214 switch (st->guest_EAX) {
2215 /* vendor ID */
2216 case 0:
2217 st->guest_EAX = 0x1;
2218 st->guest_EBX = 0x68747541;
2219 st->guest_ECX = 0x444d4163;
2220 st->guest_EDX = 0x69746e65;
2221 break;
2222 /* feature bits */
2223 case 1:
2224 st->guest_EAX = 0x621;
2225 st->guest_EBX = 0x0;
2226 st->guest_ECX = 0x0;
2227 st->guest_EDX = 0x183f9ff;
2228 break;
2229 /* Highest Extended Function Supported (0x80000004 brand string) */
2230 case 0x80000000:
2231 st->guest_EAX = 0x80000004;
2232 st->guest_EBX = 0x68747541;
2233 st->guest_ECX = 0x444d4163;
2234 st->guest_EDX = 0x69746e65;
2235 break;
2236 /* Extended Processor Info and Feature Bits */
2237 case 0x80000001:
2238 st->guest_EAX = 0x721;
2239 st->guest_EBX = 0x0;
2240 st->guest_ECX = 0x0;
2241 st->guest_EDX = 0x1c3f9ff; /* Note no 3DNow. */
2242 break;
2243 /* Processor Brand String "AMD Athlon(tm) Processor" */
2244 case 0x80000002:
2245 st->guest_EAX = 0x20444d41;
2246 st->guest_EBX = 0x6c687441;
2247 st->guest_ECX = 0x74286e6f;
2248 st->guest_EDX = 0x5020296d;
2249 break;
2250 case 0x80000003:
2251 st->guest_EAX = 0x65636f72;
2252 st->guest_EBX = 0x726f7373;
2253 st->guest_ECX = 0x0;
2254 st->guest_EDX = 0x0;
2255 break;
2256 default:
2257 st->guest_EAX = 0x0;
2258 st->guest_EBX = 0x0;
2259 st->guest_ECX = 0x0;
2260 st->guest_EDX = 0x0;
2261 break;
2262 }
2263 }
2264
2265 /* CALLED FROM GENERATED CODE */
2266 /* DIRTY HELPER (modifies guest state) */
2267 /* Claim to be the following SSE1-capable CPU:
2268 vendor_id : GenuineIntel
2269 cpu family : 6
2270 model : 11
2271 model name : Intel(R) Pentium(R) III CPU family 1133MHz
2272 stepping : 1
2273 cpu MHz : 1131.013
2274 cache size : 512 KB
2275 */
x86g_dirtyhelper_CPUID_sse1(VexGuestX86State * st)2276 void x86g_dirtyhelper_CPUID_sse1 ( VexGuestX86State* st )
2277 {
2278 switch (st->guest_EAX) {
2279 case 0:
2280 st->guest_EAX = 0x00000002;
2281 st->guest_EBX = 0x756e6547;
2282 st->guest_ECX = 0x6c65746e;
2283 st->guest_EDX = 0x49656e69;
2284 break;
2285 case 1:
2286 st->guest_EAX = 0x000006b1;
2287 st->guest_EBX = 0x00000004;
2288 st->guest_ECX = 0x00000000;
2289 st->guest_EDX = 0x0383fbff;
2290 break;
2291 default:
2292 st->guest_EAX = 0x03020101;
2293 st->guest_EBX = 0x00000000;
2294 st->guest_ECX = 0x00000000;
2295 st->guest_EDX = 0x0c040883;
2296 break;
2297 }
2298 }
2299
2300 /* Claim to be the following SSSE3-capable CPU (2 x ...):
2301 vendor_id : GenuineIntel
2302 cpu family : 6
2303 model : 15
2304 model name : Intel(R) Core(TM)2 CPU 6600 @ 2.40GHz
2305 stepping : 6
2306 cpu MHz : 2394.000
2307 cache size : 4096 KB
2308 physical id : 0
2309 siblings : 2
2310 core id : 0
2311 cpu cores : 2
2312 fpu : yes
2313 fpu_exception : yes
2314 cpuid level : 10
2315 wp : yes
2316 flags : fpu vme de pse tsc msr pae mce cx8 apic sep
2317 mtrr pge mca cmov pat pse36 clflush dts acpi
2318 mmx fxsr sse sse2 ss ht tm syscall nx lm
2319 constant_tsc pni monitor ds_cpl vmx est tm2
2320 cx16 xtpr lahf_lm
2321 bogomips : 4798.78
2322 clflush size : 64
2323 cache_alignment : 64
2324 address sizes : 36 bits physical, 48 bits virtual
2325 power management:
2326 */
x86g_dirtyhelper_CPUID_sse2(VexGuestX86State * st)2327 void x86g_dirtyhelper_CPUID_sse2 ( VexGuestX86State* st )
2328 {
2329 # define SET_ABCD(_a,_b,_c,_d) \
2330 do { st->guest_EAX = (UInt)(_a); \
2331 st->guest_EBX = (UInt)(_b); \
2332 st->guest_ECX = (UInt)(_c); \
2333 st->guest_EDX = (UInt)(_d); \
2334 } while (0)
2335
2336 switch (st->guest_EAX) {
2337 case 0x00000000:
2338 SET_ABCD(0x0000000a, 0x756e6547, 0x6c65746e, 0x49656e69);
2339 break;
2340 case 0x00000001:
2341 SET_ABCD(0x000006f6, 0x00020800, 0x0000e3bd, 0xbfebfbff);
2342 break;
2343 case 0x00000002:
2344 SET_ABCD(0x05b0b101, 0x005657f0, 0x00000000, 0x2cb43049);
2345 break;
2346 case 0x00000003:
2347 SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x00000000);
2348 break;
2349 case 0x00000004: {
2350 switch (st->guest_ECX) {
2351 case 0x00000000: SET_ABCD(0x04000121, 0x01c0003f,
2352 0x0000003f, 0x00000001); break;
2353 case 0x00000001: SET_ABCD(0x04000122, 0x01c0003f,
2354 0x0000003f, 0x00000001); break;
2355 case 0x00000002: SET_ABCD(0x04004143, 0x03c0003f,
2356 0x00000fff, 0x00000001); break;
2357 default: SET_ABCD(0x00000000, 0x00000000,
2358 0x00000000, 0x00000000); break;
2359 }
2360 break;
2361 }
2362 case 0x00000005:
2363 SET_ABCD(0x00000040, 0x00000040, 0x00000003, 0x00000020);
2364 break;
2365 case 0x00000006:
2366 SET_ABCD(0x00000001, 0x00000002, 0x00000001, 0x00000000);
2367 break;
2368 case 0x00000007:
2369 SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x00000000);
2370 break;
2371 case 0x00000008:
2372 SET_ABCD(0x00000400, 0x00000000, 0x00000000, 0x00000000);
2373 break;
2374 case 0x00000009:
2375 SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x00000000);
2376 break;
2377 case 0x0000000a:
2378 unhandled_eax_value:
2379 SET_ABCD(0x07280202, 0x00000000, 0x00000000, 0x00000000);
2380 break;
2381 case 0x80000000:
2382 SET_ABCD(0x80000008, 0x00000000, 0x00000000, 0x00000000);
2383 break;
2384 case 0x80000001:
2385 SET_ABCD(0x00000000, 0x00000000, 0x00000001, 0x20100000);
2386 break;
2387 case 0x80000002:
2388 SET_ABCD(0x65746e49, 0x2952286c, 0x726f4320, 0x4d542865);
2389 break;
2390 case 0x80000003:
2391 SET_ABCD(0x43203229, 0x20205550, 0x20202020, 0x20202020);
2392 break;
2393 case 0x80000004:
2394 SET_ABCD(0x30303636, 0x20402020, 0x30342e32, 0x007a4847);
2395 break;
2396 case 0x80000005:
2397 SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x00000000);
2398 break;
2399 case 0x80000006:
2400 SET_ABCD(0x00000000, 0x00000000, 0x10008040, 0x00000000);
2401 break;
2402 case 0x80000007:
2403 SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x00000000);
2404 break;
2405 case 0x80000008:
2406 SET_ABCD(0x00003024, 0x00000000, 0x00000000, 0x00000000);
2407 break;
2408 default:
2409 goto unhandled_eax_value;
2410 }
2411 # undef SET_ABCD
2412 }
2413
2414
2415 /* CALLED FROM GENERATED CODE */
2416 /* DIRTY HELPER (non-referentially-transparent) */
2417 /* Horrible hack. On non-x86 platforms, return 0. */
x86g_dirtyhelper_IN(UInt portno,UInt sz)2418 UInt x86g_dirtyhelper_IN ( UInt portno, UInt sz/*1,2 or 4*/ )
2419 {
2420 # if defined(__i386__)
2421 UInt r = 0;
2422 portno &= 0xFFFF;
2423 switch (sz) {
2424 case 4:
2425 __asm__ __volatile__("movl $0,%%eax; inl %w1,%0"
2426 : "=a" (r) : "Nd" (portno));
2427 break;
2428 case 2:
2429 __asm__ __volatile__("movl $0,%%eax; inw %w1,%w0"
2430 : "=a" (r) : "Nd" (portno));
2431 break;
2432 case 1:
2433 __asm__ __volatile__("movl $0,%%eax; inb %w1,%b0"
2434 : "=a" (r) : "Nd" (portno));
2435 break;
2436 default:
2437 break;
2438 }
2439 return r;
2440 # else
2441 return 0;
2442 # endif
2443 }
2444
2445
2446 /* CALLED FROM GENERATED CODE */
2447 /* DIRTY HELPER (non-referentially-transparent) */
2448 /* Horrible hack. On non-x86 platforms, do nothing. */
x86g_dirtyhelper_OUT(UInt portno,UInt data,UInt sz)2449 void x86g_dirtyhelper_OUT ( UInt portno, UInt data, UInt sz/*1,2 or 4*/ )
2450 {
2451 # if defined(__i386__)
2452 portno &= 0xFFFF;
2453 switch (sz) {
2454 case 4:
2455 __asm__ __volatile__("outl %0, %w1"
2456 : : "a" (data), "Nd" (portno));
2457 break;
2458 case 2:
2459 __asm__ __volatile__("outw %w0, %w1"
2460 : : "a" (data), "Nd" (portno));
2461 break;
2462 case 1:
2463 __asm__ __volatile__("outb %b0, %w1"
2464 : : "a" (data), "Nd" (portno));
2465 break;
2466 default:
2467 break;
2468 }
2469 # else
2470 /* do nothing */
2471 # endif
2472 }
2473
2474 /* CALLED FROM GENERATED CODE */
2475 /* DIRTY HELPER (non-referentially-transparent) */
2476 /* Horrible hack. On non-x86 platforms, do nothing. */
2477 /* op = 0: call the native SGDT instruction.
2478 op = 1: call the native SIDT instruction.
2479 */
x86g_dirtyhelper_SxDT(void * address,UInt op)2480 void x86g_dirtyhelper_SxDT ( void *address, UInt op ) {
2481 # if defined(__i386__)
2482 switch (op) {
2483 case 0:
2484 __asm__ __volatile__("sgdt (%0)" : : "r" (address) : "memory");
2485 break;
2486 case 1:
2487 __asm__ __volatile__("sidt (%0)" : : "r" (address) : "memory");
2488 break;
2489 default:
2490 vpanic("x86g_dirtyhelper_SxDT");
2491 }
2492 # else
2493 /* do nothing */
2494 UChar* p = (UChar*)address;
2495 p[0] = p[1] = p[2] = p[3] = p[4] = p[5] = 0;
2496 # endif
2497 }
2498
2499 /*---------------------------------------------------------------*/
2500 /*--- Helpers for MMX/SSE/SSE2. ---*/
2501 /*---------------------------------------------------------------*/
2502
abdU8(UChar xx,UChar yy)2503 static inline UChar abdU8 ( UChar xx, UChar yy ) {
2504 return toUChar(xx>yy ? xx-yy : yy-xx);
2505 }
2506
mk32x2(UInt w1,UInt w0)2507 static inline ULong mk32x2 ( UInt w1, UInt w0 ) {
2508 return (((ULong)w1) << 32) | ((ULong)w0);
2509 }
2510
sel16x4_3(ULong w64)2511 static inline UShort sel16x4_3 ( ULong w64 ) {
2512 UInt hi32 = toUInt(w64 >> 32);
2513 return toUShort(hi32 >> 16);
2514 }
sel16x4_2(ULong w64)2515 static inline UShort sel16x4_2 ( ULong w64 ) {
2516 UInt hi32 = toUInt(w64 >> 32);
2517 return toUShort(hi32);
2518 }
sel16x4_1(ULong w64)2519 static inline UShort sel16x4_1 ( ULong w64 ) {
2520 UInt lo32 = toUInt(w64);
2521 return toUShort(lo32 >> 16);
2522 }
sel16x4_0(ULong w64)2523 static inline UShort sel16x4_0 ( ULong w64 ) {
2524 UInt lo32 = toUInt(w64);
2525 return toUShort(lo32);
2526 }
2527
sel8x8_7(ULong w64)2528 static inline UChar sel8x8_7 ( ULong w64 ) {
2529 UInt hi32 = toUInt(w64 >> 32);
2530 return toUChar(hi32 >> 24);
2531 }
sel8x8_6(ULong w64)2532 static inline UChar sel8x8_6 ( ULong w64 ) {
2533 UInt hi32 = toUInt(w64 >> 32);
2534 return toUChar(hi32 >> 16);
2535 }
sel8x8_5(ULong w64)2536 static inline UChar sel8x8_5 ( ULong w64 ) {
2537 UInt hi32 = toUInt(w64 >> 32);
2538 return toUChar(hi32 >> 8);
2539 }
sel8x8_4(ULong w64)2540 static inline UChar sel8x8_4 ( ULong w64 ) {
2541 UInt hi32 = toUInt(w64 >> 32);
2542 return toUChar(hi32 >> 0);
2543 }
sel8x8_3(ULong w64)2544 static inline UChar sel8x8_3 ( ULong w64 ) {
2545 UInt lo32 = toUInt(w64);
2546 return toUChar(lo32 >> 24);
2547 }
sel8x8_2(ULong w64)2548 static inline UChar sel8x8_2 ( ULong w64 ) {
2549 UInt lo32 = toUInt(w64);
2550 return toUChar(lo32 >> 16);
2551 }
sel8x8_1(ULong w64)2552 static inline UChar sel8x8_1 ( ULong w64 ) {
2553 UInt lo32 = toUInt(w64);
2554 return toUChar(lo32 >> 8);
2555 }
sel8x8_0(ULong w64)2556 static inline UChar sel8x8_0 ( ULong w64 ) {
2557 UInt lo32 = toUInt(w64);
2558 return toUChar(lo32 >> 0);
2559 }
2560
2561 /* CALLED FROM GENERATED CODE: CLEAN HELPER */
x86g_calculate_mmx_pmaddwd(ULong xx,ULong yy)2562 ULong x86g_calculate_mmx_pmaddwd ( ULong xx, ULong yy )
2563 {
2564 return
2565 mk32x2(
2566 (((Int)(Short)sel16x4_3(xx)) * ((Int)(Short)sel16x4_3(yy)))
2567 + (((Int)(Short)sel16x4_2(xx)) * ((Int)(Short)sel16x4_2(yy))),
2568 (((Int)(Short)sel16x4_1(xx)) * ((Int)(Short)sel16x4_1(yy)))
2569 + (((Int)(Short)sel16x4_0(xx)) * ((Int)(Short)sel16x4_0(yy)))
2570 );
2571 }
2572
2573 /* CALLED FROM GENERATED CODE: CLEAN HELPER */
x86g_calculate_mmx_psadbw(ULong xx,ULong yy)2574 ULong x86g_calculate_mmx_psadbw ( ULong xx, ULong yy )
2575 {
2576 UInt t = 0;
2577 t += (UInt)abdU8( sel8x8_7(xx), sel8x8_7(yy) );
2578 t += (UInt)abdU8( sel8x8_6(xx), sel8x8_6(yy) );
2579 t += (UInt)abdU8( sel8x8_5(xx), sel8x8_5(yy) );
2580 t += (UInt)abdU8( sel8x8_4(xx), sel8x8_4(yy) );
2581 t += (UInt)abdU8( sel8x8_3(xx), sel8x8_3(yy) );
2582 t += (UInt)abdU8( sel8x8_2(xx), sel8x8_2(yy) );
2583 t += (UInt)abdU8( sel8x8_1(xx), sel8x8_1(yy) );
2584 t += (UInt)abdU8( sel8x8_0(xx), sel8x8_0(yy) );
2585 t &= 0xFFFF;
2586 return (ULong)t;
2587 }
2588
2589
2590 /*---------------------------------------------------------------*/
2591 /*--- Helpers for dealing with segment overrides. ---*/
2592 /*---------------------------------------------------------------*/
2593
2594 static inline
get_segdescr_base(VexGuestX86SegDescr * ent)2595 UInt get_segdescr_base ( VexGuestX86SegDescr* ent )
2596 {
2597 UInt lo = 0xFFFF & (UInt)ent->LdtEnt.Bits.BaseLow;
2598 UInt mid = 0xFF & (UInt)ent->LdtEnt.Bits.BaseMid;
2599 UInt hi = 0xFF & (UInt)ent->LdtEnt.Bits.BaseHi;
2600 return (hi << 24) | (mid << 16) | lo;
2601 }
2602
2603 static inline
get_segdescr_limit(VexGuestX86SegDescr * ent)2604 UInt get_segdescr_limit ( VexGuestX86SegDescr* ent )
2605 {
2606 UInt lo = 0xFFFF & (UInt)ent->LdtEnt.Bits.LimitLow;
2607 UInt hi = 0xF & (UInt)ent->LdtEnt.Bits.LimitHi;
2608 UInt limit = (hi << 16) | lo;
2609 if (ent->LdtEnt.Bits.Granularity)
2610 limit = (limit << 12) | 0xFFF;
2611 return limit;
2612 }
2613
2614 /* CALLED FROM GENERATED CODE: CLEAN HELPER */
x86g_use_seg_selector(HWord ldt,HWord gdt,UInt seg_selector,UInt virtual_addr)2615 ULong x86g_use_seg_selector ( HWord ldt, HWord gdt,
2616 UInt seg_selector, UInt virtual_addr )
2617 {
2618 UInt tiBit, base, limit;
2619 VexGuestX86SegDescr* the_descrs;
2620
2621 Bool verboze = False;
2622
2623 /* If this isn't true, we're in Big Trouble. */
2624 vassert(8 == sizeof(VexGuestX86SegDescr));
2625
2626 if (verboze)
2627 vex_printf("x86h_use_seg_selector: "
2628 "seg_selector = 0x%x, vaddr = 0x%x\n",
2629 seg_selector, virtual_addr);
2630
2631 /* Check for wildly invalid selector. */
2632 if (seg_selector & ~0xFFFF)
2633 goto bad;
2634
2635 seg_selector &= 0x0000FFFF;
2636
2637 /* Sanity check the segment selector. Ensure that RPL=11b (least
2638 privilege). This forms the bottom 2 bits of the selector. */
2639 if ((seg_selector & 3) != 3)
2640 goto bad;
2641
2642 /* Extract the TI bit (0 means GDT, 1 means LDT) */
2643 tiBit = (seg_selector >> 2) & 1;
2644
2645 /* Convert the segment selector onto a table index */
2646 seg_selector >>= 3;
2647 vassert(seg_selector >= 0 && seg_selector < 8192);
2648
2649 if (tiBit == 0) {
2650
2651 /* GDT access. */
2652 /* Do we actually have a GDT to look at? */
2653 if (gdt == 0)
2654 goto bad;
2655
2656 /* Check for access to non-existent entry. */
2657 if (seg_selector >= VEX_GUEST_X86_GDT_NENT)
2658 goto bad;
2659
2660 the_descrs = (VexGuestX86SegDescr*)gdt;
2661 base = get_segdescr_base (&the_descrs[seg_selector]);
2662 limit = get_segdescr_limit(&the_descrs[seg_selector]);
2663
2664 } else {
2665
2666 /* All the same stuff, except for the LDT. */
2667 if (ldt == 0)
2668 goto bad;
2669
2670 if (seg_selector >= VEX_GUEST_X86_LDT_NENT)
2671 goto bad;
2672
2673 the_descrs = (VexGuestX86SegDescr*)ldt;
2674 base = get_segdescr_base (&the_descrs[seg_selector]);
2675 limit = get_segdescr_limit(&the_descrs[seg_selector]);
2676
2677 }
2678
2679 /* Do the limit check. Note, this check is just slightly too
2680 slack. Really it should be "if (virtual_addr + size - 1 >=
2681 limit)," but we don't have the size info to hand. Getting it
2682 could be significantly complex. */
2683 if (virtual_addr >= limit)
2684 goto bad;
2685
2686 if (verboze)
2687 vex_printf("x86h_use_seg_selector: "
2688 "base = 0x%x, addr = 0x%x\n",
2689 base, base + virtual_addr);
2690
2691 /* High 32 bits are zero, indicating success. */
2692 return (ULong)( ((UInt)virtual_addr) + base );
2693
2694 bad:
2695 return 1ULL << 32;
2696 }
2697
2698
2699 /*---------------------------------------------------------------*/
2700 /*--- Helpers for dealing with, and describing, ---*/
2701 /*--- guest state as a whole. ---*/
2702 /*---------------------------------------------------------------*/
2703
2704 /* Initialise the entire x86 guest state. */
2705 /* VISIBLE TO LIBVEX CLIENT */
LibVEX_GuestX86_initialise(VexGuestX86State * vex_state)2706 void LibVEX_GuestX86_initialise ( /*OUT*/VexGuestX86State* vex_state )
2707 {
2708 vex_state->host_EvC_FAILADDR = 0;
2709 vex_state->host_EvC_COUNTER = 0;
2710
2711 vex_state->guest_EAX = 0;
2712 vex_state->guest_ECX = 0;
2713 vex_state->guest_EDX = 0;
2714 vex_state->guest_EBX = 0;
2715 vex_state->guest_ESP = 0;
2716 vex_state->guest_EBP = 0;
2717 vex_state->guest_ESI = 0;
2718 vex_state->guest_EDI = 0;
2719
2720 vex_state->guest_CC_OP = X86G_CC_OP_COPY;
2721 vex_state->guest_CC_DEP1 = 0;
2722 vex_state->guest_CC_DEP2 = 0;
2723 vex_state->guest_CC_NDEP = 0;
2724 vex_state->guest_DFLAG = 1; /* forwards */
2725 vex_state->guest_IDFLAG = 0;
2726 vex_state->guest_ACFLAG = 0;
2727
2728 vex_state->guest_EIP = 0;
2729
2730 /* Initialise the simulated FPU */
2731 x86g_dirtyhelper_FINIT( vex_state );
2732
2733 /* Initialse the SSE state. */
2734 # define SSEZERO(_xmm) _xmm[0]=_xmm[1]=_xmm[2]=_xmm[3] = 0;
2735
2736 vex_state->guest_SSEROUND = (UInt)Irrm_NEAREST;
2737 SSEZERO(vex_state->guest_XMM0);
2738 SSEZERO(vex_state->guest_XMM1);
2739 SSEZERO(vex_state->guest_XMM2);
2740 SSEZERO(vex_state->guest_XMM3);
2741 SSEZERO(vex_state->guest_XMM4);
2742 SSEZERO(vex_state->guest_XMM5);
2743 SSEZERO(vex_state->guest_XMM6);
2744 SSEZERO(vex_state->guest_XMM7);
2745
2746 # undef SSEZERO
2747
2748 vex_state->guest_CS = 0;
2749 vex_state->guest_DS = 0;
2750 vex_state->guest_ES = 0;
2751 vex_state->guest_FS = 0;
2752 vex_state->guest_GS = 0;
2753 vex_state->guest_SS = 0;
2754 vex_state->guest_LDT = 0;
2755 vex_state->guest_GDT = 0;
2756
2757 vex_state->guest_EMNOTE = EmNote_NONE;
2758
2759 /* SSE2 has a 'clflush' cache-line-invalidator which uses these. */
2760 vex_state->guest_CMSTART = 0;
2761 vex_state->guest_CMLEN = 0;
2762
2763 vex_state->guest_NRADDR = 0;
2764 vex_state->guest_SC_CLASS = 0;
2765 vex_state->guest_IP_AT_SYSCALL = 0;
2766
2767 vex_state->padding1 = 0;
2768 }
2769
2770
2771 /* Figure out if any part of the guest state contained in minoff
2772 .. maxoff requires precise memory exceptions. If in doubt return
2773 True (but this generates significantly slower code).
2774
2775 By default we enforce precise exns for guest %ESP, %EBP and %EIP
2776 only. These are the minimum needed to extract correct stack
2777 backtraces from x86 code.
2778
2779 Only %ESP is needed in mode VexRegUpdSpAtMemAccess.
2780 */
guest_x86_state_requires_precise_mem_exns(Int minoff,Int maxoff,VexRegisterUpdates pxControl)2781 Bool guest_x86_state_requires_precise_mem_exns (
2782 Int minoff, Int maxoff, VexRegisterUpdates pxControl
2783 )
2784 {
2785 Int ebp_min = offsetof(VexGuestX86State, guest_EBP);
2786 Int ebp_max = ebp_min + 4 - 1;
2787 Int esp_min = offsetof(VexGuestX86State, guest_ESP);
2788 Int esp_max = esp_min + 4 - 1;
2789 Int eip_min = offsetof(VexGuestX86State, guest_EIP);
2790 Int eip_max = eip_min + 4 - 1;
2791
2792 if (maxoff < esp_min || minoff > esp_max) {
2793 /* no overlap with esp */
2794 if (pxControl == VexRegUpdSpAtMemAccess)
2795 return False; // We only need to check stack pointer.
2796 } else {
2797 return True;
2798 }
2799
2800 if (maxoff < ebp_min || minoff > ebp_max) {
2801 /* no overlap with ebp */
2802 } else {
2803 return True;
2804 }
2805
2806 if (maxoff < eip_min || minoff > eip_max) {
2807 /* no overlap with eip */
2808 } else {
2809 return True;
2810 }
2811
2812 return False;
2813 }
2814
2815
2816 #define ALWAYSDEFD(field) \
2817 { offsetof(VexGuestX86State, field), \
2818 (sizeof ((VexGuestX86State*)0)->field) }
2819
2820 VexGuestLayout
2821 x86guest_layout
2822 = {
2823 /* Total size of the guest state, in bytes. */
2824 .total_sizeB = sizeof(VexGuestX86State),
2825
2826 /* Describe the stack pointer. */
2827 .offset_SP = offsetof(VexGuestX86State,guest_ESP),
2828 .sizeof_SP = 4,
2829
2830 /* Describe the frame pointer. */
2831 .offset_FP = offsetof(VexGuestX86State,guest_EBP),
2832 .sizeof_FP = 4,
2833
2834 /* Describe the instruction pointer. */
2835 .offset_IP = offsetof(VexGuestX86State,guest_EIP),
2836 .sizeof_IP = 4,
2837
2838 /* Describe any sections to be regarded by Memcheck as
2839 'always-defined'. */
2840 .n_alwaysDefd = 24,
2841
2842 /* flags thunk: OP and NDEP are always defd, whereas DEP1
2843 and DEP2 have to be tracked. See detailed comment in
2844 gdefs.h on meaning of thunk fields. */
2845 .alwaysDefd
2846 = { /* 0 */ ALWAYSDEFD(guest_CC_OP),
2847 /* 1 */ ALWAYSDEFD(guest_CC_NDEP),
2848 /* 2 */ ALWAYSDEFD(guest_DFLAG),
2849 /* 3 */ ALWAYSDEFD(guest_IDFLAG),
2850 /* 4 */ ALWAYSDEFD(guest_ACFLAG),
2851 /* 5 */ ALWAYSDEFD(guest_EIP),
2852 /* 6 */ ALWAYSDEFD(guest_FTOP),
2853 /* 7 */ ALWAYSDEFD(guest_FPTAG),
2854 /* 8 */ ALWAYSDEFD(guest_FPROUND),
2855 /* 9 */ ALWAYSDEFD(guest_FC3210),
2856 /* 10 */ ALWAYSDEFD(guest_CS),
2857 /* 11 */ ALWAYSDEFD(guest_DS),
2858 /* 12 */ ALWAYSDEFD(guest_ES),
2859 /* 13 */ ALWAYSDEFD(guest_FS),
2860 /* 14 */ ALWAYSDEFD(guest_GS),
2861 /* 15 */ ALWAYSDEFD(guest_SS),
2862 /* 16 */ ALWAYSDEFD(guest_LDT),
2863 /* 17 */ ALWAYSDEFD(guest_GDT),
2864 /* 18 */ ALWAYSDEFD(guest_EMNOTE),
2865 /* 19 */ ALWAYSDEFD(guest_SSEROUND),
2866 /* 20 */ ALWAYSDEFD(guest_CMSTART),
2867 /* 21 */ ALWAYSDEFD(guest_CMLEN),
2868 /* 22 */ ALWAYSDEFD(guest_SC_CLASS),
2869 /* 23 */ ALWAYSDEFD(guest_IP_AT_SYSCALL)
2870 }
2871 };
2872
2873
2874 /*---------------------------------------------------------------*/
2875 /*--- end guest_x86_helpers.c ---*/
2876 /*---------------------------------------------------------------*/
2877