1
2 /*---------------------------------------------------------------*/
3 /*--- begin guest_x86_helpers.c ---*/
4 /*---------------------------------------------------------------*/
5
6 /*
7 This file is part of Valgrind, a dynamic binary instrumentation
8 framework.
9
10 Copyright (C) 2004-2015 OpenWorks LLP
11 info@open-works.net
12
13 This program is free software; you can redistribute it and/or
14 modify it under the terms of the GNU General Public License as
15 published by the Free Software Foundation; either version 2 of the
16 License, or (at your option) any later version.
17
18 This program is distributed in the hope that it will be useful, but
19 WITHOUT ANY WARRANTY; without even the implied warranty of
20 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
21 General Public License for more details.
22
23 You should have received a copy of the GNU General Public License
24 along with this program; if not, write to the Free Software
25 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
26 02110-1301, USA.
27
28 The GNU General Public License is contained in the file COPYING.
29
30 Neither the names of the U.S. Department of Energy nor the
31 University of California nor the names of its contributors may be
32 used to endorse or promote products derived from this software
33 without prior written permission.
34 */
35
36 #include "libvex_basictypes.h"
37 #include "libvex_emnote.h"
38 #include "libvex_guest_x86.h"
39 #include "libvex_ir.h"
40 #include "libvex.h"
41
42 #include "main_util.h"
43 #include "main_globals.h"
44 #include "guest_generic_bb_to_IR.h"
45 #include "guest_x86_defs.h"
46 #include "guest_generic_x87.h"
47
48
49 /* This file contains helper functions for x86 guest code.
50 Calls to these functions are generated by the back end.
51 These calls are of course in the host machine code and
52 this file will be compiled to host machine code, so that
53 all makes sense.
54
55 Only change the signatures of these helper functions very
56 carefully. If you change the signature here, you'll have to change
57 the parameters passed to it in the IR calls constructed by
58 guest-x86/toIR.c.
59
60 The convention used is that all functions called from generated
61 code are named x86g_<something>, and any function whose name lacks
62 that prefix is not called from generated code. Note that some
63 LibVEX_* functions can however be called by VEX's client, but that
64 is not the same as calling them from VEX-generated code.
65 */
66
67
68 /* Set to 1 to get detailed profiling info about use of the flag
69 machinery. */
70 #define PROFILE_EFLAGS 0
71
72
73 /*---------------------------------------------------------------*/
74 /*--- %eflags run-time helpers. ---*/
75 /*---------------------------------------------------------------*/
76
77 static const UChar parity_table[256] = {
78 X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0,
79 0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P,
80 0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P,
81 X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0,
82 0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P,
83 X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0,
84 X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0,
85 0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P,
86 0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P,
87 X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0,
88 X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0,
89 0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P,
90 X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0,
91 0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P,
92 0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P,
93 X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0,
94 0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P,
95 X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0,
96 X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0,
97 0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P,
98 X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0,
99 0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P,
100 0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P,
101 X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0,
102 X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0,
103 0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P,
104 0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P,
105 X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0,
106 0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P,
107 X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0,
108 X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0,
109 0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P,
110 };
111
112 /* generalised left-shifter */
lshift(Int x,Int n)113 inline static Int lshift ( Int x, Int n )
114 {
115 if (n >= 0)
116 return (UInt)x << n;
117 else
118 return x >> (-n);
119 }
120
121 /* identity on ULong */
idULong(ULong x)122 static inline ULong idULong ( ULong x )
123 {
124 return x;
125 }
126
127
128 #define PREAMBLE(__data_bits) \
129 /* const */ UInt DATA_MASK \
130 = __data_bits==8 ? 0xFF \
131 : (__data_bits==16 ? 0xFFFF \
132 : 0xFFFFFFFF); \
133 /* const */ UInt SIGN_MASK = 1u << (__data_bits - 1); \
134 /* const */ UInt CC_DEP1 = cc_dep1_formal; \
135 /* const */ UInt CC_DEP2 = cc_dep2_formal; \
136 /* const */ UInt CC_NDEP = cc_ndep_formal; \
137 /* Four bogus assignments, which hopefully gcc can */ \
138 /* optimise away, and which stop it complaining about */ \
139 /* unused variables. */ \
140 SIGN_MASK = SIGN_MASK; \
141 DATA_MASK = DATA_MASK; \
142 CC_DEP2 = CC_DEP2; \
143 CC_NDEP = CC_NDEP;
144
145
146 /*-------------------------------------------------------------*/
147
148 #define ACTIONS_ADD(DATA_BITS,DATA_UTYPE) \
149 { \
150 PREAMBLE(DATA_BITS); \
151 { UInt cf, pf, af, zf, sf, of; \
152 UInt argL, argR, res; \
153 argL = CC_DEP1; \
154 argR = CC_DEP2; \
155 res = argL + argR; \
156 cf = (DATA_UTYPE)res < (DATA_UTYPE)argL; \
157 pf = parity_table[(UChar)res]; \
158 af = (res ^ argL ^ argR) & 0x10; \
159 zf = ((DATA_UTYPE)res == 0) << 6; \
160 sf = lshift(res, 8 - DATA_BITS) & 0x80; \
161 of = lshift((argL ^ argR ^ -1) & (argL ^ res), \
162 12 - DATA_BITS) & X86G_CC_MASK_O; \
163 return cf | pf | af | zf | sf | of; \
164 } \
165 }
166
167 /*-------------------------------------------------------------*/
168
169 #define ACTIONS_SUB(DATA_BITS,DATA_UTYPE) \
170 { \
171 PREAMBLE(DATA_BITS); \
172 { UInt cf, pf, af, zf, sf, of; \
173 UInt argL, argR, res; \
174 argL = CC_DEP1; \
175 argR = CC_DEP2; \
176 res = argL - argR; \
177 cf = (DATA_UTYPE)argL < (DATA_UTYPE)argR; \
178 pf = parity_table[(UChar)res]; \
179 af = (res ^ argL ^ argR) & 0x10; \
180 zf = ((DATA_UTYPE)res == 0) << 6; \
181 sf = lshift(res, 8 - DATA_BITS) & 0x80; \
182 of = lshift((argL ^ argR) & (argL ^ res), \
183 12 - DATA_BITS) & X86G_CC_MASK_O; \
184 return cf | pf | af | zf | sf | of; \
185 } \
186 }
187
188 /*-------------------------------------------------------------*/
189
190 #define ACTIONS_ADC(DATA_BITS,DATA_UTYPE) \
191 { \
192 PREAMBLE(DATA_BITS); \
193 { UInt cf, pf, af, zf, sf, of; \
194 UInt argL, argR, oldC, res; \
195 oldC = CC_NDEP & X86G_CC_MASK_C; \
196 argL = CC_DEP1; \
197 argR = CC_DEP2 ^ oldC; \
198 res = (argL + argR) + oldC; \
199 if (oldC) \
200 cf = (DATA_UTYPE)res <= (DATA_UTYPE)argL; \
201 else \
202 cf = (DATA_UTYPE)res < (DATA_UTYPE)argL; \
203 pf = parity_table[(UChar)res]; \
204 af = (res ^ argL ^ argR) & 0x10; \
205 zf = ((DATA_UTYPE)res == 0) << 6; \
206 sf = lshift(res, 8 - DATA_BITS) & 0x80; \
207 of = lshift((argL ^ argR ^ -1) & (argL ^ res), \
208 12 - DATA_BITS) & X86G_CC_MASK_O; \
209 return cf | pf | af | zf | sf | of; \
210 } \
211 }
212
213 /*-------------------------------------------------------------*/
214
215 #define ACTIONS_SBB(DATA_BITS,DATA_UTYPE) \
216 { \
217 PREAMBLE(DATA_BITS); \
218 { UInt cf, pf, af, zf, sf, of; \
219 UInt argL, argR, oldC, res; \
220 oldC = CC_NDEP & X86G_CC_MASK_C; \
221 argL = CC_DEP1; \
222 argR = CC_DEP2 ^ oldC; \
223 res = (argL - argR) - oldC; \
224 if (oldC) \
225 cf = (DATA_UTYPE)argL <= (DATA_UTYPE)argR; \
226 else \
227 cf = (DATA_UTYPE)argL < (DATA_UTYPE)argR; \
228 pf = parity_table[(UChar)res]; \
229 af = (res ^ argL ^ argR) & 0x10; \
230 zf = ((DATA_UTYPE)res == 0) << 6; \
231 sf = lshift(res, 8 - DATA_BITS) & 0x80; \
232 of = lshift((argL ^ argR) & (argL ^ res), \
233 12 - DATA_BITS) & X86G_CC_MASK_O; \
234 return cf | pf | af | zf | sf | of; \
235 } \
236 }
237
238 /*-------------------------------------------------------------*/
239
240 #define ACTIONS_LOGIC(DATA_BITS,DATA_UTYPE) \
241 { \
242 PREAMBLE(DATA_BITS); \
243 { UInt cf, pf, af, zf, sf, of; \
244 cf = 0; \
245 pf = parity_table[(UChar)CC_DEP1]; \
246 af = 0; \
247 zf = ((DATA_UTYPE)CC_DEP1 == 0) << 6; \
248 sf = lshift(CC_DEP1, 8 - DATA_BITS) & 0x80; \
249 of = 0; \
250 return cf | pf | af | zf | sf | of; \
251 } \
252 }
253
254 /*-------------------------------------------------------------*/
255
256 #define ACTIONS_INC(DATA_BITS,DATA_UTYPE) \
257 { \
258 PREAMBLE(DATA_BITS); \
259 { UInt cf, pf, af, zf, sf, of; \
260 UInt argL, argR, res; \
261 res = CC_DEP1; \
262 argL = res - 1; \
263 argR = 1; \
264 cf = CC_NDEP & X86G_CC_MASK_C; \
265 pf = parity_table[(UChar)res]; \
266 af = (res ^ argL ^ argR) & 0x10; \
267 zf = ((DATA_UTYPE)res == 0) << 6; \
268 sf = lshift(res, 8 - DATA_BITS) & 0x80; \
269 of = ((res & DATA_MASK) == SIGN_MASK) << 11; \
270 return cf | pf | af | zf | sf | of; \
271 } \
272 }
273
274 /*-------------------------------------------------------------*/
275
276 #define ACTIONS_DEC(DATA_BITS,DATA_UTYPE) \
277 { \
278 PREAMBLE(DATA_BITS); \
279 { UInt cf, pf, af, zf, sf, of; \
280 UInt argL, argR, res; \
281 res = CC_DEP1; \
282 argL = res + 1; \
283 argR = 1; \
284 cf = CC_NDEP & X86G_CC_MASK_C; \
285 pf = parity_table[(UChar)res]; \
286 af = (res ^ argL ^ argR) & 0x10; \
287 zf = ((DATA_UTYPE)res == 0) << 6; \
288 sf = lshift(res, 8 - DATA_BITS) & 0x80; \
289 of = ((res & DATA_MASK) \
290 == ((UInt)SIGN_MASK - 1)) << 11; \
291 return cf | pf | af | zf | sf | of; \
292 } \
293 }
294
295 /*-------------------------------------------------------------*/
296
297 #define ACTIONS_SHL(DATA_BITS,DATA_UTYPE) \
298 { \
299 PREAMBLE(DATA_BITS); \
300 { UInt cf, pf, af, zf, sf, of; \
301 cf = (CC_DEP2 >> (DATA_BITS - 1)) & X86G_CC_MASK_C; \
302 pf = parity_table[(UChar)CC_DEP1]; \
303 af = 0; /* undefined */ \
304 zf = ((DATA_UTYPE)CC_DEP1 == 0) << 6; \
305 sf = lshift(CC_DEP1, 8 - DATA_BITS) & 0x80; \
306 /* of is defined if shift count == 1 */ \
307 of = lshift(CC_DEP2 ^ CC_DEP1, 12 - DATA_BITS) \
308 & X86G_CC_MASK_O; \
309 return cf | pf | af | zf | sf | of; \
310 } \
311 }
312
313 /*-------------------------------------------------------------*/
314
315 #define ACTIONS_SHR(DATA_BITS,DATA_UTYPE) \
316 { \
317 PREAMBLE(DATA_BITS); \
318 { UInt cf, pf, af, zf, sf, of; \
319 cf = CC_DEP2 & 1; \
320 pf = parity_table[(UChar)CC_DEP1]; \
321 af = 0; /* undefined */ \
322 zf = ((DATA_UTYPE)CC_DEP1 == 0) << 6; \
323 sf = lshift(CC_DEP1, 8 - DATA_BITS) & 0x80; \
324 /* of is defined if shift count == 1 */ \
325 of = lshift(CC_DEP2 ^ CC_DEP1, 12 - DATA_BITS) \
326 & X86G_CC_MASK_O; \
327 return cf | pf | af | zf | sf | of; \
328 } \
329 }
330
331 /*-------------------------------------------------------------*/
332
333 /* ROL: cf' = lsb(result). of' = msb(result) ^ lsb(result). */
334 /* DEP1 = result, NDEP = old flags */
335 #define ACTIONS_ROL(DATA_BITS,DATA_UTYPE) \
336 { \
337 PREAMBLE(DATA_BITS); \
338 { UInt fl \
339 = (CC_NDEP & ~(X86G_CC_MASK_O | X86G_CC_MASK_C)) \
340 | (X86G_CC_MASK_C & CC_DEP1) \
341 | (X86G_CC_MASK_O & (lshift(CC_DEP1, \
342 11-(DATA_BITS-1)) \
343 ^ lshift(CC_DEP1, 11))); \
344 return fl; \
345 } \
346 }
347
348 /*-------------------------------------------------------------*/
349
350 /* ROR: cf' = msb(result). of' = msb(result) ^ msb-1(result). */
351 /* DEP1 = result, NDEP = old flags */
352 #define ACTIONS_ROR(DATA_BITS,DATA_UTYPE) \
353 { \
354 PREAMBLE(DATA_BITS); \
355 { UInt fl \
356 = (CC_NDEP & ~(X86G_CC_MASK_O | X86G_CC_MASK_C)) \
357 | (X86G_CC_MASK_C & (CC_DEP1 >> (DATA_BITS-1))) \
358 | (X86G_CC_MASK_O & (lshift(CC_DEP1, \
359 11-(DATA_BITS-1)) \
360 ^ lshift(CC_DEP1, 11-(DATA_BITS-1)+1))); \
361 return fl; \
362 } \
363 }
364
365 /*-------------------------------------------------------------*/
366
367 #define ACTIONS_UMUL(DATA_BITS, DATA_UTYPE, NARROWtoU, \
368 DATA_U2TYPE, NARROWto2U) \
369 { \
370 PREAMBLE(DATA_BITS); \
371 { UInt cf, pf, af, zf, sf, of; \
372 DATA_UTYPE hi; \
373 DATA_UTYPE lo \
374 = NARROWtoU( ((DATA_UTYPE)CC_DEP1) \
375 * ((DATA_UTYPE)CC_DEP2) ); \
376 DATA_U2TYPE rr \
377 = NARROWto2U( \
378 ((DATA_U2TYPE)((DATA_UTYPE)CC_DEP1)) \
379 * ((DATA_U2TYPE)((DATA_UTYPE)CC_DEP2)) ); \
380 hi = NARROWtoU(rr >>/*u*/ DATA_BITS); \
381 cf = (hi != 0); \
382 pf = parity_table[(UChar)lo]; \
383 af = 0; /* undefined */ \
384 zf = (lo == 0) << 6; \
385 sf = lshift(lo, 8 - DATA_BITS) & 0x80; \
386 of = cf << 11; \
387 return cf | pf | af | zf | sf | of; \
388 } \
389 }
390
391 /*-------------------------------------------------------------*/
392
393 #define ACTIONS_SMUL(DATA_BITS, DATA_STYPE, NARROWtoS, \
394 DATA_S2TYPE, NARROWto2S) \
395 { \
396 PREAMBLE(DATA_BITS); \
397 { UInt cf, pf, af, zf, sf, of; \
398 DATA_STYPE hi; \
399 DATA_STYPE lo \
400 = NARROWtoS( ((DATA_S2TYPE)(DATA_STYPE)CC_DEP1) \
401 * ((DATA_S2TYPE)(DATA_STYPE)CC_DEP2) ); \
402 DATA_S2TYPE rr \
403 = NARROWto2S( \
404 ((DATA_S2TYPE)((DATA_STYPE)CC_DEP1)) \
405 * ((DATA_S2TYPE)((DATA_STYPE)CC_DEP2)) ); \
406 hi = NARROWtoS(rr >>/*s*/ DATA_BITS); \
407 cf = (hi != (lo >>/*s*/ (DATA_BITS-1))); \
408 pf = parity_table[(UChar)lo]; \
409 af = 0; /* undefined */ \
410 zf = (lo == 0) << 6; \
411 sf = lshift(lo, 8 - DATA_BITS) & 0x80; \
412 of = cf << 11; \
413 return cf | pf | af | zf | sf | of; \
414 } \
415 }
416
417
418 #if PROFILE_EFLAGS
419
420 static Bool initted = False;
421
422 /* C flag, fast route */
423 static UInt tabc_fast[X86G_CC_OP_NUMBER];
424 /* C flag, slow route */
425 static UInt tabc_slow[X86G_CC_OP_NUMBER];
426 /* table for calculate_cond */
427 static UInt tab_cond[X86G_CC_OP_NUMBER][16];
428 /* total entry counts for calc_all, calc_c, calc_cond. */
429 static UInt n_calc_all = 0;
430 static UInt n_calc_c = 0;
431 static UInt n_calc_cond = 0;
432
433 #define SHOW_COUNTS_NOW (0 == (0x3FFFFF & (n_calc_all+n_calc_c+n_calc_cond)))
434
435
showCounts(void)436 static void showCounts ( void )
437 {
438 Int op, co;
439 HChar ch;
440 vex_printf("\nTotal calls: calc_all=%u calc_cond=%u calc_c=%u\n",
441 n_calc_all, n_calc_cond, n_calc_c);
442
443 vex_printf(" cSLOW cFAST O NO B NB Z NZ BE NBE"
444 " S NS P NP L NL LE NLE\n");
445 vex_printf(" -----------------------------------------------------"
446 "----------------------------------------\n");
447 for (op = 0; op < X86G_CC_OP_NUMBER; op++) {
448
449 ch = ' ';
450 if (op > 0 && (op-1) % 3 == 0)
451 ch = 'B';
452 if (op > 0 && (op-1) % 3 == 1)
453 ch = 'W';
454 if (op > 0 && (op-1) % 3 == 2)
455 ch = 'L';
456
457 vex_printf("%2d%c: ", op, ch);
458 vex_printf("%6u ", tabc_slow[op]);
459 vex_printf("%6u ", tabc_fast[op]);
460 for (co = 0; co < 16; co++) {
461 Int n = tab_cond[op][co];
462 if (n >= 1000) {
463 vex_printf(" %3dK", n / 1000);
464 } else
465 if (n >= 0) {
466 vex_printf(" %3d ", n );
467 } else {
468 vex_printf(" ");
469 }
470 }
471 vex_printf("\n");
472 }
473 vex_printf("\n");
474 }
475
initCounts(void)476 static void initCounts ( void )
477 {
478 Int op, co;
479 initted = True;
480 for (op = 0; op < X86G_CC_OP_NUMBER; op++) {
481 tabc_fast[op] = tabc_slow[op] = 0;
482 for (co = 0; co < 16; co++)
483 tab_cond[op][co] = 0;
484 }
485 }
486
487 #endif /* PROFILE_EFLAGS */
488
489
490 /* CALLED FROM GENERATED CODE: CLEAN HELPER */
491 /* Calculate all the 6 flags from the supplied thunk parameters.
492 Worker function, not directly called from generated code. */
493 static
x86g_calculate_eflags_all_WRK(UInt cc_op,UInt cc_dep1_formal,UInt cc_dep2_formal,UInt cc_ndep_formal)494 UInt x86g_calculate_eflags_all_WRK ( UInt cc_op,
495 UInt cc_dep1_formal,
496 UInt cc_dep2_formal,
497 UInt cc_ndep_formal )
498 {
499 switch (cc_op) {
500 case X86G_CC_OP_COPY:
501 return cc_dep1_formal
502 & (X86G_CC_MASK_O | X86G_CC_MASK_S | X86G_CC_MASK_Z
503 | X86G_CC_MASK_A | X86G_CC_MASK_C | X86G_CC_MASK_P);
504
505 case X86G_CC_OP_ADDB: ACTIONS_ADD( 8, UChar );
506 case X86G_CC_OP_ADDW: ACTIONS_ADD( 16, UShort );
507 case X86G_CC_OP_ADDL: ACTIONS_ADD( 32, UInt );
508
509 case X86G_CC_OP_ADCB: ACTIONS_ADC( 8, UChar );
510 case X86G_CC_OP_ADCW: ACTIONS_ADC( 16, UShort );
511 case X86G_CC_OP_ADCL: ACTIONS_ADC( 32, UInt );
512
513 case X86G_CC_OP_SUBB: ACTIONS_SUB( 8, UChar );
514 case X86G_CC_OP_SUBW: ACTIONS_SUB( 16, UShort );
515 case X86G_CC_OP_SUBL: ACTIONS_SUB( 32, UInt );
516
517 case X86G_CC_OP_SBBB: ACTIONS_SBB( 8, UChar );
518 case X86G_CC_OP_SBBW: ACTIONS_SBB( 16, UShort );
519 case X86G_CC_OP_SBBL: ACTIONS_SBB( 32, UInt );
520
521 case X86G_CC_OP_LOGICB: ACTIONS_LOGIC( 8, UChar );
522 case X86G_CC_OP_LOGICW: ACTIONS_LOGIC( 16, UShort );
523 case X86G_CC_OP_LOGICL: ACTIONS_LOGIC( 32, UInt );
524
525 case X86G_CC_OP_INCB: ACTIONS_INC( 8, UChar );
526 case X86G_CC_OP_INCW: ACTIONS_INC( 16, UShort );
527 case X86G_CC_OP_INCL: ACTIONS_INC( 32, UInt );
528
529 case X86G_CC_OP_DECB: ACTIONS_DEC( 8, UChar );
530 case X86G_CC_OP_DECW: ACTIONS_DEC( 16, UShort );
531 case X86G_CC_OP_DECL: ACTIONS_DEC( 32, UInt );
532
533 case X86G_CC_OP_SHLB: ACTIONS_SHL( 8, UChar );
534 case X86G_CC_OP_SHLW: ACTIONS_SHL( 16, UShort );
535 case X86G_CC_OP_SHLL: ACTIONS_SHL( 32, UInt );
536
537 case X86G_CC_OP_SHRB: ACTIONS_SHR( 8, UChar );
538 case X86G_CC_OP_SHRW: ACTIONS_SHR( 16, UShort );
539 case X86G_CC_OP_SHRL: ACTIONS_SHR( 32, UInt );
540
541 case X86G_CC_OP_ROLB: ACTIONS_ROL( 8, UChar );
542 case X86G_CC_OP_ROLW: ACTIONS_ROL( 16, UShort );
543 case X86G_CC_OP_ROLL: ACTIONS_ROL( 32, UInt );
544
545 case X86G_CC_OP_RORB: ACTIONS_ROR( 8, UChar );
546 case X86G_CC_OP_RORW: ACTIONS_ROR( 16, UShort );
547 case X86G_CC_OP_RORL: ACTIONS_ROR( 32, UInt );
548
549 case X86G_CC_OP_UMULB: ACTIONS_UMUL( 8, UChar, toUChar,
550 UShort, toUShort );
551 case X86G_CC_OP_UMULW: ACTIONS_UMUL( 16, UShort, toUShort,
552 UInt, toUInt );
553 case X86G_CC_OP_UMULL: ACTIONS_UMUL( 32, UInt, toUInt,
554 ULong, idULong );
555
556 case X86G_CC_OP_SMULB: ACTIONS_SMUL( 8, Char, toUChar,
557 Short, toUShort );
558 case X86G_CC_OP_SMULW: ACTIONS_SMUL( 16, Short, toUShort,
559 Int, toUInt );
560 case X86G_CC_OP_SMULL: ACTIONS_SMUL( 32, Int, toUInt,
561 Long, idULong );
562
563 default:
564 /* shouldn't really make these calls from generated code */
565 vex_printf("x86g_calculate_eflags_all_WRK(X86)"
566 "( %u, 0x%x, 0x%x, 0x%x )\n",
567 cc_op, cc_dep1_formal, cc_dep2_formal, cc_ndep_formal );
568 vpanic("x86g_calculate_eflags_all_WRK(X86)");
569 }
570 }
571
572
573 /* CALLED FROM GENERATED CODE: CLEAN HELPER */
574 /* Calculate all the 6 flags from the supplied thunk parameters. */
x86g_calculate_eflags_all(UInt cc_op,UInt cc_dep1,UInt cc_dep2,UInt cc_ndep)575 UInt x86g_calculate_eflags_all ( UInt cc_op,
576 UInt cc_dep1,
577 UInt cc_dep2,
578 UInt cc_ndep )
579 {
580 # if PROFILE_EFLAGS
581 if (!initted) initCounts();
582 n_calc_all++;
583 if (SHOW_COUNTS_NOW) showCounts();
584 # endif
585 return
586 x86g_calculate_eflags_all_WRK ( cc_op, cc_dep1, cc_dep2, cc_ndep );
587 }
588
589
590 /* CALLED FROM GENERATED CODE: CLEAN HELPER */
591 /* Calculate just the carry flag from the supplied thunk parameters. */
592 VEX_REGPARM(3)
x86g_calculate_eflags_c(UInt cc_op,UInt cc_dep1,UInt cc_dep2,UInt cc_ndep)593 UInt x86g_calculate_eflags_c ( UInt cc_op,
594 UInt cc_dep1,
595 UInt cc_dep2,
596 UInt cc_ndep )
597 {
598 # if PROFILE_EFLAGS
599 if (!initted) initCounts();
600 n_calc_c++;
601 tabc_fast[cc_op]++;
602 if (SHOW_COUNTS_NOW) showCounts();
603 # endif
604
605 /* Fast-case some common ones. */
606 switch (cc_op) {
607 case X86G_CC_OP_LOGICL:
608 case X86G_CC_OP_LOGICW:
609 case X86G_CC_OP_LOGICB:
610 return 0;
611 case X86G_CC_OP_SUBL:
612 return ((UInt)cc_dep1) < ((UInt)cc_dep2)
613 ? X86G_CC_MASK_C : 0;
614 case X86G_CC_OP_SUBW:
615 return ((UInt)(cc_dep1 & 0xFFFF)) < ((UInt)(cc_dep2 & 0xFFFF))
616 ? X86G_CC_MASK_C : 0;
617 case X86G_CC_OP_SUBB:
618 return ((UInt)(cc_dep1 & 0xFF)) < ((UInt)(cc_dep2 & 0xFF))
619 ? X86G_CC_MASK_C : 0;
620 case X86G_CC_OP_INCL:
621 case X86G_CC_OP_DECL:
622 return cc_ndep & X86G_CC_MASK_C;
623 default:
624 break;
625 }
626
627 # if PROFILE_EFLAGS
628 tabc_fast[cc_op]--;
629 tabc_slow[cc_op]++;
630 # endif
631
632 return x86g_calculate_eflags_all_WRK(cc_op,cc_dep1,cc_dep2,cc_ndep)
633 & X86G_CC_MASK_C;
634 }
635
636
637 /* CALLED FROM GENERATED CODE: CLEAN HELPER */
638 /* returns 1 or 0 */
x86g_calculate_condition(UInt cond,UInt cc_op,UInt cc_dep1,UInt cc_dep2,UInt cc_ndep)639 UInt x86g_calculate_condition ( UInt/*X86Condcode*/ cond,
640 UInt cc_op,
641 UInt cc_dep1,
642 UInt cc_dep2,
643 UInt cc_ndep )
644 {
645 UInt eflags = x86g_calculate_eflags_all_WRK(cc_op, cc_dep1,
646 cc_dep2, cc_ndep);
647 UInt of,sf,zf,cf,pf;
648 UInt inv = cond & 1;
649
650 # if PROFILE_EFLAGS
651 if (!initted) initCounts();
652 tab_cond[cc_op][cond]++;
653 n_calc_cond++;
654 if (SHOW_COUNTS_NOW) showCounts();
655 # endif
656
657 switch (cond) {
658 case X86CondNO:
659 case X86CondO: /* OF == 1 */
660 of = eflags >> X86G_CC_SHIFT_O;
661 return 1 & (inv ^ of);
662
663 case X86CondNZ:
664 case X86CondZ: /* ZF == 1 */
665 zf = eflags >> X86G_CC_SHIFT_Z;
666 return 1 & (inv ^ zf);
667
668 case X86CondNB:
669 case X86CondB: /* CF == 1 */
670 cf = eflags >> X86G_CC_SHIFT_C;
671 return 1 & (inv ^ cf);
672 break;
673
674 case X86CondNBE:
675 case X86CondBE: /* (CF or ZF) == 1 */
676 cf = eflags >> X86G_CC_SHIFT_C;
677 zf = eflags >> X86G_CC_SHIFT_Z;
678 return 1 & (inv ^ (cf | zf));
679 break;
680
681 case X86CondNS:
682 case X86CondS: /* SF == 1 */
683 sf = eflags >> X86G_CC_SHIFT_S;
684 return 1 & (inv ^ sf);
685
686 case X86CondNP:
687 case X86CondP: /* PF == 1 */
688 pf = eflags >> X86G_CC_SHIFT_P;
689 return 1 & (inv ^ pf);
690
691 case X86CondNL:
692 case X86CondL: /* (SF xor OF) == 1 */
693 sf = eflags >> X86G_CC_SHIFT_S;
694 of = eflags >> X86G_CC_SHIFT_O;
695 return 1 & (inv ^ (sf ^ of));
696 break;
697
698 case X86CondNLE:
699 case X86CondLE: /* ((SF xor OF) or ZF) == 1 */
700 sf = eflags >> X86G_CC_SHIFT_S;
701 of = eflags >> X86G_CC_SHIFT_O;
702 zf = eflags >> X86G_CC_SHIFT_Z;
703 return 1 & (inv ^ ((sf ^ of) | zf));
704 break;
705
706 default:
707 /* shouldn't really make these calls from generated code */
708 vex_printf("x86g_calculate_condition( %u, %u, 0x%x, 0x%x, 0x%x )\n",
709 cond, cc_op, cc_dep1, cc_dep2, cc_ndep );
710 vpanic("x86g_calculate_condition");
711 }
712 }
713
714
715 /* VISIBLE TO LIBVEX CLIENT */
LibVEX_GuestX86_get_eflags(const VexGuestX86State * vex_state)716 UInt LibVEX_GuestX86_get_eflags ( /*IN*/const VexGuestX86State* vex_state )
717 {
718 UInt eflags = x86g_calculate_eflags_all_WRK(
719 vex_state->guest_CC_OP,
720 vex_state->guest_CC_DEP1,
721 vex_state->guest_CC_DEP2,
722 vex_state->guest_CC_NDEP
723 );
724 UInt dflag = vex_state->guest_DFLAG;
725 vassert(dflag == 1 || dflag == 0xFFFFFFFF);
726 if (dflag == 0xFFFFFFFF)
727 eflags |= X86G_CC_MASK_D;
728 if (vex_state->guest_IDFLAG == 1)
729 eflags |= X86G_CC_MASK_ID;
730 if (vex_state->guest_ACFLAG == 1)
731 eflags |= X86G_CC_MASK_AC;
732
733 return eflags;
734 }
735
736 /* VISIBLE TO LIBVEX CLIENT */
737 void
LibVEX_GuestX86_put_eflags(UInt eflags,VexGuestX86State * vex_state)738 LibVEX_GuestX86_put_eflags ( UInt eflags,
739 /*MOD*/VexGuestX86State* vex_state )
740 {
741 /* D flag */
742 if (eflags & X86G_CC_MASK_D) {
743 vex_state->guest_DFLAG = 0xFFFFFFFF;
744 eflags &= ~X86G_CC_MASK_D;
745 }
746 else
747 vex_state->guest_DFLAG = 1;
748
749 /* ID flag */
750 if (eflags & X86G_CC_MASK_ID) {
751 vex_state->guest_IDFLAG = 1;
752 eflags &= ~X86G_CC_MASK_ID;
753 }
754 else
755 vex_state->guest_IDFLAG = 0;
756
757 /* AC flag */
758 if (eflags & X86G_CC_MASK_AC) {
759 vex_state->guest_ACFLAG = 1;
760 eflags &= ~X86G_CC_MASK_AC;
761 }
762 else
763 vex_state->guest_ACFLAG = 0;
764
765 UInt cc_mask = X86G_CC_MASK_O | X86G_CC_MASK_S | X86G_CC_MASK_Z |
766 X86G_CC_MASK_A | X86G_CC_MASK_C | X86G_CC_MASK_P;
767 vex_state->guest_CC_OP = X86G_CC_OP_COPY;
768 vex_state->guest_CC_DEP1 = eflags & cc_mask;
769 vex_state->guest_CC_DEP2 = 0;
770 vex_state->guest_CC_NDEP = 0;
771 }
772
773 /* VISIBLE TO LIBVEX CLIENT */
774 void
LibVEX_GuestX86_put_eflag_c(UInt new_carry_flag,VexGuestX86State * vex_state)775 LibVEX_GuestX86_put_eflag_c ( UInt new_carry_flag,
776 /*MOD*/VexGuestX86State* vex_state )
777 {
778 UInt oszacp = x86g_calculate_eflags_all_WRK(
779 vex_state->guest_CC_OP,
780 vex_state->guest_CC_DEP1,
781 vex_state->guest_CC_DEP2,
782 vex_state->guest_CC_NDEP
783 );
784 if (new_carry_flag & 1) {
785 oszacp |= X86G_CC_MASK_C;
786 } else {
787 oszacp &= ~X86G_CC_MASK_C;
788 }
789 vex_state->guest_CC_OP = X86G_CC_OP_COPY;
790 vex_state->guest_CC_DEP1 = oszacp;
791 vex_state->guest_CC_DEP2 = 0;
792 vex_state->guest_CC_NDEP = 0;
793 }
794
795
796 /*---------------------------------------------------------------*/
797 /*--- %eflags translation-time function specialisers. ---*/
798 /*--- These help iropt specialise calls the above run-time ---*/
799 /*--- %eflags functions. ---*/
800 /*---------------------------------------------------------------*/
801
802 /* Used by the optimiser to try specialisations. Returns an
803 equivalent expression, or NULL if none. */
804
isU32(IRExpr * e,UInt n)805 static inline Bool isU32 ( IRExpr* e, UInt n )
806 {
807 return
808 toBool( e->tag == Iex_Const
809 && e->Iex.Const.con->tag == Ico_U32
810 && e->Iex.Const.con->Ico.U32 == n );
811 }
812
guest_x86_spechelper(const HChar * function_name,IRExpr ** args,IRStmt ** precedingStmts,Int n_precedingStmts)813 IRExpr* guest_x86_spechelper ( const HChar* function_name,
814 IRExpr** args,
815 IRStmt** precedingStmts,
816 Int n_precedingStmts )
817 {
818 # define unop(_op,_a1) IRExpr_Unop((_op),(_a1))
819 # define binop(_op,_a1,_a2) IRExpr_Binop((_op),(_a1),(_a2))
820 # define mkU32(_n) IRExpr_Const(IRConst_U32(_n))
821 # define mkU8(_n) IRExpr_Const(IRConst_U8(_n))
822
823 Int i, arity = 0;
824 for (i = 0; args[i]; i++)
825 arity++;
826 # if 0
827 vex_printf("spec request:\n");
828 vex_printf(" %s ", function_name);
829 for (i = 0; i < arity; i++) {
830 vex_printf(" ");
831 ppIRExpr(args[i]);
832 }
833 vex_printf("\n");
834 # endif
835
836 /* --------- specialising "x86g_calculate_condition" --------- */
837
838 if (vex_streq(function_name, "x86g_calculate_condition")) {
839 /* specialise calls to above "calculate condition" function */
840 IRExpr *cond, *cc_op, *cc_dep1, *cc_dep2;
841 vassert(arity == 5);
842 cond = args[0];
843 cc_op = args[1];
844 cc_dep1 = args[2];
845 cc_dep2 = args[3];
846
847 /*---------------- ADDL ----------------*/
848
849 if (isU32(cc_op, X86G_CC_OP_ADDL) && isU32(cond, X86CondZ)) {
850 /* long add, then Z --> test (dst+src == 0) */
851 return unop(Iop_1Uto32,
852 binop(Iop_CmpEQ32,
853 binop(Iop_Add32, cc_dep1, cc_dep2),
854 mkU32(0)));
855 }
856
857 /*---------------- SUBL ----------------*/
858
859 if (isU32(cc_op, X86G_CC_OP_SUBL) && isU32(cond, X86CondZ)) {
860 /* long sub/cmp, then Z --> test dst==src */
861 return unop(Iop_1Uto32,
862 binop(Iop_CmpEQ32, cc_dep1, cc_dep2));
863 }
864 if (isU32(cc_op, X86G_CC_OP_SUBL) && isU32(cond, X86CondNZ)) {
865 /* long sub/cmp, then NZ --> test dst!=src */
866 return unop(Iop_1Uto32,
867 binop(Iop_CmpNE32, cc_dep1, cc_dep2));
868 }
869
870 if (isU32(cc_op, X86G_CC_OP_SUBL) && isU32(cond, X86CondL)) {
871 /* long sub/cmp, then L (signed less than)
872 --> test dst <s src */
873 return unop(Iop_1Uto32,
874 binop(Iop_CmpLT32S, cc_dep1, cc_dep2));
875 }
876 if (isU32(cc_op, X86G_CC_OP_SUBL) && isU32(cond, X86CondNL)) {
877 /* long sub/cmp, then NL (signed greater than or equal)
878 --> test !(dst <s src) */
879 return binop(Iop_Xor32,
880 unop(Iop_1Uto32,
881 binop(Iop_CmpLT32S, cc_dep1, cc_dep2)),
882 mkU32(1));
883 }
884
885 if (isU32(cc_op, X86G_CC_OP_SUBL) && isU32(cond, X86CondLE)) {
886 /* long sub/cmp, then LE (signed less than or equal)
887 --> test dst <=s src */
888 return unop(Iop_1Uto32,
889 binop(Iop_CmpLE32S, cc_dep1, cc_dep2));
890 }
891 if (isU32(cc_op, X86G_CC_OP_SUBL) && isU32(cond, X86CondNLE)) {
892 /* long sub/cmp, then NLE (signed not less than or equal)
893 --> test dst >s src
894 --> test !(dst <=s src) */
895 return binop(Iop_Xor32,
896 unop(Iop_1Uto32,
897 binop(Iop_CmpLE32S, cc_dep1, cc_dep2)),
898 mkU32(1));
899 }
900
901 if (isU32(cc_op, X86G_CC_OP_SUBL) && isU32(cond, X86CondBE)) {
902 /* long sub/cmp, then BE (unsigned less than or equal)
903 --> test dst <=u src */
904 return unop(Iop_1Uto32,
905 binop(Iop_CmpLE32U, cc_dep1, cc_dep2));
906 }
907 if (isU32(cc_op, X86G_CC_OP_SUBL) && isU32(cond, X86CondNBE)) {
908 /* long sub/cmp, then BE (unsigned greater than)
909 --> test !(dst <=u src) */
910 return binop(Iop_Xor32,
911 unop(Iop_1Uto32,
912 binop(Iop_CmpLE32U, cc_dep1, cc_dep2)),
913 mkU32(1));
914 }
915
916 if (isU32(cc_op, X86G_CC_OP_SUBL) && isU32(cond, X86CondB)) {
917 /* long sub/cmp, then B (unsigned less than)
918 --> test dst <u src */
919 return unop(Iop_1Uto32,
920 binop(Iop_CmpLT32U, cc_dep1, cc_dep2));
921 }
922 if (isU32(cc_op, X86G_CC_OP_SUBL) && isU32(cond, X86CondNB)) {
923 /* long sub/cmp, then NB (unsigned greater than or equal)
924 --> test !(dst <u src) */
925 return binop(Iop_Xor32,
926 unop(Iop_1Uto32,
927 binop(Iop_CmpLT32U, cc_dep1, cc_dep2)),
928 mkU32(1));
929 }
930
931 if (isU32(cc_op, X86G_CC_OP_SUBL) && isU32(cond, X86CondS)) {
932 /* long sub/cmp, then S (negative) --> test (dst-src <s 0) */
933 return unop(Iop_1Uto32,
934 binop(Iop_CmpLT32S,
935 binop(Iop_Sub32, cc_dep1, cc_dep2),
936 mkU32(0)));
937 }
938 if (isU32(cc_op, X86G_CC_OP_SUBL) && isU32(cond, X86CondNS)) {
939 /* long sub/cmp, then NS (not negative) --> test !(dst-src <s 0) */
940 return binop(Iop_Xor32,
941 unop(Iop_1Uto32,
942 binop(Iop_CmpLT32S,
943 binop(Iop_Sub32, cc_dep1, cc_dep2),
944 mkU32(0))),
945 mkU32(1));
946 }
947
948 /*---------------- SUBW ----------------*/
949
950 if (isU32(cc_op, X86G_CC_OP_SUBW) && isU32(cond, X86CondZ)) {
951 /* word sub/cmp, then Z --> test dst==src */
952 return unop(Iop_1Uto32,
953 binop(Iop_CmpEQ16,
954 unop(Iop_32to16,cc_dep1),
955 unop(Iop_32to16,cc_dep2)));
956 }
957 if (isU32(cc_op, X86G_CC_OP_SUBW) && isU32(cond, X86CondNZ)) {
958 /* word sub/cmp, then NZ --> test dst!=src */
959 return unop(Iop_1Uto32,
960 binop(Iop_CmpNE16,
961 unop(Iop_32to16,cc_dep1),
962 unop(Iop_32to16,cc_dep2)));
963 }
964
965 /*---------------- SUBB ----------------*/
966
967 if (isU32(cc_op, X86G_CC_OP_SUBB) && isU32(cond, X86CondZ)) {
968 /* byte sub/cmp, then Z --> test dst==src */
969 return unop(Iop_1Uto32,
970 binop(Iop_CmpEQ8,
971 unop(Iop_32to8,cc_dep1),
972 unop(Iop_32to8,cc_dep2)));
973 }
974 if (isU32(cc_op, X86G_CC_OP_SUBB) && isU32(cond, X86CondNZ)) {
975 /* byte sub/cmp, then NZ --> test dst!=src */
976 return unop(Iop_1Uto32,
977 binop(Iop_CmpNE8,
978 unop(Iop_32to8,cc_dep1),
979 unop(Iop_32to8,cc_dep2)));
980 }
981
982 if (isU32(cc_op, X86G_CC_OP_SUBB) && isU32(cond, X86CondNBE)) {
983 /* byte sub/cmp, then NBE (unsigned greater than)
984 --> test src <u dst */
985 /* Note, args are opposite way round from the usual */
986 return unop(Iop_1Uto32,
987 binop(Iop_CmpLT32U,
988 binop(Iop_And32,cc_dep2,mkU32(0xFF)),
989 binop(Iop_And32,cc_dep1,mkU32(0xFF))));
990 }
991
992 if (isU32(cc_op, X86G_CC_OP_SUBB) && isU32(cond, X86CondS)
993 && isU32(cc_dep2, 0)) {
994 /* byte sub/cmp of zero, then S --> test (dst-0 <s 0)
995 --> test dst <s 0
996 --> (UInt)dst[7]
997 This is yet another scheme by which gcc figures out if the
998 top bit of a byte is 1 or 0. See also LOGICB/CondS below. */
999 /* Note: isU32(cc_dep2, 0) is correct, even though this is
1000 for an 8-bit comparison, since the args to the helper
1001 function are always U32s. */
1002 return binop(Iop_And32,
1003 binop(Iop_Shr32,cc_dep1,mkU8(7)),
1004 mkU32(1));
1005 }
1006 if (isU32(cc_op, X86G_CC_OP_SUBB) && isU32(cond, X86CondNS)
1007 && isU32(cc_dep2, 0)) {
1008 /* byte sub/cmp of zero, then NS --> test !(dst-0 <s 0)
1009 --> test !(dst <s 0)
1010 --> (UInt) !dst[7]
1011 */
1012 return binop(Iop_Xor32,
1013 binop(Iop_And32,
1014 binop(Iop_Shr32,cc_dep1,mkU8(7)),
1015 mkU32(1)),
1016 mkU32(1));
1017 }
1018
1019 /*---------------- LOGICL ----------------*/
1020
1021 if (isU32(cc_op, X86G_CC_OP_LOGICL) && isU32(cond, X86CondZ)) {
1022 /* long and/or/xor, then Z --> test dst==0 */
1023 return unop(Iop_1Uto32,binop(Iop_CmpEQ32, cc_dep1, mkU32(0)));
1024 }
1025 if (isU32(cc_op, X86G_CC_OP_LOGICL) && isU32(cond, X86CondNZ)) {
1026 /* long and/or/xor, then NZ --> test dst!=0 */
1027 return unop(Iop_1Uto32,binop(Iop_CmpNE32, cc_dep1, mkU32(0)));
1028 }
1029
1030 if (isU32(cc_op, X86G_CC_OP_LOGICL) && isU32(cond, X86CondLE)) {
1031 /* long and/or/xor, then LE
1032 This is pretty subtle. LOGIC sets SF and ZF according to the
1033 result and makes OF be zero. LE computes (SZ ^ OF) | ZF, but
1034 OF is zero, so this reduces to SZ | ZF -- which will be 1 iff
1035 the result is <=signed 0. Hence ...
1036 */
1037 return unop(Iop_1Uto32,binop(Iop_CmpLE32S, cc_dep1, mkU32(0)));
1038 }
1039
1040 if (isU32(cc_op, X86G_CC_OP_LOGICL) && isU32(cond, X86CondBE)) {
1041 /* long and/or/xor, then BE
1042 LOGIC sets ZF according to the result and makes CF be zero.
1043 BE computes (CF | ZF), but CF is zero, so this reduces ZF
1044 -- which will be 1 iff the result is zero. Hence ...
1045 */
1046 return unop(Iop_1Uto32,binop(Iop_CmpEQ32, cc_dep1, mkU32(0)));
1047 }
1048
1049 if (isU32(cc_op, X86G_CC_OP_LOGICL) && isU32(cond, X86CondS)) {
1050 /* see comment below for (LOGICB, CondS) */
1051 /* long and/or/xor, then S --> (UInt)result[31] */
1052 return binop(Iop_And32,
1053 binop(Iop_Shr32,cc_dep1,mkU8(31)),
1054 mkU32(1));
1055 }
1056 if (isU32(cc_op, X86G_CC_OP_LOGICL) && isU32(cond, X86CondNS)) {
1057 /* see comment below for (LOGICB, CondNS) */
1058 /* long and/or/xor, then S --> (UInt) ~ result[31] */
1059 return binop(Iop_Xor32,
1060 binop(Iop_And32,
1061 binop(Iop_Shr32,cc_dep1,mkU8(31)),
1062 mkU32(1)),
1063 mkU32(1));
1064 }
1065
1066 /*---------------- LOGICW ----------------*/
1067
1068 if (isU32(cc_op, X86G_CC_OP_LOGICW) && isU32(cond, X86CondZ)) {
1069 /* word and/or/xor, then Z --> test dst==0 */
1070 return unop(Iop_1Uto32,
1071 binop(Iop_CmpEQ32, binop(Iop_And32,cc_dep1,mkU32(0xFFFF)),
1072 mkU32(0)));
1073 }
1074
1075 if (isU32(cc_op, X86G_CC_OP_LOGICW) && isU32(cond, X86CondS)) {
1076 /* see comment below for (LOGICB, CondS) */
1077 /* word and/or/xor, then S --> (UInt)result[15] */
1078 return binop(Iop_And32,
1079 binop(Iop_Shr32,cc_dep1,mkU8(15)),
1080 mkU32(1));
1081 }
1082
1083 /*---------------- LOGICB ----------------*/
1084
1085 if (isU32(cc_op, X86G_CC_OP_LOGICB) && isU32(cond, X86CondZ)) {
1086 /* byte and/or/xor, then Z --> test dst==0 */
1087 return unop(Iop_1Uto32,
1088 binop(Iop_CmpEQ32, binop(Iop_And32,cc_dep1,mkU32(255)),
1089 mkU32(0)));
1090 }
1091 if (isU32(cc_op, X86G_CC_OP_LOGICB) && isU32(cond, X86CondNZ)) {
1092 /* byte and/or/xor, then Z --> test dst!=0 */
1093 /* b9ac9: 84 c0 test %al,%al
1094 b9acb: 75 0d jne b9ada */
1095 return unop(Iop_1Uto32,
1096 binop(Iop_CmpNE32, binop(Iop_And32,cc_dep1,mkU32(255)),
1097 mkU32(0)));
1098 }
1099
1100 if (isU32(cc_op, X86G_CC_OP_LOGICB) && isU32(cond, X86CondS)) {
1101 /* this is an idiom gcc sometimes uses to find out if the top
1102 bit of a byte register is set: eg testb %al,%al; js ..
1103 Since it just depends on the top bit of the byte, extract
1104 that bit and explicitly get rid of all the rest. This
1105 helps memcheck avoid false positives in the case where any
1106 of the other bits in the byte are undefined. */
1107 /* byte and/or/xor, then S --> (UInt)result[7] */
1108 return binop(Iop_And32,
1109 binop(Iop_Shr32,cc_dep1,mkU8(7)),
1110 mkU32(1));
1111 }
1112 if (isU32(cc_op, X86G_CC_OP_LOGICB) && isU32(cond, X86CondNS)) {
1113 /* ditto, for negation-of-S. */
1114 /* byte and/or/xor, then S --> (UInt) ~ result[7] */
1115 return binop(Iop_Xor32,
1116 binop(Iop_And32,
1117 binop(Iop_Shr32,cc_dep1,mkU8(7)),
1118 mkU32(1)),
1119 mkU32(1));
1120 }
1121
1122 /*---------------- DECL ----------------*/
1123
1124 if (isU32(cc_op, X86G_CC_OP_DECL) && isU32(cond, X86CondZ)) {
1125 /* dec L, then Z --> test dst == 0 */
1126 return unop(Iop_1Uto32,binop(Iop_CmpEQ32, cc_dep1, mkU32(0)));
1127 }
1128
1129 if (isU32(cc_op, X86G_CC_OP_DECL) && isU32(cond, X86CondS)) {
1130 /* dec L, then S --> compare DST <s 0 */
1131 return unop(Iop_1Uto32,binop(Iop_CmpLT32S, cc_dep1, mkU32(0)));
1132 }
1133
1134 /*---------------- DECW ----------------*/
1135
1136 if (isU32(cc_op, X86G_CC_OP_DECW) && isU32(cond, X86CondZ)) {
1137 /* dec W, then Z --> test dst == 0 */
1138 return unop(Iop_1Uto32,
1139 binop(Iop_CmpEQ32,
1140 binop(Iop_Shl32,cc_dep1,mkU8(16)),
1141 mkU32(0)));
1142 }
1143
1144 /*---------------- INCW ----------------*/
1145
1146 if (isU32(cc_op, X86G_CC_OP_INCW) && isU32(cond, X86CondZ)) {
1147 /* This rewrite helps memcheck on 'incw %ax ; je ...'. */
1148 /* inc W, then Z --> test dst == 0 */
1149 return unop(Iop_1Uto32,
1150 binop(Iop_CmpEQ32,
1151 binop(Iop_Shl32,cc_dep1,mkU8(16)),
1152 mkU32(0)));
1153 }
1154
1155 /*---------------- SHRL ----------------*/
1156
1157 if (isU32(cc_op, X86G_CC_OP_SHRL) && isU32(cond, X86CondZ)) {
1158 /* SHRL, then Z --> test dep1 == 0 */
1159 return unop(Iop_1Uto32,binop(Iop_CmpEQ32, cc_dep1, mkU32(0)));
1160 }
1161
1162 /*---------------- COPY ----------------*/
1163 /* This can happen, as a result of x87 FP compares: "fcom ... ;
1164 fnstsw %ax ; sahf ; jbe" for example. */
1165
1166 if (isU32(cc_op, X86G_CC_OP_COPY) &&
1167 (isU32(cond, X86CondBE) || isU32(cond, X86CondNBE))) {
1168 /* COPY, then BE --> extract C and Z from dep1, and test
1169 (C or Z) == 1. */
1170 /* COPY, then NBE --> extract C and Z from dep1, and test
1171 (C or Z) == 0. */
1172 UInt nnn = isU32(cond, X86CondBE) ? 1 : 0;
1173 return
1174 unop(
1175 Iop_1Uto32,
1176 binop(
1177 Iop_CmpEQ32,
1178 binop(
1179 Iop_And32,
1180 binop(
1181 Iop_Or32,
1182 binop(Iop_Shr32, cc_dep1, mkU8(X86G_CC_SHIFT_C)),
1183 binop(Iop_Shr32, cc_dep1, mkU8(X86G_CC_SHIFT_Z))
1184 ),
1185 mkU32(1)
1186 ),
1187 mkU32(nnn)
1188 )
1189 );
1190 }
1191
1192 if (isU32(cc_op, X86G_CC_OP_COPY)
1193 && (isU32(cond, X86CondB) || isU32(cond, X86CondNB))) {
1194 /* COPY, then B --> extract C from dep1, and test (C == 1). */
1195 /* COPY, then NB --> extract C from dep1, and test (C == 0). */
1196 UInt nnn = isU32(cond, X86CondB) ? 1 : 0;
1197 return
1198 unop(
1199 Iop_1Uto32,
1200 binop(
1201 Iop_CmpEQ32,
1202 binop(
1203 Iop_And32,
1204 binop(Iop_Shr32, cc_dep1, mkU8(X86G_CC_SHIFT_C)),
1205 mkU32(1)
1206 ),
1207 mkU32(nnn)
1208 )
1209 );
1210 }
1211
1212 if (isU32(cc_op, X86G_CC_OP_COPY)
1213 && (isU32(cond, X86CondZ) || isU32(cond, X86CondNZ))) {
1214 /* COPY, then Z --> extract Z from dep1, and test (Z == 1). */
1215 /* COPY, then NZ --> extract Z from dep1, and test (Z == 0). */
1216 UInt nnn = isU32(cond, X86CondZ) ? 1 : 0;
1217 return
1218 unop(
1219 Iop_1Uto32,
1220 binop(
1221 Iop_CmpEQ32,
1222 binop(
1223 Iop_And32,
1224 binop(Iop_Shr32, cc_dep1, mkU8(X86G_CC_SHIFT_Z)),
1225 mkU32(1)
1226 ),
1227 mkU32(nnn)
1228 )
1229 );
1230 }
1231
1232 if (isU32(cc_op, X86G_CC_OP_COPY)
1233 && (isU32(cond, X86CondP) || isU32(cond, X86CondNP))) {
1234 /* COPY, then P --> extract P from dep1, and test (P == 1). */
1235 /* COPY, then NP --> extract P from dep1, and test (P == 0). */
1236 UInt nnn = isU32(cond, X86CondP) ? 1 : 0;
1237 return
1238 unop(
1239 Iop_1Uto32,
1240 binop(
1241 Iop_CmpEQ32,
1242 binop(
1243 Iop_And32,
1244 binop(Iop_Shr32, cc_dep1, mkU8(X86G_CC_SHIFT_P)),
1245 mkU32(1)
1246 ),
1247 mkU32(nnn)
1248 )
1249 );
1250 }
1251
1252 return NULL;
1253 }
1254
1255 /* --------- specialising "x86g_calculate_eflags_c" --------- */
1256
1257 if (vex_streq(function_name, "x86g_calculate_eflags_c")) {
1258 /* specialise calls to above "calculate_eflags_c" function */
1259 IRExpr *cc_op, *cc_dep1, *cc_dep2, *cc_ndep;
1260 vassert(arity == 4);
1261 cc_op = args[0];
1262 cc_dep1 = args[1];
1263 cc_dep2 = args[2];
1264 cc_ndep = args[3];
1265
1266 if (isU32(cc_op, X86G_CC_OP_SUBL)) {
1267 /* C after sub denotes unsigned less than */
1268 return unop(Iop_1Uto32,
1269 binop(Iop_CmpLT32U, cc_dep1, cc_dep2));
1270 }
1271 if (isU32(cc_op, X86G_CC_OP_SUBB)) {
1272 /* C after sub denotes unsigned less than */
1273 return unop(Iop_1Uto32,
1274 binop(Iop_CmpLT32U,
1275 binop(Iop_And32,cc_dep1,mkU32(0xFF)),
1276 binop(Iop_And32,cc_dep2,mkU32(0xFF))));
1277 }
1278 if (isU32(cc_op, X86G_CC_OP_LOGICL)
1279 || isU32(cc_op, X86G_CC_OP_LOGICW)
1280 || isU32(cc_op, X86G_CC_OP_LOGICB)) {
1281 /* cflag after logic is zero */
1282 return mkU32(0);
1283 }
1284 if (isU32(cc_op, X86G_CC_OP_DECL) || isU32(cc_op, X86G_CC_OP_INCL)) {
1285 /* If the thunk is dec or inc, the cflag is supplied as CC_NDEP. */
1286 return cc_ndep;
1287 }
1288 if (isU32(cc_op, X86G_CC_OP_COPY)) {
1289 /* cflag after COPY is stored in DEP1. */
1290 return
1291 binop(
1292 Iop_And32,
1293 binop(Iop_Shr32, cc_dep1, mkU8(X86G_CC_SHIFT_C)),
1294 mkU32(1)
1295 );
1296 }
1297 if (isU32(cc_op, X86G_CC_OP_ADDL)) {
1298 /* C after add denotes sum <u either arg */
1299 return unop(Iop_1Uto32,
1300 binop(Iop_CmpLT32U,
1301 binop(Iop_Add32, cc_dep1, cc_dep2),
1302 cc_dep1));
1303 }
1304 // ATC, requires verification, no test case known
1305 //if (isU32(cc_op, X86G_CC_OP_SMULL)) {
1306 // /* C after signed widening multiply denotes the case where
1307 // the top half of the result isn't simply the sign extension
1308 // of the bottom half (iow the result doesn't fit completely
1309 // in the bottom half). Hence:
1310 // C = hi-half(dep1 x dep2) != lo-half(dep1 x dep2) >>s 31
1311 // where 'x' denotes signed widening multiply.*/
1312 // return
1313 // unop(Iop_1Uto32,
1314 // binop(Iop_CmpNE32,
1315 // unop(Iop_64HIto32,
1316 // binop(Iop_MullS32, cc_dep1, cc_dep2)),
1317 // binop(Iop_Sar32,
1318 // binop(Iop_Mul32, cc_dep1, cc_dep2), mkU8(31)) ));
1319 //}
1320 # if 0
1321 if (cc_op->tag == Iex_Const) {
1322 vex_printf("CFLAG "); ppIRExpr(cc_op); vex_printf("\n");
1323 }
1324 # endif
1325
1326 return NULL;
1327 }
1328
1329 /* --------- specialising "x86g_calculate_eflags_all" --------- */
1330
1331 if (vex_streq(function_name, "x86g_calculate_eflags_all")) {
1332 /* specialise calls to above "calculate_eflags_all" function */
1333 IRExpr *cc_op, *cc_dep1; /*, *cc_dep2, *cc_ndep; */
1334 vassert(arity == 4);
1335 cc_op = args[0];
1336 cc_dep1 = args[1];
1337 /* cc_dep2 = args[2]; */
1338 /* cc_ndep = args[3]; */
1339
1340 if (isU32(cc_op, X86G_CC_OP_COPY)) {
1341 /* eflags after COPY are stored in DEP1. */
1342 return
1343 binop(
1344 Iop_And32,
1345 cc_dep1,
1346 mkU32(X86G_CC_MASK_O | X86G_CC_MASK_S | X86G_CC_MASK_Z
1347 | X86G_CC_MASK_A | X86G_CC_MASK_C | X86G_CC_MASK_P)
1348 );
1349 }
1350 return NULL;
1351 }
1352
1353 # undef unop
1354 # undef binop
1355 # undef mkU32
1356 # undef mkU8
1357
1358 return NULL;
1359 }
1360
1361
1362 /*---------------------------------------------------------------*/
1363 /*--- Supporting functions for x87 FPU activities. ---*/
1364 /*---------------------------------------------------------------*/
1365
host_is_little_endian(void)1366 static inline Bool host_is_little_endian ( void )
1367 {
1368 UInt x = 0x76543210;
1369 UChar* p = (UChar*)(&x);
1370 return toBool(*p == 0x10);
1371 }
1372
1373 /* 80 and 64-bit floating point formats:
1374
1375 80-bit:
1376
1377 S 0 0-------0 zero
1378 S 0 0X------X denormals
1379 S 1-7FFE 1X------X normals (all normals have leading 1)
1380 S 7FFF 10------0 infinity
1381 S 7FFF 10X-----X snan
1382 S 7FFF 11X-----X qnan
1383
1384 S is the sign bit. For runs X----X, at least one of the Xs must be
1385 nonzero. Exponent is 15 bits, fractional part is 63 bits, and
1386 there is an explicitly represented leading 1, and a sign bit,
1387 giving 80 in total.
1388
1389 64-bit avoids the confusion of an explicitly represented leading 1
1390 and so is simpler:
1391
1392 S 0 0------0 zero
1393 S 0 X------X denormals
1394 S 1-7FE any normals
1395 S 7FF 0------0 infinity
1396 S 7FF 0X-----X snan
1397 S 7FF 1X-----X qnan
1398
1399 Exponent is 11 bits, fractional part is 52 bits, and there is a
1400 sign bit, giving 64 in total.
1401 */
1402
1403 /* Inspect a value and its tag, as per the x87 'FXAM' instruction. */
1404 /* CALLED FROM GENERATED CODE: CLEAN HELPER */
x86g_calculate_FXAM(UInt tag,ULong dbl)1405 UInt x86g_calculate_FXAM ( UInt tag, ULong dbl )
1406 {
1407 Bool mantissaIsZero;
1408 Int bexp;
1409 UChar sign;
1410 UChar* f64;
1411
1412 vassert(host_is_little_endian());
1413
1414 /* vex_printf("calculate_FXAM ( %d, %llx ) .. ", tag, dbl ); */
1415
1416 f64 = (UChar*)(&dbl);
1417 sign = toUChar( (f64[7] >> 7) & 1 );
1418
1419 /* First off, if the tag indicates the register was empty,
1420 return 1,0,sign,1 */
1421 if (tag == 0) {
1422 /* vex_printf("Empty\n"); */
1423 return X86G_FC_MASK_C3 | 0 | (sign << X86G_FC_SHIFT_C1)
1424 | X86G_FC_MASK_C0;
1425 }
1426
1427 bexp = (f64[7] << 4) | ((f64[6] >> 4) & 0x0F);
1428 bexp &= 0x7FF;
1429
1430 mantissaIsZero
1431 = toBool(
1432 (f64[6] & 0x0F) == 0
1433 && (f64[5] | f64[4] | f64[3] | f64[2] | f64[1] | f64[0]) == 0
1434 );
1435
1436 /* If both exponent and mantissa are zero, the value is zero.
1437 Return 1,0,sign,0. */
1438 if (bexp == 0 && mantissaIsZero) {
1439 /* vex_printf("Zero\n"); */
1440 return X86G_FC_MASK_C3 | 0
1441 | (sign << X86G_FC_SHIFT_C1) | 0;
1442 }
1443
1444 /* If exponent is zero but mantissa isn't, it's a denormal.
1445 Return 1,1,sign,0. */
1446 if (bexp == 0 && !mantissaIsZero) {
1447 /* vex_printf("Denormal\n"); */
1448 return X86G_FC_MASK_C3 | X86G_FC_MASK_C2
1449 | (sign << X86G_FC_SHIFT_C1) | 0;
1450 }
1451
1452 /* If the exponent is 7FF and the mantissa is zero, this is an infinity.
1453 Return 0,1,sign,1. */
1454 if (bexp == 0x7FF && mantissaIsZero) {
1455 /* vex_printf("Inf\n"); */
1456 return 0 | X86G_FC_MASK_C2 | (sign << X86G_FC_SHIFT_C1)
1457 | X86G_FC_MASK_C0;
1458 }
1459
1460 /* If the exponent is 7FF and the mantissa isn't zero, this is a NaN.
1461 Return 0,0,sign,1. */
1462 if (bexp == 0x7FF && !mantissaIsZero) {
1463 /* vex_printf("NaN\n"); */
1464 return 0 | 0 | (sign << X86G_FC_SHIFT_C1) | X86G_FC_MASK_C0;
1465 }
1466
1467 /* Uh, ok, we give up. It must be a normal finite number.
1468 Return 0,1,sign,0.
1469 */
1470 /* vex_printf("normal\n"); */
1471 return 0 | X86G_FC_MASK_C2 | (sign << X86G_FC_SHIFT_C1) | 0;
1472 }
1473
1474
1475 /* CALLED FROM GENERATED CODE */
1476 /* DIRTY HELPER (reads guest memory) */
x86g_dirtyhelper_loadF80le(Addr addrU)1477 ULong x86g_dirtyhelper_loadF80le ( Addr addrU )
1478 {
1479 ULong f64;
1480 convert_f80le_to_f64le ( (UChar*)addrU, (UChar*)&f64 );
1481 return f64;
1482 }
1483
1484 /* CALLED FROM GENERATED CODE */
1485 /* DIRTY HELPER (writes guest memory) */
x86g_dirtyhelper_storeF80le(Addr addrU,ULong f64)1486 void x86g_dirtyhelper_storeF80le ( Addr addrU, ULong f64 )
1487 {
1488 convert_f64le_to_f80le( (UChar*)&f64, (UChar*)addrU );
1489 }
1490
1491
1492 /*----------------------------------------------*/
1493 /*--- The exported fns .. ---*/
1494 /*----------------------------------------------*/
1495
1496 /* Layout of the real x87 state. */
1497 /* 13 June 05: Fpu_State and auxiliary constants was moved to
1498 g_generic_x87.h */
1499
1500
1501 /* CLEAN HELPER */
1502 /* fpucw[15:0] contains a x87 native format FPU control word.
1503 Extract from it the required FPROUND value and any resulting
1504 emulation warning, and return (warn << 32) | fpround value.
1505 */
x86g_check_fldcw(UInt fpucw)1506 ULong x86g_check_fldcw ( UInt fpucw )
1507 {
1508 /* Decide on a rounding mode. fpucw[11:10] holds it. */
1509 /* NOTE, encoded exactly as per enum IRRoundingMode. */
1510 UInt rmode = (fpucw >> 10) & 3;
1511
1512 /* Detect any required emulation warnings. */
1513 VexEmNote ew = EmNote_NONE;
1514
1515 if ((fpucw & 0x3F) != 0x3F) {
1516 /* unmasked exceptions! */
1517 ew = EmWarn_X86_x87exns;
1518 }
1519 else
1520 if (((fpucw >> 8) & 3) != 3) {
1521 /* unsupported precision */
1522 ew = EmWarn_X86_x87precision;
1523 }
1524
1525 return (((ULong)ew) << 32) | ((ULong)rmode);
1526 }
1527
1528 /* CLEAN HELPER */
1529 /* Given fpround as an IRRoundingMode value, create a suitable x87
1530 native format FPU control word. */
x86g_create_fpucw(UInt fpround)1531 UInt x86g_create_fpucw ( UInt fpround )
1532 {
1533 fpround &= 3;
1534 return 0x037F | (fpround << 10);
1535 }
1536
1537
1538 /* CLEAN HELPER */
1539 /* mxcsr[15:0] contains a SSE native format MXCSR value.
1540 Extract from it the required SSEROUND value and any resulting
1541 emulation warning, and return (warn << 32) | sseround value.
1542 */
x86g_check_ldmxcsr(UInt mxcsr)1543 ULong x86g_check_ldmxcsr ( UInt mxcsr )
1544 {
1545 /* Decide on a rounding mode. mxcsr[14:13] holds it. */
1546 /* NOTE, encoded exactly as per enum IRRoundingMode. */
1547 UInt rmode = (mxcsr >> 13) & 3;
1548
1549 /* Detect any required emulation warnings. */
1550 VexEmNote ew = EmNote_NONE;
1551
1552 if ((mxcsr & 0x1F80) != 0x1F80) {
1553 /* unmasked exceptions! */
1554 ew = EmWarn_X86_sseExns;
1555 }
1556 else
1557 if (mxcsr & (1<<15)) {
1558 /* FZ is set */
1559 ew = EmWarn_X86_fz;
1560 }
1561 else
1562 if (mxcsr & (1<<6)) {
1563 /* DAZ is set */
1564 ew = EmWarn_X86_daz;
1565 }
1566
1567 return (((ULong)ew) << 32) | ((ULong)rmode);
1568 }
1569
1570
1571 /* CLEAN HELPER */
1572 /* Given sseround as an IRRoundingMode value, create a suitable SSE
1573 native format MXCSR value. */
x86g_create_mxcsr(UInt sseround)1574 UInt x86g_create_mxcsr ( UInt sseround )
1575 {
1576 sseround &= 3;
1577 return 0x1F80 | (sseround << 13);
1578 }
1579
1580
1581 /* CALLED FROM GENERATED CODE */
1582 /* DIRTY HELPER (writes guest state) */
1583 /* Initialise the x87 FPU state as per 'finit'. */
x86g_dirtyhelper_FINIT(VexGuestX86State * gst)1584 void x86g_dirtyhelper_FINIT ( VexGuestX86State* gst )
1585 {
1586 Int i;
1587 gst->guest_FTOP = 0;
1588 for (i = 0; i < 8; i++) {
1589 gst->guest_FPTAG[i] = 0; /* empty */
1590 gst->guest_FPREG[i] = 0; /* IEEE754 64-bit zero */
1591 }
1592 gst->guest_FPROUND = (UInt)Irrm_NEAREST;
1593 gst->guest_FC3210 = 0;
1594 }
1595
1596
1597 /* This is used to implement both 'frstor' and 'fldenv'. The latter
1598 appears to differ from the former only in that the 8 FP registers
1599 themselves are not transferred into the guest state. */
1600 static
do_put_x87(Bool moveRegs,UChar * x87_state,VexGuestX86State * vex_state)1601 VexEmNote do_put_x87 ( Bool moveRegs,
1602 /*IN*/UChar* x87_state,
1603 /*OUT*/VexGuestX86State* vex_state )
1604 {
1605 Int stno, preg;
1606 UInt tag;
1607 ULong* vexRegs = (ULong*)(&vex_state->guest_FPREG[0]);
1608 UChar* vexTags = (UChar*)(&vex_state->guest_FPTAG[0]);
1609 Fpu_State* x87 = (Fpu_State*)x87_state;
1610 UInt ftop = (x87->env[FP_ENV_STAT] >> 11) & 7;
1611 UInt tagw = x87->env[FP_ENV_TAG];
1612 UInt fpucw = x87->env[FP_ENV_CTRL];
1613 UInt c3210 = x87->env[FP_ENV_STAT] & 0x4700;
1614 VexEmNote ew;
1615 UInt fpround;
1616 ULong pair;
1617
1618 /* Copy registers and tags */
1619 for (stno = 0; stno < 8; stno++) {
1620 preg = (stno + ftop) & 7;
1621 tag = (tagw >> (2*preg)) & 3;
1622 if (tag == 3) {
1623 /* register is empty */
1624 /* hmm, if it's empty, does it still get written? Probably
1625 safer to say it does. If we don't, memcheck could get out
1626 of sync, in that it thinks all FP registers are defined by
1627 this helper, but in reality some have not been updated. */
1628 if (moveRegs)
1629 vexRegs[preg] = 0; /* IEEE754 64-bit zero */
1630 vexTags[preg] = 0;
1631 } else {
1632 /* register is non-empty */
1633 if (moveRegs)
1634 convert_f80le_to_f64le( &x87->reg[10*stno],
1635 (UChar*)&vexRegs[preg] );
1636 vexTags[preg] = 1;
1637 }
1638 }
1639
1640 /* stack pointer */
1641 vex_state->guest_FTOP = ftop;
1642
1643 /* status word */
1644 vex_state->guest_FC3210 = c3210;
1645
1646 /* handle the control word, setting FPROUND and detecting any
1647 emulation warnings. */
1648 pair = x86g_check_fldcw ( (UInt)fpucw );
1649 fpround = (UInt)pair;
1650 ew = (VexEmNote)(pair >> 32);
1651
1652 vex_state->guest_FPROUND = fpround & 3;
1653
1654 /* emulation warnings --> caller */
1655 return ew;
1656 }
1657
1658
1659 /* Create an x87 FPU state from the guest state, as close as
1660 we can approximate it. */
1661 static
do_get_x87(VexGuestX86State * vex_state,UChar * x87_state)1662 void do_get_x87 ( /*IN*/VexGuestX86State* vex_state,
1663 /*OUT*/UChar* x87_state )
1664 {
1665 Int i, stno, preg;
1666 UInt tagw;
1667 ULong* vexRegs = (ULong*)(&vex_state->guest_FPREG[0]);
1668 UChar* vexTags = (UChar*)(&vex_state->guest_FPTAG[0]);
1669 Fpu_State* x87 = (Fpu_State*)x87_state;
1670 UInt ftop = vex_state->guest_FTOP;
1671 UInt c3210 = vex_state->guest_FC3210;
1672
1673 for (i = 0; i < 14; i++)
1674 x87->env[i] = 0;
1675
1676 x87->env[1] = x87->env[3] = x87->env[5] = x87->env[13] = 0xFFFF;
1677 x87->env[FP_ENV_STAT]
1678 = toUShort(((ftop & 7) << 11) | (c3210 & 0x4700));
1679 x87->env[FP_ENV_CTRL]
1680 = toUShort(x86g_create_fpucw( vex_state->guest_FPROUND ));
1681
1682 /* Dump the register stack in ST order. */
1683 tagw = 0;
1684 for (stno = 0; stno < 8; stno++) {
1685 preg = (stno + ftop) & 7;
1686 if (vexTags[preg] == 0) {
1687 /* register is empty */
1688 tagw |= (3 << (2*preg));
1689 convert_f64le_to_f80le( (UChar*)&vexRegs[preg],
1690 &x87->reg[10*stno] );
1691 } else {
1692 /* register is full. */
1693 tagw |= (0 << (2*preg));
1694 convert_f64le_to_f80le( (UChar*)&vexRegs[preg],
1695 &x87->reg[10*stno] );
1696 }
1697 }
1698 x87->env[FP_ENV_TAG] = toUShort(tagw);
1699 }
1700
1701
1702 /* CALLED FROM GENERATED CODE */
1703 /* DIRTY HELPER (reads guest state, writes guest mem) */
x86g_dirtyhelper_FXSAVE(VexGuestX86State * gst,HWord addr)1704 void x86g_dirtyhelper_FXSAVE ( VexGuestX86State* gst, HWord addr )
1705 {
1706 /* Somewhat roundabout, but at least it's simple. */
1707 Fpu_State tmp;
1708 UShort* addrS = (UShort*)addr;
1709 UChar* addrC = (UChar*)addr;
1710 U128* xmm = (U128*)(addr + 160);
1711 UInt mxcsr;
1712 UShort fp_tags;
1713 UInt summary_tags;
1714 Int r, stno;
1715 UShort *srcS, *dstS;
1716
1717 do_get_x87( gst, (UChar*)&tmp );
1718 mxcsr = x86g_create_mxcsr( gst->guest_SSEROUND );
1719
1720 /* Now build the proper fxsave image from the x87 image we just
1721 made. */
1722
1723 addrS[0] = tmp.env[FP_ENV_CTRL]; /* FCW: fpu control word */
1724 addrS[1] = tmp.env[FP_ENV_STAT]; /* FCW: fpu status word */
1725
1726 /* set addrS[2] in an endian-independent way */
1727 summary_tags = 0;
1728 fp_tags = tmp.env[FP_ENV_TAG];
1729 for (r = 0; r < 8; r++) {
1730 if ( ((fp_tags >> (2*r)) & 3) != 3 )
1731 summary_tags |= (1 << r);
1732 }
1733 addrC[4] = toUChar(summary_tags); /* FTW: tag summary byte */
1734 addrC[5] = 0; /* pad */
1735
1736 addrS[3] = 0; /* FOP: fpu opcode (bogus) */
1737 addrS[4] = 0;
1738 addrS[5] = 0; /* FPU IP (bogus) */
1739 addrS[6] = 0; /* FPU IP's segment selector (bogus) (although we
1740 could conceivably dump %CS here) */
1741
1742 addrS[7] = 0; /* Intel reserved */
1743
1744 addrS[8] = 0; /* FPU DP (operand pointer) (bogus) */
1745 addrS[9] = 0; /* FPU DP (operand pointer) (bogus) */
1746 addrS[10] = 0; /* segment selector for above operand pointer; %DS
1747 perhaps? */
1748 addrS[11] = 0; /* Intel reserved */
1749
1750 addrS[12] = toUShort(mxcsr); /* MXCSR */
1751 addrS[13] = toUShort(mxcsr >> 16);
1752
1753 addrS[14] = 0xFFFF; /* MXCSR mask (lo16); who knows what for */
1754 addrS[15] = 0xFFFF; /* MXCSR mask (hi16); who knows what for */
1755
1756 /* Copy in the FP registers, in ST order. */
1757 for (stno = 0; stno < 8; stno++) {
1758 srcS = (UShort*)(&tmp.reg[10*stno]);
1759 dstS = (UShort*)(&addrS[16 + 8*stno]);
1760 dstS[0] = srcS[0];
1761 dstS[1] = srcS[1];
1762 dstS[2] = srcS[2];
1763 dstS[3] = srcS[3];
1764 dstS[4] = srcS[4];
1765 dstS[5] = 0;
1766 dstS[6] = 0;
1767 dstS[7] = 0;
1768 }
1769
1770 /* That's the first 160 bytes of the image done. Now only %xmm0
1771 .. %xmm7 remain to be copied. If the host is big-endian, these
1772 need to be byte-swapped. */
1773 vassert(host_is_little_endian());
1774
1775 # define COPY_U128(_dst,_src) \
1776 do { _dst[0] = _src[0]; _dst[1] = _src[1]; \
1777 _dst[2] = _src[2]; _dst[3] = _src[3]; } \
1778 while (0)
1779
1780 COPY_U128( xmm[0], gst->guest_XMM0 );
1781 COPY_U128( xmm[1], gst->guest_XMM1 );
1782 COPY_U128( xmm[2], gst->guest_XMM2 );
1783 COPY_U128( xmm[3], gst->guest_XMM3 );
1784 COPY_U128( xmm[4], gst->guest_XMM4 );
1785 COPY_U128( xmm[5], gst->guest_XMM5 );
1786 COPY_U128( xmm[6], gst->guest_XMM6 );
1787 COPY_U128( xmm[7], gst->guest_XMM7 );
1788
1789 # undef COPY_U128
1790 }
1791
1792
1793 /* CALLED FROM GENERATED CODE */
1794 /* DIRTY HELPER (writes guest state, reads guest mem) */
x86g_dirtyhelper_FXRSTOR(VexGuestX86State * gst,HWord addr)1795 VexEmNote x86g_dirtyhelper_FXRSTOR ( VexGuestX86State* gst, HWord addr )
1796 {
1797 Fpu_State tmp;
1798 VexEmNote warnX87 = EmNote_NONE;
1799 VexEmNote warnXMM = EmNote_NONE;
1800 UShort* addrS = (UShort*)addr;
1801 UChar* addrC = (UChar*)addr;
1802 U128* xmm = (U128*)(addr + 160);
1803 UShort fp_tags;
1804 Int r, stno, i;
1805
1806 /* Restore %xmm0 .. %xmm7. If the host is big-endian, these need
1807 to be byte-swapped. */
1808 vassert(host_is_little_endian());
1809
1810 # define COPY_U128(_dst,_src) \
1811 do { _dst[0] = _src[0]; _dst[1] = _src[1]; \
1812 _dst[2] = _src[2]; _dst[3] = _src[3]; } \
1813 while (0)
1814
1815 COPY_U128( gst->guest_XMM0, xmm[0] );
1816 COPY_U128( gst->guest_XMM1, xmm[1] );
1817 COPY_U128( gst->guest_XMM2, xmm[2] );
1818 COPY_U128( gst->guest_XMM3, xmm[3] );
1819 COPY_U128( gst->guest_XMM4, xmm[4] );
1820 COPY_U128( gst->guest_XMM5, xmm[5] );
1821 COPY_U128( gst->guest_XMM6, xmm[6] );
1822 COPY_U128( gst->guest_XMM7, xmm[7] );
1823
1824 # undef COPY_U128
1825
1826 /* Copy the x87 registers out of the image, into a temporary
1827 Fpu_State struct. */
1828
1829 /* LLVM on Darwin turns the following loop into a movaps plus a
1830 handful of scalar stores. This would work fine except for the
1831 fact that VEX doesn't keep the stack correctly (16-) aligned for
1832 the call, so it segfaults. Hence, split the loop into two
1833 pieces (and pray LLVM doesn't merely glue them back together) so
1834 it's composed only of scalar stores and so is alignment
1835 insensitive. Of course this is a kludge of the lamest kind --
1836 VEX should be fixed properly. */
1837 /* Code that seems to trigger the problem:
1838 for (i = 0; i < 14; i++) tmp.env[i] = 0; */
1839 for (i = 0; i < 7; i++) tmp.env[i+0] = 0;
1840 __asm__ __volatile__("" ::: "memory");
1841 for (i = 0; i < 7; i++) tmp.env[i+7] = 0;
1842
1843 for (i = 0; i < 80; i++) tmp.reg[i] = 0;
1844 /* fill in tmp.reg[0..7] */
1845 for (stno = 0; stno < 8; stno++) {
1846 UShort* dstS = (UShort*)(&tmp.reg[10*stno]);
1847 UShort* srcS = (UShort*)(&addrS[16 + 8*stno]);
1848 dstS[0] = srcS[0];
1849 dstS[1] = srcS[1];
1850 dstS[2] = srcS[2];
1851 dstS[3] = srcS[3];
1852 dstS[4] = srcS[4];
1853 }
1854 /* fill in tmp.env[0..13] */
1855 tmp.env[FP_ENV_CTRL] = addrS[0]; /* FCW: fpu control word */
1856 tmp.env[FP_ENV_STAT] = addrS[1]; /* FCW: fpu status word */
1857
1858 fp_tags = 0;
1859 for (r = 0; r < 8; r++) {
1860 if (addrC[4] & (1<<r))
1861 fp_tags |= (0 << (2*r)); /* EMPTY */
1862 else
1863 fp_tags |= (3 << (2*r)); /* VALID -- not really precise enough. */
1864 }
1865 tmp.env[FP_ENV_TAG] = fp_tags;
1866
1867 /* Now write 'tmp' into the guest state. */
1868 warnX87 = do_put_x87( True/*moveRegs*/, (UChar*)&tmp, gst );
1869
1870 { UInt w32 = (((UInt)addrS[12]) & 0xFFFF)
1871 | ((((UInt)addrS[13]) & 0xFFFF) << 16);
1872 ULong w64 = x86g_check_ldmxcsr( w32 );
1873
1874 warnXMM = (VexEmNote)(w64 >> 32);
1875
1876 gst->guest_SSEROUND = w64 & 0xFFFFFFFF;
1877 }
1878
1879 /* Prefer an X87 emwarn over an XMM one, if both exist. */
1880 if (warnX87 != EmNote_NONE)
1881 return warnX87;
1882 else
1883 return warnXMM;
1884 }
1885
1886
1887 /* CALLED FROM GENERATED CODE */
1888 /* DIRTY HELPER (reads guest state, writes guest mem) */
x86g_dirtyhelper_FSAVE(VexGuestX86State * gst,HWord addr)1889 void x86g_dirtyhelper_FSAVE ( VexGuestX86State* gst, HWord addr )
1890 {
1891 do_get_x87( gst, (UChar*)addr );
1892 }
1893
1894 /* CALLED FROM GENERATED CODE */
1895 /* DIRTY HELPER (writes guest state, reads guest mem) */
x86g_dirtyhelper_FRSTOR(VexGuestX86State * gst,HWord addr)1896 VexEmNote x86g_dirtyhelper_FRSTOR ( VexGuestX86State* gst, HWord addr )
1897 {
1898 return do_put_x87( True/*regs too*/, (UChar*)addr, gst );
1899 }
1900
1901 /* CALLED FROM GENERATED CODE */
1902 /* DIRTY HELPER (reads guest state, writes guest mem) */
x86g_dirtyhelper_FSTENV(VexGuestX86State * gst,HWord addr)1903 void x86g_dirtyhelper_FSTENV ( VexGuestX86State* gst, HWord addr )
1904 {
1905 /* Somewhat roundabout, but at least it's simple. */
1906 Int i;
1907 UShort* addrP = (UShort*)addr;
1908 Fpu_State tmp;
1909 do_get_x87( gst, (UChar*)&tmp );
1910 for (i = 0; i < 14; i++)
1911 addrP[i] = tmp.env[i];
1912 }
1913
1914 /* CALLED FROM GENERATED CODE */
1915 /* DIRTY HELPER (writes guest state, reads guest mem) */
x86g_dirtyhelper_FLDENV(VexGuestX86State * gst,HWord addr)1916 VexEmNote x86g_dirtyhelper_FLDENV ( VexGuestX86State* gst, HWord addr )
1917 {
1918 return do_put_x87( False/*don't move regs*/, (UChar*)addr, gst);
1919 }
1920
1921 /* VISIBLE TO LIBVEX CLIENT */
1922 /* Do x87 save from the supplied VexGuestX86State structure and store the
1923 result at the given address which represents a buffer of at least 108
1924 bytes. */
LibVEX_GuestX86_get_x87(VexGuestX86State * vex_state,UChar * x87_state)1925 void LibVEX_GuestX86_get_x87 ( /*IN*/VexGuestX86State* vex_state,
1926 /*OUT*/UChar* x87_state )
1927 {
1928 do_get_x87 ( vex_state, x87_state );
1929 }
1930
1931 /* VISIBLE TO LIBVEX CLIENT */
1932 /* Do x87 restore from the supplied address and store read values to the given
1933 VexGuestX86State structure. */
LibVEX_GuestX86_put_x87(UChar * x87_state,VexGuestX86State * vex_state)1934 VexEmNote LibVEX_GuestX86_put_x87 ( /*IN*/UChar* x87_state,
1935 /*MOD*/VexGuestX86State* vex_state )
1936 {
1937 return do_put_x87 ( True/*moveRegs*/, x87_state, vex_state );
1938 }
1939
1940 /* VISIBLE TO LIBVEX CLIENT */
1941 /* Return mxcsr from the supplied VexGuestX86State structure. */
LibVEX_GuestX86_get_mxcsr(VexGuestX86State * vex_state)1942 UInt LibVEX_GuestX86_get_mxcsr ( /*IN*/VexGuestX86State* vex_state )
1943 {
1944 return x86g_create_mxcsr ( vex_state->guest_SSEROUND );
1945 }
1946
1947 /* VISIBLE TO LIBVEX CLIENT */
1948 /* Modify the given VexGuestX86State structure according to the passed mxcsr
1949 value. */
LibVEX_GuestX86_put_mxcsr(UInt mxcsr,VexGuestX86State * vex_state)1950 VexEmNote LibVEX_GuestX86_put_mxcsr ( /*IN*/UInt mxcsr,
1951 /*MOD*/VexGuestX86State* vex_state)
1952 {
1953 ULong w64 = x86g_check_ldmxcsr( mxcsr );
1954 vex_state->guest_SSEROUND = w64 & 0xFFFFFFFF;
1955 return (VexEmNote)(w64 >> 32);
1956 }
1957
1958 /*---------------------------------------------------------------*/
1959 /*--- Misc integer helpers, including rotates and CPUID. ---*/
1960 /*---------------------------------------------------------------*/
1961
1962 /* CALLED FROM GENERATED CODE: CLEAN HELPER */
1963 /* Calculate both flags and value result for rotate right
1964 through the carry bit. Result in low 32 bits,
1965 new flags (OSZACP) in high 32 bits.
1966 */
x86g_calculate_RCR(UInt arg,UInt rot_amt,UInt eflags_in,UInt sz)1967 ULong x86g_calculate_RCR ( UInt arg, UInt rot_amt, UInt eflags_in, UInt sz )
1968 {
1969 UInt tempCOUNT = rot_amt & 0x1F, cf=0, of=0, tempcf;
1970
1971 switch (sz) {
1972 case 4:
1973 cf = (eflags_in >> X86G_CC_SHIFT_C) & 1;
1974 of = ((arg >> 31) ^ cf) & 1;
1975 while (tempCOUNT > 0) {
1976 tempcf = arg & 1;
1977 arg = (arg >> 1) | (cf << 31);
1978 cf = tempcf;
1979 tempCOUNT--;
1980 }
1981 break;
1982 case 2:
1983 while (tempCOUNT >= 17) tempCOUNT -= 17;
1984 cf = (eflags_in >> X86G_CC_SHIFT_C) & 1;
1985 of = ((arg >> 15) ^ cf) & 1;
1986 while (tempCOUNT > 0) {
1987 tempcf = arg & 1;
1988 arg = ((arg >> 1) & 0x7FFF) | (cf << 15);
1989 cf = tempcf;
1990 tempCOUNT--;
1991 }
1992 break;
1993 case 1:
1994 while (tempCOUNT >= 9) tempCOUNT -= 9;
1995 cf = (eflags_in >> X86G_CC_SHIFT_C) & 1;
1996 of = ((arg >> 7) ^ cf) & 1;
1997 while (tempCOUNT > 0) {
1998 tempcf = arg & 1;
1999 arg = ((arg >> 1) & 0x7F) | (cf << 7);
2000 cf = tempcf;
2001 tempCOUNT--;
2002 }
2003 break;
2004 default:
2005 vpanic("calculate_RCR: invalid size");
2006 }
2007
2008 cf &= 1;
2009 of &= 1;
2010 eflags_in &= ~(X86G_CC_MASK_C | X86G_CC_MASK_O);
2011 eflags_in |= (cf << X86G_CC_SHIFT_C) | (of << X86G_CC_SHIFT_O);
2012
2013 return (((ULong)eflags_in) << 32) | ((ULong)arg);
2014 }
2015
2016
2017 /* CALLED FROM GENERATED CODE: CLEAN HELPER */
2018 /* Calculate both flags and value result for rotate left
2019 through the carry bit. Result in low 32 bits,
2020 new flags (OSZACP) in high 32 bits.
2021 */
x86g_calculate_RCL(UInt arg,UInt rot_amt,UInt eflags_in,UInt sz)2022 ULong x86g_calculate_RCL ( UInt arg, UInt rot_amt, UInt eflags_in, UInt sz )
2023 {
2024 UInt tempCOUNT = rot_amt & 0x1F, cf=0, of=0, tempcf;
2025
2026 switch (sz) {
2027 case 4:
2028 cf = (eflags_in >> X86G_CC_SHIFT_C) & 1;
2029 while (tempCOUNT > 0) {
2030 tempcf = (arg >> 31) & 1;
2031 arg = (arg << 1) | (cf & 1);
2032 cf = tempcf;
2033 tempCOUNT--;
2034 }
2035 of = ((arg >> 31) ^ cf) & 1;
2036 break;
2037 case 2:
2038 while (tempCOUNT >= 17) tempCOUNT -= 17;
2039 cf = (eflags_in >> X86G_CC_SHIFT_C) & 1;
2040 while (tempCOUNT > 0) {
2041 tempcf = (arg >> 15) & 1;
2042 arg = 0xFFFF & ((arg << 1) | (cf & 1));
2043 cf = tempcf;
2044 tempCOUNT--;
2045 }
2046 of = ((arg >> 15) ^ cf) & 1;
2047 break;
2048 case 1:
2049 while (tempCOUNT >= 9) tempCOUNT -= 9;
2050 cf = (eflags_in >> X86G_CC_SHIFT_C) & 1;
2051 while (tempCOUNT > 0) {
2052 tempcf = (arg >> 7) & 1;
2053 arg = 0xFF & ((arg << 1) | (cf & 1));
2054 cf = tempcf;
2055 tempCOUNT--;
2056 }
2057 of = ((arg >> 7) ^ cf) & 1;
2058 break;
2059 default:
2060 vpanic("calculate_RCL: invalid size");
2061 }
2062
2063 cf &= 1;
2064 of &= 1;
2065 eflags_in &= ~(X86G_CC_MASK_C | X86G_CC_MASK_O);
2066 eflags_in |= (cf << X86G_CC_SHIFT_C) | (of << X86G_CC_SHIFT_O);
2067
2068 return (((ULong)eflags_in) << 32) | ((ULong)arg);
2069 }
2070
2071
2072 /* CALLED FROM GENERATED CODE: CLEAN HELPER */
2073 /* Calculate both flags and value result for DAA/DAS/AAA/AAS.
2074 AX value in low half of arg, OSZACP in upper half.
2075 See guest-x86/toIR.c usage point for details.
2076 */
calc_parity_8bit(UInt w32)2077 static UInt calc_parity_8bit ( UInt w32 ) {
2078 UInt i;
2079 UInt p = 1;
2080 for (i = 0; i < 8; i++)
2081 p ^= (1 & (w32 >> i));
2082 return p;
2083 }
x86g_calculate_daa_das_aaa_aas(UInt flags_and_AX,UInt opcode)2084 UInt x86g_calculate_daa_das_aaa_aas ( UInt flags_and_AX, UInt opcode )
2085 {
2086 UInt r_AL = (flags_and_AX >> 0) & 0xFF;
2087 UInt r_AH = (flags_and_AX >> 8) & 0xFF;
2088 UInt r_O = (flags_and_AX >> (16 + X86G_CC_SHIFT_O)) & 1;
2089 UInt r_S = (flags_and_AX >> (16 + X86G_CC_SHIFT_S)) & 1;
2090 UInt r_Z = (flags_and_AX >> (16 + X86G_CC_SHIFT_Z)) & 1;
2091 UInt r_A = (flags_and_AX >> (16 + X86G_CC_SHIFT_A)) & 1;
2092 UInt r_C = (flags_and_AX >> (16 + X86G_CC_SHIFT_C)) & 1;
2093 UInt r_P = (flags_and_AX >> (16 + X86G_CC_SHIFT_P)) & 1;
2094 UInt result = 0;
2095
2096 switch (opcode) {
2097 case 0x27: { /* DAA */
2098 UInt old_AL = r_AL;
2099 UInt old_C = r_C;
2100 r_C = 0;
2101 if ((r_AL & 0xF) > 9 || r_A == 1) {
2102 r_AL = r_AL + 6;
2103 r_C = old_C;
2104 if (r_AL >= 0x100) r_C = 1;
2105 r_A = 1;
2106 } else {
2107 r_A = 0;
2108 }
2109 if (old_AL > 0x99 || old_C == 1) {
2110 r_AL = r_AL + 0x60;
2111 r_C = 1;
2112 } else {
2113 r_C = 0;
2114 }
2115 /* O is undefined. S Z and P are set according to the
2116 result. */
2117 r_AL &= 0xFF;
2118 r_O = 0; /* let's say */
2119 r_S = (r_AL & 0x80) ? 1 : 0;
2120 r_Z = (r_AL == 0) ? 1 : 0;
2121 r_P = calc_parity_8bit( r_AL );
2122 break;
2123 }
2124 case 0x2F: { /* DAS */
2125 UInt old_AL = r_AL;
2126 UInt old_C = r_C;
2127 r_C = 0;
2128 if ((r_AL & 0xF) > 9 || r_A == 1) {
2129 Bool borrow = r_AL < 6;
2130 r_AL = r_AL - 6;
2131 r_C = old_C;
2132 if (borrow) r_C = 1;
2133 r_A = 1;
2134 } else {
2135 r_A = 0;
2136 }
2137 if (old_AL > 0x99 || old_C == 1) {
2138 r_AL = r_AL - 0x60;
2139 r_C = 1;
2140 } else {
2141 /* Intel docs are wrong: r_C = 0; */
2142 }
2143 /* O is undefined. S Z and P are set according to the
2144 result. */
2145 r_AL &= 0xFF;
2146 r_O = 0; /* let's say */
2147 r_S = (r_AL & 0x80) ? 1 : 0;
2148 r_Z = (r_AL == 0) ? 1 : 0;
2149 r_P = calc_parity_8bit( r_AL );
2150 break;
2151 }
2152 case 0x37: { /* AAA */
2153 Bool nudge = r_AL > 0xF9;
2154 if ((r_AL & 0xF) > 9 || r_A == 1) {
2155 r_AL = r_AL + 6;
2156 r_AH = r_AH + 1 + (nudge ? 1 : 0);
2157 r_A = 1;
2158 r_C = 1;
2159 r_AL = r_AL & 0xF;
2160 } else {
2161 r_A = 0;
2162 r_C = 0;
2163 r_AL = r_AL & 0xF;
2164 }
2165 /* O S Z and P are undefined. */
2166 r_O = r_S = r_Z = r_P = 0; /* let's say */
2167 break;
2168 }
2169 case 0x3F: { /* AAS */
2170 Bool nudge = r_AL < 0x06;
2171 if ((r_AL & 0xF) > 9 || r_A == 1) {
2172 r_AL = r_AL - 6;
2173 r_AH = r_AH - 1 - (nudge ? 1 : 0);
2174 r_A = 1;
2175 r_C = 1;
2176 r_AL = r_AL & 0xF;
2177 } else {
2178 r_A = 0;
2179 r_C = 0;
2180 r_AL = r_AL & 0xF;
2181 }
2182 /* O S Z and P are undefined. */
2183 r_O = r_S = r_Z = r_P = 0; /* let's say */
2184 break;
2185 }
2186 default:
2187 vassert(0);
2188 }
2189 result = ( (r_O & 1) << (16 + X86G_CC_SHIFT_O) )
2190 | ( (r_S & 1) << (16 + X86G_CC_SHIFT_S) )
2191 | ( (r_Z & 1) << (16 + X86G_CC_SHIFT_Z) )
2192 | ( (r_A & 1) << (16 + X86G_CC_SHIFT_A) )
2193 | ( (r_C & 1) << (16 + X86G_CC_SHIFT_C) )
2194 | ( (r_P & 1) << (16 + X86G_CC_SHIFT_P) )
2195 | ( (r_AH & 0xFF) << 8 )
2196 | ( (r_AL & 0xFF) << 0 );
2197 return result;
2198 }
2199
x86g_calculate_aad_aam(UInt flags_and_AX,UInt opcode)2200 UInt x86g_calculate_aad_aam ( UInt flags_and_AX, UInt opcode )
2201 {
2202 UInt r_AL = (flags_and_AX >> 0) & 0xFF;
2203 UInt r_AH = (flags_and_AX >> 8) & 0xFF;
2204 UInt r_O = (flags_and_AX >> (16 + X86G_CC_SHIFT_O)) & 1;
2205 UInt r_S = (flags_and_AX >> (16 + X86G_CC_SHIFT_S)) & 1;
2206 UInt r_Z = (flags_and_AX >> (16 + X86G_CC_SHIFT_Z)) & 1;
2207 UInt r_A = (flags_and_AX >> (16 + X86G_CC_SHIFT_A)) & 1;
2208 UInt r_C = (flags_and_AX >> (16 + X86G_CC_SHIFT_C)) & 1;
2209 UInt r_P = (flags_and_AX >> (16 + X86G_CC_SHIFT_P)) & 1;
2210 UInt result = 0;
2211
2212 switch (opcode) {
2213 case 0xD4: { /* AAM */
2214 r_AH = r_AL / 10;
2215 r_AL = r_AL % 10;
2216 break;
2217 }
2218 case 0xD5: { /* AAD */
2219 r_AL = ((r_AH * 10) + r_AL) & 0xff;
2220 r_AH = 0;
2221 break;
2222 }
2223 default:
2224 vassert(0);
2225 }
2226
2227 r_O = 0; /* let's say (undefined) */
2228 r_C = 0; /* let's say (undefined) */
2229 r_A = 0; /* let's say (undefined) */
2230 r_S = (r_AL & 0x80) ? 1 : 0;
2231 r_Z = (r_AL == 0) ? 1 : 0;
2232 r_P = calc_parity_8bit( r_AL );
2233
2234 result = ( (r_O & 1) << (16 + X86G_CC_SHIFT_O) )
2235 | ( (r_S & 1) << (16 + X86G_CC_SHIFT_S) )
2236 | ( (r_Z & 1) << (16 + X86G_CC_SHIFT_Z) )
2237 | ( (r_A & 1) << (16 + X86G_CC_SHIFT_A) )
2238 | ( (r_C & 1) << (16 + X86G_CC_SHIFT_C) )
2239 | ( (r_P & 1) << (16 + X86G_CC_SHIFT_P) )
2240 | ( (r_AH & 0xFF) << 8 )
2241 | ( (r_AL & 0xFF) << 0 );
2242 return result;
2243 }
2244
2245
2246 /* CALLED FROM GENERATED CODE */
2247 /* DIRTY HELPER (non-referentially-transparent) */
2248 /* Horrible hack. On non-x86 platforms, return 1. */
x86g_dirtyhelper_RDTSC(void)2249 ULong x86g_dirtyhelper_RDTSC ( void )
2250 {
2251 # if defined(__i386__)
2252 ULong res;
2253 __asm__ __volatile__("rdtsc" : "=A" (res));
2254 return res;
2255 # else
2256 return 1ULL;
2257 # endif
2258 }
2259
2260
2261 /* CALLED FROM GENERATED CODE */
2262 /* DIRTY HELPER (modifies guest state) */
2263 /* Claim to be a P55C (Intel Pentium/MMX) */
x86g_dirtyhelper_CPUID_sse0(VexGuestX86State * st)2264 void x86g_dirtyhelper_CPUID_sse0 ( VexGuestX86State* st )
2265 {
2266 switch (st->guest_EAX) {
2267 case 0:
2268 st->guest_EAX = 0x1;
2269 st->guest_EBX = 0x756e6547;
2270 st->guest_ECX = 0x6c65746e;
2271 st->guest_EDX = 0x49656e69;
2272 break;
2273 default:
2274 st->guest_EAX = 0x543;
2275 st->guest_EBX = 0x0;
2276 st->guest_ECX = 0x0;
2277 st->guest_EDX = 0x8001bf;
2278 break;
2279 }
2280 }
2281
2282 /* CALLED FROM GENERATED CODE */
2283 /* DIRTY HELPER (modifies guest state) */
2284 /* Claim to be a Athlon "Classic" (Model 2, K75 "Pluto/Orion") */
2285 /* But without 3DNow support (weird, but we really don't support it). */
x86g_dirtyhelper_CPUID_mmxext(VexGuestX86State * st)2286 void x86g_dirtyhelper_CPUID_mmxext ( VexGuestX86State* st )
2287 {
2288 switch (st->guest_EAX) {
2289 /* vendor ID */
2290 case 0:
2291 st->guest_EAX = 0x1;
2292 st->guest_EBX = 0x68747541;
2293 st->guest_ECX = 0x444d4163;
2294 st->guest_EDX = 0x69746e65;
2295 break;
2296 /* feature bits */
2297 case 1:
2298 st->guest_EAX = 0x621;
2299 st->guest_EBX = 0x0;
2300 st->guest_ECX = 0x0;
2301 st->guest_EDX = 0x183f9ff;
2302 break;
2303 /* Highest Extended Function Supported (0x80000004 brand string) */
2304 case 0x80000000:
2305 st->guest_EAX = 0x80000004;
2306 st->guest_EBX = 0x68747541;
2307 st->guest_ECX = 0x444d4163;
2308 st->guest_EDX = 0x69746e65;
2309 break;
2310 /* Extended Processor Info and Feature Bits */
2311 case 0x80000001:
2312 st->guest_EAX = 0x721;
2313 st->guest_EBX = 0x0;
2314 st->guest_ECX = 0x0;
2315 st->guest_EDX = 0x1c3f9ff; /* Note no 3DNow. */
2316 break;
2317 /* Processor Brand String "AMD Athlon(tm) Processor" */
2318 case 0x80000002:
2319 st->guest_EAX = 0x20444d41;
2320 st->guest_EBX = 0x6c687441;
2321 st->guest_ECX = 0x74286e6f;
2322 st->guest_EDX = 0x5020296d;
2323 break;
2324 case 0x80000003:
2325 st->guest_EAX = 0x65636f72;
2326 st->guest_EBX = 0x726f7373;
2327 st->guest_ECX = 0x0;
2328 st->guest_EDX = 0x0;
2329 break;
2330 default:
2331 st->guest_EAX = 0x0;
2332 st->guest_EBX = 0x0;
2333 st->guest_ECX = 0x0;
2334 st->guest_EDX = 0x0;
2335 break;
2336 }
2337 }
2338
2339 /* CALLED FROM GENERATED CODE */
2340 /* DIRTY HELPER (modifies guest state) */
2341 /* Claim to be the following SSE1-capable CPU:
2342 vendor_id : GenuineIntel
2343 cpu family : 6
2344 model : 11
2345 model name : Intel(R) Pentium(R) III CPU family 1133MHz
2346 stepping : 1
2347 cpu MHz : 1131.013
2348 cache size : 512 KB
2349 */
x86g_dirtyhelper_CPUID_sse1(VexGuestX86State * st)2350 void x86g_dirtyhelper_CPUID_sse1 ( VexGuestX86State* st )
2351 {
2352 switch (st->guest_EAX) {
2353 case 0:
2354 st->guest_EAX = 0x00000002;
2355 st->guest_EBX = 0x756e6547;
2356 st->guest_ECX = 0x6c65746e;
2357 st->guest_EDX = 0x49656e69;
2358 break;
2359 case 1:
2360 st->guest_EAX = 0x000006b1;
2361 st->guest_EBX = 0x00000004;
2362 st->guest_ECX = 0x00000000;
2363 st->guest_EDX = 0x0383fbff;
2364 break;
2365 default:
2366 st->guest_EAX = 0x03020101;
2367 st->guest_EBX = 0x00000000;
2368 st->guest_ECX = 0x00000000;
2369 st->guest_EDX = 0x0c040883;
2370 break;
2371 }
2372 }
2373
2374 /* Claim to be the following SSE2-capable CPU:
2375 vendor_id : GenuineIntel
2376 cpu family : 15
2377 model : 2
2378 model name : Intel(R) Pentium(R) 4 CPU 3.00GHz
2379 stepping : 9
2380 microcode : 0x17
2381 cpu MHz : 2992.577
2382 cache size : 512 KB
2383 flags : fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge mca cmov
2384 pat pse36 clflush dts acpi mmx fxsr sse sse2 ss ht tm pbe
2385 pebs bts cid xtpr
2386 clflush size : 64
2387 cache_alignment : 128
2388 address sizes : 36 bits physical, 32 bits virtual
2389 */
x86g_dirtyhelper_CPUID_sse2(VexGuestX86State * st)2390 void x86g_dirtyhelper_CPUID_sse2 ( VexGuestX86State* st )
2391 {
2392 switch (st->guest_EAX) {
2393 case 0:
2394 st->guest_EAX = 0x00000002;
2395 st->guest_EBX = 0x756e6547;
2396 st->guest_ECX = 0x6c65746e;
2397 st->guest_EDX = 0x49656e69;
2398 break;
2399 case 1:
2400 st->guest_EAX = 0x00000f29;
2401 st->guest_EBX = 0x01020809;
2402 st->guest_ECX = 0x00004400;
2403 st->guest_EDX = 0xbfebfbff;
2404 break;
2405 default:
2406 st->guest_EAX = 0x03020101;
2407 st->guest_EBX = 0x00000000;
2408 st->guest_ECX = 0x00000000;
2409 st->guest_EDX = 0x0c040883;
2410 break;
2411 }
2412 }
2413
2414 /* Claim to be the following SSSE3-capable CPU (2 x ...):
2415 vendor_id : GenuineIntel
2416 cpu family : 6
2417 model : 15
2418 model name : Intel(R) Core(TM)2 CPU 6600 @ 2.40GHz
2419 stepping : 6
2420 cpu MHz : 2394.000
2421 cache size : 4096 KB
2422 physical id : 0
2423 siblings : 2
2424 core id : 0
2425 cpu cores : 2
2426 fpu : yes
2427 fpu_exception : yes
2428 cpuid level : 10
2429 wp : yes
2430 flags : fpu vme de pse tsc msr pae mce cx8 apic sep
2431 mtrr pge mca cmov pat pse36 clflush dts acpi
2432 mmx fxsr sse sse2 ss ht tm syscall nx lm
2433 constant_tsc pni monitor ds_cpl vmx est tm2
2434 cx16 xtpr lahf_lm
2435 bogomips : 4798.78
2436 clflush size : 64
2437 cache_alignment : 64
2438 address sizes : 36 bits physical, 48 bits virtual
2439 power management:
2440 */
x86g_dirtyhelper_CPUID_sse3(VexGuestX86State * st)2441 void x86g_dirtyhelper_CPUID_sse3 ( VexGuestX86State* st )
2442 {
2443 # define SET_ABCD(_a,_b,_c,_d) \
2444 do { st->guest_EAX = (UInt)(_a); \
2445 st->guest_EBX = (UInt)(_b); \
2446 st->guest_ECX = (UInt)(_c); \
2447 st->guest_EDX = (UInt)(_d); \
2448 } while (0)
2449
2450 switch (st->guest_EAX) {
2451 case 0x00000000:
2452 SET_ABCD(0x0000000a, 0x756e6547, 0x6c65746e, 0x49656e69);
2453 break;
2454 case 0x00000001:
2455 SET_ABCD(0x000006f6, 0x00020800, 0x0000e3bd, 0xbfebfbff);
2456 break;
2457 case 0x00000002:
2458 SET_ABCD(0x05b0b101, 0x005657f0, 0x00000000, 0x2cb43049);
2459 break;
2460 case 0x00000003:
2461 SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x00000000);
2462 break;
2463 case 0x00000004: {
2464 switch (st->guest_ECX) {
2465 case 0x00000000: SET_ABCD(0x04000121, 0x01c0003f,
2466 0x0000003f, 0x00000001); break;
2467 case 0x00000001: SET_ABCD(0x04000122, 0x01c0003f,
2468 0x0000003f, 0x00000001); break;
2469 case 0x00000002: SET_ABCD(0x04004143, 0x03c0003f,
2470 0x00000fff, 0x00000001); break;
2471 default: SET_ABCD(0x00000000, 0x00000000,
2472 0x00000000, 0x00000000); break;
2473 }
2474 break;
2475 }
2476 case 0x00000005:
2477 SET_ABCD(0x00000040, 0x00000040, 0x00000003, 0x00000020);
2478 break;
2479 case 0x00000006:
2480 SET_ABCD(0x00000001, 0x00000002, 0x00000001, 0x00000000);
2481 break;
2482 case 0x00000007:
2483 SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x00000000);
2484 break;
2485 case 0x00000008:
2486 SET_ABCD(0x00000400, 0x00000000, 0x00000000, 0x00000000);
2487 break;
2488 case 0x00000009:
2489 SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x00000000);
2490 break;
2491 case 0x0000000a:
2492 unhandled_eax_value:
2493 SET_ABCD(0x07280202, 0x00000000, 0x00000000, 0x00000000);
2494 break;
2495 case 0x80000000:
2496 SET_ABCD(0x80000008, 0x00000000, 0x00000000, 0x00000000);
2497 break;
2498 case 0x80000001:
2499 SET_ABCD(0x00000000, 0x00000000, 0x00000001, 0x20100000);
2500 break;
2501 case 0x80000002:
2502 SET_ABCD(0x65746e49, 0x2952286c, 0x726f4320, 0x4d542865);
2503 break;
2504 case 0x80000003:
2505 SET_ABCD(0x43203229, 0x20205550, 0x20202020, 0x20202020);
2506 break;
2507 case 0x80000004:
2508 SET_ABCD(0x30303636, 0x20402020, 0x30342e32, 0x007a4847);
2509 break;
2510 case 0x80000005:
2511 SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x00000000);
2512 break;
2513 case 0x80000006:
2514 SET_ABCD(0x00000000, 0x00000000, 0x10008040, 0x00000000);
2515 break;
2516 case 0x80000007:
2517 SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x00000000);
2518 break;
2519 case 0x80000008:
2520 SET_ABCD(0x00003024, 0x00000000, 0x00000000, 0x00000000);
2521 break;
2522 default:
2523 goto unhandled_eax_value;
2524 }
2525 # undef SET_ABCD
2526 }
2527
2528
2529 /* CALLED FROM GENERATED CODE */
2530 /* DIRTY HELPER (non-referentially-transparent) */
2531 /* Horrible hack. On non-x86 platforms, return 0. */
x86g_dirtyhelper_IN(UInt portno,UInt sz)2532 UInt x86g_dirtyhelper_IN ( UInt portno, UInt sz/*1,2 or 4*/ )
2533 {
2534 # if defined(__i386__)
2535 UInt r = 0;
2536 portno &= 0xFFFF;
2537 switch (sz) {
2538 case 4:
2539 __asm__ __volatile__("movl $0,%%eax; inl %w1,%0"
2540 : "=a" (r) : "Nd" (portno));
2541 break;
2542 case 2:
2543 __asm__ __volatile__("movl $0,%%eax; inw %w1,%w0"
2544 : "=a" (r) : "Nd" (portno));
2545 break;
2546 case 1:
2547 __asm__ __volatile__("movl $0,%%eax; inb %w1,%b0"
2548 : "=a" (r) : "Nd" (portno));
2549 break;
2550 default:
2551 break;
2552 }
2553 return r;
2554 # else
2555 return 0;
2556 # endif
2557 }
2558
2559
2560 /* CALLED FROM GENERATED CODE */
2561 /* DIRTY HELPER (non-referentially-transparent) */
2562 /* Horrible hack. On non-x86 platforms, do nothing. */
x86g_dirtyhelper_OUT(UInt portno,UInt data,UInt sz)2563 void x86g_dirtyhelper_OUT ( UInt portno, UInt data, UInt sz/*1,2 or 4*/ )
2564 {
2565 # if defined(__i386__)
2566 portno &= 0xFFFF;
2567 switch (sz) {
2568 case 4:
2569 __asm__ __volatile__("outl %0, %w1"
2570 : : "a" (data), "Nd" (portno));
2571 break;
2572 case 2:
2573 __asm__ __volatile__("outw %w0, %w1"
2574 : : "a" (data), "Nd" (portno));
2575 break;
2576 case 1:
2577 __asm__ __volatile__("outb %b0, %w1"
2578 : : "a" (data), "Nd" (portno));
2579 break;
2580 default:
2581 break;
2582 }
2583 # else
2584 /* do nothing */
2585 # endif
2586 }
2587
2588 /* CALLED FROM GENERATED CODE */
2589 /* DIRTY HELPER (non-referentially-transparent) */
2590 /* Horrible hack. On non-x86 platforms, do nothing. */
2591 /* op = 0: call the native SGDT instruction.
2592 op = 1: call the native SIDT instruction.
2593 */
x86g_dirtyhelper_SxDT(void * address,UInt op)2594 void x86g_dirtyhelper_SxDT ( void *address, UInt op ) {
2595 # if defined(__i386__)
2596 switch (op) {
2597 case 0:
2598 __asm__ __volatile__("sgdt (%0)" : : "r" (address) : "memory");
2599 break;
2600 case 1:
2601 __asm__ __volatile__("sidt (%0)" : : "r" (address) : "memory");
2602 break;
2603 default:
2604 vpanic("x86g_dirtyhelper_SxDT");
2605 }
2606 # else
2607 /* do nothing */
2608 UChar* p = (UChar*)address;
2609 p[0] = p[1] = p[2] = p[3] = p[4] = p[5] = 0;
2610 # endif
2611 }
2612
2613 /*---------------------------------------------------------------*/
2614 /*--- Helpers for MMX/SSE/SSE2. ---*/
2615 /*---------------------------------------------------------------*/
2616
abdU8(UChar xx,UChar yy)2617 static inline UChar abdU8 ( UChar xx, UChar yy ) {
2618 return toUChar(xx>yy ? xx-yy : yy-xx);
2619 }
2620
mk32x2(UInt w1,UInt w0)2621 static inline ULong mk32x2 ( UInt w1, UInt w0 ) {
2622 return (((ULong)w1) << 32) | ((ULong)w0);
2623 }
2624
sel16x4_3(ULong w64)2625 static inline UShort sel16x4_3 ( ULong w64 ) {
2626 UInt hi32 = toUInt(w64 >> 32);
2627 return toUShort(hi32 >> 16);
2628 }
sel16x4_2(ULong w64)2629 static inline UShort sel16x4_2 ( ULong w64 ) {
2630 UInt hi32 = toUInt(w64 >> 32);
2631 return toUShort(hi32);
2632 }
sel16x4_1(ULong w64)2633 static inline UShort sel16x4_1 ( ULong w64 ) {
2634 UInt lo32 = toUInt(w64);
2635 return toUShort(lo32 >> 16);
2636 }
sel16x4_0(ULong w64)2637 static inline UShort sel16x4_0 ( ULong w64 ) {
2638 UInt lo32 = toUInt(w64);
2639 return toUShort(lo32);
2640 }
2641
sel8x8_7(ULong w64)2642 static inline UChar sel8x8_7 ( ULong w64 ) {
2643 UInt hi32 = toUInt(w64 >> 32);
2644 return toUChar(hi32 >> 24);
2645 }
sel8x8_6(ULong w64)2646 static inline UChar sel8x8_6 ( ULong w64 ) {
2647 UInt hi32 = toUInt(w64 >> 32);
2648 return toUChar(hi32 >> 16);
2649 }
sel8x8_5(ULong w64)2650 static inline UChar sel8x8_5 ( ULong w64 ) {
2651 UInt hi32 = toUInt(w64 >> 32);
2652 return toUChar(hi32 >> 8);
2653 }
sel8x8_4(ULong w64)2654 static inline UChar sel8x8_4 ( ULong w64 ) {
2655 UInt hi32 = toUInt(w64 >> 32);
2656 return toUChar(hi32 >> 0);
2657 }
sel8x8_3(ULong w64)2658 static inline UChar sel8x8_3 ( ULong w64 ) {
2659 UInt lo32 = toUInt(w64);
2660 return toUChar(lo32 >> 24);
2661 }
sel8x8_2(ULong w64)2662 static inline UChar sel8x8_2 ( ULong w64 ) {
2663 UInt lo32 = toUInt(w64);
2664 return toUChar(lo32 >> 16);
2665 }
sel8x8_1(ULong w64)2666 static inline UChar sel8x8_1 ( ULong w64 ) {
2667 UInt lo32 = toUInt(w64);
2668 return toUChar(lo32 >> 8);
2669 }
sel8x8_0(ULong w64)2670 static inline UChar sel8x8_0 ( ULong w64 ) {
2671 UInt lo32 = toUInt(w64);
2672 return toUChar(lo32 >> 0);
2673 }
2674
2675 /* CALLED FROM GENERATED CODE: CLEAN HELPER */
x86g_calculate_mmx_pmaddwd(ULong xx,ULong yy)2676 ULong x86g_calculate_mmx_pmaddwd ( ULong xx, ULong yy )
2677 {
2678 return
2679 mk32x2(
2680 (((Int)(Short)sel16x4_3(xx)) * ((Int)(Short)sel16x4_3(yy)))
2681 + (((Int)(Short)sel16x4_2(xx)) * ((Int)(Short)sel16x4_2(yy))),
2682 (((Int)(Short)sel16x4_1(xx)) * ((Int)(Short)sel16x4_1(yy)))
2683 + (((Int)(Short)sel16x4_0(xx)) * ((Int)(Short)sel16x4_0(yy)))
2684 );
2685 }
2686
2687 /* CALLED FROM GENERATED CODE: CLEAN HELPER */
x86g_calculate_mmx_psadbw(ULong xx,ULong yy)2688 ULong x86g_calculate_mmx_psadbw ( ULong xx, ULong yy )
2689 {
2690 UInt t = 0;
2691 t += (UInt)abdU8( sel8x8_7(xx), sel8x8_7(yy) );
2692 t += (UInt)abdU8( sel8x8_6(xx), sel8x8_6(yy) );
2693 t += (UInt)abdU8( sel8x8_5(xx), sel8x8_5(yy) );
2694 t += (UInt)abdU8( sel8x8_4(xx), sel8x8_4(yy) );
2695 t += (UInt)abdU8( sel8x8_3(xx), sel8x8_3(yy) );
2696 t += (UInt)abdU8( sel8x8_2(xx), sel8x8_2(yy) );
2697 t += (UInt)abdU8( sel8x8_1(xx), sel8x8_1(yy) );
2698 t += (UInt)abdU8( sel8x8_0(xx), sel8x8_0(yy) );
2699 t &= 0xFFFF;
2700 return (ULong)t;
2701 }
2702
2703
2704 /*---------------------------------------------------------------*/
2705 /*--- Helpers for dealing with segment overrides. ---*/
2706 /*---------------------------------------------------------------*/
2707
2708 static inline
get_segdescr_base(VexGuestX86SegDescr * ent)2709 UInt get_segdescr_base ( VexGuestX86SegDescr* ent )
2710 {
2711 UInt lo = 0xFFFF & (UInt)ent->LdtEnt.Bits.BaseLow;
2712 UInt mid = 0xFF & (UInt)ent->LdtEnt.Bits.BaseMid;
2713 UInt hi = 0xFF & (UInt)ent->LdtEnt.Bits.BaseHi;
2714 return (hi << 24) | (mid << 16) | lo;
2715 }
2716
2717 static inline
get_segdescr_limit(VexGuestX86SegDescr * ent)2718 UInt get_segdescr_limit ( VexGuestX86SegDescr* ent )
2719 {
2720 UInt lo = 0xFFFF & (UInt)ent->LdtEnt.Bits.LimitLow;
2721 UInt hi = 0xF & (UInt)ent->LdtEnt.Bits.LimitHi;
2722 UInt limit = (hi << 16) | lo;
2723 if (ent->LdtEnt.Bits.Granularity)
2724 limit = (limit << 12) | 0xFFF;
2725 return limit;
2726 }
2727
2728 /* CALLED FROM GENERATED CODE: CLEAN HELPER */
x86g_use_seg_selector(HWord ldt,HWord gdt,UInt seg_selector,UInt virtual_addr)2729 ULong x86g_use_seg_selector ( HWord ldt, HWord gdt,
2730 UInt seg_selector, UInt virtual_addr )
2731 {
2732 UInt tiBit, base, limit;
2733 VexGuestX86SegDescr* the_descrs;
2734
2735 Bool verboze = False;
2736
2737 /* If this isn't true, we're in Big Trouble. */
2738 vassert(8 == sizeof(VexGuestX86SegDescr));
2739
2740 if (verboze)
2741 vex_printf("x86h_use_seg_selector: "
2742 "seg_selector = 0x%x, vaddr = 0x%x\n",
2743 seg_selector, virtual_addr);
2744
2745 /* Check for wildly invalid selector. */
2746 if (seg_selector & ~0xFFFF)
2747 goto bad;
2748
2749 seg_selector &= 0x0000FFFF;
2750
2751 /* Sanity check the segment selector. Ensure that RPL=11b (least
2752 privilege). This forms the bottom 2 bits of the selector. */
2753 if ((seg_selector & 3) != 3)
2754 goto bad;
2755
2756 /* Extract the TI bit (0 means GDT, 1 means LDT) */
2757 tiBit = (seg_selector >> 2) & 1;
2758
2759 /* Convert the segment selector onto a table index */
2760 seg_selector >>= 3;
2761 vassert(seg_selector >= 0 && seg_selector < 8192);
2762
2763 if (tiBit == 0) {
2764
2765 /* GDT access. */
2766 /* Do we actually have a GDT to look at? */
2767 if (gdt == 0)
2768 goto bad;
2769
2770 /* Check for access to non-existent entry. */
2771 if (seg_selector >= VEX_GUEST_X86_GDT_NENT)
2772 goto bad;
2773
2774 the_descrs = (VexGuestX86SegDescr*)gdt;
2775 base = get_segdescr_base (&the_descrs[seg_selector]);
2776 limit = get_segdescr_limit(&the_descrs[seg_selector]);
2777
2778 } else {
2779
2780 /* All the same stuff, except for the LDT. */
2781 if (ldt == 0)
2782 goto bad;
2783
2784 if (seg_selector >= VEX_GUEST_X86_LDT_NENT)
2785 goto bad;
2786
2787 the_descrs = (VexGuestX86SegDescr*)ldt;
2788 base = get_segdescr_base (&the_descrs[seg_selector]);
2789 limit = get_segdescr_limit(&the_descrs[seg_selector]);
2790
2791 }
2792
2793 /* Do the limit check. Note, this check is just slightly too
2794 slack. Really it should be "if (virtual_addr + size - 1 >=
2795 limit)," but we don't have the size info to hand. Getting it
2796 could be significantly complex. */
2797 if (virtual_addr >= limit)
2798 goto bad;
2799
2800 if (verboze)
2801 vex_printf("x86h_use_seg_selector: "
2802 "base = 0x%x, addr = 0x%x\n",
2803 base, base + virtual_addr);
2804
2805 /* High 32 bits are zero, indicating success. */
2806 return (ULong)( ((UInt)virtual_addr) + base );
2807
2808 bad:
2809 return 1ULL << 32;
2810 }
2811
2812
2813 /*---------------------------------------------------------------*/
2814 /*--- Helpers for dealing with, and describing, ---*/
2815 /*--- guest state as a whole. ---*/
2816 /*---------------------------------------------------------------*/
2817
2818 /* Initialise the entire x86 guest state. */
2819 /* VISIBLE TO LIBVEX CLIENT */
LibVEX_GuestX86_initialise(VexGuestX86State * vex_state)2820 void LibVEX_GuestX86_initialise ( /*OUT*/VexGuestX86State* vex_state )
2821 {
2822 vex_state->host_EvC_FAILADDR = 0;
2823 vex_state->host_EvC_COUNTER = 0;
2824
2825 vex_state->guest_EAX = 0;
2826 vex_state->guest_ECX = 0;
2827 vex_state->guest_EDX = 0;
2828 vex_state->guest_EBX = 0;
2829 vex_state->guest_ESP = 0;
2830 vex_state->guest_EBP = 0;
2831 vex_state->guest_ESI = 0;
2832 vex_state->guest_EDI = 0;
2833
2834 vex_state->guest_CC_OP = X86G_CC_OP_COPY;
2835 vex_state->guest_CC_DEP1 = 0;
2836 vex_state->guest_CC_DEP2 = 0;
2837 vex_state->guest_CC_NDEP = 0;
2838 vex_state->guest_DFLAG = 1; /* forwards */
2839 vex_state->guest_IDFLAG = 0;
2840 vex_state->guest_ACFLAG = 0;
2841
2842 vex_state->guest_EIP = 0;
2843
2844 /* Initialise the simulated FPU */
2845 x86g_dirtyhelper_FINIT( vex_state );
2846
2847 /* Initialse the SSE state. */
2848 # define SSEZERO(_xmm) _xmm[0]=_xmm[1]=_xmm[2]=_xmm[3] = 0;
2849
2850 vex_state->guest_SSEROUND = (UInt)Irrm_NEAREST;
2851 SSEZERO(vex_state->guest_XMM0);
2852 SSEZERO(vex_state->guest_XMM1);
2853 SSEZERO(vex_state->guest_XMM2);
2854 SSEZERO(vex_state->guest_XMM3);
2855 SSEZERO(vex_state->guest_XMM4);
2856 SSEZERO(vex_state->guest_XMM5);
2857 SSEZERO(vex_state->guest_XMM6);
2858 SSEZERO(vex_state->guest_XMM7);
2859
2860 # undef SSEZERO
2861
2862 vex_state->guest_CS = 0;
2863 vex_state->guest_DS = 0;
2864 vex_state->guest_ES = 0;
2865 vex_state->guest_FS = 0;
2866 vex_state->guest_GS = 0;
2867 vex_state->guest_SS = 0;
2868 vex_state->guest_LDT = 0;
2869 vex_state->guest_GDT = 0;
2870
2871 vex_state->guest_EMNOTE = EmNote_NONE;
2872
2873 /* SSE2 has a 'clflush' cache-line-invalidator which uses these. */
2874 vex_state->guest_CMSTART = 0;
2875 vex_state->guest_CMLEN = 0;
2876
2877 vex_state->guest_NRADDR = 0;
2878 vex_state->guest_SC_CLASS = 0;
2879 vex_state->guest_IP_AT_SYSCALL = 0;
2880
2881 vex_state->padding1 = 0;
2882 }
2883
2884
2885 /* Figure out if any part of the guest state contained in minoff
2886 .. maxoff requires precise memory exceptions. If in doubt return
2887 True (but this generates significantly slower code).
2888
2889 By default we enforce precise exns for guest %ESP, %EBP and %EIP
2890 only. These are the minimum needed to extract correct stack
2891 backtraces from x86 code.
2892
2893 Only %ESP is needed in mode VexRegUpdSpAtMemAccess.
2894 */
guest_x86_state_requires_precise_mem_exns(Int minoff,Int maxoff,VexRegisterUpdates pxControl)2895 Bool guest_x86_state_requires_precise_mem_exns (
2896 Int minoff, Int maxoff, VexRegisterUpdates pxControl
2897 )
2898 {
2899 Int ebp_min = offsetof(VexGuestX86State, guest_EBP);
2900 Int ebp_max = ebp_min + 4 - 1;
2901 Int esp_min = offsetof(VexGuestX86State, guest_ESP);
2902 Int esp_max = esp_min + 4 - 1;
2903 Int eip_min = offsetof(VexGuestX86State, guest_EIP);
2904 Int eip_max = eip_min + 4 - 1;
2905
2906 if (maxoff < esp_min || minoff > esp_max) {
2907 /* no overlap with esp */
2908 if (pxControl == VexRegUpdSpAtMemAccess)
2909 return False; // We only need to check stack pointer.
2910 } else {
2911 return True;
2912 }
2913
2914 if (maxoff < ebp_min || minoff > ebp_max) {
2915 /* no overlap with ebp */
2916 } else {
2917 return True;
2918 }
2919
2920 if (maxoff < eip_min || minoff > eip_max) {
2921 /* no overlap with eip */
2922 } else {
2923 return True;
2924 }
2925
2926 return False;
2927 }
2928
2929
2930 #define ALWAYSDEFD(field) \
2931 { offsetof(VexGuestX86State, field), \
2932 (sizeof ((VexGuestX86State*)0)->field) }
2933
2934 VexGuestLayout
2935 x86guest_layout
2936 = {
2937 /* Total size of the guest state, in bytes. */
2938 .total_sizeB = sizeof(VexGuestX86State),
2939
2940 /* Describe the stack pointer. */
2941 .offset_SP = offsetof(VexGuestX86State,guest_ESP),
2942 .sizeof_SP = 4,
2943
2944 /* Describe the frame pointer. */
2945 .offset_FP = offsetof(VexGuestX86State,guest_EBP),
2946 .sizeof_FP = 4,
2947
2948 /* Describe the instruction pointer. */
2949 .offset_IP = offsetof(VexGuestX86State,guest_EIP),
2950 .sizeof_IP = 4,
2951
2952 /* Describe any sections to be regarded by Memcheck as
2953 'always-defined'. */
2954 .n_alwaysDefd = 24,
2955
2956 /* flags thunk: OP and NDEP are always defd, whereas DEP1
2957 and DEP2 have to be tracked. See detailed comment in
2958 gdefs.h on meaning of thunk fields. */
2959 .alwaysDefd
2960 = { /* 0 */ ALWAYSDEFD(guest_CC_OP),
2961 /* 1 */ ALWAYSDEFD(guest_CC_NDEP),
2962 /* 2 */ ALWAYSDEFD(guest_DFLAG),
2963 /* 3 */ ALWAYSDEFD(guest_IDFLAG),
2964 /* 4 */ ALWAYSDEFD(guest_ACFLAG),
2965 /* 5 */ ALWAYSDEFD(guest_EIP),
2966 /* 6 */ ALWAYSDEFD(guest_FTOP),
2967 /* 7 */ ALWAYSDEFD(guest_FPTAG),
2968 /* 8 */ ALWAYSDEFD(guest_FPROUND),
2969 /* 9 */ ALWAYSDEFD(guest_FC3210),
2970 /* 10 */ ALWAYSDEFD(guest_CS),
2971 /* 11 */ ALWAYSDEFD(guest_DS),
2972 /* 12 */ ALWAYSDEFD(guest_ES),
2973 /* 13 */ ALWAYSDEFD(guest_FS),
2974 /* 14 */ ALWAYSDEFD(guest_GS),
2975 /* 15 */ ALWAYSDEFD(guest_SS),
2976 /* 16 */ ALWAYSDEFD(guest_LDT),
2977 /* 17 */ ALWAYSDEFD(guest_GDT),
2978 /* 18 */ ALWAYSDEFD(guest_EMNOTE),
2979 /* 19 */ ALWAYSDEFD(guest_SSEROUND),
2980 /* 20 */ ALWAYSDEFD(guest_CMSTART),
2981 /* 21 */ ALWAYSDEFD(guest_CMLEN),
2982 /* 22 */ ALWAYSDEFD(guest_SC_CLASS),
2983 /* 23 */ ALWAYSDEFD(guest_IP_AT_SYSCALL)
2984 }
2985 };
2986
2987
2988 /*---------------------------------------------------------------*/
2989 /*--- end guest_x86_helpers.c ---*/
2990 /*---------------------------------------------------------------*/
2991