1 
2 /*---------------------------------------------------------------*/
3 /*--- begin                               guest_x86_helpers.c ---*/
4 /*---------------------------------------------------------------*/
5 
6 /*
7    This file is part of Valgrind, a dynamic binary instrumentation
8    framework.
9 
10    Copyright (C) 2004-2013 OpenWorks LLP
11       info@open-works.net
12 
13    This program is free software; you can redistribute it and/or
14    modify it under the terms of the GNU General Public License as
15    published by the Free Software Foundation; either version 2 of the
16    License, or (at your option) any later version.
17 
18    This program is distributed in the hope that it will be useful, but
19    WITHOUT ANY WARRANTY; without even the implied warranty of
20    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
21    General Public License for more details.
22 
23    You should have received a copy of the GNU General Public License
24    along with this program; if not, write to the Free Software
25    Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
26    02110-1301, USA.
27 
28    The GNU General Public License is contained in the file COPYING.
29 
30    Neither the names of the U.S. Department of Energy nor the
31    University of California nor the names of its contributors may be
32    used to endorse or promote products derived from this software
33    without prior written permission.
34 */
35 
36 #include "libvex_basictypes.h"
37 #include "libvex_emnote.h"
38 #include "libvex_guest_x86.h"
39 #include "libvex_ir.h"
40 #include "libvex.h"
41 
42 #include "main_util.h"
43 #include "main_globals.h"
44 #include "guest_generic_bb_to_IR.h"
45 #include "guest_x86_defs.h"
46 #include "guest_generic_x87.h"
47 
48 
49 /* This file contains helper functions for x86 guest code.
50    Calls to these functions are generated by the back end.
51    These calls are of course in the host machine code and
52    this file will be compiled to host machine code, so that
53    all makes sense.
54 
55    Only change the signatures of these helper functions very
56    carefully.  If you change the signature here, you'll have to change
57    the parameters passed to it in the IR calls constructed by
58    guest-x86/toIR.c.
59 
60    The convention used is that all functions called from generated
61    code are named x86g_<something>, and any function whose name lacks
62    that prefix is not called from generated code.  Note that some
63    LibVEX_* functions can however be called by VEX's client, but that
64    is not the same as calling them from VEX-generated code.
65 */
66 
67 
68 /* Set to 1 to get detailed profiling info about use of the flag
69    machinery. */
70 #define PROFILE_EFLAGS 0
71 
72 
73 /*---------------------------------------------------------------*/
74 /*--- %eflags run-time helpers.                               ---*/
75 /*---------------------------------------------------------------*/
76 
77 static const UChar parity_table[256] = {
78     X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0,
79     0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P,
80     0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P,
81     X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0,
82     0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P,
83     X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0,
84     X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0,
85     0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P,
86     0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P,
87     X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0,
88     X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0,
89     0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P,
90     X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0,
91     0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P,
92     0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P,
93     X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0,
94     0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P,
95     X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0,
96     X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0,
97     0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P,
98     X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0,
99     0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P,
100     0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P,
101     X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0,
102     X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0,
103     0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P,
104     0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P,
105     X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0,
106     0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P,
107     X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0,
108     X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0,
109     0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P,
110 };
111 
112 /* generalised left-shifter */
lshift(Int x,Int n)113 inline static Int lshift ( Int x, Int n )
114 {
115    if (n >= 0)
116       return (UInt)x << n;
117    else
118       return x >> (-n);
119 }
120 
121 /* identity on ULong */
idULong(ULong x)122 static inline ULong idULong ( ULong x )
123 {
124    return x;
125 }
126 
127 
128 #define PREAMBLE(__data_bits)					\
129    /* const */ UInt DATA_MASK 					\
130       = __data_bits==8 ? 0xFF 					\
131                        : (__data_bits==16 ? 0xFFFF 		\
132                                           : 0xFFFFFFFF); 	\
133    /* const */ UInt SIGN_MASK = 1u << (__data_bits - 1);	\
134    /* const */ UInt CC_DEP1 = cc_dep1_formal;			\
135    /* const */ UInt CC_DEP2 = cc_dep2_formal;			\
136    /* const */ UInt CC_NDEP = cc_ndep_formal;			\
137    /* Four bogus assignments, which hopefully gcc can     */	\
138    /* optimise away, and which stop it complaining about  */	\
139    /* unused variables.                                   */	\
140    SIGN_MASK = SIGN_MASK;					\
141    DATA_MASK = DATA_MASK;					\
142    CC_DEP2 = CC_DEP2;						\
143    CC_NDEP = CC_NDEP;
144 
145 
146 /*-------------------------------------------------------------*/
147 
148 #define ACTIONS_ADD(DATA_BITS,DATA_UTYPE)			\
149 {								\
150    PREAMBLE(DATA_BITS);						\
151    { UInt cf, pf, af, zf, sf, of;				\
152      UInt argL, argR, res;					\
153      argL = CC_DEP1;						\
154      argR = CC_DEP2;						\
155      res  = argL + argR;					\
156      cf = (DATA_UTYPE)res < (DATA_UTYPE)argL;			\
157      pf = parity_table[(UChar)res];				\
158      af = (res ^ argL ^ argR) & 0x10;				\
159      zf = ((DATA_UTYPE)res == 0) << 6;				\
160      sf = lshift(res, 8 - DATA_BITS) & 0x80;			\
161      of = lshift((argL ^ argR ^ -1) & (argL ^ res), 		\
162                  12 - DATA_BITS) & X86G_CC_MASK_O;		\
163      return cf | pf | af | zf | sf | of;			\
164    }								\
165 }
166 
167 /*-------------------------------------------------------------*/
168 
169 #define ACTIONS_SUB(DATA_BITS,DATA_UTYPE)			\
170 {								\
171    PREAMBLE(DATA_BITS);						\
172    { UInt cf, pf, af, zf, sf, of;				\
173      UInt argL, argR, res;					\
174      argL = CC_DEP1;						\
175      argR = CC_DEP2;						\
176      res  = argL - argR;					\
177      cf = (DATA_UTYPE)argL < (DATA_UTYPE)argR;			\
178      pf = parity_table[(UChar)res];				\
179      af = (res ^ argL ^ argR) & 0x10;				\
180      zf = ((DATA_UTYPE)res == 0) << 6;				\
181      sf = lshift(res, 8 - DATA_BITS) & 0x80;			\
182      of = lshift((argL ^ argR) & (argL ^ res),	 		\
183                  12 - DATA_BITS) & X86G_CC_MASK_O; 		\
184      return cf | pf | af | zf | sf | of;			\
185    }								\
186 }
187 
188 /*-------------------------------------------------------------*/
189 
190 #define ACTIONS_ADC(DATA_BITS,DATA_UTYPE)			\
191 {								\
192    PREAMBLE(DATA_BITS);						\
193    { UInt cf, pf, af, zf, sf, of;				\
194      UInt argL, argR, oldC, res;		       		\
195      oldC = CC_NDEP & X86G_CC_MASK_C;				\
196      argL = CC_DEP1;						\
197      argR = CC_DEP2 ^ oldC;	       				\
198      res  = (argL + argR) + oldC;				\
199      if (oldC)							\
200         cf = (DATA_UTYPE)res <= (DATA_UTYPE)argL;		\
201      else							\
202         cf = (DATA_UTYPE)res < (DATA_UTYPE)argL;		\
203      pf = parity_table[(UChar)res];				\
204      af = (res ^ argL ^ argR) & 0x10;				\
205      zf = ((DATA_UTYPE)res == 0) << 6;				\
206      sf = lshift(res, 8 - DATA_BITS) & 0x80;			\
207      of = lshift((argL ^ argR ^ -1) & (argL ^ res), 		\
208                   12 - DATA_BITS) & X86G_CC_MASK_O;		\
209      return cf | pf | af | zf | sf | of;			\
210    }								\
211 }
212 
213 /*-------------------------------------------------------------*/
214 
215 #define ACTIONS_SBB(DATA_BITS,DATA_UTYPE)			\
216 {								\
217    PREAMBLE(DATA_BITS);						\
218    { UInt cf, pf, af, zf, sf, of;				\
219      UInt argL, argR, oldC, res;		       		\
220      oldC = CC_NDEP & X86G_CC_MASK_C;				\
221      argL = CC_DEP1;						\
222      argR = CC_DEP2 ^ oldC;	       				\
223      res  = (argL - argR) - oldC;				\
224      if (oldC)							\
225         cf = (DATA_UTYPE)argL <= (DATA_UTYPE)argR;		\
226      else							\
227         cf = (DATA_UTYPE)argL < (DATA_UTYPE)argR;		\
228      pf = parity_table[(UChar)res];				\
229      af = (res ^ argL ^ argR) & 0x10;				\
230      zf = ((DATA_UTYPE)res == 0) << 6;				\
231      sf = lshift(res, 8 - DATA_BITS) & 0x80;			\
232      of = lshift((argL ^ argR) & (argL ^ res), 			\
233                  12 - DATA_BITS) & X86G_CC_MASK_O;		\
234      return cf | pf | af | zf | sf | of;			\
235    }								\
236 }
237 
238 /*-------------------------------------------------------------*/
239 
240 #define ACTIONS_LOGIC(DATA_BITS,DATA_UTYPE)			\
241 {								\
242    PREAMBLE(DATA_BITS);						\
243    { UInt cf, pf, af, zf, sf, of;				\
244      cf = 0;							\
245      pf = parity_table[(UChar)CC_DEP1];				\
246      af = 0;							\
247      zf = ((DATA_UTYPE)CC_DEP1 == 0) << 6;			\
248      sf = lshift(CC_DEP1, 8 - DATA_BITS) & 0x80;		\
249      of = 0;							\
250      return cf | pf | af | zf | sf | of;			\
251    }								\
252 }
253 
254 /*-------------------------------------------------------------*/
255 
256 #define ACTIONS_INC(DATA_BITS,DATA_UTYPE)			\
257 {								\
258    PREAMBLE(DATA_BITS);						\
259    { UInt cf, pf, af, zf, sf, of;				\
260      UInt argL, argR, res;					\
261      res  = CC_DEP1;						\
262      argL = res - 1;						\
263      argR = 1;							\
264      cf = CC_NDEP & X86G_CC_MASK_C;				\
265      pf = parity_table[(UChar)res];				\
266      af = (res ^ argL ^ argR) & 0x10;				\
267      zf = ((DATA_UTYPE)res == 0) << 6;				\
268      sf = lshift(res, 8 - DATA_BITS) & 0x80;			\
269      of = ((res & DATA_MASK) == SIGN_MASK) << 11;		\
270      return cf | pf | af | zf | sf | of;			\
271    }								\
272 }
273 
274 /*-------------------------------------------------------------*/
275 
276 #define ACTIONS_DEC(DATA_BITS,DATA_UTYPE)			\
277 {								\
278    PREAMBLE(DATA_BITS);						\
279    { UInt cf, pf, af, zf, sf, of;				\
280      UInt argL, argR, res;					\
281      res  = CC_DEP1;						\
282      argL = res + 1;						\
283      argR = 1;							\
284      cf = CC_NDEP & X86G_CC_MASK_C;				\
285      pf = parity_table[(UChar)res];				\
286      af = (res ^ argL ^ argR) & 0x10;				\
287      zf = ((DATA_UTYPE)res == 0) << 6;				\
288      sf = lshift(res, 8 - DATA_BITS) & 0x80;			\
289      of = ((res & DATA_MASK) 					\
290           == ((UInt)SIGN_MASK - 1)) << 11;			\
291      return cf | pf | af | zf | sf | of;			\
292    }								\
293 }
294 
295 /*-------------------------------------------------------------*/
296 
297 #define ACTIONS_SHL(DATA_BITS,DATA_UTYPE)			\
298 {								\
299    PREAMBLE(DATA_BITS);						\
300    { UInt cf, pf, af, zf, sf, of;				\
301      cf = (CC_DEP2 >> (DATA_BITS - 1)) & X86G_CC_MASK_C;	\
302      pf = parity_table[(UChar)CC_DEP1];				\
303      af = 0; /* undefined */					\
304      zf = ((DATA_UTYPE)CC_DEP1 == 0) << 6;			\
305      sf = lshift(CC_DEP1, 8 - DATA_BITS) & 0x80;		\
306      /* of is defined if shift count == 1 */			\
307      of = lshift(CC_DEP2 ^ CC_DEP1, 12 - DATA_BITS) 		\
308           & X86G_CC_MASK_O;					\
309      return cf | pf | af | zf | sf | of;			\
310    }								\
311 }
312 
313 /*-------------------------------------------------------------*/
314 
315 #define ACTIONS_SHR(DATA_BITS,DATA_UTYPE)			\
316 {								\
317    PREAMBLE(DATA_BITS);  					\
318    { UInt cf, pf, af, zf, sf, of;				\
319      cf = CC_DEP2 & 1;						\
320      pf = parity_table[(UChar)CC_DEP1];				\
321      af = 0; /* undefined */					\
322      zf = ((DATA_UTYPE)CC_DEP1 == 0) << 6;			\
323      sf = lshift(CC_DEP1, 8 - DATA_BITS) & 0x80;		\
324      /* of is defined if shift count == 1 */			\
325      of = lshift(CC_DEP2 ^ CC_DEP1, 12 - DATA_BITS)		\
326           & X86G_CC_MASK_O;					\
327      return cf | pf | af | zf | sf | of;			\
328    }								\
329 }
330 
331 /*-------------------------------------------------------------*/
332 
333 /* ROL: cf' = lsb(result).  of' = msb(result) ^ lsb(result). */
334 /* DEP1 = result, NDEP = old flags */
335 #define ACTIONS_ROL(DATA_BITS,DATA_UTYPE)			\
336 {								\
337    PREAMBLE(DATA_BITS);						\
338    { UInt fl 							\
339         = (CC_NDEP & ~(X86G_CC_MASK_O | X86G_CC_MASK_C))	\
340           | (X86G_CC_MASK_C & CC_DEP1)				\
341           | (X86G_CC_MASK_O & (lshift(CC_DEP1,  		\
342                                       11-(DATA_BITS-1)) 	\
343                      ^ lshift(CC_DEP1, 11)));			\
344      return fl;							\
345    }								\
346 }
347 
348 /*-------------------------------------------------------------*/
349 
350 /* ROR: cf' = msb(result).  of' = msb(result) ^ msb-1(result). */
351 /* DEP1 = result, NDEP = old flags */
352 #define ACTIONS_ROR(DATA_BITS,DATA_UTYPE)			\
353 {								\
354    PREAMBLE(DATA_BITS);						\
355    { UInt fl 							\
356         = (CC_NDEP & ~(X86G_CC_MASK_O | X86G_CC_MASK_C))	\
357           | (X86G_CC_MASK_C & (CC_DEP1 >> (DATA_BITS-1)))	\
358           | (X86G_CC_MASK_O & (lshift(CC_DEP1, 			\
359                                       11-(DATA_BITS-1)) 	\
360                      ^ lshift(CC_DEP1, 11-(DATA_BITS-1)+1)));	\
361      return fl;							\
362    }								\
363 }
364 
365 /*-------------------------------------------------------------*/
366 
367 #define ACTIONS_UMUL(DATA_BITS, DATA_UTYPE,  NARROWtoU,         \
368                                 DATA_U2TYPE, NARROWto2U)        \
369 {                                                               \
370    PREAMBLE(DATA_BITS);                                         \
371    { UInt cf, pf, af, zf, sf, of;                               \
372      DATA_UTYPE  hi;                                            \
373      DATA_UTYPE  lo                                             \
374         = NARROWtoU( ((DATA_UTYPE)CC_DEP1)                      \
375                      * ((DATA_UTYPE)CC_DEP2) );                 \
376      DATA_U2TYPE rr                                             \
377         = NARROWto2U(                                           \
378              ((DATA_U2TYPE)((DATA_UTYPE)CC_DEP1))               \
379              * ((DATA_U2TYPE)((DATA_UTYPE)CC_DEP2)) );          \
380      hi = NARROWtoU(rr >>/*u*/ DATA_BITS);                      \
381      cf = (hi != 0);                                            \
382      pf = parity_table[(UChar)lo];                              \
383      af = 0; /* undefined */                                    \
384      zf = (lo == 0) << 6;                                       \
385      sf = lshift(lo, 8 - DATA_BITS) & 0x80;                     \
386      of = cf << 11;                                             \
387      return cf | pf | af | zf | sf | of;                        \
388    }								\
389 }
390 
391 /*-------------------------------------------------------------*/
392 
393 #define ACTIONS_SMUL(DATA_BITS, DATA_STYPE,  NARROWtoS,         \
394                                 DATA_S2TYPE, NARROWto2S)        \
395 {                                                               \
396    PREAMBLE(DATA_BITS);                                         \
397    { UInt cf, pf, af, zf, sf, of;                               \
398      DATA_STYPE  hi;                                            \
399      DATA_STYPE  lo                                             \
400         = NARROWtoS( ((DATA_S2TYPE)(DATA_STYPE)CC_DEP1)         \
401                      * ((DATA_S2TYPE)(DATA_STYPE)CC_DEP2) );    \
402      DATA_S2TYPE rr                                             \
403         = NARROWto2S(                                           \
404              ((DATA_S2TYPE)((DATA_STYPE)CC_DEP1))               \
405              * ((DATA_S2TYPE)((DATA_STYPE)CC_DEP2)) );          \
406      hi = NARROWtoS(rr >>/*s*/ DATA_BITS);                      \
407      cf = (hi != (lo >>/*s*/ (DATA_BITS-1)));                   \
408      pf = parity_table[(UChar)lo];                              \
409      af = 0; /* undefined */                                    \
410      zf = (lo == 0) << 6;                                       \
411      sf = lshift(lo, 8 - DATA_BITS) & 0x80;                     \
412      of = cf << 11;                                             \
413      return cf | pf | af | zf | sf | of;                        \
414    }								\
415 }
416 
417 
418 #if PROFILE_EFLAGS
419 
420 static Bool initted     = False;
421 
422 /* C flag, fast route */
423 static UInt tabc_fast[X86G_CC_OP_NUMBER];
424 /* C flag, slow route */
425 static UInt tabc_slow[X86G_CC_OP_NUMBER];
426 /* table for calculate_cond */
427 static UInt tab_cond[X86G_CC_OP_NUMBER][16];
428 /* total entry counts for calc_all, calc_c, calc_cond. */
429 static UInt n_calc_all  = 0;
430 static UInt n_calc_c    = 0;
431 static UInt n_calc_cond = 0;
432 
433 #define SHOW_COUNTS_NOW (0 == (0x3FFFFF & (n_calc_all+n_calc_c+n_calc_cond)))
434 
435 
showCounts(void)436 static void showCounts ( void )
437 {
438    Int op, co;
439    HChar ch;
440    vex_printf("\nTotal calls: calc_all=%u   calc_cond=%u   calc_c=%u\n",
441               n_calc_all, n_calc_cond, n_calc_c);
442 
443    vex_printf("      cSLOW  cFAST    O   NO    B   NB    Z   NZ   BE  NBE"
444               "    S   NS    P   NP    L   NL   LE  NLE\n");
445    vex_printf("     -----------------------------------------------------"
446               "----------------------------------------\n");
447    for (op = 0; op < X86G_CC_OP_NUMBER; op++) {
448 
449       ch = ' ';
450       if (op > 0 && (op-1) % 3 == 0)
451          ch = 'B';
452       if (op > 0 && (op-1) % 3 == 1)
453          ch = 'W';
454       if (op > 0 && (op-1) % 3 == 2)
455          ch = 'L';
456 
457       vex_printf("%2d%c: ", op, ch);
458       vex_printf("%6u ", tabc_slow[op]);
459       vex_printf("%6u ", tabc_fast[op]);
460       for (co = 0; co < 16; co++) {
461          Int n = tab_cond[op][co];
462          if (n >= 1000) {
463             vex_printf(" %3dK", n / 1000);
464          } else
465          if (n >= 0) {
466             vex_printf(" %3d ", n );
467          } else {
468             vex_printf("     ");
469          }
470       }
471       vex_printf("\n");
472    }
473    vex_printf("\n");
474 }
475 
initCounts(void)476 static void initCounts ( void )
477 {
478    Int op, co;
479    initted = True;
480    for (op = 0; op < X86G_CC_OP_NUMBER; op++) {
481       tabc_fast[op] = tabc_slow[op] = 0;
482       for (co = 0; co < 16; co++)
483          tab_cond[op][co] = 0;
484    }
485 }
486 
487 #endif /* PROFILE_EFLAGS */
488 
489 
490 /* CALLED FROM GENERATED CODE: CLEAN HELPER */
491 /* Calculate all the 6 flags from the supplied thunk parameters.
492    Worker function, not directly called from generated code. */
493 static
x86g_calculate_eflags_all_WRK(UInt cc_op,UInt cc_dep1_formal,UInt cc_dep2_formal,UInt cc_ndep_formal)494 UInt x86g_calculate_eflags_all_WRK ( UInt cc_op,
495                                      UInt cc_dep1_formal,
496                                      UInt cc_dep2_formal,
497                                      UInt cc_ndep_formal )
498 {
499    switch (cc_op) {
500       case X86G_CC_OP_COPY:
501          return cc_dep1_formal
502                 & (X86G_CC_MASK_O | X86G_CC_MASK_S | X86G_CC_MASK_Z
503                    | X86G_CC_MASK_A | X86G_CC_MASK_C | X86G_CC_MASK_P);
504 
505       case X86G_CC_OP_ADDB:   ACTIONS_ADD( 8,  UChar  );
506       case X86G_CC_OP_ADDW:   ACTIONS_ADD( 16, UShort );
507       case X86G_CC_OP_ADDL:   ACTIONS_ADD( 32, UInt   );
508 
509       case X86G_CC_OP_ADCB:   ACTIONS_ADC( 8,  UChar  );
510       case X86G_CC_OP_ADCW:   ACTIONS_ADC( 16, UShort );
511       case X86G_CC_OP_ADCL:   ACTIONS_ADC( 32, UInt   );
512 
513       case X86G_CC_OP_SUBB:   ACTIONS_SUB(  8, UChar  );
514       case X86G_CC_OP_SUBW:   ACTIONS_SUB( 16, UShort );
515       case X86G_CC_OP_SUBL:   ACTIONS_SUB( 32, UInt   );
516 
517       case X86G_CC_OP_SBBB:   ACTIONS_SBB(  8, UChar  );
518       case X86G_CC_OP_SBBW:   ACTIONS_SBB( 16, UShort );
519       case X86G_CC_OP_SBBL:   ACTIONS_SBB( 32, UInt   );
520 
521       case X86G_CC_OP_LOGICB: ACTIONS_LOGIC(  8, UChar  );
522       case X86G_CC_OP_LOGICW: ACTIONS_LOGIC( 16, UShort );
523       case X86G_CC_OP_LOGICL: ACTIONS_LOGIC( 32, UInt   );
524 
525       case X86G_CC_OP_INCB:   ACTIONS_INC(  8, UChar  );
526       case X86G_CC_OP_INCW:   ACTIONS_INC( 16, UShort );
527       case X86G_CC_OP_INCL:   ACTIONS_INC( 32, UInt   );
528 
529       case X86G_CC_OP_DECB:   ACTIONS_DEC(  8, UChar  );
530       case X86G_CC_OP_DECW:   ACTIONS_DEC( 16, UShort );
531       case X86G_CC_OP_DECL:   ACTIONS_DEC( 32, UInt   );
532 
533       case X86G_CC_OP_SHLB:   ACTIONS_SHL(  8, UChar  );
534       case X86G_CC_OP_SHLW:   ACTIONS_SHL( 16, UShort );
535       case X86G_CC_OP_SHLL:   ACTIONS_SHL( 32, UInt   );
536 
537       case X86G_CC_OP_SHRB:   ACTIONS_SHR(  8, UChar  );
538       case X86G_CC_OP_SHRW:   ACTIONS_SHR( 16, UShort );
539       case X86G_CC_OP_SHRL:   ACTIONS_SHR( 32, UInt   );
540 
541       case X86G_CC_OP_ROLB:   ACTIONS_ROL(  8, UChar  );
542       case X86G_CC_OP_ROLW:   ACTIONS_ROL( 16, UShort );
543       case X86G_CC_OP_ROLL:   ACTIONS_ROL( 32, UInt   );
544 
545       case X86G_CC_OP_RORB:   ACTIONS_ROR(  8, UChar  );
546       case X86G_CC_OP_RORW:   ACTIONS_ROR( 16, UShort );
547       case X86G_CC_OP_RORL:   ACTIONS_ROR( 32, UInt   );
548 
549       case X86G_CC_OP_UMULB:  ACTIONS_UMUL(  8, UChar,  toUChar,
550                                                 UShort, toUShort );
551       case X86G_CC_OP_UMULW:  ACTIONS_UMUL( 16, UShort, toUShort,
552                                                 UInt,   toUInt );
553       case X86G_CC_OP_UMULL:  ACTIONS_UMUL( 32, UInt,   toUInt,
554                                                 ULong,  idULong );
555 
556       case X86G_CC_OP_SMULB:  ACTIONS_SMUL(  8, Char,   toUChar,
557                                                 Short,  toUShort );
558       case X86G_CC_OP_SMULW:  ACTIONS_SMUL( 16, Short,  toUShort,
559                                                 Int,    toUInt   );
560       case X86G_CC_OP_SMULL:  ACTIONS_SMUL( 32, Int,    toUInt,
561                                                 Long,   idULong );
562 
563       default:
564          /* shouldn't really make these calls from generated code */
565          vex_printf("x86g_calculate_eflags_all_WRK(X86)"
566                     "( %u, 0x%x, 0x%x, 0x%x )\n",
567                     cc_op, cc_dep1_formal, cc_dep2_formal, cc_ndep_formal );
568          vpanic("x86g_calculate_eflags_all_WRK(X86)");
569    }
570 }
571 
572 
573 /* CALLED FROM GENERATED CODE: CLEAN HELPER */
574 /* Calculate all the 6 flags from the supplied thunk parameters. */
x86g_calculate_eflags_all(UInt cc_op,UInt cc_dep1,UInt cc_dep2,UInt cc_ndep)575 UInt x86g_calculate_eflags_all ( UInt cc_op,
576                                  UInt cc_dep1,
577                                  UInt cc_dep2,
578                                  UInt cc_ndep )
579 {
580 #  if PROFILE_EFLAGS
581    if (!initted) initCounts();
582    n_calc_all++;
583    if (SHOW_COUNTS_NOW) showCounts();
584 #  endif
585    return
586       x86g_calculate_eflags_all_WRK ( cc_op, cc_dep1, cc_dep2, cc_ndep );
587 }
588 
589 
590 /* CALLED FROM GENERATED CODE: CLEAN HELPER */
591 /* Calculate just the carry flag from the supplied thunk parameters. */
592 VEX_REGPARM(3)
x86g_calculate_eflags_c(UInt cc_op,UInt cc_dep1,UInt cc_dep2,UInt cc_ndep)593 UInt x86g_calculate_eflags_c ( UInt cc_op,
594                                UInt cc_dep1,
595                                UInt cc_dep2,
596                                UInt cc_ndep )
597 {
598 #  if PROFILE_EFLAGS
599    if (!initted) initCounts();
600    n_calc_c++;
601    tabc_fast[cc_op]++;
602    if (SHOW_COUNTS_NOW) showCounts();
603 #  endif
604 
605    /* Fast-case some common ones. */
606    switch (cc_op) {
607       case X86G_CC_OP_LOGICL:
608       case X86G_CC_OP_LOGICW:
609       case X86G_CC_OP_LOGICB:
610          return 0;
611       case X86G_CC_OP_SUBL:
612          return ((UInt)cc_dep1) < ((UInt)cc_dep2)
613                    ? X86G_CC_MASK_C : 0;
614       case X86G_CC_OP_SUBW:
615          return ((UInt)(cc_dep1 & 0xFFFF)) < ((UInt)(cc_dep2 & 0xFFFF))
616                    ? X86G_CC_MASK_C : 0;
617       case X86G_CC_OP_SUBB:
618          return ((UInt)(cc_dep1 & 0xFF)) < ((UInt)(cc_dep2 & 0xFF))
619                    ? X86G_CC_MASK_C : 0;
620       case X86G_CC_OP_INCL:
621       case X86G_CC_OP_DECL:
622          return cc_ndep & X86G_CC_MASK_C;
623       default:
624          break;
625    }
626 
627 #  if PROFILE_EFLAGS
628    tabc_fast[cc_op]--;
629    tabc_slow[cc_op]++;
630 #  endif
631 
632    return x86g_calculate_eflags_all_WRK(cc_op,cc_dep1,cc_dep2,cc_ndep)
633           & X86G_CC_MASK_C;
634 }
635 
636 
637 /* CALLED FROM GENERATED CODE: CLEAN HELPER */
638 /* returns 1 or 0 */
x86g_calculate_condition(UInt cond,UInt cc_op,UInt cc_dep1,UInt cc_dep2,UInt cc_ndep)639 UInt x86g_calculate_condition ( UInt/*X86Condcode*/ cond,
640                                 UInt cc_op,
641                                 UInt cc_dep1,
642                                 UInt cc_dep2,
643                                 UInt cc_ndep )
644 {
645    UInt eflags = x86g_calculate_eflags_all_WRK(cc_op, cc_dep1,
646                                                cc_dep2, cc_ndep);
647    UInt of,sf,zf,cf,pf;
648    UInt inv = cond & 1;
649 
650 #  if PROFILE_EFLAGS
651    if (!initted) initCounts();
652    tab_cond[cc_op][cond]++;
653    n_calc_cond++;
654    if (SHOW_COUNTS_NOW) showCounts();
655 #  endif
656 
657    switch (cond) {
658       case X86CondNO:
659       case X86CondO: /* OF == 1 */
660          of = eflags >> X86G_CC_SHIFT_O;
661          return 1 & (inv ^ of);
662 
663       case X86CondNZ:
664       case X86CondZ: /* ZF == 1 */
665          zf = eflags >> X86G_CC_SHIFT_Z;
666          return 1 & (inv ^ zf);
667 
668       case X86CondNB:
669       case X86CondB: /* CF == 1 */
670          cf = eflags >> X86G_CC_SHIFT_C;
671          return 1 & (inv ^ cf);
672          break;
673 
674       case X86CondNBE:
675       case X86CondBE: /* (CF or ZF) == 1 */
676          cf = eflags >> X86G_CC_SHIFT_C;
677          zf = eflags >> X86G_CC_SHIFT_Z;
678          return 1 & (inv ^ (cf | zf));
679          break;
680 
681       case X86CondNS:
682       case X86CondS: /* SF == 1 */
683          sf = eflags >> X86G_CC_SHIFT_S;
684          return 1 & (inv ^ sf);
685 
686       case X86CondNP:
687       case X86CondP: /* PF == 1 */
688          pf = eflags >> X86G_CC_SHIFT_P;
689          return 1 & (inv ^ pf);
690 
691       case X86CondNL:
692       case X86CondL: /* (SF xor OF) == 1 */
693          sf = eflags >> X86G_CC_SHIFT_S;
694          of = eflags >> X86G_CC_SHIFT_O;
695          return 1 & (inv ^ (sf ^ of));
696          break;
697 
698       case X86CondNLE:
699       case X86CondLE: /* ((SF xor OF) or ZF)  == 1 */
700          sf = eflags >> X86G_CC_SHIFT_S;
701          of = eflags >> X86G_CC_SHIFT_O;
702          zf = eflags >> X86G_CC_SHIFT_Z;
703          return 1 & (inv ^ ((sf ^ of) | zf));
704          break;
705 
706       default:
707          /* shouldn't really make these calls from generated code */
708          vex_printf("x86g_calculate_condition( %u, %u, 0x%x, 0x%x, 0x%x )\n",
709                     cond, cc_op, cc_dep1, cc_dep2, cc_ndep );
710          vpanic("x86g_calculate_condition");
711    }
712 }
713 
714 
715 /* VISIBLE TO LIBVEX CLIENT */
LibVEX_GuestX86_get_eflags(const VexGuestX86State * vex_state)716 UInt LibVEX_GuestX86_get_eflags ( /*IN*/const VexGuestX86State* vex_state )
717 {
718    UInt eflags = x86g_calculate_eflags_all_WRK(
719                     vex_state->guest_CC_OP,
720                     vex_state->guest_CC_DEP1,
721                     vex_state->guest_CC_DEP2,
722                     vex_state->guest_CC_NDEP
723                  );
724    UInt dflag = vex_state->guest_DFLAG;
725    vassert(dflag == 1 || dflag == 0xFFFFFFFF);
726    if (dflag == 0xFFFFFFFF)
727       eflags |= (1<<10);
728    if (vex_state->guest_IDFLAG == 1)
729       eflags |= (1<<21);
730    if (vex_state->guest_ACFLAG == 1)
731       eflags |= (1<<18);
732 
733    return eflags;
734 }
735 
736 /* VISIBLE TO LIBVEX CLIENT */
737 void
LibVEX_GuestX86_put_eflag_c(UInt new_carry_flag,VexGuestX86State * vex_state)738 LibVEX_GuestX86_put_eflag_c ( UInt new_carry_flag,
739                               /*MOD*/VexGuestX86State* vex_state )
740 {
741    UInt oszacp = x86g_calculate_eflags_all_WRK(
742                     vex_state->guest_CC_OP,
743                     vex_state->guest_CC_DEP1,
744                     vex_state->guest_CC_DEP2,
745                     vex_state->guest_CC_NDEP
746                  );
747    if (new_carry_flag & 1) {
748       oszacp |= X86G_CC_MASK_C;
749    } else {
750       oszacp &= ~X86G_CC_MASK_C;
751    }
752    vex_state->guest_CC_OP   = X86G_CC_OP_COPY;
753    vex_state->guest_CC_DEP1 = oszacp;
754    vex_state->guest_CC_DEP2 = 0;
755    vex_state->guest_CC_NDEP = 0;
756 }
757 
758 
759 /*---------------------------------------------------------------*/
760 /*--- %eflags translation-time function specialisers.         ---*/
761 /*--- These help iropt specialise calls the above run-time    ---*/
762 /*--- %eflags functions.                                      ---*/
763 /*---------------------------------------------------------------*/
764 
765 /* Used by the optimiser to try specialisations.  Returns an
766    equivalent expression, or NULL if none. */
767 
isU32(IRExpr * e,UInt n)768 static inline Bool isU32 ( IRExpr* e, UInt n )
769 {
770    return
771       toBool( e->tag == Iex_Const
772               && e->Iex.Const.con->tag == Ico_U32
773               && e->Iex.Const.con->Ico.U32 == n );
774 }
775 
guest_x86_spechelper(const HChar * function_name,IRExpr ** args,IRStmt ** precedingStmts,Int n_precedingStmts)776 IRExpr* guest_x86_spechelper ( const HChar* function_name,
777                                IRExpr** args,
778                                IRStmt** precedingStmts,
779                                Int      n_precedingStmts )
780 {
781 #  define unop(_op,_a1) IRExpr_Unop((_op),(_a1))
782 #  define binop(_op,_a1,_a2) IRExpr_Binop((_op),(_a1),(_a2))
783 #  define mkU32(_n) IRExpr_Const(IRConst_U32(_n))
784 #  define mkU8(_n)  IRExpr_Const(IRConst_U8(_n))
785 
786    Int i, arity = 0;
787    for (i = 0; args[i]; i++)
788       arity++;
789 #  if 0
790    vex_printf("spec request:\n");
791    vex_printf("   %s  ", function_name);
792    for (i = 0; i < arity; i++) {
793       vex_printf("  ");
794       ppIRExpr(args[i]);
795    }
796    vex_printf("\n");
797 #  endif
798 
799    /* --------- specialising "x86g_calculate_condition" --------- */
800 
801    if (vex_streq(function_name, "x86g_calculate_condition")) {
802       /* specialise calls to above "calculate condition" function */
803       IRExpr *cond, *cc_op, *cc_dep1, *cc_dep2;
804       vassert(arity == 5);
805       cond    = args[0];
806       cc_op   = args[1];
807       cc_dep1 = args[2];
808       cc_dep2 = args[3];
809 
810       /*---------------- ADDL ----------------*/
811 
812       if (isU32(cc_op, X86G_CC_OP_ADDL) && isU32(cond, X86CondZ)) {
813          /* long add, then Z --> test (dst+src == 0) */
814          return unop(Iop_1Uto32,
815                      binop(Iop_CmpEQ32,
816                            binop(Iop_Add32, cc_dep1, cc_dep2),
817                            mkU32(0)));
818       }
819 
820       /*---------------- SUBL ----------------*/
821 
822       if (isU32(cc_op, X86G_CC_OP_SUBL) && isU32(cond, X86CondZ)) {
823          /* long sub/cmp, then Z --> test dst==src */
824          return unop(Iop_1Uto32,
825                      binop(Iop_CmpEQ32, cc_dep1, cc_dep2));
826       }
827       if (isU32(cc_op, X86G_CC_OP_SUBL) && isU32(cond, X86CondNZ)) {
828          /* long sub/cmp, then NZ --> test dst!=src */
829          return unop(Iop_1Uto32,
830                      binop(Iop_CmpNE32, cc_dep1, cc_dep2));
831       }
832 
833       if (isU32(cc_op, X86G_CC_OP_SUBL) && isU32(cond, X86CondL)) {
834          /* long sub/cmp, then L (signed less than)
835             --> test dst <s src */
836          return unop(Iop_1Uto32,
837                      binop(Iop_CmpLT32S, cc_dep1, cc_dep2));
838       }
839       if (isU32(cc_op, X86G_CC_OP_SUBL) && isU32(cond, X86CondNL)) {
840          /* long sub/cmp, then NL (signed greater than or equal)
841             --> test !(dst <s src) */
842          return binop(Iop_Xor32,
843                       unop(Iop_1Uto32,
844                            binop(Iop_CmpLT32S, cc_dep1, cc_dep2)),
845                       mkU32(1));
846       }
847 
848       if (isU32(cc_op, X86G_CC_OP_SUBL) && isU32(cond, X86CondLE)) {
849          /* long sub/cmp, then LE (signed less than or equal)
850             --> test dst <=s src */
851          return unop(Iop_1Uto32,
852                      binop(Iop_CmpLE32S, cc_dep1, cc_dep2));
853       }
854       if (isU32(cc_op, X86G_CC_OP_SUBL) && isU32(cond, X86CondNLE)) {
855          /* long sub/cmp, then NLE (signed not less than or equal)
856             --> test dst >s src
857             --> test !(dst <=s src) */
858          return binop(Iop_Xor32,
859                       unop(Iop_1Uto32,
860                            binop(Iop_CmpLE32S, cc_dep1, cc_dep2)),
861                       mkU32(1));
862       }
863 
864       if (isU32(cc_op, X86G_CC_OP_SUBL) && isU32(cond, X86CondBE)) {
865          /* long sub/cmp, then BE (unsigned less than or equal)
866             --> test dst <=u src */
867          return unop(Iop_1Uto32,
868                      binop(Iop_CmpLE32U, cc_dep1, cc_dep2));
869       }
870       if (isU32(cc_op, X86G_CC_OP_SUBL) && isU32(cond, X86CondNBE)) {
871          /* long sub/cmp, then BE (unsigned greater than)
872             --> test !(dst <=u src) */
873          return binop(Iop_Xor32,
874                       unop(Iop_1Uto32,
875                            binop(Iop_CmpLE32U, cc_dep1, cc_dep2)),
876                       mkU32(1));
877       }
878 
879       if (isU32(cc_op, X86G_CC_OP_SUBL) && isU32(cond, X86CondB)) {
880          /* long sub/cmp, then B (unsigned less than)
881             --> test dst <u src */
882          return unop(Iop_1Uto32,
883                      binop(Iop_CmpLT32U, cc_dep1, cc_dep2));
884       }
885       if (isU32(cc_op, X86G_CC_OP_SUBL) && isU32(cond, X86CondNB)) {
886          /* long sub/cmp, then NB (unsigned greater than or equal)
887             --> test !(dst <u src) */
888          return binop(Iop_Xor32,
889                       unop(Iop_1Uto32,
890                            binop(Iop_CmpLT32U, cc_dep1, cc_dep2)),
891                       mkU32(1));
892       }
893 
894       if (isU32(cc_op, X86G_CC_OP_SUBL) && isU32(cond, X86CondS)) {
895          /* long sub/cmp, then S (negative) --> test (dst-src <s 0) */
896          return unop(Iop_1Uto32,
897                      binop(Iop_CmpLT32S,
898                            binop(Iop_Sub32, cc_dep1, cc_dep2),
899                            mkU32(0)));
900       }
901       if (isU32(cc_op, X86G_CC_OP_SUBL) && isU32(cond, X86CondNS)) {
902          /* long sub/cmp, then NS (not negative) --> test !(dst-src <s 0) */
903          return binop(Iop_Xor32,
904                       unop(Iop_1Uto32,
905                            binop(Iop_CmpLT32S,
906                                  binop(Iop_Sub32, cc_dep1, cc_dep2),
907                                  mkU32(0))),
908                       mkU32(1));
909       }
910 
911       /*---------------- SUBW ----------------*/
912 
913       if (isU32(cc_op, X86G_CC_OP_SUBW) && isU32(cond, X86CondZ)) {
914          /* word sub/cmp, then Z --> test dst==src */
915          return unop(Iop_1Uto32,
916                      binop(Iop_CmpEQ16,
917                            unop(Iop_32to16,cc_dep1),
918                            unop(Iop_32to16,cc_dep2)));
919       }
920       if (isU32(cc_op, X86G_CC_OP_SUBW) && isU32(cond, X86CondNZ)) {
921          /* word sub/cmp, then NZ --> test dst!=src */
922          return unop(Iop_1Uto32,
923                      binop(Iop_CmpNE16,
924                            unop(Iop_32to16,cc_dep1),
925                            unop(Iop_32to16,cc_dep2)));
926       }
927 
928       /*---------------- SUBB ----------------*/
929 
930       if (isU32(cc_op, X86G_CC_OP_SUBB) && isU32(cond, X86CondZ)) {
931          /* byte sub/cmp, then Z --> test dst==src */
932          return unop(Iop_1Uto32,
933                      binop(Iop_CmpEQ8,
934                            unop(Iop_32to8,cc_dep1),
935                            unop(Iop_32to8,cc_dep2)));
936       }
937       if (isU32(cc_op, X86G_CC_OP_SUBB) && isU32(cond, X86CondNZ)) {
938          /* byte sub/cmp, then NZ --> test dst!=src */
939          return unop(Iop_1Uto32,
940                      binop(Iop_CmpNE8,
941                            unop(Iop_32to8,cc_dep1),
942                            unop(Iop_32to8,cc_dep2)));
943       }
944 
945       if (isU32(cc_op, X86G_CC_OP_SUBB) && isU32(cond, X86CondNBE)) {
946          /* byte sub/cmp, then NBE (unsigned greater than)
947             --> test src <u dst */
948          /* Note, args are opposite way round from the usual */
949          return unop(Iop_1Uto32,
950                      binop(Iop_CmpLT32U,
951                            binop(Iop_And32,cc_dep2,mkU32(0xFF)),
952 			   binop(Iop_And32,cc_dep1,mkU32(0xFF))));
953       }
954 
955       if (isU32(cc_op, X86G_CC_OP_SUBB) && isU32(cond, X86CondS)
956                                         && isU32(cc_dep2, 0)) {
957          /* byte sub/cmp of zero, then S --> test (dst-0 <s 0)
958                                          --> test dst <s 0
959                                          --> (UInt)dst[7]
960             This is yet another scheme by which gcc figures out if the
961             top bit of a byte is 1 or 0.  See also LOGICB/CondS below. */
962          /* Note: isU32(cc_dep2, 0) is correct, even though this is
963             for an 8-bit comparison, since the args to the helper
964             function are always U32s. */
965          return binop(Iop_And32,
966                       binop(Iop_Shr32,cc_dep1,mkU8(7)),
967                       mkU32(1));
968       }
969       if (isU32(cc_op, X86G_CC_OP_SUBB) && isU32(cond, X86CondNS)
970                                         && isU32(cc_dep2, 0)) {
971          /* byte sub/cmp of zero, then NS --> test !(dst-0 <s 0)
972                                           --> test !(dst <s 0)
973                                           --> (UInt) !dst[7]
974          */
975          return binop(Iop_Xor32,
976                       binop(Iop_And32,
977                             binop(Iop_Shr32,cc_dep1,mkU8(7)),
978                             mkU32(1)),
979                 mkU32(1));
980       }
981 
982       /*---------------- LOGICL ----------------*/
983 
984       if (isU32(cc_op, X86G_CC_OP_LOGICL) && isU32(cond, X86CondZ)) {
985          /* long and/or/xor, then Z --> test dst==0 */
986          return unop(Iop_1Uto32,binop(Iop_CmpEQ32, cc_dep1, mkU32(0)));
987       }
988       if (isU32(cc_op, X86G_CC_OP_LOGICL) && isU32(cond, X86CondNZ)) {
989          /* long and/or/xor, then NZ --> test dst!=0 */
990          return unop(Iop_1Uto32,binop(Iop_CmpNE32, cc_dep1, mkU32(0)));
991       }
992 
993       if (isU32(cc_op, X86G_CC_OP_LOGICL) && isU32(cond, X86CondLE)) {
994          /* long and/or/xor, then LE
995             This is pretty subtle.  LOGIC sets SF and ZF according to the
996             result and makes OF be zero.  LE computes (SZ ^ OF) | ZF, but
997             OF is zero, so this reduces to SZ | ZF -- which will be 1 iff
998             the result is <=signed 0.  Hence ...
999          */
1000          return unop(Iop_1Uto32,binop(Iop_CmpLE32S, cc_dep1, mkU32(0)));
1001       }
1002 
1003       if (isU32(cc_op, X86G_CC_OP_LOGICL) && isU32(cond, X86CondBE)) {
1004          /* long and/or/xor, then BE
1005             LOGIC sets ZF according to the result and makes CF be zero.
1006             BE computes (CF | ZF), but CF is zero, so this reduces ZF
1007             -- which will be 1 iff the result is zero.  Hence ...
1008          */
1009          return unop(Iop_1Uto32,binop(Iop_CmpEQ32, cc_dep1, mkU32(0)));
1010       }
1011 
1012       if (isU32(cc_op, X86G_CC_OP_LOGICL) && isU32(cond, X86CondS)) {
1013          /* see comment below for (LOGICB, CondS) */
1014          /* long and/or/xor, then S --> (UInt)result[31] */
1015          return binop(Iop_And32,
1016                       binop(Iop_Shr32,cc_dep1,mkU8(31)),
1017                       mkU32(1));
1018       }
1019       if (isU32(cc_op, X86G_CC_OP_LOGICL) && isU32(cond, X86CondNS)) {
1020          /* see comment below for (LOGICB, CondNS) */
1021          /* long and/or/xor, then S --> (UInt) ~ result[31] */
1022          return binop(Iop_Xor32,
1023                 binop(Iop_And32,
1024                       binop(Iop_Shr32,cc_dep1,mkU8(31)),
1025                       mkU32(1)),
1026                 mkU32(1));
1027       }
1028 
1029       /*---------------- LOGICW ----------------*/
1030 
1031       if (isU32(cc_op, X86G_CC_OP_LOGICW) && isU32(cond, X86CondZ)) {
1032          /* word and/or/xor, then Z --> test dst==0 */
1033          return unop(Iop_1Uto32,
1034                      binop(Iop_CmpEQ32, binop(Iop_And32,cc_dep1,mkU32(0xFFFF)),
1035                                         mkU32(0)));
1036       }
1037 
1038       if (isU32(cc_op, X86G_CC_OP_LOGICW) && isU32(cond, X86CondS)) {
1039          /* see comment below for (LOGICB, CondS) */
1040          /* word and/or/xor, then S --> (UInt)result[15] */
1041          return binop(Iop_And32,
1042                       binop(Iop_Shr32,cc_dep1,mkU8(15)),
1043                       mkU32(1));
1044       }
1045 
1046       /*---------------- LOGICB ----------------*/
1047 
1048       if (isU32(cc_op, X86G_CC_OP_LOGICB) && isU32(cond, X86CondZ)) {
1049          /* byte and/or/xor, then Z --> test dst==0 */
1050          return unop(Iop_1Uto32,
1051                      binop(Iop_CmpEQ32, binop(Iop_And32,cc_dep1,mkU32(255)),
1052                                         mkU32(0)));
1053       }
1054       if (isU32(cc_op, X86G_CC_OP_LOGICB) && isU32(cond, X86CondNZ)) {
1055          /* byte and/or/xor, then Z --> test dst!=0 */
1056          /* b9ac9:       84 c0                   test   %al,%al
1057             b9acb:       75 0d                   jne    b9ada */
1058          return unop(Iop_1Uto32,
1059                      binop(Iop_CmpNE32, binop(Iop_And32,cc_dep1,mkU32(255)),
1060                                         mkU32(0)));
1061       }
1062 
1063       if (isU32(cc_op, X86G_CC_OP_LOGICB) && isU32(cond, X86CondS)) {
1064          /* this is an idiom gcc sometimes uses to find out if the top
1065             bit of a byte register is set: eg testb %al,%al; js ..
1066             Since it just depends on the top bit of the byte, extract
1067             that bit and explicitly get rid of all the rest.  This
1068             helps memcheck avoid false positives in the case where any
1069             of the other bits in the byte are undefined. */
1070          /* byte and/or/xor, then S --> (UInt)result[7] */
1071          return binop(Iop_And32,
1072                       binop(Iop_Shr32,cc_dep1,mkU8(7)),
1073                       mkU32(1));
1074       }
1075       if (isU32(cc_op, X86G_CC_OP_LOGICB) && isU32(cond, X86CondNS)) {
1076          /* ditto, for negation-of-S. */
1077          /* byte and/or/xor, then S --> (UInt) ~ result[7] */
1078          return binop(Iop_Xor32,
1079                 binop(Iop_And32,
1080                       binop(Iop_Shr32,cc_dep1,mkU8(7)),
1081                       mkU32(1)),
1082                 mkU32(1));
1083       }
1084 
1085       /*---------------- DECL ----------------*/
1086 
1087       if (isU32(cc_op, X86G_CC_OP_DECL) && isU32(cond, X86CondZ)) {
1088          /* dec L, then Z --> test dst == 0 */
1089          return unop(Iop_1Uto32,binop(Iop_CmpEQ32, cc_dep1, mkU32(0)));
1090       }
1091 
1092       if (isU32(cc_op, X86G_CC_OP_DECL) && isU32(cond, X86CondS)) {
1093          /* dec L, then S --> compare DST <s 0 */
1094          return unop(Iop_1Uto32,binop(Iop_CmpLT32S, cc_dep1, mkU32(0)));
1095       }
1096 
1097       /*---------------- DECW ----------------*/
1098 
1099       if (isU32(cc_op, X86G_CC_OP_DECW) && isU32(cond, X86CondZ)) {
1100          /* dec W, then Z --> test dst == 0 */
1101          return unop(Iop_1Uto32,
1102                      binop(Iop_CmpEQ32,
1103                            binop(Iop_Shl32,cc_dep1,mkU8(16)),
1104                            mkU32(0)));
1105       }
1106 
1107       /*---------------- INCW ----------------*/
1108 
1109       if (isU32(cc_op, X86G_CC_OP_INCW) && isU32(cond, X86CondZ)) {
1110          /* This rewrite helps memcheck on 'incw %ax ; je ...'. */
1111          /* inc W, then Z --> test dst == 0 */
1112          return unop(Iop_1Uto32,
1113                      binop(Iop_CmpEQ32,
1114                            binop(Iop_Shl32,cc_dep1,mkU8(16)),
1115                            mkU32(0)));
1116       }
1117 
1118       /*---------------- SHRL ----------------*/
1119 
1120       if (isU32(cc_op, X86G_CC_OP_SHRL) && isU32(cond, X86CondZ)) {
1121          /* SHRL, then Z --> test dep1 == 0 */
1122          return unop(Iop_1Uto32,binop(Iop_CmpEQ32, cc_dep1, mkU32(0)));
1123       }
1124 
1125       /*---------------- COPY ----------------*/
1126       /* This can happen, as a result of x87 FP compares: "fcom ... ;
1127          fnstsw %ax ; sahf ; jbe" for example. */
1128 
1129       if (isU32(cc_op, X86G_CC_OP_COPY) &&
1130           (isU32(cond, X86CondBE) || isU32(cond, X86CondNBE))) {
1131          /* COPY, then BE --> extract C and Z from dep1, and test
1132             (C or Z) == 1. */
1133          /* COPY, then NBE --> extract C and Z from dep1, and test
1134             (C or Z) == 0. */
1135          UInt nnn = isU32(cond, X86CondBE) ? 1 : 0;
1136          return
1137             unop(
1138                Iop_1Uto32,
1139                binop(
1140                   Iop_CmpEQ32,
1141                   binop(
1142                      Iop_And32,
1143                      binop(
1144                         Iop_Or32,
1145                         binop(Iop_Shr32, cc_dep1, mkU8(X86G_CC_SHIFT_C)),
1146                         binop(Iop_Shr32, cc_dep1, mkU8(X86G_CC_SHIFT_Z))
1147                      ),
1148                      mkU32(1)
1149                   ),
1150                   mkU32(nnn)
1151                )
1152             );
1153       }
1154 
1155       if (isU32(cc_op, X86G_CC_OP_COPY)
1156           && (isU32(cond, X86CondB) || isU32(cond, X86CondNB))) {
1157          /* COPY, then B --> extract C from dep1, and test (C == 1). */
1158          /* COPY, then NB --> extract C from dep1, and test (C == 0). */
1159          UInt nnn = isU32(cond, X86CondB) ? 1 : 0;
1160          return
1161             unop(
1162                Iop_1Uto32,
1163                binop(
1164                   Iop_CmpEQ32,
1165                   binop(
1166                      Iop_And32,
1167                      binop(Iop_Shr32, cc_dep1, mkU8(X86G_CC_SHIFT_C)),
1168                      mkU32(1)
1169                   ),
1170                   mkU32(nnn)
1171                )
1172             );
1173       }
1174 
1175       if (isU32(cc_op, X86G_CC_OP_COPY)
1176           && (isU32(cond, X86CondZ) || isU32(cond, X86CondNZ))) {
1177          /* COPY, then Z --> extract Z from dep1, and test (Z == 1). */
1178          /* COPY, then NZ --> extract Z from dep1, and test (Z == 0). */
1179          UInt nnn = isU32(cond, X86CondZ) ? 1 : 0;
1180          return
1181             unop(
1182                Iop_1Uto32,
1183                binop(
1184                   Iop_CmpEQ32,
1185                   binop(
1186                      Iop_And32,
1187                      binop(Iop_Shr32, cc_dep1, mkU8(X86G_CC_SHIFT_Z)),
1188                      mkU32(1)
1189                   ),
1190                   mkU32(nnn)
1191                )
1192             );
1193       }
1194 
1195       if (isU32(cc_op, X86G_CC_OP_COPY)
1196           && (isU32(cond, X86CondP) || isU32(cond, X86CondNP))) {
1197          /* COPY, then P --> extract P from dep1, and test (P == 1). */
1198          /* COPY, then NP --> extract P from dep1, and test (P == 0). */
1199          UInt nnn = isU32(cond, X86CondP) ? 1 : 0;
1200          return
1201             unop(
1202                Iop_1Uto32,
1203                binop(
1204                   Iop_CmpEQ32,
1205                   binop(
1206                      Iop_And32,
1207                      binop(Iop_Shr32, cc_dep1, mkU8(X86G_CC_SHIFT_P)),
1208                      mkU32(1)
1209                   ),
1210                   mkU32(nnn)
1211                )
1212             );
1213       }
1214 
1215       return NULL;
1216    }
1217 
1218    /* --------- specialising "x86g_calculate_eflags_c" --------- */
1219 
1220    if (vex_streq(function_name, "x86g_calculate_eflags_c")) {
1221       /* specialise calls to above "calculate_eflags_c" function */
1222       IRExpr *cc_op, *cc_dep1, *cc_dep2, *cc_ndep;
1223       vassert(arity == 4);
1224       cc_op   = args[0];
1225       cc_dep1 = args[1];
1226       cc_dep2 = args[2];
1227       cc_ndep = args[3];
1228 
1229       if (isU32(cc_op, X86G_CC_OP_SUBL)) {
1230          /* C after sub denotes unsigned less than */
1231          return unop(Iop_1Uto32,
1232                      binop(Iop_CmpLT32U, cc_dep1, cc_dep2));
1233       }
1234       if (isU32(cc_op, X86G_CC_OP_SUBB)) {
1235          /* C after sub denotes unsigned less than */
1236          return unop(Iop_1Uto32,
1237                      binop(Iop_CmpLT32U,
1238                            binop(Iop_And32,cc_dep1,mkU32(0xFF)),
1239                            binop(Iop_And32,cc_dep2,mkU32(0xFF))));
1240       }
1241       if (isU32(cc_op, X86G_CC_OP_LOGICL)
1242           || isU32(cc_op, X86G_CC_OP_LOGICW)
1243           || isU32(cc_op, X86G_CC_OP_LOGICB)) {
1244          /* cflag after logic is zero */
1245          return mkU32(0);
1246       }
1247       if (isU32(cc_op, X86G_CC_OP_DECL) || isU32(cc_op, X86G_CC_OP_INCL)) {
1248          /* If the thunk is dec or inc, the cflag is supplied as CC_NDEP. */
1249          return cc_ndep;
1250       }
1251       if (isU32(cc_op, X86G_CC_OP_COPY)) {
1252          /* cflag after COPY is stored in DEP1. */
1253          return
1254             binop(
1255                Iop_And32,
1256                binop(Iop_Shr32, cc_dep1, mkU8(X86G_CC_SHIFT_C)),
1257                mkU32(1)
1258             );
1259       }
1260       if (isU32(cc_op, X86G_CC_OP_ADDL)) {
1261          /* C after add denotes sum <u either arg */
1262          return unop(Iop_1Uto32,
1263                      binop(Iop_CmpLT32U,
1264                            binop(Iop_Add32, cc_dep1, cc_dep2),
1265                            cc_dep1));
1266       }
1267       // ATC, requires verification, no test case known
1268       //if (isU32(cc_op, X86G_CC_OP_SMULL)) {
1269       //   /* C after signed widening multiply denotes the case where
1270       //      the top half of the result isn't simply the sign extension
1271       //      of the bottom half (iow the result doesn't fit completely
1272       //      in the bottom half).  Hence:
1273       //        C = hi-half(dep1 x dep2) != lo-half(dep1 x dep2) >>s 31
1274       //      where 'x' denotes signed widening multiply.*/
1275       //   return
1276       //      unop(Iop_1Uto32,
1277       //           binop(Iop_CmpNE32,
1278       //                 unop(Iop_64HIto32,
1279       //                      binop(Iop_MullS32, cc_dep1, cc_dep2)),
1280       //                 binop(Iop_Sar32,
1281       //                       binop(Iop_Mul32, cc_dep1, cc_dep2), mkU8(31)) ));
1282       //}
1283 #     if 0
1284       if (cc_op->tag == Iex_Const) {
1285          vex_printf("CFLAG "); ppIRExpr(cc_op); vex_printf("\n");
1286       }
1287 #     endif
1288 
1289       return NULL;
1290    }
1291 
1292    /* --------- specialising "x86g_calculate_eflags_all" --------- */
1293 
1294    if (vex_streq(function_name, "x86g_calculate_eflags_all")) {
1295       /* specialise calls to above "calculate_eflags_all" function */
1296       IRExpr *cc_op, *cc_dep1; /*, *cc_dep2, *cc_ndep; */
1297       vassert(arity == 4);
1298       cc_op   = args[0];
1299       cc_dep1 = args[1];
1300       /* cc_dep2 = args[2]; */
1301       /* cc_ndep = args[3]; */
1302 
1303       if (isU32(cc_op, X86G_CC_OP_COPY)) {
1304          /* eflags after COPY are stored in DEP1. */
1305          return
1306             binop(
1307                Iop_And32,
1308                cc_dep1,
1309                mkU32(X86G_CC_MASK_O | X86G_CC_MASK_S | X86G_CC_MASK_Z
1310                      | X86G_CC_MASK_A | X86G_CC_MASK_C | X86G_CC_MASK_P)
1311             );
1312       }
1313       return NULL;
1314    }
1315 
1316 #  undef unop
1317 #  undef binop
1318 #  undef mkU32
1319 #  undef mkU8
1320 
1321    return NULL;
1322 }
1323 
1324 
1325 /*---------------------------------------------------------------*/
1326 /*--- Supporting functions for x87 FPU activities.            ---*/
1327 /*---------------------------------------------------------------*/
1328 
host_is_little_endian(void)1329 static inline Bool host_is_little_endian ( void )
1330 {
1331    UInt x = 0x76543210;
1332    UChar* p = (UChar*)(&x);
1333    return toBool(*p == 0x10);
1334 }
1335 
1336 /* 80 and 64-bit floating point formats:
1337 
1338    80-bit:
1339 
1340     S  0       0-------0      zero
1341     S  0       0X------X      denormals
1342     S  1-7FFE  1X------X      normals (all normals have leading 1)
1343     S  7FFF    10------0      infinity
1344     S  7FFF    10X-----X      snan
1345     S  7FFF    11X-----X      qnan
1346 
1347    S is the sign bit.  For runs X----X, at least one of the Xs must be
1348    nonzero.  Exponent is 15 bits, fractional part is 63 bits, and
1349    there is an explicitly represented leading 1, and a sign bit,
1350    giving 80 in total.
1351 
1352    64-bit avoids the confusion of an explicitly represented leading 1
1353    and so is simpler:
1354 
1355     S  0      0------0   zero
1356     S  0      X------X   denormals
1357     S  1-7FE  any        normals
1358     S  7FF    0------0   infinity
1359     S  7FF    0X-----X   snan
1360     S  7FF    1X-----X   qnan
1361 
1362    Exponent is 11 bits, fractional part is 52 bits, and there is a
1363    sign bit, giving 64 in total.
1364 */
1365 
1366 /* Inspect a value and its tag, as per the x87 'FXAM' instruction. */
1367 /* CALLED FROM GENERATED CODE: CLEAN HELPER */
x86g_calculate_FXAM(UInt tag,ULong dbl)1368 UInt x86g_calculate_FXAM ( UInt tag, ULong dbl )
1369 {
1370    Bool   mantissaIsZero;
1371    Int    bexp;
1372    UChar  sign;
1373    UChar* f64;
1374 
1375    vassert(host_is_little_endian());
1376 
1377    /* vex_printf("calculate_FXAM ( %d, %llx ) .. ", tag, dbl ); */
1378 
1379    f64  = (UChar*)(&dbl);
1380    sign = toUChar( (f64[7] >> 7) & 1 );
1381 
1382    /* First off, if the tag indicates the register was empty,
1383       return 1,0,sign,1 */
1384    if (tag == 0) {
1385       /* vex_printf("Empty\n"); */
1386       return X86G_FC_MASK_C3 | 0 | (sign << X86G_FC_SHIFT_C1)
1387                                  | X86G_FC_MASK_C0;
1388    }
1389 
1390    bexp = (f64[7] << 4) | ((f64[6] >> 4) & 0x0F);
1391    bexp &= 0x7FF;
1392 
1393    mantissaIsZero
1394       = toBool(
1395            (f64[6] & 0x0F) == 0
1396            && (f64[5] | f64[4] | f64[3] | f64[2] | f64[1] | f64[0]) == 0
1397         );
1398 
1399    /* If both exponent and mantissa are zero, the value is zero.
1400       Return 1,0,sign,0. */
1401    if (bexp == 0 && mantissaIsZero) {
1402       /* vex_printf("Zero\n"); */
1403       return X86G_FC_MASK_C3 | 0
1404                              | (sign << X86G_FC_SHIFT_C1) | 0;
1405    }
1406 
1407    /* If exponent is zero but mantissa isn't, it's a denormal.
1408       Return 1,1,sign,0. */
1409    if (bexp == 0 && !mantissaIsZero) {
1410       /* vex_printf("Denormal\n"); */
1411       return X86G_FC_MASK_C3 | X86G_FC_MASK_C2
1412                              | (sign << X86G_FC_SHIFT_C1) | 0;
1413    }
1414 
1415    /* If the exponent is 7FF and the mantissa is zero, this is an infinity.
1416       Return 0,1,sign,1. */
1417    if (bexp == 0x7FF && mantissaIsZero) {
1418       /* vex_printf("Inf\n"); */
1419       return 0 | X86G_FC_MASK_C2 | (sign << X86G_FC_SHIFT_C1)
1420                                  | X86G_FC_MASK_C0;
1421    }
1422 
1423    /* If the exponent is 7FF and the mantissa isn't zero, this is a NaN.
1424       Return 0,0,sign,1. */
1425    if (bexp == 0x7FF && !mantissaIsZero) {
1426       /* vex_printf("NaN\n"); */
1427       return 0 | 0 | (sign << X86G_FC_SHIFT_C1) | X86G_FC_MASK_C0;
1428    }
1429 
1430    /* Uh, ok, we give up.  It must be a normal finite number.
1431       Return 0,1,sign,0.
1432    */
1433    /* vex_printf("normal\n"); */
1434    return 0 | X86G_FC_MASK_C2 | (sign << X86G_FC_SHIFT_C1) | 0;
1435 }
1436 
1437 
1438 /* CALLED FROM GENERATED CODE */
1439 /* DIRTY HELPER (reads guest memory) */
x86g_dirtyhelper_loadF80le(Addr addrU)1440 ULong x86g_dirtyhelper_loadF80le ( Addr addrU )
1441 {
1442    ULong f64;
1443    convert_f80le_to_f64le ( (UChar*)addrU, (UChar*)&f64 );
1444    return f64;
1445 }
1446 
1447 /* CALLED FROM GENERATED CODE */
1448 /* DIRTY HELPER (writes guest memory) */
x86g_dirtyhelper_storeF80le(Addr addrU,ULong f64)1449 void x86g_dirtyhelper_storeF80le ( Addr addrU, ULong f64 )
1450 {
1451    convert_f64le_to_f80le( (UChar*)&f64, (UChar*)addrU );
1452 }
1453 
1454 
1455 /*----------------------------------------------*/
1456 /*--- The exported fns ..                    ---*/
1457 /*----------------------------------------------*/
1458 
1459 /* Layout of the real x87 state. */
1460 /* 13 June 05: Fpu_State and auxiliary constants was moved to
1461    g_generic_x87.h */
1462 
1463 
1464 /* CLEAN HELPER */
1465 /* fpucw[15:0] contains a x87 native format FPU control word.
1466    Extract from it the required FPROUND value and any resulting
1467    emulation warning, and return (warn << 32) | fpround value.
1468 */
x86g_check_fldcw(UInt fpucw)1469 ULong x86g_check_fldcw ( UInt fpucw )
1470 {
1471    /* Decide on a rounding mode.  fpucw[11:10] holds it. */
1472    /* NOTE, encoded exactly as per enum IRRoundingMode. */
1473    UInt rmode = (fpucw >> 10) & 3;
1474 
1475    /* Detect any required emulation warnings. */
1476    VexEmNote ew = EmNote_NONE;
1477 
1478    if ((fpucw & 0x3F) != 0x3F) {
1479       /* unmasked exceptions! */
1480       ew = EmWarn_X86_x87exns;
1481    }
1482    else
1483    if (((fpucw >> 8) & 3) != 3) {
1484       /* unsupported precision */
1485       ew = EmWarn_X86_x87precision;
1486    }
1487 
1488    return (((ULong)ew) << 32) | ((ULong)rmode);
1489 }
1490 
1491 /* CLEAN HELPER */
1492 /* Given fpround as an IRRoundingMode value, create a suitable x87
1493    native format FPU control word. */
x86g_create_fpucw(UInt fpround)1494 UInt x86g_create_fpucw ( UInt fpround )
1495 {
1496    fpround &= 3;
1497    return 0x037F | (fpround << 10);
1498 }
1499 
1500 
1501 /* CLEAN HELPER */
1502 /* mxcsr[15:0] contains a SSE native format MXCSR value.
1503    Extract from it the required SSEROUND value and any resulting
1504    emulation warning, and return (warn << 32) | sseround value.
1505 */
x86g_check_ldmxcsr(UInt mxcsr)1506 ULong x86g_check_ldmxcsr ( UInt mxcsr )
1507 {
1508    /* Decide on a rounding mode.  mxcsr[14:13] holds it. */
1509    /* NOTE, encoded exactly as per enum IRRoundingMode. */
1510    UInt rmode = (mxcsr >> 13) & 3;
1511 
1512    /* Detect any required emulation warnings. */
1513    VexEmNote ew = EmNote_NONE;
1514 
1515    if ((mxcsr & 0x1F80) != 0x1F80) {
1516       /* unmasked exceptions! */
1517       ew = EmWarn_X86_sseExns;
1518    }
1519    else
1520    if (mxcsr & (1<<15)) {
1521       /* FZ is set */
1522       ew = EmWarn_X86_fz;
1523    }
1524    else
1525    if (mxcsr & (1<<6)) {
1526       /* DAZ is set */
1527       ew = EmWarn_X86_daz;
1528    }
1529 
1530    return (((ULong)ew) << 32) | ((ULong)rmode);
1531 }
1532 
1533 
1534 /* CLEAN HELPER */
1535 /* Given sseround as an IRRoundingMode value, create a suitable SSE
1536    native format MXCSR value. */
x86g_create_mxcsr(UInt sseround)1537 UInt x86g_create_mxcsr ( UInt sseround )
1538 {
1539    sseround &= 3;
1540    return 0x1F80 | (sseround << 13);
1541 }
1542 
1543 
1544 /* CALLED FROM GENERATED CODE */
1545 /* DIRTY HELPER (writes guest state) */
1546 /* Initialise the x87 FPU state as per 'finit'. */
x86g_dirtyhelper_FINIT(VexGuestX86State * gst)1547 void x86g_dirtyhelper_FINIT ( VexGuestX86State* gst )
1548 {
1549    Int i;
1550    gst->guest_FTOP = 0;
1551    for (i = 0; i < 8; i++) {
1552       gst->guest_FPTAG[i] = 0; /* empty */
1553       gst->guest_FPREG[i] = 0; /* IEEE754 64-bit zero */
1554    }
1555    gst->guest_FPROUND = (UInt)Irrm_NEAREST;
1556    gst->guest_FC3210  = 0;
1557 }
1558 
1559 
1560 /* This is used to implement both 'frstor' and 'fldenv'.  The latter
1561    appears to differ from the former only in that the 8 FP registers
1562    themselves are not transferred into the guest state. */
1563 static
do_put_x87(Bool moveRegs,UChar * x87_state,VexGuestX86State * vex_state)1564 VexEmNote do_put_x87 ( Bool moveRegs,
1565                        /*IN*/UChar* x87_state,
1566                        /*OUT*/VexGuestX86State* vex_state )
1567 {
1568    Int        stno, preg;
1569    UInt       tag;
1570    ULong*     vexRegs = (ULong*)(&vex_state->guest_FPREG[0]);
1571    UChar*     vexTags = (UChar*)(&vex_state->guest_FPTAG[0]);
1572    Fpu_State* x87     = (Fpu_State*)x87_state;
1573    UInt       ftop    = (x87->env[FP_ENV_STAT] >> 11) & 7;
1574    UInt       tagw    = x87->env[FP_ENV_TAG];
1575    UInt       fpucw   = x87->env[FP_ENV_CTRL];
1576    UInt       c3210   = x87->env[FP_ENV_STAT] & 0x4700;
1577    VexEmNote  ew;
1578    UInt       fpround;
1579    ULong      pair;
1580 
1581    /* Copy registers and tags */
1582    for (stno = 0; stno < 8; stno++) {
1583       preg = (stno + ftop) & 7;
1584       tag = (tagw >> (2*preg)) & 3;
1585       if (tag == 3) {
1586          /* register is empty */
1587          /* hmm, if it's empty, does it still get written?  Probably
1588             safer to say it does.  If we don't, memcheck could get out
1589             of sync, in that it thinks all FP registers are defined by
1590             this helper, but in reality some have not been updated. */
1591          if (moveRegs)
1592             vexRegs[preg] = 0; /* IEEE754 64-bit zero */
1593          vexTags[preg] = 0;
1594       } else {
1595          /* register is non-empty */
1596          if (moveRegs)
1597             convert_f80le_to_f64le( &x87->reg[10*stno],
1598                                     (UChar*)&vexRegs[preg] );
1599          vexTags[preg] = 1;
1600       }
1601    }
1602 
1603    /* stack pointer */
1604    vex_state->guest_FTOP = ftop;
1605 
1606    /* status word */
1607    vex_state->guest_FC3210 = c3210;
1608 
1609    /* handle the control word, setting FPROUND and detecting any
1610       emulation warnings. */
1611    pair    = x86g_check_fldcw ( (UInt)fpucw );
1612    fpround = (UInt)pair;
1613    ew      = (VexEmNote)(pair >> 32);
1614 
1615    vex_state->guest_FPROUND = fpround & 3;
1616 
1617    /* emulation warnings --> caller */
1618    return ew;
1619 }
1620 
1621 
1622 /* Create an x87 FPU state from the guest state, as close as
1623    we can approximate it. */
1624 static
do_get_x87(VexGuestX86State * vex_state,UChar * x87_state)1625 void do_get_x87 ( /*IN*/VexGuestX86State* vex_state,
1626                   /*OUT*/UChar* x87_state )
1627 {
1628    Int        i, stno, preg;
1629    UInt       tagw;
1630    ULong*     vexRegs = (ULong*)(&vex_state->guest_FPREG[0]);
1631    UChar*     vexTags = (UChar*)(&vex_state->guest_FPTAG[0]);
1632    Fpu_State* x87     = (Fpu_State*)x87_state;
1633    UInt       ftop    = vex_state->guest_FTOP;
1634    UInt       c3210   = vex_state->guest_FC3210;
1635 
1636    for (i = 0; i < 14; i++)
1637       x87->env[i] = 0;
1638 
1639    x87->env[1] = x87->env[3] = x87->env[5] = x87->env[13] = 0xFFFF;
1640    x87->env[FP_ENV_STAT]
1641       = toUShort(((ftop & 7) << 11) | (c3210 & 0x4700));
1642    x87->env[FP_ENV_CTRL]
1643       = toUShort(x86g_create_fpucw( vex_state->guest_FPROUND ));
1644 
1645    /* Dump the register stack in ST order. */
1646    tagw = 0;
1647    for (stno = 0; stno < 8; stno++) {
1648       preg = (stno + ftop) & 7;
1649       if (vexTags[preg] == 0) {
1650          /* register is empty */
1651          tagw |= (3 << (2*preg));
1652          convert_f64le_to_f80le( (UChar*)&vexRegs[preg],
1653                                  &x87->reg[10*stno] );
1654       } else {
1655          /* register is full. */
1656          tagw |= (0 << (2*preg));
1657          convert_f64le_to_f80le( (UChar*)&vexRegs[preg],
1658                                  &x87->reg[10*stno] );
1659       }
1660    }
1661    x87->env[FP_ENV_TAG] = toUShort(tagw);
1662 }
1663 
1664 
1665 /* CALLED FROM GENERATED CODE */
1666 /* DIRTY HELPER (reads guest state, writes guest mem) */
x86g_dirtyhelper_FXSAVE(VexGuestX86State * gst,HWord addr)1667 void x86g_dirtyhelper_FXSAVE ( VexGuestX86State* gst, HWord addr )
1668 {
1669    /* Somewhat roundabout, but at least it's simple. */
1670    Fpu_State tmp;
1671    UShort*   addrS = (UShort*)addr;
1672    UChar*    addrC = (UChar*)addr;
1673    U128*     xmm   = (U128*)(addr + 160);
1674    UInt      mxcsr;
1675    UShort    fp_tags;
1676    UInt      summary_tags;
1677    Int       r, stno;
1678    UShort    *srcS, *dstS;
1679 
1680    do_get_x87( gst, (UChar*)&tmp );
1681    mxcsr = x86g_create_mxcsr( gst->guest_SSEROUND );
1682 
1683    /* Now build the proper fxsave image from the x87 image we just
1684       made. */
1685 
1686    addrS[0]  = tmp.env[FP_ENV_CTRL]; /* FCW: fpu control word */
1687    addrS[1]  = tmp.env[FP_ENV_STAT]; /* FCW: fpu status word */
1688 
1689    /* set addrS[2] in an endian-independent way */
1690    summary_tags = 0;
1691    fp_tags = tmp.env[FP_ENV_TAG];
1692    for (r = 0; r < 8; r++) {
1693       if ( ((fp_tags >> (2*r)) & 3) != 3 )
1694          summary_tags |= (1 << r);
1695    }
1696    addrC[4]  = toUChar(summary_tags); /* FTW: tag summary byte */
1697    addrC[5]  = 0; /* pad */
1698 
1699    addrS[3]  = 0; /* FOP: fpu opcode (bogus) */
1700    addrS[4]  = 0;
1701    addrS[5]  = 0; /* FPU IP (bogus) */
1702    addrS[6]  = 0; /* FPU IP's segment selector (bogus) (although we
1703                      could conceivably dump %CS here) */
1704 
1705    addrS[7]  = 0; /* Intel reserved */
1706 
1707    addrS[8]  = 0; /* FPU DP (operand pointer) (bogus) */
1708    addrS[9]  = 0; /* FPU DP (operand pointer) (bogus) */
1709    addrS[10] = 0; /* segment selector for above operand pointer; %DS
1710                      perhaps? */
1711    addrS[11] = 0; /* Intel reserved */
1712 
1713    addrS[12] = toUShort(mxcsr);  /* MXCSR */
1714    addrS[13] = toUShort(mxcsr >> 16);
1715 
1716    addrS[14] = 0xFFFF; /* MXCSR mask (lo16); who knows what for */
1717    addrS[15] = 0xFFFF; /* MXCSR mask (hi16); who knows what for */
1718 
1719    /* Copy in the FP registers, in ST order. */
1720    for (stno = 0; stno < 8; stno++) {
1721       srcS = (UShort*)(&tmp.reg[10*stno]);
1722       dstS = (UShort*)(&addrS[16 + 8*stno]);
1723       dstS[0] = srcS[0];
1724       dstS[1] = srcS[1];
1725       dstS[2] = srcS[2];
1726       dstS[3] = srcS[3];
1727       dstS[4] = srcS[4];
1728       dstS[5] = 0;
1729       dstS[6] = 0;
1730       dstS[7] = 0;
1731    }
1732 
1733    /* That's the first 160 bytes of the image done.  Now only %xmm0
1734       .. %xmm7 remain to be copied.  If the host is big-endian, these
1735       need to be byte-swapped. */
1736    vassert(host_is_little_endian());
1737 
1738 #  define COPY_U128(_dst,_src)                       \
1739       do { _dst[0] = _src[0]; _dst[1] = _src[1];     \
1740            _dst[2] = _src[2]; _dst[3] = _src[3]; }   \
1741       while (0)
1742 
1743    COPY_U128( xmm[0], gst->guest_XMM0 );
1744    COPY_U128( xmm[1], gst->guest_XMM1 );
1745    COPY_U128( xmm[2], gst->guest_XMM2 );
1746    COPY_U128( xmm[3], gst->guest_XMM3 );
1747    COPY_U128( xmm[4], gst->guest_XMM4 );
1748    COPY_U128( xmm[5], gst->guest_XMM5 );
1749    COPY_U128( xmm[6], gst->guest_XMM6 );
1750    COPY_U128( xmm[7], gst->guest_XMM7 );
1751 
1752 #  undef COPY_U128
1753 }
1754 
1755 
1756 /* CALLED FROM GENERATED CODE */
1757 /* DIRTY HELPER (writes guest state, reads guest mem) */
x86g_dirtyhelper_FXRSTOR(VexGuestX86State * gst,HWord addr)1758 VexEmNote x86g_dirtyhelper_FXRSTOR ( VexGuestX86State* gst, HWord addr )
1759 {
1760    Fpu_State tmp;
1761    VexEmNote warnX87 = EmNote_NONE;
1762    VexEmNote warnXMM = EmNote_NONE;
1763    UShort*   addrS   = (UShort*)addr;
1764    UChar*    addrC   = (UChar*)addr;
1765    U128*     xmm     = (U128*)(addr + 160);
1766    UShort    fp_tags;
1767    Int       r, stno, i;
1768 
1769    /* Restore %xmm0 .. %xmm7.  If the host is big-endian, these need
1770       to be byte-swapped. */
1771    vassert(host_is_little_endian());
1772 
1773 #  define COPY_U128(_dst,_src)                       \
1774       do { _dst[0] = _src[0]; _dst[1] = _src[1];     \
1775            _dst[2] = _src[2]; _dst[3] = _src[3]; }   \
1776       while (0)
1777 
1778    COPY_U128( gst->guest_XMM0, xmm[0] );
1779    COPY_U128( gst->guest_XMM1, xmm[1] );
1780    COPY_U128( gst->guest_XMM2, xmm[2] );
1781    COPY_U128( gst->guest_XMM3, xmm[3] );
1782    COPY_U128( gst->guest_XMM4, xmm[4] );
1783    COPY_U128( gst->guest_XMM5, xmm[5] );
1784    COPY_U128( gst->guest_XMM6, xmm[6] );
1785    COPY_U128( gst->guest_XMM7, xmm[7] );
1786 
1787 #  undef COPY_U128
1788 
1789    /* Copy the x87 registers out of the image, into a temporary
1790       Fpu_State struct. */
1791 
1792    /* LLVM on Darwin turns the following loop into a movaps plus a
1793       handful of scalar stores.  This would work fine except for the
1794       fact that VEX doesn't keep the stack correctly (16-) aligned for
1795       the call, so it segfaults.  Hence, split the loop into two
1796       pieces (and pray LLVM doesn't merely glue them back together) so
1797       it's composed only of scalar stores and so is alignment
1798       insensitive.  Of course this is a kludge of the lamest kind --
1799       VEX should be fixed properly. */
1800    /* Code that seems to trigger the problem:
1801       for (i = 0; i < 14; i++) tmp.env[i] = 0; */
1802    for (i = 0; i < 7; i++) tmp.env[i+0] = 0;
1803    for (i = 0; i < 7; i++) tmp.env[i+7] = 0;
1804 
1805    for (i = 0; i < 80; i++) tmp.reg[i] = 0;
1806    /* fill in tmp.reg[0..7] */
1807    for (stno = 0; stno < 8; stno++) {
1808       UShort* dstS = (UShort*)(&tmp.reg[10*stno]);
1809       UShort* srcS = (UShort*)(&addrS[16 + 8*stno]);
1810       dstS[0] = srcS[0];
1811       dstS[1] = srcS[1];
1812       dstS[2] = srcS[2];
1813       dstS[3] = srcS[3];
1814       dstS[4] = srcS[4];
1815    }
1816    /* fill in tmp.env[0..13] */
1817    tmp.env[FP_ENV_CTRL] = addrS[0]; /* FCW: fpu control word */
1818    tmp.env[FP_ENV_STAT] = addrS[1]; /* FCW: fpu status word */
1819 
1820    fp_tags = 0;
1821    for (r = 0; r < 8; r++) {
1822       if (addrC[4] & (1<<r))
1823          fp_tags |= (0 << (2*r)); /* EMPTY */
1824       else
1825          fp_tags |= (3 << (2*r)); /* VALID -- not really precise enough. */
1826    }
1827    tmp.env[FP_ENV_TAG] = fp_tags;
1828 
1829    /* Now write 'tmp' into the guest state. */
1830    warnX87 = do_put_x87( True/*moveRegs*/, (UChar*)&tmp, gst );
1831 
1832    { UInt w32 = (((UInt)addrS[12]) & 0xFFFF)
1833                 | ((((UInt)addrS[13]) & 0xFFFF) << 16);
1834      ULong w64 = x86g_check_ldmxcsr( w32 );
1835 
1836      warnXMM = (VexEmNote)(w64 >> 32);
1837 
1838      gst->guest_SSEROUND = (UInt)w64;
1839    }
1840 
1841    /* Prefer an X87 emwarn over an XMM one, if both exist. */
1842    if (warnX87 != EmNote_NONE)
1843       return warnX87;
1844    else
1845       return warnXMM;
1846 }
1847 
1848 
1849 /* CALLED FROM GENERATED CODE */
1850 /* DIRTY HELPER (reads guest state, writes guest mem) */
x86g_dirtyhelper_FSAVE(VexGuestX86State * gst,HWord addr)1851 void x86g_dirtyhelper_FSAVE ( VexGuestX86State* gst, HWord addr )
1852 {
1853    do_get_x87( gst, (UChar*)addr );
1854 }
1855 
1856 /* CALLED FROM GENERATED CODE */
1857 /* DIRTY HELPER (writes guest state, reads guest mem) */
x86g_dirtyhelper_FRSTOR(VexGuestX86State * gst,HWord addr)1858 VexEmNote x86g_dirtyhelper_FRSTOR ( VexGuestX86State* gst, HWord addr )
1859 {
1860    return do_put_x87( True/*regs too*/, (UChar*)addr, gst );
1861 }
1862 
1863 /* CALLED FROM GENERATED CODE */
1864 /* DIRTY HELPER (reads guest state, writes guest mem) */
x86g_dirtyhelper_FSTENV(VexGuestX86State * gst,HWord addr)1865 void x86g_dirtyhelper_FSTENV ( VexGuestX86State* gst, HWord addr )
1866 {
1867    /* Somewhat roundabout, but at least it's simple. */
1868    Int       i;
1869    UShort*   addrP = (UShort*)addr;
1870    Fpu_State tmp;
1871    do_get_x87( gst, (UChar*)&tmp );
1872    for (i = 0; i < 14; i++)
1873       addrP[i] = tmp.env[i];
1874 }
1875 
1876 /* CALLED FROM GENERATED CODE */
1877 /* DIRTY HELPER (writes guest state, reads guest mem) */
x86g_dirtyhelper_FLDENV(VexGuestX86State * gst,HWord addr)1878 VexEmNote x86g_dirtyhelper_FLDENV ( VexGuestX86State* gst, HWord addr )
1879 {
1880    return do_put_x87( False/*don't move regs*/, (UChar*)addr, gst);
1881 }
1882 
1883 
1884 /*---------------------------------------------------------------*/
1885 /*--- Misc integer helpers, including rotates and CPUID.      ---*/
1886 /*---------------------------------------------------------------*/
1887 
1888 /* CALLED FROM GENERATED CODE: CLEAN HELPER */
1889 /* Calculate both flags and value result for rotate right
1890    through the carry bit.  Result in low 32 bits,
1891    new flags (OSZACP) in high 32 bits.
1892 */
x86g_calculate_RCR(UInt arg,UInt rot_amt,UInt eflags_in,UInt sz)1893 ULong x86g_calculate_RCR ( UInt arg, UInt rot_amt, UInt eflags_in, UInt sz )
1894 {
1895    UInt tempCOUNT = rot_amt & 0x1F, cf=0, of=0, tempcf;
1896 
1897    switch (sz) {
1898       case 4:
1899          cf        = (eflags_in >> X86G_CC_SHIFT_C) & 1;
1900          of        = ((arg >> 31) ^ cf) & 1;
1901          while (tempCOUNT > 0) {
1902             tempcf = arg & 1;
1903             arg    = (arg >> 1) | (cf << 31);
1904             cf     = tempcf;
1905             tempCOUNT--;
1906          }
1907          break;
1908       case 2:
1909          while (tempCOUNT >= 17) tempCOUNT -= 17;
1910          cf        = (eflags_in >> X86G_CC_SHIFT_C) & 1;
1911          of        = ((arg >> 15) ^ cf) & 1;
1912          while (tempCOUNT > 0) {
1913             tempcf = arg & 1;
1914             arg    = ((arg >> 1) & 0x7FFF) | (cf << 15);
1915             cf     = tempcf;
1916             tempCOUNT--;
1917          }
1918          break;
1919       case 1:
1920          while (tempCOUNT >= 9) tempCOUNT -= 9;
1921          cf        = (eflags_in >> X86G_CC_SHIFT_C) & 1;
1922          of        = ((arg >> 7) ^ cf) & 1;
1923          while (tempCOUNT > 0) {
1924             tempcf = arg & 1;
1925             arg    = ((arg >> 1) & 0x7F) | (cf << 7);
1926             cf     = tempcf;
1927             tempCOUNT--;
1928          }
1929          break;
1930       default:
1931          vpanic("calculate_RCR: invalid size");
1932    }
1933 
1934    cf &= 1;
1935    of &= 1;
1936    eflags_in &= ~(X86G_CC_MASK_C | X86G_CC_MASK_O);
1937    eflags_in |= (cf << X86G_CC_SHIFT_C) | (of << X86G_CC_SHIFT_O);
1938 
1939    return (((ULong)eflags_in) << 32) | ((ULong)arg);
1940 }
1941 
1942 
1943 /* CALLED FROM GENERATED CODE: CLEAN HELPER */
1944 /* Calculate both flags and value result for rotate left
1945    through the carry bit.  Result in low 32 bits,
1946    new flags (OSZACP) in high 32 bits.
1947 */
x86g_calculate_RCL(UInt arg,UInt rot_amt,UInt eflags_in,UInt sz)1948 ULong x86g_calculate_RCL ( UInt arg, UInt rot_amt, UInt eflags_in, UInt sz )
1949 {
1950    UInt tempCOUNT = rot_amt & 0x1F, cf=0, of=0, tempcf;
1951 
1952    switch (sz) {
1953       case 4:
1954          cf = (eflags_in >> X86G_CC_SHIFT_C) & 1;
1955          while (tempCOUNT > 0) {
1956             tempcf = (arg >> 31) & 1;
1957             arg    = (arg << 1) | (cf & 1);
1958             cf     = tempcf;
1959             tempCOUNT--;
1960          }
1961          of = ((arg >> 31) ^ cf) & 1;
1962          break;
1963       case 2:
1964          while (tempCOUNT >= 17) tempCOUNT -= 17;
1965          cf = (eflags_in >> X86G_CC_SHIFT_C) & 1;
1966          while (tempCOUNT > 0) {
1967             tempcf = (arg >> 15) & 1;
1968             arg    = 0xFFFF & ((arg << 1) | (cf & 1));
1969             cf     = tempcf;
1970             tempCOUNT--;
1971          }
1972          of = ((arg >> 15) ^ cf) & 1;
1973          break;
1974       case 1:
1975          while (tempCOUNT >= 9) tempCOUNT -= 9;
1976          cf = (eflags_in >> X86G_CC_SHIFT_C) & 1;
1977          while (tempCOUNT > 0) {
1978             tempcf = (arg >> 7) & 1;
1979             arg    = 0xFF & ((arg << 1) | (cf & 1));
1980             cf     = tempcf;
1981             tempCOUNT--;
1982          }
1983          of = ((arg >> 7) ^ cf) & 1;
1984          break;
1985       default:
1986          vpanic("calculate_RCL: invalid size");
1987    }
1988 
1989    cf &= 1;
1990    of &= 1;
1991    eflags_in &= ~(X86G_CC_MASK_C | X86G_CC_MASK_O);
1992    eflags_in |= (cf << X86G_CC_SHIFT_C) | (of << X86G_CC_SHIFT_O);
1993 
1994    return (((ULong)eflags_in) << 32) | ((ULong)arg);
1995 }
1996 
1997 
1998 /* CALLED FROM GENERATED CODE: CLEAN HELPER */
1999 /* Calculate both flags and value result for DAA/DAS/AAA/AAS.
2000    AX value in low half of arg, OSZACP in upper half.
2001    See guest-x86/toIR.c usage point for details.
2002 */
calc_parity_8bit(UInt w32)2003 static UInt calc_parity_8bit ( UInt w32 ) {
2004    UInt i;
2005    UInt p = 1;
2006    for (i = 0; i < 8; i++)
2007       p ^= (1 & (w32 >> i));
2008    return p;
2009 }
x86g_calculate_daa_das_aaa_aas(UInt flags_and_AX,UInt opcode)2010 UInt x86g_calculate_daa_das_aaa_aas ( UInt flags_and_AX, UInt opcode )
2011 {
2012    UInt r_AL = (flags_and_AX >> 0) & 0xFF;
2013    UInt r_AH = (flags_and_AX >> 8) & 0xFF;
2014    UInt r_O  = (flags_and_AX >> (16 + X86G_CC_SHIFT_O)) & 1;
2015    UInt r_S  = (flags_and_AX >> (16 + X86G_CC_SHIFT_S)) & 1;
2016    UInt r_Z  = (flags_and_AX >> (16 + X86G_CC_SHIFT_Z)) & 1;
2017    UInt r_A  = (flags_and_AX >> (16 + X86G_CC_SHIFT_A)) & 1;
2018    UInt r_C  = (flags_and_AX >> (16 + X86G_CC_SHIFT_C)) & 1;
2019    UInt r_P  = (flags_and_AX >> (16 + X86G_CC_SHIFT_P)) & 1;
2020    UInt result = 0;
2021 
2022    switch (opcode) {
2023       case 0x27: { /* DAA */
2024          UInt old_AL = r_AL;
2025          UInt old_C  = r_C;
2026          r_C = 0;
2027          if ((r_AL & 0xF) > 9 || r_A == 1) {
2028             r_AL = r_AL + 6;
2029             r_C  = old_C;
2030             if (r_AL >= 0x100) r_C = 1;
2031             r_A = 1;
2032          } else {
2033             r_A = 0;
2034          }
2035          if (old_AL > 0x99 || old_C == 1) {
2036             r_AL = r_AL + 0x60;
2037             r_C  = 1;
2038          } else {
2039             r_C = 0;
2040          }
2041          /* O is undefined.  S Z and P are set according to the
2042 	    result. */
2043          r_AL &= 0xFF;
2044          r_O = 0; /* let's say */
2045          r_S = (r_AL & 0x80) ? 1 : 0;
2046          r_Z = (r_AL == 0) ? 1 : 0;
2047          r_P = calc_parity_8bit( r_AL );
2048          break;
2049       }
2050       case 0x2F: { /* DAS */
2051          UInt old_AL = r_AL;
2052          UInt old_C  = r_C;
2053          r_C = 0;
2054          if ((r_AL & 0xF) > 9 || r_A == 1) {
2055             Bool borrow = r_AL < 6;
2056             r_AL = r_AL - 6;
2057             r_C  = old_C;
2058             if (borrow) r_C = 1;
2059             r_A = 1;
2060          } else {
2061             r_A = 0;
2062          }
2063          if (old_AL > 0x99 || old_C == 1) {
2064             r_AL = r_AL - 0x60;
2065             r_C  = 1;
2066          } else {
2067             /* Intel docs are wrong: r_C = 0; */
2068          }
2069          /* O is undefined.  S Z and P are set according to the
2070 	    result. */
2071          r_AL &= 0xFF;
2072          r_O = 0; /* let's say */
2073          r_S = (r_AL & 0x80) ? 1 : 0;
2074          r_Z = (r_AL == 0) ? 1 : 0;
2075          r_P = calc_parity_8bit( r_AL );
2076          break;
2077       }
2078       case 0x37: { /* AAA */
2079          Bool nudge = r_AL > 0xF9;
2080          if ((r_AL & 0xF) > 9 || r_A == 1) {
2081             r_AL = r_AL + 6;
2082             r_AH = r_AH + 1 + (nudge ? 1 : 0);
2083             r_A  = 1;
2084             r_C  = 1;
2085             r_AL = r_AL & 0xF;
2086          } else {
2087             r_A  = 0;
2088             r_C  = 0;
2089             r_AL = r_AL & 0xF;
2090          }
2091          /* O S Z and P are undefined. */
2092          r_O = r_S = r_Z = r_P = 0; /* let's say */
2093          break;
2094       }
2095       case 0x3F: { /* AAS */
2096          Bool nudge = r_AL < 0x06;
2097          if ((r_AL & 0xF) > 9 || r_A == 1) {
2098             r_AL = r_AL - 6;
2099             r_AH = r_AH - 1 - (nudge ? 1 : 0);
2100             r_A  = 1;
2101             r_C  = 1;
2102             r_AL = r_AL & 0xF;
2103          } else {
2104             r_A  = 0;
2105             r_C  = 0;
2106             r_AL = r_AL & 0xF;
2107          }
2108          /* O S Z and P are undefined. */
2109          r_O = r_S = r_Z = r_P = 0; /* let's say */
2110          break;
2111       }
2112       default:
2113          vassert(0);
2114    }
2115    result =   ( (r_O & 1) << (16 + X86G_CC_SHIFT_O) )
2116             | ( (r_S & 1) << (16 + X86G_CC_SHIFT_S) )
2117             | ( (r_Z & 1) << (16 + X86G_CC_SHIFT_Z) )
2118             | ( (r_A & 1) << (16 + X86G_CC_SHIFT_A) )
2119             | ( (r_C & 1) << (16 + X86G_CC_SHIFT_C) )
2120             | ( (r_P & 1) << (16 + X86G_CC_SHIFT_P) )
2121             | ( (r_AH & 0xFF) << 8 )
2122             | ( (r_AL & 0xFF) << 0 );
2123    return result;
2124 }
2125 
x86g_calculate_aad_aam(UInt flags_and_AX,UInt opcode)2126 UInt x86g_calculate_aad_aam ( UInt flags_and_AX, UInt opcode )
2127 {
2128    UInt r_AL = (flags_and_AX >> 0) & 0xFF;
2129    UInt r_AH = (flags_and_AX >> 8) & 0xFF;
2130    UInt r_O  = (flags_and_AX >> (16 + X86G_CC_SHIFT_O)) & 1;
2131    UInt r_S  = (flags_and_AX >> (16 + X86G_CC_SHIFT_S)) & 1;
2132    UInt r_Z  = (flags_and_AX >> (16 + X86G_CC_SHIFT_Z)) & 1;
2133    UInt r_A  = (flags_and_AX >> (16 + X86G_CC_SHIFT_A)) & 1;
2134    UInt r_C  = (flags_and_AX >> (16 + X86G_CC_SHIFT_C)) & 1;
2135    UInt r_P  = (flags_and_AX >> (16 + X86G_CC_SHIFT_P)) & 1;
2136    UInt result = 0;
2137 
2138    switch (opcode) {
2139       case 0xD4: { /* AAM */
2140          r_AH = r_AL / 10;
2141          r_AL = r_AL % 10;
2142          break;
2143       }
2144       case 0xD5: { /* AAD */
2145          r_AL = ((r_AH * 10) + r_AL) & 0xff;
2146          r_AH = 0;
2147          break;
2148       }
2149       default:
2150          vassert(0);
2151    }
2152 
2153    r_O = 0; /* let's say (undefined) */
2154    r_C = 0; /* let's say (undefined) */
2155    r_A = 0; /* let's say (undefined) */
2156    r_S = (r_AL & 0x80) ? 1 : 0;
2157    r_Z = (r_AL == 0) ? 1 : 0;
2158    r_P = calc_parity_8bit( r_AL );
2159 
2160    result =   ( (r_O & 1) << (16 + X86G_CC_SHIFT_O) )
2161             | ( (r_S & 1) << (16 + X86G_CC_SHIFT_S) )
2162             | ( (r_Z & 1) << (16 + X86G_CC_SHIFT_Z) )
2163             | ( (r_A & 1) << (16 + X86G_CC_SHIFT_A) )
2164             | ( (r_C & 1) << (16 + X86G_CC_SHIFT_C) )
2165             | ( (r_P & 1) << (16 + X86G_CC_SHIFT_P) )
2166             | ( (r_AH & 0xFF) << 8 )
2167             | ( (r_AL & 0xFF) << 0 );
2168    return result;
2169 }
2170 
2171 
2172 /* CALLED FROM GENERATED CODE */
2173 /* DIRTY HELPER (non-referentially-transparent) */
2174 /* Horrible hack.  On non-x86 platforms, return 1. */
x86g_dirtyhelper_RDTSC(void)2175 ULong x86g_dirtyhelper_RDTSC ( void )
2176 {
2177 #  if defined(__i386__)
2178    ULong res;
2179    __asm__ __volatile__("rdtsc" : "=A" (res));
2180    return res;
2181 #  else
2182    return 1ULL;
2183 #  endif
2184 }
2185 
2186 
2187 /* CALLED FROM GENERATED CODE */
2188 /* DIRTY HELPER (modifies guest state) */
2189 /* Claim to be a P55C (Intel Pentium/MMX) */
x86g_dirtyhelper_CPUID_sse0(VexGuestX86State * st)2190 void x86g_dirtyhelper_CPUID_sse0 ( VexGuestX86State* st )
2191 {
2192    switch (st->guest_EAX) {
2193       case 0:
2194          st->guest_EAX = 0x1;
2195          st->guest_EBX = 0x756e6547;
2196          st->guest_ECX = 0x6c65746e;
2197          st->guest_EDX = 0x49656e69;
2198          break;
2199       default:
2200          st->guest_EAX = 0x543;
2201          st->guest_EBX = 0x0;
2202          st->guest_ECX = 0x0;
2203          st->guest_EDX = 0x8001bf;
2204          break;
2205    }
2206 }
2207 
2208 /* CALLED FROM GENERATED CODE */
2209 /* DIRTY HELPER (modifies guest state) */
2210 /* Claim to be a Athlon "Classic" (Model 2, K75 "Pluto/Orion") */
2211 /* But without 3DNow support (weird, but we really don't support it). */
x86g_dirtyhelper_CPUID_mmxext(VexGuestX86State * st)2212 void x86g_dirtyhelper_CPUID_mmxext ( VexGuestX86State* st )
2213 {
2214    switch (st->guest_EAX) {
2215       /* vendor ID */
2216       case 0:
2217          st->guest_EAX = 0x1;
2218          st->guest_EBX = 0x68747541;
2219          st->guest_ECX = 0x444d4163;
2220          st->guest_EDX = 0x69746e65;
2221          break;
2222       /* feature bits */
2223       case 1:
2224          st->guest_EAX = 0x621;
2225          st->guest_EBX = 0x0;
2226          st->guest_ECX = 0x0;
2227          st->guest_EDX = 0x183f9ff;
2228          break;
2229       /* Highest Extended Function Supported (0x80000004 brand string) */
2230       case 0x80000000:
2231          st->guest_EAX = 0x80000004;
2232          st->guest_EBX = 0x68747541;
2233          st->guest_ECX = 0x444d4163;
2234          st->guest_EDX = 0x69746e65;
2235          break;
2236       /* Extended Processor Info and Feature Bits */
2237       case 0x80000001:
2238          st->guest_EAX = 0x721;
2239          st->guest_EBX = 0x0;
2240          st->guest_ECX = 0x0;
2241          st->guest_EDX = 0x1c3f9ff; /* Note no 3DNow. */
2242          break;
2243       /* Processor Brand String "AMD Athlon(tm) Processor" */
2244       case 0x80000002:
2245          st->guest_EAX = 0x20444d41;
2246          st->guest_EBX = 0x6c687441;
2247          st->guest_ECX = 0x74286e6f;
2248          st->guest_EDX = 0x5020296d;
2249          break;
2250       case 0x80000003:
2251          st->guest_EAX = 0x65636f72;
2252          st->guest_EBX = 0x726f7373;
2253          st->guest_ECX = 0x0;
2254          st->guest_EDX = 0x0;
2255          break;
2256       default:
2257          st->guest_EAX = 0x0;
2258          st->guest_EBX = 0x0;
2259          st->guest_ECX = 0x0;
2260          st->guest_EDX = 0x0;
2261          break;
2262    }
2263 }
2264 
2265 /* CALLED FROM GENERATED CODE */
2266 /* DIRTY HELPER (modifies guest state) */
2267 /* Claim to be the following SSE1-capable CPU:
2268    vendor_id       : GenuineIntel
2269    cpu family      : 6
2270    model           : 11
2271    model name      : Intel(R) Pentium(R) III CPU family      1133MHz
2272    stepping        : 1
2273    cpu MHz         : 1131.013
2274    cache size      : 512 KB
2275 */
x86g_dirtyhelper_CPUID_sse1(VexGuestX86State * st)2276 void x86g_dirtyhelper_CPUID_sse1 ( VexGuestX86State* st )
2277 {
2278    switch (st->guest_EAX) {
2279       case 0:
2280          st->guest_EAX = 0x00000002;
2281          st->guest_EBX = 0x756e6547;
2282          st->guest_ECX = 0x6c65746e;
2283          st->guest_EDX = 0x49656e69;
2284          break;
2285       case 1:
2286          st->guest_EAX = 0x000006b1;
2287          st->guest_EBX = 0x00000004;
2288          st->guest_ECX = 0x00000000;
2289          st->guest_EDX = 0x0383fbff;
2290          break;
2291       default:
2292          st->guest_EAX = 0x03020101;
2293          st->guest_EBX = 0x00000000;
2294          st->guest_ECX = 0x00000000;
2295          st->guest_EDX = 0x0c040883;
2296          break;
2297    }
2298 }
2299 
2300 /* Claim to be the following SSSE3-capable CPU (2 x ...):
2301    vendor_id       : GenuineIntel
2302    cpu family      : 6
2303    model           : 15
2304    model name      : Intel(R) Core(TM)2 CPU 6600 @ 2.40GHz
2305    stepping        : 6
2306    cpu MHz         : 2394.000
2307    cache size      : 4096 KB
2308    physical id     : 0
2309    siblings        : 2
2310    core id         : 0
2311    cpu cores       : 2
2312    fpu             : yes
2313    fpu_exception   : yes
2314    cpuid level     : 10
2315    wp              : yes
2316    flags           : fpu vme de pse tsc msr pae mce cx8 apic sep
2317                      mtrr pge mca cmov pat pse36 clflush dts acpi
2318                      mmx fxsr sse sse2 ss ht tm syscall nx lm
2319                      constant_tsc pni monitor ds_cpl vmx est tm2
2320                      cx16 xtpr lahf_lm
2321    bogomips        : 4798.78
2322    clflush size    : 64
2323    cache_alignment : 64
2324    address sizes   : 36 bits physical, 48 bits virtual
2325    power management:
2326 */
x86g_dirtyhelper_CPUID_sse2(VexGuestX86State * st)2327 void x86g_dirtyhelper_CPUID_sse2 ( VexGuestX86State* st )
2328 {
2329 #  define SET_ABCD(_a,_b,_c,_d)               \
2330       do { st->guest_EAX = (UInt)(_a);        \
2331            st->guest_EBX = (UInt)(_b);        \
2332            st->guest_ECX = (UInt)(_c);        \
2333            st->guest_EDX = (UInt)(_d);        \
2334       } while (0)
2335 
2336    switch (st->guest_EAX) {
2337       case 0x00000000:
2338          SET_ABCD(0x0000000a, 0x756e6547, 0x6c65746e, 0x49656e69);
2339          break;
2340       case 0x00000001:
2341          SET_ABCD(0x000006f6, 0x00020800, 0x0000e3bd, 0xbfebfbff);
2342          break;
2343       case 0x00000002:
2344          SET_ABCD(0x05b0b101, 0x005657f0, 0x00000000, 0x2cb43049);
2345          break;
2346       case 0x00000003:
2347          SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x00000000);
2348          break;
2349       case 0x00000004: {
2350          switch (st->guest_ECX) {
2351             case 0x00000000: SET_ABCD(0x04000121, 0x01c0003f,
2352                                       0x0000003f, 0x00000001); break;
2353             case 0x00000001: SET_ABCD(0x04000122, 0x01c0003f,
2354                                       0x0000003f, 0x00000001); break;
2355             case 0x00000002: SET_ABCD(0x04004143, 0x03c0003f,
2356                                       0x00000fff, 0x00000001); break;
2357             default:         SET_ABCD(0x00000000, 0x00000000,
2358                                       0x00000000, 0x00000000); break;
2359          }
2360          break;
2361       }
2362       case 0x00000005:
2363          SET_ABCD(0x00000040, 0x00000040, 0x00000003, 0x00000020);
2364          break;
2365       case 0x00000006:
2366          SET_ABCD(0x00000001, 0x00000002, 0x00000001, 0x00000000);
2367          break;
2368       case 0x00000007:
2369          SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x00000000);
2370          break;
2371       case 0x00000008:
2372          SET_ABCD(0x00000400, 0x00000000, 0x00000000, 0x00000000);
2373          break;
2374       case 0x00000009:
2375          SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x00000000);
2376          break;
2377       case 0x0000000a:
2378       unhandled_eax_value:
2379          SET_ABCD(0x07280202, 0x00000000, 0x00000000, 0x00000000);
2380          break;
2381       case 0x80000000:
2382          SET_ABCD(0x80000008, 0x00000000, 0x00000000, 0x00000000);
2383          break;
2384       case 0x80000001:
2385          SET_ABCD(0x00000000, 0x00000000, 0x00000001, 0x20100000);
2386          break;
2387       case 0x80000002:
2388          SET_ABCD(0x65746e49, 0x2952286c, 0x726f4320, 0x4d542865);
2389          break;
2390       case 0x80000003:
2391          SET_ABCD(0x43203229, 0x20205550, 0x20202020, 0x20202020);
2392          break;
2393       case 0x80000004:
2394          SET_ABCD(0x30303636, 0x20402020, 0x30342e32, 0x007a4847);
2395          break;
2396       case 0x80000005:
2397          SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x00000000);
2398          break;
2399       case 0x80000006:
2400          SET_ABCD(0x00000000, 0x00000000, 0x10008040, 0x00000000);
2401          break;
2402       case 0x80000007:
2403          SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x00000000);
2404          break;
2405       case 0x80000008:
2406          SET_ABCD(0x00003024, 0x00000000, 0x00000000, 0x00000000);
2407          break;
2408       default:
2409          goto unhandled_eax_value;
2410    }
2411 #  undef SET_ABCD
2412 }
2413 
2414 
2415 /* CALLED FROM GENERATED CODE */
2416 /* DIRTY HELPER (non-referentially-transparent) */
2417 /* Horrible hack.  On non-x86 platforms, return 0. */
x86g_dirtyhelper_IN(UInt portno,UInt sz)2418 UInt x86g_dirtyhelper_IN ( UInt portno, UInt sz/*1,2 or 4*/ )
2419 {
2420 #  if defined(__i386__)
2421    UInt r = 0;
2422    portno &= 0xFFFF;
2423    switch (sz) {
2424       case 4:
2425          __asm__ __volatile__("movl $0,%%eax; inl %w1,%0"
2426                               : "=a" (r) : "Nd" (portno));
2427 	 break;
2428       case 2:
2429          __asm__ __volatile__("movl $0,%%eax; inw %w1,%w0"
2430                               : "=a" (r) : "Nd" (portno));
2431 	 break;
2432       case 1:
2433          __asm__ __volatile__("movl $0,%%eax; inb %w1,%b0"
2434                               : "=a" (r) : "Nd" (portno));
2435 	 break;
2436       default:
2437          break;
2438    }
2439    return r;
2440 #  else
2441    return 0;
2442 #  endif
2443 }
2444 
2445 
2446 /* CALLED FROM GENERATED CODE */
2447 /* DIRTY HELPER (non-referentially-transparent) */
2448 /* Horrible hack.  On non-x86 platforms, do nothing. */
x86g_dirtyhelper_OUT(UInt portno,UInt data,UInt sz)2449 void x86g_dirtyhelper_OUT ( UInt portno, UInt data, UInt sz/*1,2 or 4*/ )
2450 {
2451 #  if defined(__i386__)
2452    portno &= 0xFFFF;
2453    switch (sz) {
2454       case 4:
2455          __asm__ __volatile__("outl %0, %w1"
2456                               : : "a" (data), "Nd" (portno));
2457 	 break;
2458       case 2:
2459          __asm__ __volatile__("outw %w0, %w1"
2460                               : : "a" (data), "Nd" (portno));
2461 	 break;
2462       case 1:
2463          __asm__ __volatile__("outb %b0, %w1"
2464                               : : "a" (data), "Nd" (portno));
2465 	 break;
2466       default:
2467          break;
2468    }
2469 #  else
2470    /* do nothing */
2471 #  endif
2472 }
2473 
2474 /* CALLED FROM GENERATED CODE */
2475 /* DIRTY HELPER (non-referentially-transparent) */
2476 /* Horrible hack.  On non-x86 platforms, do nothing. */
2477 /* op = 0: call the native SGDT instruction.
2478    op = 1: call the native SIDT instruction.
2479 */
x86g_dirtyhelper_SxDT(void * address,UInt op)2480 void x86g_dirtyhelper_SxDT ( void *address, UInt op ) {
2481 #  if defined(__i386__)
2482    switch (op) {
2483       case 0:
2484          __asm__ __volatile__("sgdt (%0)" : : "r" (address) : "memory");
2485          break;
2486       case 1:
2487          __asm__ __volatile__("sidt (%0)" : : "r" (address) : "memory");
2488          break;
2489       default:
2490          vpanic("x86g_dirtyhelper_SxDT");
2491    }
2492 #  else
2493    /* do nothing */
2494    UChar* p = (UChar*)address;
2495    p[0] = p[1] = p[2] = p[3] = p[4] = p[5] = 0;
2496 #  endif
2497 }
2498 
2499 /*---------------------------------------------------------------*/
2500 /*--- Helpers for MMX/SSE/SSE2.                               ---*/
2501 /*---------------------------------------------------------------*/
2502 
abdU8(UChar xx,UChar yy)2503 static inline UChar abdU8 ( UChar xx, UChar yy ) {
2504    return toUChar(xx>yy ? xx-yy : yy-xx);
2505 }
2506 
mk32x2(UInt w1,UInt w0)2507 static inline ULong mk32x2 ( UInt w1, UInt w0 ) {
2508    return (((ULong)w1) << 32) | ((ULong)w0);
2509 }
2510 
sel16x4_3(ULong w64)2511 static inline UShort sel16x4_3 ( ULong w64 ) {
2512    UInt hi32 = toUInt(w64 >> 32);
2513    return toUShort(hi32 >> 16);
2514 }
sel16x4_2(ULong w64)2515 static inline UShort sel16x4_2 ( ULong w64 ) {
2516    UInt hi32 = toUInt(w64 >> 32);
2517    return toUShort(hi32);
2518 }
sel16x4_1(ULong w64)2519 static inline UShort sel16x4_1 ( ULong w64 ) {
2520    UInt lo32 = toUInt(w64);
2521    return toUShort(lo32 >> 16);
2522 }
sel16x4_0(ULong w64)2523 static inline UShort sel16x4_0 ( ULong w64 ) {
2524    UInt lo32 = toUInt(w64);
2525    return toUShort(lo32);
2526 }
2527 
sel8x8_7(ULong w64)2528 static inline UChar sel8x8_7 ( ULong w64 ) {
2529    UInt hi32 = toUInt(w64 >> 32);
2530    return toUChar(hi32 >> 24);
2531 }
sel8x8_6(ULong w64)2532 static inline UChar sel8x8_6 ( ULong w64 ) {
2533    UInt hi32 = toUInt(w64 >> 32);
2534    return toUChar(hi32 >> 16);
2535 }
sel8x8_5(ULong w64)2536 static inline UChar sel8x8_5 ( ULong w64 ) {
2537    UInt hi32 = toUInt(w64 >> 32);
2538    return toUChar(hi32 >> 8);
2539 }
sel8x8_4(ULong w64)2540 static inline UChar sel8x8_4 ( ULong w64 ) {
2541    UInt hi32 = toUInt(w64 >> 32);
2542    return toUChar(hi32 >> 0);
2543 }
sel8x8_3(ULong w64)2544 static inline UChar sel8x8_3 ( ULong w64 ) {
2545    UInt lo32 = toUInt(w64);
2546    return toUChar(lo32 >> 24);
2547 }
sel8x8_2(ULong w64)2548 static inline UChar sel8x8_2 ( ULong w64 ) {
2549    UInt lo32 = toUInt(w64);
2550    return toUChar(lo32 >> 16);
2551 }
sel8x8_1(ULong w64)2552 static inline UChar sel8x8_1 ( ULong w64 ) {
2553    UInt lo32 = toUInt(w64);
2554    return toUChar(lo32 >> 8);
2555 }
sel8x8_0(ULong w64)2556 static inline UChar sel8x8_0 ( ULong w64 ) {
2557    UInt lo32 = toUInt(w64);
2558    return toUChar(lo32 >> 0);
2559 }
2560 
2561 /* CALLED FROM GENERATED CODE: CLEAN HELPER */
x86g_calculate_mmx_pmaddwd(ULong xx,ULong yy)2562 ULong x86g_calculate_mmx_pmaddwd ( ULong xx, ULong yy )
2563 {
2564    return
2565       mk32x2(
2566          (((Int)(Short)sel16x4_3(xx)) * ((Int)(Short)sel16x4_3(yy)))
2567             + (((Int)(Short)sel16x4_2(xx)) * ((Int)(Short)sel16x4_2(yy))),
2568          (((Int)(Short)sel16x4_1(xx)) * ((Int)(Short)sel16x4_1(yy)))
2569             + (((Int)(Short)sel16x4_0(xx)) * ((Int)(Short)sel16x4_0(yy)))
2570       );
2571 }
2572 
2573 /* CALLED FROM GENERATED CODE: CLEAN HELPER */
x86g_calculate_mmx_psadbw(ULong xx,ULong yy)2574 ULong x86g_calculate_mmx_psadbw ( ULong xx, ULong yy )
2575 {
2576    UInt t = 0;
2577    t += (UInt)abdU8( sel8x8_7(xx), sel8x8_7(yy) );
2578    t += (UInt)abdU8( sel8x8_6(xx), sel8x8_6(yy) );
2579    t += (UInt)abdU8( sel8x8_5(xx), sel8x8_5(yy) );
2580    t += (UInt)abdU8( sel8x8_4(xx), sel8x8_4(yy) );
2581    t += (UInt)abdU8( sel8x8_3(xx), sel8x8_3(yy) );
2582    t += (UInt)abdU8( sel8x8_2(xx), sel8x8_2(yy) );
2583    t += (UInt)abdU8( sel8x8_1(xx), sel8x8_1(yy) );
2584    t += (UInt)abdU8( sel8x8_0(xx), sel8x8_0(yy) );
2585    t &= 0xFFFF;
2586    return (ULong)t;
2587 }
2588 
2589 
2590 /*---------------------------------------------------------------*/
2591 /*--- Helpers for dealing with segment overrides.             ---*/
2592 /*---------------------------------------------------------------*/
2593 
2594 static inline
get_segdescr_base(VexGuestX86SegDescr * ent)2595 UInt get_segdescr_base ( VexGuestX86SegDescr* ent )
2596 {
2597    UInt lo  = 0xFFFF & (UInt)ent->LdtEnt.Bits.BaseLow;
2598    UInt mid =   0xFF & (UInt)ent->LdtEnt.Bits.BaseMid;
2599    UInt hi  =   0xFF & (UInt)ent->LdtEnt.Bits.BaseHi;
2600    return (hi << 24) | (mid << 16) | lo;
2601 }
2602 
2603 static inline
get_segdescr_limit(VexGuestX86SegDescr * ent)2604 UInt get_segdescr_limit ( VexGuestX86SegDescr* ent )
2605 {
2606     UInt lo    = 0xFFFF & (UInt)ent->LdtEnt.Bits.LimitLow;
2607     UInt hi    =    0xF & (UInt)ent->LdtEnt.Bits.LimitHi;
2608     UInt limit = (hi << 16) | lo;
2609     if (ent->LdtEnt.Bits.Granularity)
2610        limit = (limit << 12) | 0xFFF;
2611     return limit;
2612 }
2613 
2614 /* CALLED FROM GENERATED CODE: CLEAN HELPER */
x86g_use_seg_selector(HWord ldt,HWord gdt,UInt seg_selector,UInt virtual_addr)2615 ULong x86g_use_seg_selector ( HWord ldt, HWord gdt,
2616                               UInt seg_selector, UInt virtual_addr )
2617 {
2618    UInt tiBit, base, limit;
2619    VexGuestX86SegDescr* the_descrs;
2620 
2621    Bool verboze = False;
2622 
2623    /* If this isn't true, we're in Big Trouble. */
2624    vassert(8 == sizeof(VexGuestX86SegDescr));
2625 
2626    if (verboze)
2627       vex_printf("x86h_use_seg_selector: "
2628                  "seg_selector = 0x%x, vaddr = 0x%x\n",
2629                  seg_selector, virtual_addr);
2630 
2631    /* Check for wildly invalid selector. */
2632    if (seg_selector & ~0xFFFF)
2633       goto bad;
2634 
2635    seg_selector &= 0x0000FFFF;
2636 
2637    /* Sanity check the segment selector.  Ensure that RPL=11b (least
2638       privilege).  This forms the bottom 2 bits of the selector. */
2639    if ((seg_selector & 3) != 3)
2640       goto bad;
2641 
2642    /* Extract the TI bit (0 means GDT, 1 means LDT) */
2643    tiBit = (seg_selector >> 2) & 1;
2644 
2645    /* Convert the segment selector onto a table index */
2646    seg_selector >>= 3;
2647    vassert(seg_selector >= 0 && seg_selector < 8192);
2648 
2649    if (tiBit == 0) {
2650 
2651       /* GDT access. */
2652       /* Do we actually have a GDT to look at? */
2653       if (gdt == 0)
2654          goto bad;
2655 
2656       /* Check for access to non-existent entry. */
2657       if (seg_selector >= VEX_GUEST_X86_GDT_NENT)
2658          goto bad;
2659 
2660       the_descrs = (VexGuestX86SegDescr*)gdt;
2661       base  = get_segdescr_base (&the_descrs[seg_selector]);
2662       limit = get_segdescr_limit(&the_descrs[seg_selector]);
2663 
2664    } else {
2665 
2666       /* All the same stuff, except for the LDT. */
2667       if (ldt == 0)
2668          goto bad;
2669 
2670       if (seg_selector >= VEX_GUEST_X86_LDT_NENT)
2671          goto bad;
2672 
2673       the_descrs = (VexGuestX86SegDescr*)ldt;
2674       base  = get_segdescr_base (&the_descrs[seg_selector]);
2675       limit = get_segdescr_limit(&the_descrs[seg_selector]);
2676 
2677    }
2678 
2679    /* Do the limit check.  Note, this check is just slightly too
2680       slack.  Really it should be "if (virtual_addr + size - 1 >=
2681       limit)," but we don't have the size info to hand.  Getting it
2682       could be significantly complex.  */
2683    if (virtual_addr >= limit)
2684       goto bad;
2685 
2686    if (verboze)
2687       vex_printf("x86h_use_seg_selector: "
2688                  "base = 0x%x, addr = 0x%x\n",
2689                  base, base + virtual_addr);
2690 
2691    /* High 32 bits are zero, indicating success. */
2692    return (ULong)( ((UInt)virtual_addr) + base );
2693 
2694  bad:
2695    return 1ULL << 32;
2696 }
2697 
2698 
2699 /*---------------------------------------------------------------*/
2700 /*--- Helpers for dealing with, and describing,               ---*/
2701 /*--- guest state as a whole.                                 ---*/
2702 /*---------------------------------------------------------------*/
2703 
2704 /* Initialise the entire x86 guest state. */
2705 /* VISIBLE TO LIBVEX CLIENT */
LibVEX_GuestX86_initialise(VexGuestX86State * vex_state)2706 void LibVEX_GuestX86_initialise ( /*OUT*/VexGuestX86State* vex_state )
2707 {
2708    vex_state->host_EvC_FAILADDR = 0;
2709    vex_state->host_EvC_COUNTER = 0;
2710 
2711    vex_state->guest_EAX = 0;
2712    vex_state->guest_ECX = 0;
2713    vex_state->guest_EDX = 0;
2714    vex_state->guest_EBX = 0;
2715    vex_state->guest_ESP = 0;
2716    vex_state->guest_EBP = 0;
2717    vex_state->guest_ESI = 0;
2718    vex_state->guest_EDI = 0;
2719 
2720    vex_state->guest_CC_OP   = X86G_CC_OP_COPY;
2721    vex_state->guest_CC_DEP1 = 0;
2722    vex_state->guest_CC_DEP2 = 0;
2723    vex_state->guest_CC_NDEP = 0;
2724    vex_state->guest_DFLAG   = 1; /* forwards */
2725    vex_state->guest_IDFLAG  = 0;
2726    vex_state->guest_ACFLAG  = 0;
2727 
2728    vex_state->guest_EIP = 0;
2729 
2730    /* Initialise the simulated FPU */
2731    x86g_dirtyhelper_FINIT( vex_state );
2732 
2733    /* Initialse the SSE state. */
2734 #  define SSEZERO(_xmm) _xmm[0]=_xmm[1]=_xmm[2]=_xmm[3] = 0;
2735 
2736    vex_state->guest_SSEROUND = (UInt)Irrm_NEAREST;
2737    SSEZERO(vex_state->guest_XMM0);
2738    SSEZERO(vex_state->guest_XMM1);
2739    SSEZERO(vex_state->guest_XMM2);
2740    SSEZERO(vex_state->guest_XMM3);
2741    SSEZERO(vex_state->guest_XMM4);
2742    SSEZERO(vex_state->guest_XMM5);
2743    SSEZERO(vex_state->guest_XMM6);
2744    SSEZERO(vex_state->guest_XMM7);
2745 
2746 #  undef SSEZERO
2747 
2748    vex_state->guest_CS  = 0;
2749    vex_state->guest_DS  = 0;
2750    vex_state->guest_ES  = 0;
2751    vex_state->guest_FS  = 0;
2752    vex_state->guest_GS  = 0;
2753    vex_state->guest_SS  = 0;
2754    vex_state->guest_LDT = 0;
2755    vex_state->guest_GDT = 0;
2756 
2757    vex_state->guest_EMNOTE = EmNote_NONE;
2758 
2759    /* SSE2 has a 'clflush' cache-line-invalidator which uses these. */
2760    vex_state->guest_CMSTART = 0;
2761    vex_state->guest_CMLEN   = 0;
2762 
2763    vex_state->guest_NRADDR   = 0;
2764    vex_state->guest_SC_CLASS = 0;
2765    vex_state->guest_IP_AT_SYSCALL = 0;
2766 
2767    vex_state->padding1 = 0;
2768 }
2769 
2770 
2771 /* Figure out if any part of the guest state contained in minoff
2772    .. maxoff requires precise memory exceptions.  If in doubt return
2773    True (but this generates significantly slower code).
2774 
2775    By default we enforce precise exns for guest %ESP, %EBP and %EIP
2776    only.  These are the minimum needed to extract correct stack
2777    backtraces from x86 code.
2778 
2779    Only %ESP is needed in mode VexRegUpdSpAtMemAccess.
2780 */
guest_x86_state_requires_precise_mem_exns(Int minoff,Int maxoff,VexRegisterUpdates pxControl)2781 Bool guest_x86_state_requires_precise_mem_exns (
2782         Int minoff, Int maxoff, VexRegisterUpdates pxControl
2783      )
2784 {
2785    Int ebp_min = offsetof(VexGuestX86State, guest_EBP);
2786    Int ebp_max = ebp_min + 4 - 1;
2787    Int esp_min = offsetof(VexGuestX86State, guest_ESP);
2788    Int esp_max = esp_min + 4 - 1;
2789    Int eip_min = offsetof(VexGuestX86State, guest_EIP);
2790    Int eip_max = eip_min + 4 - 1;
2791 
2792    if (maxoff < esp_min || minoff > esp_max) {
2793       /* no overlap with esp */
2794       if (pxControl == VexRegUpdSpAtMemAccess)
2795          return False; // We only need to check stack pointer.
2796    } else {
2797       return True;
2798    }
2799 
2800    if (maxoff < ebp_min || minoff > ebp_max) {
2801       /* no overlap with ebp */
2802    } else {
2803       return True;
2804    }
2805 
2806    if (maxoff < eip_min || minoff > eip_max) {
2807       /* no overlap with eip */
2808    } else {
2809       return True;
2810    }
2811 
2812    return False;
2813 }
2814 
2815 
2816 #define ALWAYSDEFD(field)                           \
2817     { offsetof(VexGuestX86State, field),            \
2818       (sizeof ((VexGuestX86State*)0)->field) }
2819 
2820 VexGuestLayout
2821    x86guest_layout
2822       = {
2823           /* Total size of the guest state, in bytes. */
2824           .total_sizeB = sizeof(VexGuestX86State),
2825 
2826           /* Describe the stack pointer. */
2827           .offset_SP = offsetof(VexGuestX86State,guest_ESP),
2828           .sizeof_SP = 4,
2829 
2830           /* Describe the frame pointer. */
2831           .offset_FP = offsetof(VexGuestX86State,guest_EBP),
2832           .sizeof_FP = 4,
2833 
2834           /* Describe the instruction pointer. */
2835           .offset_IP = offsetof(VexGuestX86State,guest_EIP),
2836           .sizeof_IP = 4,
2837 
2838           /* Describe any sections to be regarded by Memcheck as
2839              'always-defined'. */
2840           .n_alwaysDefd = 24,
2841 
2842           /* flags thunk: OP and NDEP are always defd, whereas DEP1
2843              and DEP2 have to be tracked.  See detailed comment in
2844              gdefs.h on meaning of thunk fields. */
2845           .alwaysDefd
2846              = { /*  0 */ ALWAYSDEFD(guest_CC_OP),
2847                  /*  1 */ ALWAYSDEFD(guest_CC_NDEP),
2848                  /*  2 */ ALWAYSDEFD(guest_DFLAG),
2849                  /*  3 */ ALWAYSDEFD(guest_IDFLAG),
2850                  /*  4 */ ALWAYSDEFD(guest_ACFLAG),
2851                  /*  5 */ ALWAYSDEFD(guest_EIP),
2852                  /*  6 */ ALWAYSDEFD(guest_FTOP),
2853                  /*  7 */ ALWAYSDEFD(guest_FPTAG),
2854                  /*  8 */ ALWAYSDEFD(guest_FPROUND),
2855                  /*  9 */ ALWAYSDEFD(guest_FC3210),
2856                  /* 10 */ ALWAYSDEFD(guest_CS),
2857                  /* 11 */ ALWAYSDEFD(guest_DS),
2858                  /* 12 */ ALWAYSDEFD(guest_ES),
2859                  /* 13 */ ALWAYSDEFD(guest_FS),
2860                  /* 14 */ ALWAYSDEFD(guest_GS),
2861                  /* 15 */ ALWAYSDEFD(guest_SS),
2862                  /* 16 */ ALWAYSDEFD(guest_LDT),
2863                  /* 17 */ ALWAYSDEFD(guest_GDT),
2864                  /* 18 */ ALWAYSDEFD(guest_EMNOTE),
2865                  /* 19 */ ALWAYSDEFD(guest_SSEROUND),
2866                  /* 20 */ ALWAYSDEFD(guest_CMSTART),
2867                  /* 21 */ ALWAYSDEFD(guest_CMLEN),
2868                  /* 22 */ ALWAYSDEFD(guest_SC_CLASS),
2869                  /* 23 */ ALWAYSDEFD(guest_IP_AT_SYSCALL)
2870                }
2871         };
2872 
2873 
2874 /*---------------------------------------------------------------*/
2875 /*--- end                                 guest_x86_helpers.c ---*/
2876 /*---------------------------------------------------------------*/
2877