1 
2 /*---------------------------------------------------------------*/
3 /*--- begin                             guest_amd64_helpers.c ---*/
4 /*---------------------------------------------------------------*/
5 
6 /*
7    This file is part of Valgrind, a dynamic binary instrumentation
8    framework.
9 
10    Copyright (C) 2004-2015 OpenWorks LLP
11       info@open-works.net
12 
13    This program is free software; you can redistribute it and/or
14    modify it under the terms of the GNU General Public License as
15    published by the Free Software Foundation; either version 2 of the
16    License, or (at your option) any later version.
17 
18    This program is distributed in the hope that it will be useful, but
19    WITHOUT ANY WARRANTY; without even the implied warranty of
20    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
21    General Public License for more details.
22 
23    You should have received a copy of the GNU General Public License
24    along with this program; if not, write to the Free Software
25    Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
26    02110-1301, USA.
27 
28    The GNU General Public License is contained in the file COPYING.
29 
30    Neither the names of the U.S. Department of Energy nor the
31    University of California nor the names of its contributors may be
32    used to endorse or promote products derived from this software
33    without prior written permission.
34 */
35 
36 #include "libvex_basictypes.h"
37 #include "libvex_emnote.h"
38 #include "libvex_guest_amd64.h"
39 #include "libvex_ir.h"
40 #include "libvex.h"
41 
42 #include "main_util.h"
43 #include "main_globals.h"
44 #include "guest_generic_bb_to_IR.h"
45 #include "guest_amd64_defs.h"
46 #include "guest_generic_x87.h"
47 
48 
49 /* This file contains helper functions for amd64 guest code.
50    Calls to these functions are generated by the back end.
51    These calls are of course in the host machine code and
52    this file will be compiled to host machine code, so that
53    all makes sense.
54 
55    Only change the signatures of these helper functions very
56    carefully.  If you change the signature here, you'll have to change
57    the parameters passed to it in the IR calls constructed by
58    guest-amd64/toIR.c.
59 
60    The convention used is that all functions called from generated
61    code are named amd64g_<something>, and any function whose name lacks
62    that prefix is not called from generated code.  Note that some
63    LibVEX_* functions can however be called by VEX's client, but that
64    is not the same as calling them from VEX-generated code.
65 */
66 
67 
68 /* Set to 1 to get detailed profiling info about use of the flag
69    machinery. */
70 #define PROFILE_RFLAGS 0
71 
72 
73 /*---------------------------------------------------------------*/
74 /*--- %rflags run-time helpers.                               ---*/
75 /*---------------------------------------------------------------*/
76 
77 /* Do 64x64 -> 128 signed/unsigned multiplies, for computing flags
78    after imulq/mulq. */
79 
mullS64(Long u,Long v,Long * rHi,Long * rLo)80 static void mullS64 ( Long u, Long v, Long* rHi, Long* rLo )
81 {
82    const Long halfMask = 0xFFFFFFFFLL;
83    ULong u0, v0, w0;
84     Long u1, v1, w1, w2, t;
85    u0   = u & halfMask;
86    u1   = u >> 32;
87    v0   = v & halfMask;
88    v1   = v >> 32;
89    w0   = u0 * v0;
90    t    = u1 * v0 + (w0 >> 32);
91    w1   = t & halfMask;
92    w2   = t >> 32;
93    w1   = u0 * v1 + w1;
94    *rHi = u1 * v1 + w2 + (w1 >> 32);
95    *rLo = (Long)((ULong)u * (ULong)v);
96 }
97 
mullU64(ULong u,ULong v,ULong * rHi,ULong * rLo)98 static void mullU64 ( ULong u, ULong v, ULong* rHi, ULong* rLo )
99 {
100    const ULong halfMask = 0xFFFFFFFFULL;
101    ULong u0, v0, w0;
102    ULong u1, v1, w1,w2,t;
103    u0   = u & halfMask;
104    u1   = u >> 32;
105    v0   = v & halfMask;
106    v1   = v >> 32;
107    w0   = u0 * v0;
108    t    = u1 * v0 + (w0 >> 32);
109    w1   = t & halfMask;
110    w2   = t >> 32;
111    w1   = u0 * v1 + w1;
112    *rHi = u1 * v1 + w2 + (w1 >> 32);
113    *rLo = u * v;
114 }
115 
116 
117 static const UChar parity_table[256] = {
118     AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0,
119     0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P,
120     0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P,
121     AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0,
122     0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P,
123     AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0,
124     AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0,
125     0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P,
126     0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P,
127     AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0,
128     AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0,
129     0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P,
130     AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0,
131     0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P,
132     0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P,
133     AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0,
134     0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P,
135     AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0,
136     AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0,
137     0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P,
138     AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0,
139     0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P,
140     0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P,
141     AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0,
142     AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0,
143     0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P,
144     0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P,
145     AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0,
146     0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P,
147     AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0,
148     AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0,
149     0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P,
150 };
151 
152 /* generalised left-shifter */
lshift(Long x,Int n)153 static inline Long lshift ( Long x, Int n )
154 {
155    if (n >= 0)
156       return (ULong)x << n;
157    else
158       return x >> (-n);
159 }
160 
161 /* identity on ULong */
idULong(ULong x)162 static inline ULong idULong ( ULong x )
163 {
164    return x;
165 }
166 
167 
168 #define PREAMBLE(__data_bits)					\
169    /* const */ ULong DATA_MASK 					\
170       = __data_bits==8                                          \
171            ? 0xFFULL 					        \
172            : (__data_bits==16                                   \
173                 ? 0xFFFFULL 		                        \
174                 : (__data_bits==32                              \
175                      ? 0xFFFFFFFFULL                            \
176                      : 0xFFFFFFFFFFFFFFFFULL));                 \
177    /* const */ ULong SIGN_MASK = 1ULL << (__data_bits - 1);     \
178    /* const */ ULong CC_DEP1 = cc_dep1_formal;			\
179    /* const */ ULong CC_DEP2 = cc_dep2_formal;			\
180    /* const */ ULong CC_NDEP = cc_ndep_formal;			\
181    /* Four bogus assignments, which hopefully gcc can     */	\
182    /* optimise away, and which stop it complaining about  */	\
183    /* unused variables.                                   */	\
184    SIGN_MASK = SIGN_MASK;					\
185    DATA_MASK = DATA_MASK;					\
186    CC_DEP2 = CC_DEP2;						\
187    CC_NDEP = CC_NDEP;
188 
189 
190 /*-------------------------------------------------------------*/
191 
192 #define ACTIONS_ADD(DATA_BITS,DATA_UTYPE)			\
193 {								\
194    PREAMBLE(DATA_BITS);						\
195    { ULong cf, pf, af, zf, sf, of;				\
196      ULong argL, argR, res;					\
197      argL = CC_DEP1;						\
198      argR = CC_DEP2;						\
199      res  = argL + argR;					\
200      cf = (DATA_UTYPE)res < (DATA_UTYPE)argL;			\
201      pf = parity_table[(UChar)res];				\
202      af = (res ^ argL ^ argR) & 0x10;				\
203      zf = ((DATA_UTYPE)res == 0) << 6;				\
204      sf = lshift(res, 8 - DATA_BITS) & 0x80;			\
205      of = lshift((argL ^ argR ^ -1) & (argL ^ res), 		\
206                  12 - DATA_BITS) & AMD64G_CC_MASK_O;		\
207      return cf | pf | af | zf | sf | of;			\
208    }								\
209 }
210 
211 /*-------------------------------------------------------------*/
212 
213 #define ACTIONS_SUB(DATA_BITS,DATA_UTYPE)			\
214 {								\
215    PREAMBLE(DATA_BITS);						\
216    { ULong cf, pf, af, zf, sf, of;				\
217      ULong argL, argR, res;					\
218      argL = CC_DEP1;						\
219      argR = CC_DEP2;						\
220      res  = argL - argR;					\
221      cf = (DATA_UTYPE)argL < (DATA_UTYPE)argR;			\
222      pf = parity_table[(UChar)res];				\
223      af = (res ^ argL ^ argR) & 0x10;				\
224      zf = ((DATA_UTYPE)res == 0) << 6;				\
225      sf = lshift(res, 8 - DATA_BITS) & 0x80;			\
226      of = lshift((argL ^ argR) & (argL ^ res),	 		\
227                  12 - DATA_BITS) & AMD64G_CC_MASK_O; 		\
228      return cf | pf | af | zf | sf | of;			\
229    }								\
230 }
231 
232 /*-------------------------------------------------------------*/
233 
234 #define ACTIONS_ADC(DATA_BITS,DATA_UTYPE)			\
235 {								\
236    PREAMBLE(DATA_BITS);						\
237    { ULong cf, pf, af, zf, sf, of;				\
238      ULong argL, argR, oldC, res;		 		\
239      oldC = CC_NDEP & AMD64G_CC_MASK_C;				\
240      argL = CC_DEP1;						\
241      argR = CC_DEP2 ^ oldC;	       				\
242      res  = (argL + argR) + oldC;				\
243      if (oldC)							\
244         cf = (DATA_UTYPE)res <= (DATA_UTYPE)argL;		\
245      else							\
246         cf = (DATA_UTYPE)res < (DATA_UTYPE)argL;		\
247      pf = parity_table[(UChar)res];				\
248      af = (res ^ argL ^ argR) & 0x10;				\
249      zf = ((DATA_UTYPE)res == 0) << 6;				\
250      sf = lshift(res, 8 - DATA_BITS) & 0x80;			\
251      of = lshift((argL ^ argR ^ -1) & (argL ^ res), 		\
252                   12 - DATA_BITS) & AMD64G_CC_MASK_O;		\
253      return cf | pf | af | zf | sf | of;			\
254    }								\
255 }
256 
257 /*-------------------------------------------------------------*/
258 
259 #define ACTIONS_SBB(DATA_BITS,DATA_UTYPE)			\
260 {								\
261    PREAMBLE(DATA_BITS);						\
262    { ULong cf, pf, af, zf, sf, of;				\
263      ULong argL, argR, oldC, res;	       			\
264      oldC = CC_NDEP & AMD64G_CC_MASK_C;				\
265      argL = CC_DEP1;						\
266      argR = CC_DEP2 ^ oldC;	       				\
267      res  = (argL - argR) - oldC;				\
268      if (oldC)							\
269         cf = (DATA_UTYPE)argL <= (DATA_UTYPE)argR;		\
270      else							\
271         cf = (DATA_UTYPE)argL < (DATA_UTYPE)argR;		\
272      pf = parity_table[(UChar)res];				\
273      af = (res ^ argL ^ argR) & 0x10;				\
274      zf = ((DATA_UTYPE)res == 0) << 6;				\
275      sf = lshift(res, 8 - DATA_BITS) & 0x80;			\
276      of = lshift((argL ^ argR) & (argL ^ res), 			\
277                  12 - DATA_BITS) & AMD64G_CC_MASK_O;		\
278      return cf | pf | af | zf | sf | of;			\
279    }								\
280 }
281 
282 /*-------------------------------------------------------------*/
283 
284 #define ACTIONS_LOGIC(DATA_BITS,DATA_UTYPE)			\
285 {								\
286    PREAMBLE(DATA_BITS);						\
287    { ULong cf, pf, af, zf, sf, of;				\
288      cf = 0;							\
289      pf = parity_table[(UChar)CC_DEP1];				\
290      af = 0;							\
291      zf = ((DATA_UTYPE)CC_DEP1 == 0) << 6;			\
292      sf = lshift(CC_DEP1, 8 - DATA_BITS) & 0x80;		\
293      of = 0;							\
294      return cf | pf | af | zf | sf | of;			\
295    }								\
296 }
297 
298 /*-------------------------------------------------------------*/
299 
300 #define ACTIONS_INC(DATA_BITS,DATA_UTYPE)			\
301 {								\
302    PREAMBLE(DATA_BITS);						\
303    { ULong cf, pf, af, zf, sf, of;				\
304      ULong argL, argR, res;					\
305      res  = CC_DEP1;						\
306      argL = res - 1;						\
307      argR = 1;							\
308      cf = CC_NDEP & AMD64G_CC_MASK_C;				\
309      pf = parity_table[(UChar)res];				\
310      af = (res ^ argL ^ argR) & 0x10;				\
311      zf = ((DATA_UTYPE)res == 0) << 6;				\
312      sf = lshift(res, 8 - DATA_BITS) & 0x80;			\
313      of = ((res & DATA_MASK) == SIGN_MASK) << 11;		\
314      return cf | pf | af | zf | sf | of;			\
315    }								\
316 }
317 
318 /*-------------------------------------------------------------*/
319 
320 #define ACTIONS_DEC(DATA_BITS,DATA_UTYPE)			\
321 {								\
322    PREAMBLE(DATA_BITS);						\
323    { ULong cf, pf, af, zf, sf, of;				\
324      ULong argL, argR, res;					\
325      res  = CC_DEP1;						\
326      argL = res + 1;						\
327      argR = 1;							\
328      cf = CC_NDEP & AMD64G_CC_MASK_C;				\
329      pf = parity_table[(UChar)res];				\
330      af = (res ^ argL ^ argR) & 0x10;				\
331      zf = ((DATA_UTYPE)res == 0) << 6;				\
332      sf = lshift(res, 8 - DATA_BITS) & 0x80;			\
333      of = ((res & DATA_MASK) 					\
334           == ((ULong)SIGN_MASK - 1)) << 11;			\
335      return cf | pf | af | zf | sf | of;			\
336    }								\
337 }
338 
339 /*-------------------------------------------------------------*/
340 
341 #define ACTIONS_SHL(DATA_BITS,DATA_UTYPE)			\
342 {								\
343    PREAMBLE(DATA_BITS);						\
344    { ULong cf, pf, af, zf, sf, of;				\
345      cf = (CC_DEP2 >> (DATA_BITS - 1)) & AMD64G_CC_MASK_C;	\
346      pf = parity_table[(UChar)CC_DEP1];				\
347      af = 0; /* undefined */					\
348      zf = ((DATA_UTYPE)CC_DEP1 == 0) << 6;			\
349      sf = lshift(CC_DEP1, 8 - DATA_BITS) & 0x80;		\
350      /* of is defined if shift count == 1 */			\
351      of = lshift(CC_DEP2 ^ CC_DEP1, 12 - DATA_BITS) 		\
352           & AMD64G_CC_MASK_O;					\
353      return cf | pf | af | zf | sf | of;			\
354    }								\
355 }
356 
357 /*-------------------------------------------------------------*/
358 
359 #define ACTIONS_SHR(DATA_BITS,DATA_UTYPE)			\
360 {								\
361    PREAMBLE(DATA_BITS);  					\
362    { ULong cf, pf, af, zf, sf, of;				\
363      cf = CC_DEP2 & 1;						\
364      pf = parity_table[(UChar)CC_DEP1];				\
365      af = 0; /* undefined */					\
366      zf = ((DATA_UTYPE)CC_DEP1 == 0) << 6;			\
367      sf = lshift(CC_DEP1, 8 - DATA_BITS) & 0x80;		\
368      /* of is defined if shift count == 1 */			\
369      of = lshift(CC_DEP2 ^ CC_DEP1, 12 - DATA_BITS)		\
370           & AMD64G_CC_MASK_O;					\
371      return cf | pf | af | zf | sf | of;			\
372    }								\
373 }
374 
375 /*-------------------------------------------------------------*/
376 
377 /* ROL: cf' = lsb(result).  of' = msb(result) ^ lsb(result). */
378 /* DEP1 = result, NDEP = old flags */
379 #define ACTIONS_ROL(DATA_BITS,DATA_UTYPE)			\
380 {								\
381    PREAMBLE(DATA_BITS);						\
382    { ULong fl 							\
383         = (CC_NDEP & ~(AMD64G_CC_MASK_O | AMD64G_CC_MASK_C))	\
384           | (AMD64G_CC_MASK_C & CC_DEP1)			\
385           | (AMD64G_CC_MASK_O & (lshift(CC_DEP1,  		\
386                                       11-(DATA_BITS-1)) 	\
387                      ^ lshift(CC_DEP1, 11)));			\
388      return fl;							\
389    }								\
390 }
391 
392 /*-------------------------------------------------------------*/
393 
394 /* ROR: cf' = msb(result).  of' = msb(result) ^ msb-1(result). */
395 /* DEP1 = result, NDEP = old flags */
396 #define ACTIONS_ROR(DATA_BITS,DATA_UTYPE)			\
397 {								\
398    PREAMBLE(DATA_BITS);						\
399    { ULong fl 							\
400         = (CC_NDEP & ~(AMD64G_CC_MASK_O | AMD64G_CC_MASK_C))	\
401           | (AMD64G_CC_MASK_C & (CC_DEP1 >> (DATA_BITS-1)))	\
402           | (AMD64G_CC_MASK_O & (lshift(CC_DEP1, 		\
403                                       11-(DATA_BITS-1)) 	\
404                      ^ lshift(CC_DEP1, 11-(DATA_BITS-1)+1)));	\
405      return fl;							\
406    }								\
407 }
408 
409 /*-------------------------------------------------------------*/
410 
411 #define ACTIONS_UMUL(DATA_BITS, DATA_UTYPE,  NARROWtoU,         \
412                                 DATA_U2TYPE, NARROWto2U)        \
413 {                                                               \
414    PREAMBLE(DATA_BITS);                                         \
415    { ULong cf, pf, af, zf, sf, of;                              \
416      DATA_UTYPE  hi;                                            \
417      DATA_UTYPE  lo                                             \
418         = NARROWtoU( ((DATA_UTYPE)CC_DEP1)                      \
419                      * ((DATA_UTYPE)CC_DEP2) );                 \
420      DATA_U2TYPE rr                                             \
421         = NARROWto2U(                                           \
422              ((DATA_U2TYPE)((DATA_UTYPE)CC_DEP1))               \
423              * ((DATA_U2TYPE)((DATA_UTYPE)CC_DEP2)) );          \
424      hi = NARROWtoU(rr >>/*u*/ DATA_BITS);                      \
425      cf = (hi != 0);                                            \
426      pf = parity_table[(UChar)lo];                              \
427      af = 0; /* undefined */                                    \
428      zf = (lo == 0) << 6;                                       \
429      sf = lshift(lo, 8 - DATA_BITS) & 0x80;                     \
430      of = cf << 11;                                             \
431      return cf | pf | af | zf | sf | of;                        \
432    }								\
433 }
434 
435 /*-------------------------------------------------------------*/
436 
437 #define ACTIONS_SMUL(DATA_BITS, DATA_STYPE,  NARROWtoS,         \
438                                 DATA_S2TYPE, NARROWto2S)        \
439 {                                                               \
440    PREAMBLE(DATA_BITS);                                         \
441    { ULong cf, pf, af, zf, sf, of;                              \
442      DATA_STYPE  hi;                                            \
443      DATA_STYPE  lo                                             \
444         = NARROWtoS( ((DATA_S2TYPE)(DATA_STYPE)CC_DEP1)         \
445                      * ((DATA_S2TYPE)(DATA_STYPE)CC_DEP2) );    \
446      DATA_S2TYPE rr                                             \
447         = NARROWto2S(                                           \
448              ((DATA_S2TYPE)((DATA_STYPE)CC_DEP1))               \
449              * ((DATA_S2TYPE)((DATA_STYPE)CC_DEP2)) );          \
450      hi = NARROWtoS(rr >>/*s*/ DATA_BITS);                      \
451      cf = (hi != (lo >>/*s*/ (DATA_BITS-1)));                   \
452      pf = parity_table[(UChar)lo];                              \
453      af = 0; /* undefined */                                    \
454      zf = (lo == 0) << 6;                                       \
455      sf = lshift(lo, 8 - DATA_BITS) & 0x80;                     \
456      of = cf << 11;                                             \
457      return cf | pf | af | zf | sf | of;                        \
458    }								\
459 }
460 
461 /*-------------------------------------------------------------*/
462 
463 #define ACTIONS_UMULQ                                           \
464 {                                                               \
465    PREAMBLE(64);                                                \
466    { ULong cf, pf, af, zf, sf, of;                              \
467      ULong lo, hi;                                              \
468      mullU64( (ULong)CC_DEP1, (ULong)CC_DEP2, &hi, &lo );       \
469      cf = (hi != 0);                                            \
470      pf = parity_table[(UChar)lo];                              \
471      af = 0; /* undefined */                                    \
472      zf = (lo == 0) << 6;                                       \
473      sf = lshift(lo, 8 - 64) & 0x80;                            \
474      of = cf << 11;                                             \
475      return cf | pf | af | zf | sf | of;                        \
476    }								\
477 }
478 
479 /*-------------------------------------------------------------*/
480 
481 #define ACTIONS_SMULQ                                           \
482 {                                                               \
483    PREAMBLE(64);                                                \
484    { ULong cf, pf, af, zf, sf, of;                              \
485      Long lo, hi;                                               \
486      mullS64( (Long)CC_DEP1, (Long)CC_DEP2, &hi, &lo );         \
487      cf = (hi != (lo >>/*s*/ (64-1)));                          \
488      pf = parity_table[(UChar)lo];                              \
489      af = 0; /* undefined */                                    \
490      zf = (lo == 0) << 6;                                       \
491      sf = lshift(lo, 8 - 64) & 0x80;                            \
492      of = cf << 11;                                             \
493      return cf | pf | af | zf | sf | of;                        \
494    }								\
495 }
496 
497 /*-------------------------------------------------------------*/
498 
499 #define ACTIONS_ANDN(DATA_BITS,DATA_UTYPE)			\
500 {								\
501    PREAMBLE(DATA_BITS);						\
502    { ULong cf, pf, af, zf, sf, of;				\
503      cf = 0;							\
504      pf = 0;							\
505      af = 0;							\
506      zf = ((DATA_UTYPE)CC_DEP1 == 0) << 6;			\
507      sf = lshift(CC_DEP1, 8 - DATA_BITS) & 0x80;		\
508      of = 0;							\
509      return cf | pf | af | zf | sf | of;			\
510    }								\
511 }
512 
513 /*-------------------------------------------------------------*/
514 
515 #define ACTIONS_BLSI(DATA_BITS,DATA_UTYPE)			\
516 {								\
517    PREAMBLE(DATA_BITS);						\
518    { ULong cf, pf, af, zf, sf, of;				\
519      cf = ((DATA_UTYPE)CC_DEP2 != 0);				\
520      pf = 0;							\
521      af = 0;							\
522      zf = ((DATA_UTYPE)CC_DEP1 == 0) << 6;			\
523      sf = lshift(CC_DEP1, 8 - DATA_BITS) & 0x80;		\
524      of = 0;							\
525      return cf | pf | af | zf | sf | of;			\
526    }								\
527 }
528 
529 /*-------------------------------------------------------------*/
530 
531 #define ACTIONS_BLSMSK(DATA_BITS,DATA_UTYPE)			\
532 {								\
533    PREAMBLE(DATA_BITS);						\
534    { Long cf, pf, af, zf, sf, of;				\
535      cf = ((DATA_UTYPE)CC_DEP2 == 0);				\
536      pf = 0;							\
537      af = 0;							\
538      zf = 0;							\
539      sf = lshift(CC_DEP1, 8 - DATA_BITS) & 0x80;		\
540      of = 0;							\
541      return cf | pf | af | zf | sf | of;			\
542    }								\
543 }
544 
545 /*-------------------------------------------------------------*/
546 
547 #define ACTIONS_BLSR(DATA_BITS,DATA_UTYPE)			\
548 {								\
549    PREAMBLE(DATA_BITS);						\
550    { ULong cf, pf, af, zf, sf, of;				\
551      cf = ((DATA_UTYPE)CC_DEP2 == 0);				\
552      pf = 0;							\
553      af = 0;							\
554      zf = ((DATA_UTYPE)CC_DEP1 == 0) << 6;			\
555      sf = lshift(CC_DEP1, 8 - DATA_BITS) & 0x80;		\
556      of = 0;							\
557      return cf | pf | af | zf | sf | of;			\
558    }								\
559 }
560 
561 /*-------------------------------------------------------------*/
562 
563 
564 #if PROFILE_RFLAGS
565 
566 static Bool initted     = False;
567 
568 /* C flag, fast route */
569 static UInt tabc_fast[AMD64G_CC_OP_NUMBER];
570 /* C flag, slow route */
571 static UInt tabc_slow[AMD64G_CC_OP_NUMBER];
572 /* table for calculate_cond */
573 static UInt tab_cond[AMD64G_CC_OP_NUMBER][16];
574 /* total entry counts for calc_all, calc_c, calc_cond. */
575 static UInt n_calc_all  = 0;
576 static UInt n_calc_c    = 0;
577 static UInt n_calc_cond = 0;
578 
579 #define SHOW_COUNTS_NOW (0 == (0x3FFFFF & (n_calc_all+n_calc_c+n_calc_cond)))
580 
581 
showCounts(void)582 static void showCounts ( void )
583 {
584    Int op, co;
585    HChar ch;
586    vex_printf("\nTotal calls: calc_all=%u   calc_cond=%u   calc_c=%u\n",
587               n_calc_all, n_calc_cond, n_calc_c);
588 
589    vex_printf("      cSLOW  cFAST    O   NO    B   NB    Z   NZ   BE  NBE"
590               "    S   NS    P   NP    L   NL   LE  NLE\n");
591    vex_printf("     -----------------------------------------------------"
592               "----------------------------------------\n");
593    for (op = 0; op < AMD64G_CC_OP_NUMBER; op++) {
594 
595       ch = ' ';
596       if (op > 0 && (op-1) % 4 == 0)
597          ch = 'B';
598       if (op > 0 && (op-1) % 4 == 1)
599          ch = 'W';
600       if (op > 0 && (op-1) % 4 == 2)
601          ch = 'L';
602       if (op > 0 && (op-1) % 4 == 3)
603          ch = 'Q';
604 
605       vex_printf("%2d%c: ", op, ch);
606       vex_printf("%6u ", tabc_slow[op]);
607       vex_printf("%6u ", tabc_fast[op]);
608       for (co = 0; co < 16; co++) {
609          Int n = tab_cond[op][co];
610          if (n >= 1000) {
611             vex_printf(" %3dK", n / 1000);
612          } else
613          if (n >= 0) {
614             vex_printf(" %3d ", n );
615          } else {
616             vex_printf("     ");
617          }
618       }
619       vex_printf("\n");
620    }
621    vex_printf("\n");
622 }
623 
initCounts(void)624 static void initCounts ( void )
625 {
626    Int op, co;
627    initted = True;
628    for (op = 0; op < AMD64G_CC_OP_NUMBER; op++) {
629       tabc_fast[op] = tabc_slow[op] = 0;
630       for (co = 0; co < 16; co++)
631          tab_cond[op][co] = 0;
632    }
633 }
634 
635 #endif /* PROFILE_RFLAGS */
636 
637 
638 /* CALLED FROM GENERATED CODE: CLEAN HELPER */
639 /* Calculate all the 6 flags from the supplied thunk parameters.
640    Worker function, not directly called from generated code. */
641 static
amd64g_calculate_rflags_all_WRK(ULong cc_op,ULong cc_dep1_formal,ULong cc_dep2_formal,ULong cc_ndep_formal)642 ULong amd64g_calculate_rflags_all_WRK ( ULong cc_op,
643                                         ULong cc_dep1_formal,
644                                         ULong cc_dep2_formal,
645                                         ULong cc_ndep_formal )
646 {
647    switch (cc_op) {
648       case AMD64G_CC_OP_COPY:
649          return cc_dep1_formal
650                 & (AMD64G_CC_MASK_O | AMD64G_CC_MASK_S | AMD64G_CC_MASK_Z
651                    | AMD64G_CC_MASK_A | AMD64G_CC_MASK_C | AMD64G_CC_MASK_P);
652 
653       case AMD64G_CC_OP_ADDB:   ACTIONS_ADD( 8,  UChar  );
654       case AMD64G_CC_OP_ADDW:   ACTIONS_ADD( 16, UShort );
655       case AMD64G_CC_OP_ADDL:   ACTIONS_ADD( 32, UInt   );
656       case AMD64G_CC_OP_ADDQ:   ACTIONS_ADD( 64, ULong  );
657 
658       case AMD64G_CC_OP_ADCB:   ACTIONS_ADC( 8,  UChar  );
659       case AMD64G_CC_OP_ADCW:   ACTIONS_ADC( 16, UShort );
660       case AMD64G_CC_OP_ADCL:   ACTIONS_ADC( 32, UInt   );
661       case AMD64G_CC_OP_ADCQ:   ACTIONS_ADC( 64, ULong  );
662 
663       case AMD64G_CC_OP_SUBB:   ACTIONS_SUB(  8, UChar  );
664       case AMD64G_CC_OP_SUBW:   ACTIONS_SUB( 16, UShort );
665       case AMD64G_CC_OP_SUBL:   ACTIONS_SUB( 32, UInt   );
666       case AMD64G_CC_OP_SUBQ:   ACTIONS_SUB( 64, ULong  );
667 
668       case AMD64G_CC_OP_SBBB:   ACTIONS_SBB(  8, UChar  );
669       case AMD64G_CC_OP_SBBW:   ACTIONS_SBB( 16, UShort );
670       case AMD64G_CC_OP_SBBL:   ACTIONS_SBB( 32, UInt   );
671       case AMD64G_CC_OP_SBBQ:   ACTIONS_SBB( 64, ULong  );
672 
673       case AMD64G_CC_OP_LOGICB: ACTIONS_LOGIC(  8, UChar  );
674       case AMD64G_CC_OP_LOGICW: ACTIONS_LOGIC( 16, UShort );
675       case AMD64G_CC_OP_LOGICL: ACTIONS_LOGIC( 32, UInt   );
676       case AMD64G_CC_OP_LOGICQ: ACTIONS_LOGIC( 64, ULong  );
677 
678       case AMD64G_CC_OP_INCB:   ACTIONS_INC(  8, UChar  );
679       case AMD64G_CC_OP_INCW:   ACTIONS_INC( 16, UShort );
680       case AMD64G_CC_OP_INCL:   ACTIONS_INC( 32, UInt   );
681       case AMD64G_CC_OP_INCQ:   ACTIONS_INC( 64, ULong  );
682 
683       case AMD64G_CC_OP_DECB:   ACTIONS_DEC(  8, UChar  );
684       case AMD64G_CC_OP_DECW:   ACTIONS_DEC( 16, UShort );
685       case AMD64G_CC_OP_DECL:   ACTIONS_DEC( 32, UInt   );
686       case AMD64G_CC_OP_DECQ:   ACTIONS_DEC( 64, ULong  );
687 
688       case AMD64G_CC_OP_SHLB:   ACTIONS_SHL(  8, UChar  );
689       case AMD64G_CC_OP_SHLW:   ACTIONS_SHL( 16, UShort );
690       case AMD64G_CC_OP_SHLL:   ACTIONS_SHL( 32, UInt   );
691       case AMD64G_CC_OP_SHLQ:   ACTIONS_SHL( 64, ULong  );
692 
693       case AMD64G_CC_OP_SHRB:   ACTIONS_SHR(  8, UChar  );
694       case AMD64G_CC_OP_SHRW:   ACTIONS_SHR( 16, UShort );
695       case AMD64G_CC_OP_SHRL:   ACTIONS_SHR( 32, UInt   );
696       case AMD64G_CC_OP_SHRQ:   ACTIONS_SHR( 64, ULong  );
697 
698       case AMD64G_CC_OP_ROLB:   ACTIONS_ROL(  8, UChar  );
699       case AMD64G_CC_OP_ROLW:   ACTIONS_ROL( 16, UShort );
700       case AMD64G_CC_OP_ROLL:   ACTIONS_ROL( 32, UInt   );
701       case AMD64G_CC_OP_ROLQ:   ACTIONS_ROL( 64, ULong  );
702 
703       case AMD64G_CC_OP_RORB:   ACTIONS_ROR(  8, UChar  );
704       case AMD64G_CC_OP_RORW:   ACTIONS_ROR( 16, UShort );
705       case AMD64G_CC_OP_RORL:   ACTIONS_ROR( 32, UInt   );
706       case AMD64G_CC_OP_RORQ:   ACTIONS_ROR( 64, ULong  );
707 
708       case AMD64G_CC_OP_UMULB:  ACTIONS_UMUL(  8, UChar,  toUChar,
709                                                   UShort, toUShort );
710       case AMD64G_CC_OP_UMULW:  ACTIONS_UMUL( 16, UShort, toUShort,
711                                                   UInt,   toUInt );
712       case AMD64G_CC_OP_UMULL:  ACTIONS_UMUL( 32, UInt,   toUInt,
713                                                   ULong,  idULong );
714 
715       case AMD64G_CC_OP_UMULQ:  ACTIONS_UMULQ;
716 
717       case AMD64G_CC_OP_SMULB:  ACTIONS_SMUL(  8, Char,   toUChar,
718                                                   Short,  toUShort );
719       case AMD64G_CC_OP_SMULW:  ACTIONS_SMUL( 16, Short,  toUShort,
720                                                   Int,    toUInt   );
721       case AMD64G_CC_OP_SMULL:  ACTIONS_SMUL( 32, Int,    toUInt,
722                                                   Long,   idULong );
723 
724       case AMD64G_CC_OP_SMULQ:  ACTIONS_SMULQ;
725 
726       case AMD64G_CC_OP_ANDN32: ACTIONS_ANDN( 32, UInt   );
727       case AMD64G_CC_OP_ANDN64: ACTIONS_ANDN( 64, ULong  );
728 
729       case AMD64G_CC_OP_BLSI32: ACTIONS_BLSI( 32, UInt   );
730       case AMD64G_CC_OP_BLSI64: ACTIONS_BLSI( 64, ULong  );
731 
732       case AMD64G_CC_OP_BLSMSK32: ACTIONS_BLSMSK( 32, UInt   );
733       case AMD64G_CC_OP_BLSMSK64: ACTIONS_BLSMSK( 64, ULong  );
734 
735       case AMD64G_CC_OP_BLSR32: ACTIONS_BLSR( 32, UInt   );
736       case AMD64G_CC_OP_BLSR64: ACTIONS_BLSR( 64, ULong  );
737 
738       default:
739          /* shouldn't really make these calls from generated code */
740          vex_printf("amd64g_calculate_rflags_all_WRK(AMD64)"
741                     "( %llu, 0x%llx, 0x%llx, 0x%llx )\n",
742                     cc_op, cc_dep1_formal, cc_dep2_formal, cc_ndep_formal );
743          vpanic("amd64g_calculate_rflags_all_WRK(AMD64)");
744    }
745 }
746 
747 
748 /* CALLED FROM GENERATED CODE: CLEAN HELPER */
749 /* Calculate all the 6 flags from the supplied thunk parameters. */
amd64g_calculate_rflags_all(ULong cc_op,ULong cc_dep1,ULong cc_dep2,ULong cc_ndep)750 ULong amd64g_calculate_rflags_all ( ULong cc_op,
751                                     ULong cc_dep1,
752                                     ULong cc_dep2,
753                                     ULong cc_ndep )
754 {
755 #  if PROFILE_RFLAGS
756    if (!initted) initCounts();
757    n_calc_all++;
758    if (SHOW_COUNTS_NOW) showCounts();
759 #  endif
760    return
761       amd64g_calculate_rflags_all_WRK ( cc_op, cc_dep1, cc_dep2, cc_ndep );
762 }
763 
764 
765 /* CALLED FROM GENERATED CODE: CLEAN HELPER */
766 /* Calculate just the carry flag from the supplied thunk parameters. */
amd64g_calculate_rflags_c(ULong cc_op,ULong cc_dep1,ULong cc_dep2,ULong cc_ndep)767 ULong amd64g_calculate_rflags_c ( ULong cc_op,
768                                   ULong cc_dep1,
769                                   ULong cc_dep2,
770                                   ULong cc_ndep )
771 {
772 #  if PROFILE_RFLAGS
773    if (!initted) initCounts();
774    n_calc_c++;
775    tabc_fast[cc_op]++;
776    if (SHOW_COUNTS_NOW) showCounts();
777 #  endif
778 
779    /* Fast-case some common ones. */
780    switch (cc_op) {
781       case AMD64G_CC_OP_COPY:
782          return (cc_dep1 >> AMD64G_CC_SHIFT_C) & 1;
783       case AMD64G_CC_OP_LOGICQ:
784       case AMD64G_CC_OP_LOGICL:
785       case AMD64G_CC_OP_LOGICW:
786       case AMD64G_CC_OP_LOGICB:
787          return 0;
788 	 //      case AMD64G_CC_OP_SUBL:
789 	 //         return ((UInt)cc_dep1) < ((UInt)cc_dep2)
790 	 //                   ? AMD64G_CC_MASK_C : 0;
791 	 //      case AMD64G_CC_OP_SUBW:
792 	 //         return ((UInt)(cc_dep1 & 0xFFFF)) < ((UInt)(cc_dep2 & 0xFFFF))
793 	 //                   ? AMD64G_CC_MASK_C : 0;
794 	 //      case AMD64G_CC_OP_SUBB:
795 	 //         return ((UInt)(cc_dep1 & 0xFF)) < ((UInt)(cc_dep2 & 0xFF))
796 	 //                   ? AMD64G_CC_MASK_C : 0;
797 	 //      case AMD64G_CC_OP_INCL:
798 	 //      case AMD64G_CC_OP_DECL:
799 	 //         return cc_ndep & AMD64G_CC_MASK_C;
800       default:
801          break;
802    }
803 
804 #  if PROFILE_RFLAGS
805    tabc_fast[cc_op]--;
806    tabc_slow[cc_op]++;
807 #  endif
808 
809    return amd64g_calculate_rflags_all_WRK(cc_op,cc_dep1,cc_dep2,cc_ndep)
810           & AMD64G_CC_MASK_C;
811 }
812 
813 
814 /* CALLED FROM GENERATED CODE: CLEAN HELPER */
815 /* returns 1 or 0 */
amd64g_calculate_condition(ULong cond,ULong cc_op,ULong cc_dep1,ULong cc_dep2,ULong cc_ndep)816 ULong amd64g_calculate_condition ( ULong/*AMD64Condcode*/ cond,
817                                    ULong cc_op,
818                                    ULong cc_dep1,
819                                    ULong cc_dep2,
820                                    ULong cc_ndep )
821 {
822    ULong rflags = amd64g_calculate_rflags_all_WRK(cc_op, cc_dep1,
823                                                   cc_dep2, cc_ndep);
824    ULong of,sf,zf,cf,pf;
825    ULong inv = cond & 1;
826 
827 #  if PROFILE_RFLAGS
828    if (!initted) initCounts();
829    tab_cond[cc_op][cond]++;
830    n_calc_cond++;
831    if (SHOW_COUNTS_NOW) showCounts();
832 #  endif
833 
834    switch (cond) {
835       case AMD64CondNO:
836       case AMD64CondO: /* OF == 1 */
837          of = rflags >> AMD64G_CC_SHIFT_O;
838          return 1 & (inv ^ of);
839 
840       case AMD64CondNZ:
841       case AMD64CondZ: /* ZF == 1 */
842          zf = rflags >> AMD64G_CC_SHIFT_Z;
843          return 1 & (inv ^ zf);
844 
845       case AMD64CondNB:
846       case AMD64CondB: /* CF == 1 */
847          cf = rflags >> AMD64G_CC_SHIFT_C;
848          return 1 & (inv ^ cf);
849          break;
850 
851       case AMD64CondNBE:
852       case AMD64CondBE: /* (CF or ZF) == 1 */
853          cf = rflags >> AMD64G_CC_SHIFT_C;
854          zf = rflags >> AMD64G_CC_SHIFT_Z;
855          return 1 & (inv ^ (cf | zf));
856          break;
857 
858       case AMD64CondNS:
859       case AMD64CondS: /* SF == 1 */
860          sf = rflags >> AMD64G_CC_SHIFT_S;
861          return 1 & (inv ^ sf);
862 
863       case AMD64CondNP:
864       case AMD64CondP: /* PF == 1 */
865          pf = rflags >> AMD64G_CC_SHIFT_P;
866          return 1 & (inv ^ pf);
867 
868       case AMD64CondNL:
869       case AMD64CondL: /* (SF xor OF) == 1 */
870          sf = rflags >> AMD64G_CC_SHIFT_S;
871          of = rflags >> AMD64G_CC_SHIFT_O;
872          return 1 & (inv ^ (sf ^ of));
873          break;
874 
875       case AMD64CondNLE:
876       case AMD64CondLE: /* ((SF xor OF) or ZF)  == 1 */
877          sf = rflags >> AMD64G_CC_SHIFT_S;
878          of = rflags >> AMD64G_CC_SHIFT_O;
879          zf = rflags >> AMD64G_CC_SHIFT_Z;
880          return 1 & (inv ^ ((sf ^ of) | zf));
881          break;
882 
883       default:
884          /* shouldn't really make these calls from generated code */
885          vex_printf("amd64g_calculate_condition"
886                     "( %llu, %llu, 0x%llx, 0x%llx, 0x%llx )\n",
887                     cond, cc_op, cc_dep1, cc_dep2, cc_ndep );
888          vpanic("amd64g_calculate_condition");
889    }
890 }
891 
892 
893 /* VISIBLE TO LIBVEX CLIENT */
LibVEX_GuestAMD64_get_rflags(const VexGuestAMD64State * vex_state)894 ULong LibVEX_GuestAMD64_get_rflags ( /*IN*/const VexGuestAMD64State* vex_state )
895 {
896    ULong rflags = amd64g_calculate_rflags_all_WRK(
897                      vex_state->guest_CC_OP,
898                      vex_state->guest_CC_DEP1,
899                      vex_state->guest_CC_DEP2,
900                      vex_state->guest_CC_NDEP
901                   );
902    Long dflag = vex_state->guest_DFLAG;
903    vassert(dflag == 1 || dflag == -1);
904    if (dflag == -1)
905       rflags |= (1<<10);
906    if (vex_state->guest_IDFLAG == 1)
907       rflags |= (1<<21);
908    if (vex_state->guest_ACFLAG == 1)
909       rflags |= (1<<18);
910 
911    return rflags;
912 }
913 
914 /* VISIBLE TO LIBVEX CLIENT */
915 void
LibVEX_GuestAMD64_put_rflags(ULong rflags,VexGuestAMD64State * vex_state)916 LibVEX_GuestAMD64_put_rflags ( ULong rflags,
917                                /*MOD*/VexGuestAMD64State* vex_state )
918 {
919    /* D flag */
920    if (rflags & AMD64G_CC_MASK_D) {
921       vex_state->guest_DFLAG = -1;
922       rflags &= ~AMD64G_CC_MASK_D;
923    }
924    else
925       vex_state->guest_DFLAG = 1;
926 
927    /* ID flag */
928    if (rflags & AMD64G_CC_MASK_ID) {
929       vex_state->guest_IDFLAG = 1;
930       rflags &= ~AMD64G_CC_MASK_ID;
931    }
932    else
933       vex_state->guest_IDFLAG = 0;
934 
935    /* AC flag */
936    if (rflags & AMD64G_CC_MASK_AC) {
937       vex_state->guest_ACFLAG = 1;
938       rflags &= ~AMD64G_CC_MASK_AC;
939    }
940    else
941       vex_state->guest_ACFLAG = 0;
942 
943    UInt cc_mask = AMD64G_CC_MASK_O | AMD64G_CC_MASK_S | AMD64G_CC_MASK_Z |
944                   AMD64G_CC_MASK_A | AMD64G_CC_MASK_C | AMD64G_CC_MASK_P;
945    vex_state->guest_CC_OP   = AMD64G_CC_OP_COPY;
946    vex_state->guest_CC_DEP1 = rflags & cc_mask;
947    vex_state->guest_CC_DEP2 = 0;
948    vex_state->guest_CC_NDEP = 0;
949 }
950 
951 /* VISIBLE TO LIBVEX CLIENT */
952 void
LibVEX_GuestAMD64_put_rflag_c(ULong new_carry_flag,VexGuestAMD64State * vex_state)953 LibVEX_GuestAMD64_put_rflag_c ( ULong new_carry_flag,
954                                /*MOD*/VexGuestAMD64State* vex_state )
955 {
956    ULong oszacp = amd64g_calculate_rflags_all_WRK(
957                      vex_state->guest_CC_OP,
958                      vex_state->guest_CC_DEP1,
959                      vex_state->guest_CC_DEP2,
960                      vex_state->guest_CC_NDEP
961                   );
962    if (new_carry_flag & 1) {
963       oszacp |= AMD64G_CC_MASK_C;
964    } else {
965       oszacp &= ~AMD64G_CC_MASK_C;
966    }
967    vex_state->guest_CC_OP   = AMD64G_CC_OP_COPY;
968    vex_state->guest_CC_DEP1 = oszacp;
969    vex_state->guest_CC_DEP2 = 0;
970    vex_state->guest_CC_NDEP = 0;
971 }
972 
973 
974 /*---------------------------------------------------------------*/
975 /*--- %rflags translation-time function specialisers.         ---*/
976 /*--- These help iropt specialise calls the above run-time    ---*/
977 /*--- %rflags functions.                                      ---*/
978 /*---------------------------------------------------------------*/
979 
980 /* Used by the optimiser to try specialisations.  Returns an
981    equivalent expression, or NULL if none. */
982 
isU64(IRExpr * e,ULong n)983 static Bool isU64 ( IRExpr* e, ULong n )
984 {
985    return toBool( e->tag == Iex_Const
986                   && e->Iex.Const.con->tag == Ico_U64
987                   && e->Iex.Const.con->Ico.U64 == n );
988 }
989 
guest_amd64_spechelper(const HChar * function_name,IRExpr ** args,IRStmt ** precedingStmts,Int n_precedingStmts)990 IRExpr* guest_amd64_spechelper ( const HChar* function_name,
991                                  IRExpr** args,
992                                  IRStmt** precedingStmts,
993                                  Int      n_precedingStmts )
994 {
995 #  define unop(_op,_a1) IRExpr_Unop((_op),(_a1))
996 #  define binop(_op,_a1,_a2) IRExpr_Binop((_op),(_a1),(_a2))
997 #  define mkU64(_n) IRExpr_Const(IRConst_U64(_n))
998 #  define mkU32(_n) IRExpr_Const(IRConst_U32(_n))
999 #  define mkU8(_n)  IRExpr_Const(IRConst_U8(_n))
1000 
1001    Int i, arity = 0;
1002    for (i = 0; args[i]; i++)
1003       arity++;
1004 #  if 0
1005    vex_printf("spec request:\n");
1006    vex_printf("   %s  ", function_name);
1007    for (i = 0; i < arity; i++) {
1008       vex_printf("  ");
1009       ppIRExpr(args[i]);
1010    }
1011    vex_printf("\n");
1012 #  endif
1013 
1014    /* --------- specialising "amd64g_calculate_condition" --------- */
1015 
1016    if (vex_streq(function_name, "amd64g_calculate_condition")) {
1017       /* specialise calls to above "calculate condition" function */
1018       IRExpr *cond, *cc_op, *cc_dep1, *cc_dep2;
1019       vassert(arity == 5);
1020       cond    = args[0];
1021       cc_op   = args[1];
1022       cc_dep1 = args[2];
1023       cc_dep2 = args[3];
1024 
1025       /*---------------- ADDQ ----------------*/
1026 
1027       if (isU64(cc_op, AMD64G_CC_OP_ADDQ) && isU64(cond, AMD64CondZ)) {
1028          /* long long add, then Z --> test (dst+src == 0) */
1029          return unop(Iop_1Uto64,
1030                      binop(Iop_CmpEQ64,
1031                            binop(Iop_Add64, cc_dep1, cc_dep2),
1032                            mkU64(0)));
1033       }
1034 
1035       /*---------------- ADDL ----------------*/
1036 
1037       if (isU64(cc_op, AMD64G_CC_OP_ADDL) && isU64(cond, AMD64CondO)) {
1038          /* This is very commonly generated by Javascript JITs, for
1039             the idiom "do a 32-bit add and jump to out-of-line code if
1040             an overflow occurs". */
1041          /* long add, then O (overflow)
1042             --> ((dep1 ^ dep2 ^ -1) & (dep1 ^ (dep1 + dep2)))[31]
1043             --> (((dep1 ^ dep2 ^ -1) & (dep1 ^ (dep1 +64 dep2))) >>u 31) & 1
1044             --> (((not(dep1 ^ dep2)) & (dep1 ^ (dep1 +64 dep2))) >>u 31) & 1
1045          */
1046          vassert(isIRAtom(cc_dep1));
1047          vassert(isIRAtom(cc_dep2));
1048          return
1049             binop(Iop_And64,
1050                   binop(Iop_Shr64,
1051                         binop(Iop_And64,
1052                               unop(Iop_Not64,
1053                                    binop(Iop_Xor64, cc_dep1, cc_dep2)),
1054                               binop(Iop_Xor64,
1055                                     cc_dep1,
1056                                     binop(Iop_Add64, cc_dep1, cc_dep2))),
1057                         mkU8(31)),
1058                   mkU64(1));
1059 
1060       }
1061 
1062       /*---------------- SUBQ ----------------*/
1063 
1064       /* 0, */
1065       if (isU64(cc_op, AMD64G_CC_OP_SUBQ) && isU64(cond, AMD64CondO)) {
1066          /* long long sub/cmp, then O (overflow)
1067             --> ((dep1 ^ dep2) & (dep1 ^ (dep1 - dep2)))[63]
1068             --> ((dep1 ^ dep2) & (dep1 ^ (dep1 - dep2))) >>u 63
1069          */
1070          vassert(isIRAtom(cc_dep1));
1071          vassert(isIRAtom(cc_dep2));
1072          return binop(Iop_Shr64,
1073                       binop(Iop_And64,
1074                             binop(Iop_Xor64, cc_dep1, cc_dep2),
1075                             binop(Iop_Xor64,
1076                                   cc_dep1,
1077                                   binop(Iop_Sub64, cc_dep1, cc_dep2))),
1078                       mkU8(63));
1079       }
1080       if (isU64(cc_op, AMD64G_CC_OP_SUBQ) && isU64(cond, AMD64CondNO)) {
1081          /* No action.  Never yet found a test case. */
1082       }
1083 
1084       /* 2, 3 */
1085       if (isU64(cc_op, AMD64G_CC_OP_SUBQ) && isU64(cond, AMD64CondB)) {
1086          /* long long sub/cmp, then B (unsigned less than)
1087             --> test dst <u src */
1088          return unop(Iop_1Uto64,
1089                      binop(Iop_CmpLT64U, cc_dep1, cc_dep2));
1090       }
1091       if (isU64(cc_op, AMD64G_CC_OP_SUBQ) && isU64(cond, AMD64CondNB)) {
1092          /* long long sub/cmp, then NB (unsigned greater than or equal)
1093             --> test src <=u dst */
1094          /* Note, args are opposite way round from the usual */
1095          return unop(Iop_1Uto64,
1096                      binop(Iop_CmpLE64U, cc_dep2, cc_dep1));
1097       }
1098 
1099       /* 4, 5 */
1100       if (isU64(cc_op, AMD64G_CC_OP_SUBQ) && isU64(cond, AMD64CondZ)) {
1101          /* long long sub/cmp, then Z --> test dst==src */
1102          return unop(Iop_1Uto64,
1103                      binop(Iop_CmpEQ64,cc_dep1,cc_dep2));
1104       }
1105       if (isU64(cc_op, AMD64G_CC_OP_SUBQ) && isU64(cond, AMD64CondNZ)) {
1106          /* long long sub/cmp, then NZ --> test dst!=src */
1107          return unop(Iop_1Uto64,
1108                      binop(Iop_CmpNE64,cc_dep1,cc_dep2));
1109       }
1110 
1111       /* 6, 7 */
1112       if (isU64(cc_op, AMD64G_CC_OP_SUBQ) && isU64(cond, AMD64CondBE)) {
1113          /* long long sub/cmp, then BE (unsigned less than or equal)
1114             --> test dst <=u src */
1115          return unop(Iop_1Uto64,
1116                      binop(Iop_CmpLE64U, cc_dep1, cc_dep2));
1117       }
1118       if (isU64(cc_op, AMD64G_CC_OP_SUBQ) && isU64(cond, AMD64CondNBE)) {
1119          /* long long sub/cmp, then NBE (unsigned greater than)
1120             --> test !(dst <=u src) */
1121          return binop(Iop_Xor64,
1122                       unop(Iop_1Uto64,
1123                            binop(Iop_CmpLE64U, cc_dep1, cc_dep2)),
1124                       mkU64(1));
1125       }
1126 
1127       /* 8, 9 */
1128       if (isU64(cc_op, AMD64G_CC_OP_SUBQ) && isU64(cond, AMD64CondS)) {
1129          /* long long sub/cmp, then S (negative)
1130             --> (dst-src)[63]
1131             --> (dst-src) >>u 63 */
1132          return binop(Iop_Shr64,
1133                       binop(Iop_Sub64, cc_dep1, cc_dep2),
1134                       mkU8(63));
1135       }
1136       if (isU64(cc_op, AMD64G_CC_OP_SUBQ) && isU64(cond, AMD64CondNS)) {
1137          /* long long sub/cmp, then NS (not negative)
1138             --> (dst-src)[63] ^ 1
1139             --> ((dst-src) >>u 63) ^ 1 */
1140          return binop(Iop_Xor64,
1141                       binop(Iop_Shr64,
1142                             binop(Iop_Sub64, cc_dep1, cc_dep2),
1143                             mkU8(63)),
1144                       mkU64(1));
1145       }
1146 
1147       /* 12, 13 */
1148       if (isU64(cc_op, AMD64G_CC_OP_SUBQ) && isU64(cond, AMD64CondL)) {
1149          /* long long sub/cmp, then L (signed less than)
1150             --> test dst <s src */
1151          return unop(Iop_1Uto64,
1152                      binop(Iop_CmpLT64S, cc_dep1, cc_dep2));
1153       }
1154       if (isU64(cc_op, AMD64G_CC_OP_SUBQ) && isU64(cond, AMD64CondNL)) {
1155          /* long long sub/cmp, then NL (signed greater than or equal)
1156             --> test dst >=s src
1157             --> test src <=s dst */
1158          return unop(Iop_1Uto64,
1159                      binop(Iop_CmpLE64S, cc_dep2, cc_dep1));
1160       }
1161 
1162       /* 14, 15 */
1163       if (isU64(cc_op, AMD64G_CC_OP_SUBQ) && isU64(cond, AMD64CondLE)) {
1164          /* long long sub/cmp, then LE (signed less than or equal)
1165             --> test dst <=s src */
1166          return unop(Iop_1Uto64,
1167                      binop(Iop_CmpLE64S, cc_dep1, cc_dep2));
1168       }
1169       if (isU64(cc_op, AMD64G_CC_OP_SUBQ) && isU64(cond, AMD64CondNLE)) {
1170          /* long sub/cmp, then NLE (signed greater than)
1171             --> test !(dst <=s src)
1172             --> test (dst >s src)
1173             --> test (src <s dst) */
1174          return unop(Iop_1Uto64,
1175                      binop(Iop_CmpLT64S, cc_dep2, cc_dep1));
1176 
1177       }
1178 
1179       /*---------------- SUBL ----------------*/
1180 
1181       /* 0, */
1182       if (isU64(cc_op, AMD64G_CC_OP_SUBL) && isU64(cond, AMD64CondO)) {
1183          /* This is very commonly generated by Javascript JITs, for
1184             the idiom "do a 32-bit subtract and jump to out-of-line
1185             code if an overflow occurs". */
1186          /* long sub/cmp, then O (overflow)
1187             --> ((dep1 ^ dep2) & (dep1 ^ (dep1 - dep2)))[31]
1188             --> (((dep1 ^ dep2) & (dep1 ^ (dep1 -64 dep2))) >>u 31) & 1
1189          */
1190          vassert(isIRAtom(cc_dep1));
1191          vassert(isIRAtom(cc_dep2));
1192          return
1193             binop(Iop_And64,
1194                   binop(Iop_Shr64,
1195                         binop(Iop_And64,
1196                               binop(Iop_Xor64, cc_dep1, cc_dep2),
1197                               binop(Iop_Xor64,
1198                                     cc_dep1,
1199                                     binop(Iop_Sub64, cc_dep1, cc_dep2))),
1200                         mkU8(31)),
1201                   mkU64(1));
1202       }
1203       if (isU64(cc_op, AMD64G_CC_OP_SUBL) && isU64(cond, AMD64CondNO)) {
1204          /* No action.  Never yet found a test case. */
1205       }
1206 
1207       /* 2, 3 */
1208       if (isU64(cc_op, AMD64G_CC_OP_SUBL) && isU64(cond, AMD64CondB)) {
1209          /* long sub/cmp, then B (unsigned less than)
1210             --> test dst <u src */
1211          return unop(Iop_1Uto64,
1212                      binop(Iop_CmpLT32U,
1213                            unop(Iop_64to32, cc_dep1),
1214                            unop(Iop_64to32, cc_dep2)));
1215       }
1216       if (isU64(cc_op, AMD64G_CC_OP_SUBL) && isU64(cond, AMD64CondNB)) {
1217          /* long sub/cmp, then NB (unsigned greater than or equal)
1218             --> test src <=u dst */
1219          /* Note, args are opposite way round from the usual */
1220          return unop(Iop_1Uto64,
1221                      binop(Iop_CmpLE32U,
1222                            unop(Iop_64to32, cc_dep2),
1223                            unop(Iop_64to32, cc_dep1)));
1224       }
1225 
1226       /* 4, 5 */
1227       if (isU64(cc_op, AMD64G_CC_OP_SUBL) && isU64(cond, AMD64CondZ)) {
1228          /* long sub/cmp, then Z --> test dst==src */
1229          return unop(Iop_1Uto64,
1230                      binop(Iop_CmpEQ32,
1231                            unop(Iop_64to32, cc_dep1),
1232                            unop(Iop_64to32, cc_dep2)));
1233       }
1234       if (isU64(cc_op, AMD64G_CC_OP_SUBL) && isU64(cond, AMD64CondNZ)) {
1235          /* long sub/cmp, then NZ --> test dst!=src */
1236          return unop(Iop_1Uto64,
1237                      binop(Iop_CmpNE32,
1238                            unop(Iop_64to32, cc_dep1),
1239                            unop(Iop_64to32, cc_dep2)));
1240       }
1241 
1242       /* 6, 7 */
1243       if (isU64(cc_op, AMD64G_CC_OP_SUBL) && isU64(cond, AMD64CondBE)) {
1244          /* long sub/cmp, then BE (unsigned less than or equal)
1245             --> test dst <=u src */
1246          return unop(Iop_1Uto64,
1247                      binop(Iop_CmpLE32U,
1248                            unop(Iop_64to32, cc_dep1),
1249                            unop(Iop_64to32, cc_dep2)));
1250       }
1251       if (isU64(cc_op, AMD64G_CC_OP_SUBL) && isU64(cond, AMD64CondNBE)) {
1252          /* long sub/cmp, then NBE (unsigned greater than)
1253             --> test src <u dst */
1254          /* Note, args are opposite way round from the usual */
1255          return unop(Iop_1Uto64,
1256                      binop(Iop_CmpLT32U,
1257                            unop(Iop_64to32, cc_dep2),
1258                            unop(Iop_64to32, cc_dep1)));
1259       }
1260 
1261       /* 8, 9 */
1262       if (isU64(cc_op, AMD64G_CC_OP_SUBL) && isU64(cond, AMD64CondS)) {
1263          /* long sub/cmp, then S (negative)
1264             --> (dst-src)[31]
1265             --> ((dst -64 src) >>u 31) & 1
1266             Pointless to narrow the args to 32 bit before the subtract. */
1267          return binop(Iop_And64,
1268                       binop(Iop_Shr64,
1269                             binop(Iop_Sub64, cc_dep1, cc_dep2),
1270                             mkU8(31)),
1271                       mkU64(1));
1272       }
1273       if (isU64(cc_op, AMD64G_CC_OP_SUBL) && isU64(cond, AMD64CondNS)) {
1274          /* long sub/cmp, then NS (not negative)
1275             --> (dst-src)[31] ^ 1
1276             --> (((dst -64 src) >>u 31) & 1) ^ 1
1277             Pointless to narrow the args to 32 bit before the subtract. */
1278          return binop(Iop_Xor64,
1279                       binop(Iop_And64,
1280                             binop(Iop_Shr64,
1281                                   binop(Iop_Sub64, cc_dep1, cc_dep2),
1282                                   mkU8(31)),
1283                             mkU64(1)),
1284                       mkU64(1));
1285       }
1286 
1287       /* 12, 13 */
1288       if (isU64(cc_op, AMD64G_CC_OP_SUBL) && isU64(cond, AMD64CondL)) {
1289          /* long sub/cmp, then L (signed less than)
1290             --> test dst <s src */
1291          return unop(Iop_1Uto64,
1292                      binop(Iop_CmpLT32S,
1293                            unop(Iop_64to32, cc_dep1),
1294                            unop(Iop_64to32, cc_dep2)));
1295       }
1296       if (isU64(cc_op, AMD64G_CC_OP_SUBL) && isU64(cond, AMD64CondNL)) {
1297          /* long sub/cmp, then NL (signed greater than or equal)
1298             --> test dst >=s src
1299             --> test src <=s dst */
1300          return unop(Iop_1Uto64,
1301                      binop(Iop_CmpLE32S,
1302                            unop(Iop_64to32, cc_dep2),
1303                            unop(Iop_64to32, cc_dep1)));
1304       }
1305 
1306       /* 14, 15 */
1307       if (isU64(cc_op, AMD64G_CC_OP_SUBL) && isU64(cond, AMD64CondLE)) {
1308          /* long sub/cmp, then LE (signed less than or equal)
1309             --> test dst <=s src */
1310          return unop(Iop_1Uto64,
1311                      binop(Iop_CmpLE32S,
1312                            unop(Iop_64to32, cc_dep1),
1313                            unop(Iop_64to32, cc_dep2)));
1314 
1315       }
1316       if (isU64(cc_op, AMD64G_CC_OP_SUBL) && isU64(cond, AMD64CondNLE)) {
1317          /* long sub/cmp, then NLE (signed greater than)
1318             --> test !(dst <=s src)
1319             --> test (dst >s src)
1320             --> test (src <s dst) */
1321          return unop(Iop_1Uto64,
1322                      binop(Iop_CmpLT32S,
1323                            unop(Iop_64to32, cc_dep2),
1324                            unop(Iop_64to32, cc_dep1)));
1325 
1326       }
1327 
1328       /*---------------- SUBW ----------------*/
1329 
1330       /* 4, 5 */
1331       if (isU64(cc_op, AMD64G_CC_OP_SUBW) && isU64(cond, AMD64CondZ)) {
1332          /* word sub/cmp, then Z --> test dst==src */
1333          return unop(Iop_1Uto64,
1334                      binop(Iop_CmpEQ16,
1335                            unop(Iop_64to16,cc_dep1),
1336                            unop(Iop_64to16,cc_dep2)));
1337       }
1338       if (isU64(cc_op, AMD64G_CC_OP_SUBW) && isU64(cond, AMD64CondNZ)) {
1339          /* word sub/cmp, then NZ --> test dst!=src */
1340          return unop(Iop_1Uto64,
1341                      binop(Iop_CmpNE16,
1342                            unop(Iop_64to16,cc_dep1),
1343                            unop(Iop_64to16,cc_dep2)));
1344       }
1345 
1346       /* 6, */
1347       if (isU64(cc_op, AMD64G_CC_OP_SUBW) && isU64(cond, AMD64CondBE)) {
1348          /* word sub/cmp, then BE (unsigned less than or equal)
1349             --> test dst <=u src */
1350          return unop(Iop_1Uto64,
1351                      binop(Iop_CmpLE64U,
1352                            binop(Iop_Shl64, cc_dep1, mkU8(48)),
1353                            binop(Iop_Shl64, cc_dep2, mkU8(48))));
1354       }
1355 
1356       /* 14, */
1357       if (isU64(cc_op, AMD64G_CC_OP_SUBW) && isU64(cond, AMD64CondLE)) {
1358          /* word sub/cmp, then LE (signed less than or equal)
1359             --> test dst <=s src */
1360          return unop(Iop_1Uto64,
1361                      binop(Iop_CmpLE64S,
1362                            binop(Iop_Shl64,cc_dep1,mkU8(48)),
1363                            binop(Iop_Shl64,cc_dep2,mkU8(48))));
1364 
1365       }
1366 
1367       /*---------------- SUBB ----------------*/
1368 
1369       /* 2, 3 */
1370       if (isU64(cc_op, AMD64G_CC_OP_SUBB) && isU64(cond, AMD64CondB)) {
1371          /* byte sub/cmp, then B (unsigned less than)
1372             --> test dst <u src */
1373          return unop(Iop_1Uto64,
1374                      binop(Iop_CmpLT64U,
1375                            binop(Iop_And64, cc_dep1, mkU64(0xFF)),
1376                            binop(Iop_And64, cc_dep2, mkU64(0xFF))));
1377       }
1378       if (isU64(cc_op, AMD64G_CC_OP_SUBB) && isU64(cond, AMD64CondNB)) {
1379          /* byte sub/cmp, then NB (unsigned greater than or equal)
1380             --> test src <=u dst */
1381          /* Note, args are opposite way round from the usual */
1382          return unop(Iop_1Uto64,
1383                      binop(Iop_CmpLE64U,
1384                            binop(Iop_And64, cc_dep2, mkU64(0xFF)),
1385                            binop(Iop_And64, cc_dep1, mkU64(0xFF))));
1386       }
1387 
1388       /* 4, 5 */
1389       if (isU64(cc_op, AMD64G_CC_OP_SUBB) && isU64(cond, AMD64CondZ)) {
1390          /* byte sub/cmp, then Z --> test dst==src */
1391          return unop(Iop_1Uto64,
1392                      binop(Iop_CmpEQ8,
1393                            unop(Iop_64to8,cc_dep1),
1394                            unop(Iop_64to8,cc_dep2)));
1395       }
1396       if (isU64(cc_op, AMD64G_CC_OP_SUBB) && isU64(cond, AMD64CondNZ)) {
1397          /* byte sub/cmp, then NZ --> test dst!=src */
1398          return unop(Iop_1Uto64,
1399                      binop(Iop_CmpNE8,
1400                            unop(Iop_64to8,cc_dep1),
1401                            unop(Iop_64to8,cc_dep2)));
1402       }
1403 
1404       /* 6, */
1405       if (isU64(cc_op, AMD64G_CC_OP_SUBB) && isU64(cond, AMD64CondBE)) {
1406          /* byte sub/cmp, then BE (unsigned less than or equal)
1407             --> test dst <=u src */
1408          return unop(Iop_1Uto64,
1409                      binop(Iop_CmpLE64U,
1410                            binop(Iop_And64, cc_dep1, mkU64(0xFF)),
1411                            binop(Iop_And64, cc_dep2, mkU64(0xFF))));
1412       }
1413 
1414       /* 8, 9 */
1415       if (isU64(cc_op, AMD64G_CC_OP_SUBB) && isU64(cond, AMD64CondS)
1416                                           && isU64(cc_dep2, 0)) {
1417          /* byte sub/cmp of zero, then S --> test (dst-0 <s 0)
1418                                          --> test dst <s 0
1419                                          --> (ULong)dst[7]
1420             This is yet another scheme by which gcc figures out if the
1421             top bit of a byte is 1 or 0.  See also LOGICB/CondS below. */
1422          /* Note: isU64(cc_dep2, 0) is correct, even though this is
1423             for an 8-bit comparison, since the args to the helper
1424             function are always U64s. */
1425          return binop(Iop_And64,
1426                       binop(Iop_Shr64,cc_dep1,mkU8(7)),
1427                       mkU64(1));
1428       }
1429       if (isU64(cc_op, AMD64G_CC_OP_SUBB) && isU64(cond, AMD64CondNS)
1430                                           && isU64(cc_dep2, 0)) {
1431          /* byte sub/cmp of zero, then NS --> test !(dst-0 <s 0)
1432                                           --> test !(dst <s 0)
1433                                           --> (ULong) !dst[7]
1434          */
1435          return binop(Iop_Xor64,
1436                       binop(Iop_And64,
1437                             binop(Iop_Shr64,cc_dep1,mkU8(7)),
1438                             mkU64(1)),
1439                       mkU64(1));
1440       }
1441 
1442       /*---------------- LOGICQ ----------------*/
1443 
1444       if (isU64(cc_op, AMD64G_CC_OP_LOGICQ) && isU64(cond, AMD64CondZ)) {
1445          /* long long and/or/xor, then Z --> test dst==0 */
1446          return unop(Iop_1Uto64,
1447                      binop(Iop_CmpEQ64, cc_dep1, mkU64(0)));
1448       }
1449       if (isU64(cc_op, AMD64G_CC_OP_LOGICQ) && isU64(cond, AMD64CondNZ)) {
1450          /* long long and/or/xor, then NZ --> test dst!=0 */
1451          return unop(Iop_1Uto64,
1452                      binop(Iop_CmpNE64, cc_dep1, mkU64(0)));
1453       }
1454 
1455       if (isU64(cc_op, AMD64G_CC_OP_LOGICQ) && isU64(cond, AMD64CondL)) {
1456          /* long long and/or/xor, then L
1457             LOGIC sets SF and ZF according to the
1458             result and makes OF be zero.  L computes SF ^ OF, but
1459             OF is zero, so this reduces to SF -- which will be 1 iff
1460             the result is < signed 0.  Hence ...
1461          */
1462          return unop(Iop_1Uto64,
1463                      binop(Iop_CmpLT64S,
1464                            cc_dep1,
1465                            mkU64(0)));
1466       }
1467 
1468       /*---------------- LOGICL ----------------*/
1469 
1470       if (isU64(cc_op, AMD64G_CC_OP_LOGICL) && isU64(cond, AMD64CondZ)) {
1471          /* long and/or/xor, then Z --> test dst==0 */
1472          return unop(Iop_1Uto64,
1473                      binop(Iop_CmpEQ32,
1474                            unop(Iop_64to32, cc_dep1),
1475                            mkU32(0)));
1476       }
1477       if (isU64(cc_op, AMD64G_CC_OP_LOGICL) && isU64(cond, AMD64CondNZ)) {
1478          /* long and/or/xor, then NZ --> test dst!=0 */
1479          return unop(Iop_1Uto64,
1480                      binop(Iop_CmpNE32,
1481                            unop(Iop_64to32, cc_dep1),
1482                            mkU32(0)));
1483       }
1484 
1485       if (isU64(cc_op, AMD64G_CC_OP_LOGICL) && isU64(cond, AMD64CondLE)) {
1486          /* long and/or/xor, then LE
1487             This is pretty subtle.  LOGIC sets SF and ZF according to the
1488             result and makes OF be zero.  LE computes (SF ^ OF) | ZF, but
1489             OF is zero, so this reduces to SF | ZF -- which will be 1 iff
1490             the result is <=signed 0.  Hence ...
1491          */
1492          return unop(Iop_1Uto64,
1493                      binop(Iop_CmpLE32S,
1494                            unop(Iop_64to32, cc_dep1),
1495                            mkU32(0)));
1496       }
1497 
1498       if (isU64(cc_op, AMD64G_CC_OP_LOGICL) && isU64(cond, AMD64CondS)) {
1499          /* long and/or/xor, then S --> (ULong)result[31] */
1500          return binop(Iop_And64,
1501                       binop(Iop_Shr64, cc_dep1, mkU8(31)),
1502                       mkU64(1));
1503       }
1504       if (isU64(cc_op, AMD64G_CC_OP_LOGICL) && isU64(cond, AMD64CondNS)) {
1505          /* long and/or/xor, then S --> (ULong) ~ result[31] */
1506          return binop(Iop_Xor64,
1507                 binop(Iop_And64,
1508                       binop(Iop_Shr64, cc_dep1, mkU8(31)),
1509                       mkU64(1)),
1510                 mkU64(1));
1511       }
1512 
1513       /*---------------- LOGICW ----------------*/
1514 
1515       if (isU64(cc_op, AMD64G_CC_OP_LOGICW) && isU64(cond, AMD64CondZ)) {
1516          /* word and/or/xor, then Z --> test dst==0 */
1517          return unop(Iop_1Uto64,
1518                      binop(Iop_CmpEQ64,
1519                            binop(Iop_And64, cc_dep1, mkU64(0xFFFF)),
1520                            mkU64(0)));
1521       }
1522       if (isU64(cc_op, AMD64G_CC_OP_LOGICW) && isU64(cond, AMD64CondNZ)) {
1523          /* word and/or/xor, then NZ --> test dst!=0 */
1524          return unop(Iop_1Uto64,
1525                      binop(Iop_CmpNE64,
1526                            binop(Iop_And64, cc_dep1, mkU64(0xFFFF)),
1527                            mkU64(0)));
1528       }
1529 
1530       /*---------------- LOGICB ----------------*/
1531 
1532       if (isU64(cc_op, AMD64G_CC_OP_LOGICB) && isU64(cond, AMD64CondZ)) {
1533          /* byte and/or/xor, then Z --> test dst==0 */
1534          return unop(Iop_1Uto64,
1535                      binop(Iop_CmpEQ64, binop(Iop_And64,cc_dep1,mkU64(255)),
1536                                         mkU64(0)));
1537       }
1538       if (isU64(cc_op, AMD64G_CC_OP_LOGICB) && isU64(cond, AMD64CondNZ)) {
1539          /* byte and/or/xor, then NZ --> test dst!=0 */
1540          return unop(Iop_1Uto64,
1541                      binop(Iop_CmpNE64, binop(Iop_And64,cc_dep1,mkU64(255)),
1542                                         mkU64(0)));
1543       }
1544 
1545       if (isU64(cc_op, AMD64G_CC_OP_LOGICB) && isU64(cond, AMD64CondS)) {
1546          /* this is an idiom gcc sometimes uses to find out if the top
1547             bit of a byte register is set: eg testb %al,%al; js ..
1548             Since it just depends on the top bit of the byte, extract
1549             that bit and explicitly get rid of all the rest.  This
1550             helps memcheck avoid false positives in the case where any
1551             of the other bits in the byte are undefined. */
1552          /* byte and/or/xor, then S --> (UInt)result[7] */
1553          return binop(Iop_And64,
1554                       binop(Iop_Shr64,cc_dep1,mkU8(7)),
1555                       mkU64(1));
1556       }
1557       if (isU64(cc_op, AMD64G_CC_OP_LOGICB) && isU64(cond, AMD64CondNS)) {
1558          /* byte and/or/xor, then NS --> (UInt)!result[7] */
1559          return binop(Iop_Xor64,
1560                       binop(Iop_And64,
1561                             binop(Iop_Shr64,cc_dep1,mkU8(7)),
1562                             mkU64(1)),
1563                       mkU64(1));
1564       }
1565 
1566       /*---------------- INCB ----------------*/
1567 
1568       if (isU64(cc_op, AMD64G_CC_OP_INCB) && isU64(cond, AMD64CondLE)) {
1569          /* 8-bit inc, then LE --> sign bit of the arg */
1570          return binop(Iop_And64,
1571                       binop(Iop_Shr64,
1572                             binop(Iop_Sub64, cc_dep1, mkU64(1)),
1573                             mkU8(7)),
1574                       mkU64(1));
1575       }
1576 
1577       /*---------------- INCW ----------------*/
1578 
1579       if (isU64(cc_op, AMD64G_CC_OP_INCW) && isU64(cond, AMD64CondZ)) {
1580          /* 16-bit inc, then Z --> test dst == 0 */
1581          return unop(Iop_1Uto64,
1582                      binop(Iop_CmpEQ64,
1583                            binop(Iop_Shl64,cc_dep1,mkU8(48)),
1584                            mkU64(0)));
1585       }
1586 
1587       /*---------------- DECL ----------------*/
1588 
1589       if (isU64(cc_op, AMD64G_CC_OP_DECL) && isU64(cond, AMD64CondZ)) {
1590          /* dec L, then Z --> test dst == 0 */
1591          return unop(Iop_1Uto64,
1592                      binop(Iop_CmpEQ32,
1593                            unop(Iop_64to32, cc_dep1),
1594                            mkU32(0)));
1595       }
1596 
1597       /*---------------- DECW ----------------*/
1598 
1599       if (isU64(cc_op, AMD64G_CC_OP_DECW) && isU64(cond, AMD64CondNZ)) {
1600          /* 16-bit dec, then NZ --> test dst != 0 */
1601          return unop(Iop_1Uto64,
1602                      binop(Iop_CmpNE64,
1603                            binop(Iop_Shl64,cc_dep1,mkU8(48)),
1604                            mkU64(0)));
1605       }
1606 
1607       /*---------------- COPY ----------------*/
1608       /* This can happen, as a result of amd64 FP compares: "comisd ... ;
1609          jbe" for example. */
1610 
1611       if (isU64(cc_op, AMD64G_CC_OP_COPY) &&
1612           (isU64(cond, AMD64CondBE) || isU64(cond, AMD64CondNBE))) {
1613          /* COPY, then BE --> extract C and Z from dep1, and test (C
1614             or Z == 1). */
1615          /* COPY, then NBE --> extract C and Z from dep1, and test (C
1616             or Z == 0). */
1617          ULong nnn = isU64(cond, AMD64CondBE) ? 1 : 0;
1618          return
1619             unop(
1620                Iop_1Uto64,
1621                binop(
1622                   Iop_CmpEQ64,
1623                   binop(
1624                      Iop_And64,
1625                      binop(
1626                         Iop_Or64,
1627                         binop(Iop_Shr64, cc_dep1, mkU8(AMD64G_CC_SHIFT_C)),
1628                         binop(Iop_Shr64, cc_dep1, mkU8(AMD64G_CC_SHIFT_Z))
1629                      ),
1630                      mkU64(1)
1631                   ),
1632                   mkU64(nnn)
1633                )
1634             );
1635       }
1636 
1637       if (isU64(cc_op, AMD64G_CC_OP_COPY) && isU64(cond, AMD64CondB)) {
1638          /* COPY, then B --> extract C dep1, and test (C == 1). */
1639          return
1640             unop(
1641                Iop_1Uto64,
1642                binop(
1643                   Iop_CmpNE64,
1644                   binop(
1645                      Iop_And64,
1646                      binop(Iop_Shr64, cc_dep1, mkU8(AMD64G_CC_SHIFT_C)),
1647                      mkU64(1)
1648                   ),
1649                   mkU64(0)
1650                )
1651             );
1652       }
1653 
1654       if (isU64(cc_op, AMD64G_CC_OP_COPY)
1655           && (isU64(cond, AMD64CondZ) || isU64(cond, AMD64CondNZ))) {
1656          /* COPY, then Z --> extract Z from dep1, and test (Z == 1). */
1657          /* COPY, then NZ --> extract Z from dep1, and test (Z == 0). */
1658          UInt nnn = isU64(cond, AMD64CondZ) ? 1 : 0;
1659          return
1660             unop(
1661                Iop_1Uto64,
1662                binop(
1663                   Iop_CmpEQ64,
1664                   binop(
1665                      Iop_And64,
1666                      binop(Iop_Shr64, cc_dep1, mkU8(AMD64G_CC_SHIFT_Z)),
1667                      mkU64(1)
1668                   ),
1669                   mkU64(nnn)
1670                )
1671             );
1672       }
1673 
1674       if (isU64(cc_op, AMD64G_CC_OP_COPY) && isU64(cond, AMD64CondP)) {
1675          /* COPY, then P --> extract P from dep1, and test (P == 1). */
1676          return
1677             unop(
1678                Iop_1Uto64,
1679                binop(
1680                   Iop_CmpNE64,
1681                   binop(
1682                      Iop_And64,
1683                      binop(Iop_Shr64, cc_dep1, mkU8(AMD64G_CC_SHIFT_P)),
1684                      mkU64(1)
1685                   ),
1686                   mkU64(0)
1687                )
1688             );
1689       }
1690 
1691       return NULL;
1692    }
1693 
1694    /* --------- specialising "amd64g_calculate_rflags_c" --------- */
1695 
1696    if (vex_streq(function_name, "amd64g_calculate_rflags_c")) {
1697       /* specialise calls to above "calculate_rflags_c" function */
1698       IRExpr *cc_op, *cc_dep1, *cc_dep2, *cc_ndep;
1699       vassert(arity == 4);
1700       cc_op   = args[0];
1701       cc_dep1 = args[1];
1702       cc_dep2 = args[2];
1703       cc_ndep = args[3];
1704 
1705       if (isU64(cc_op, AMD64G_CC_OP_SUBQ)) {
1706          /* C after sub denotes unsigned less than */
1707          return unop(Iop_1Uto64,
1708                      binop(Iop_CmpLT64U,
1709                            cc_dep1,
1710                            cc_dep2));
1711       }
1712       if (isU64(cc_op, AMD64G_CC_OP_SUBL)) {
1713          /* C after sub denotes unsigned less than */
1714          return unop(Iop_1Uto64,
1715                      binop(Iop_CmpLT32U,
1716                            unop(Iop_64to32, cc_dep1),
1717                            unop(Iop_64to32, cc_dep2)));
1718       }
1719       if (isU64(cc_op, AMD64G_CC_OP_SUBB)) {
1720          /* C after sub denotes unsigned less than */
1721          return unop(Iop_1Uto64,
1722                      binop(Iop_CmpLT64U,
1723                            binop(Iop_And64,cc_dep1,mkU64(0xFF)),
1724                            binop(Iop_And64,cc_dep2,mkU64(0xFF))));
1725       }
1726       if (isU64(cc_op, AMD64G_CC_OP_LOGICQ)
1727           || isU64(cc_op, AMD64G_CC_OP_LOGICL)
1728           || isU64(cc_op, AMD64G_CC_OP_LOGICW)
1729           || isU64(cc_op, AMD64G_CC_OP_LOGICB)) {
1730          /* cflag after logic is zero */
1731          return mkU64(0);
1732       }
1733       if (isU64(cc_op, AMD64G_CC_OP_DECL) || isU64(cc_op, AMD64G_CC_OP_INCL)
1734           || isU64(cc_op, AMD64G_CC_OP_DECQ) || isU64(cc_op, AMD64G_CC_OP_INCQ)) {
1735          /* If the thunk is dec or inc, the cflag is supplied as CC_NDEP. */
1736          return cc_ndep;
1737       }
1738 
1739 #     if 0
1740       if (cc_op->tag == Iex_Const) {
1741          vex_printf("CFLAG "); ppIRExpr(cc_op); vex_printf("\n");
1742       }
1743 #     endif
1744 
1745       return NULL;
1746    }
1747 
1748 #  undef unop
1749 #  undef binop
1750 #  undef mkU64
1751 #  undef mkU32
1752 #  undef mkU8
1753 
1754    return NULL;
1755 }
1756 
1757 
1758 /*---------------------------------------------------------------*/
1759 /*--- Supporting functions for x87 FPU activities.            ---*/
1760 /*---------------------------------------------------------------*/
1761 
host_is_little_endian(void)1762 static inline Bool host_is_little_endian ( void )
1763 {
1764    UInt x = 0x76543210;
1765    UChar* p = (UChar*)(&x);
1766    return toBool(*p == 0x10);
1767 }
1768 
1769 /* Inspect a value and its tag, as per the x87 'FXAM' instruction. */
1770 /* CALLED FROM GENERATED CODE: CLEAN HELPER */
amd64g_calculate_FXAM(ULong tag,ULong dbl)1771 ULong amd64g_calculate_FXAM ( ULong tag, ULong dbl )
1772 {
1773    Bool   mantissaIsZero;
1774    Int    bexp;
1775    UChar  sign;
1776    UChar* f64;
1777 
1778    vassert(host_is_little_endian());
1779 
1780    /* vex_printf("calculate_FXAM ( %d, %llx ) .. ", tag, dbl ); */
1781 
1782    f64  = (UChar*)(&dbl);
1783    sign = toUChar( (f64[7] >> 7) & 1 );
1784 
1785    /* First off, if the tag indicates the register was empty,
1786       return 1,0,sign,1 */
1787    if (tag == 0) {
1788       /* vex_printf("Empty\n"); */
1789       return AMD64G_FC_MASK_C3 | 0 | (sign << AMD64G_FC_SHIFT_C1)
1790                                    | AMD64G_FC_MASK_C0;
1791    }
1792 
1793    bexp = (f64[7] << 4) | ((f64[6] >> 4) & 0x0F);
1794    bexp &= 0x7FF;
1795 
1796    mantissaIsZero
1797       = toBool(
1798            (f64[6] & 0x0F) == 0
1799            && (f64[5] | f64[4] | f64[3] | f64[2] | f64[1] | f64[0]) == 0
1800         );
1801 
1802    /* If both exponent and mantissa are zero, the value is zero.
1803       Return 1,0,sign,0. */
1804    if (bexp == 0 && mantissaIsZero) {
1805       /* vex_printf("Zero\n"); */
1806       return AMD64G_FC_MASK_C3 | 0
1807                                | (sign << AMD64G_FC_SHIFT_C1) | 0;
1808    }
1809 
1810    /* If exponent is zero but mantissa isn't, it's a denormal.
1811       Return 1,1,sign,0. */
1812    if (bexp == 0 && !mantissaIsZero) {
1813       /* vex_printf("Denormal\n"); */
1814       return AMD64G_FC_MASK_C3 | AMD64G_FC_MASK_C2
1815                                | (sign << AMD64G_FC_SHIFT_C1) | 0;
1816    }
1817 
1818    /* If the exponent is 7FF and the mantissa is zero, this is an infinity.
1819       Return 0,1,sign,1. */
1820    if (bexp == 0x7FF && mantissaIsZero) {
1821       /* vex_printf("Inf\n"); */
1822       return 0 | AMD64G_FC_MASK_C2 | (sign << AMD64G_FC_SHIFT_C1)
1823                                    | AMD64G_FC_MASK_C0;
1824    }
1825 
1826    /* If the exponent is 7FF and the mantissa isn't zero, this is a NaN.
1827       Return 0,0,sign,1. */
1828    if (bexp == 0x7FF && !mantissaIsZero) {
1829       /* vex_printf("NaN\n"); */
1830       return 0 | 0 | (sign << AMD64G_FC_SHIFT_C1) | AMD64G_FC_MASK_C0;
1831    }
1832 
1833    /* Uh, ok, we give up.  It must be a normal finite number.
1834       Return 0,1,sign,0.
1835    */
1836    /* vex_printf("normal\n"); */
1837    return 0 | AMD64G_FC_MASK_C2 | (sign << AMD64G_FC_SHIFT_C1) | 0;
1838 }
1839 
1840 
1841 /* This is used to implement both 'frstor' and 'fldenv'.  The latter
1842    appears to differ from the former only in that the 8 FP registers
1843    themselves are not transferred into the guest state. */
1844 static
do_put_x87(Bool moveRegs,UChar * x87_state,VexGuestAMD64State * vex_state)1845 VexEmNote do_put_x87 ( Bool moveRegs,
1846                        /*IN*/UChar* x87_state,
1847                        /*OUT*/VexGuestAMD64State* vex_state )
1848 {
1849    Int        stno, preg;
1850    UInt       tag;
1851    ULong*     vexRegs = (ULong*)(&vex_state->guest_FPREG[0]);
1852    UChar*     vexTags = (UChar*)(&vex_state->guest_FPTAG[0]);
1853    Fpu_State* x87     = (Fpu_State*)x87_state;
1854    UInt       ftop    = (x87->env[FP_ENV_STAT] >> 11) & 7;
1855    UInt       tagw    = x87->env[FP_ENV_TAG];
1856    UInt       fpucw   = x87->env[FP_ENV_CTRL];
1857    UInt       c3210   = x87->env[FP_ENV_STAT] & 0x4700;
1858    VexEmNote  ew;
1859    UInt       fpround;
1860    ULong      pair;
1861 
1862    /* Copy registers and tags */
1863    for (stno = 0; stno < 8; stno++) {
1864       preg = (stno + ftop) & 7;
1865       tag = (tagw >> (2*preg)) & 3;
1866       if (tag == 3) {
1867          /* register is empty */
1868          /* hmm, if it's empty, does it still get written?  Probably
1869             safer to say it does.  If we don't, memcheck could get out
1870             of sync, in that it thinks all FP registers are defined by
1871             this helper, but in reality some have not been updated. */
1872          if (moveRegs)
1873             vexRegs[preg] = 0; /* IEEE754 64-bit zero */
1874          vexTags[preg] = 0;
1875       } else {
1876          /* register is non-empty */
1877          if (moveRegs)
1878             convert_f80le_to_f64le( &x87->reg[10*stno],
1879                                     (UChar*)&vexRegs[preg] );
1880          vexTags[preg] = 1;
1881       }
1882    }
1883 
1884    /* stack pointer */
1885    vex_state->guest_FTOP = ftop;
1886 
1887    /* status word */
1888    vex_state->guest_FC3210 = c3210;
1889 
1890    /* handle the control word, setting FPROUND and detecting any
1891       emulation warnings. */
1892    pair    = amd64g_check_fldcw ( (ULong)fpucw );
1893    fpround = (UInt)pair & 0xFFFFFFFFULL;
1894    ew      = (VexEmNote)(pair >> 32);
1895 
1896    vex_state->guest_FPROUND = fpround & 3;
1897 
1898    /* emulation warnings --> caller */
1899    return ew;
1900 }
1901 
1902 
1903 /* Create an x87 FPU state from the guest state, as close as
1904    we can approximate it. */
1905 static
do_get_x87(VexGuestAMD64State * vex_state,UChar * x87_state)1906 void do_get_x87 ( /*IN*/VexGuestAMD64State* vex_state,
1907                   /*OUT*/UChar* x87_state )
1908 {
1909    Int        i, stno, preg;
1910    UInt       tagw;
1911    ULong*     vexRegs = (ULong*)(&vex_state->guest_FPREG[0]);
1912    UChar*     vexTags = (UChar*)(&vex_state->guest_FPTAG[0]);
1913    Fpu_State* x87     = (Fpu_State*)x87_state;
1914    UInt       ftop    = vex_state->guest_FTOP;
1915    UInt       c3210   = vex_state->guest_FC3210;
1916 
1917    for (i = 0; i < 14; i++)
1918       x87->env[i] = 0;
1919 
1920    x87->env[1] = x87->env[3] = x87->env[5] = x87->env[13] = 0xFFFF;
1921    x87->env[FP_ENV_STAT]
1922       = toUShort(((ftop & 7) << 11) | (c3210 & 0x4700));
1923    x87->env[FP_ENV_CTRL]
1924       = toUShort(amd64g_create_fpucw( vex_state->guest_FPROUND ));
1925 
1926    /* Dump the register stack in ST order. */
1927    tagw = 0;
1928    for (stno = 0; stno < 8; stno++) {
1929       preg = (stno + ftop) & 7;
1930       if (vexTags[preg] == 0) {
1931          /* register is empty */
1932          tagw |= (3 << (2*preg));
1933          convert_f64le_to_f80le( (UChar*)&vexRegs[preg],
1934                                  &x87->reg[10*stno] );
1935       } else {
1936          /* register is full. */
1937          tagw |= (0 << (2*preg));
1938          convert_f64le_to_f80le( (UChar*)&vexRegs[preg],
1939                                  &x87->reg[10*stno] );
1940       }
1941    }
1942    x87->env[FP_ENV_TAG] = toUShort(tagw);
1943 }
1944 
1945 
1946 /*---------------------------------------------------------------*/
1947 /*--- Supporting functions for XSAVE/FXSAVE.                  ---*/
1948 /*---------------------------------------------------------------*/
1949 
1950 /* CALLED FROM GENERATED CODE */
1951 /* DIRTY HELPER (reads guest state, writes guest mem) */
1952 /* XSAVE component 0 is the x87 FPU state. */
amd64g_dirtyhelper_XSAVE_COMPONENT_0(VexGuestAMD64State * gst,HWord addr)1953 void amd64g_dirtyhelper_XSAVE_COMPONENT_0
1954         ( VexGuestAMD64State* gst, HWord addr )
1955 {
1956    /* Derived from values obtained from
1957       vendor_id       : AuthenticAMD
1958       cpu family      : 15
1959       model           : 12
1960       model name      : AMD Athlon(tm) 64 Processor 3200+
1961       stepping        : 0
1962       cpu MHz         : 2200.000
1963       cache size      : 512 KB
1964    */
1965    /* Somewhat roundabout, but at least it's simple. */
1966    Fpu_State tmp;
1967    UShort*   addrS = (UShort*)addr;
1968    UChar*    addrC = (UChar*)addr;
1969    UShort    fp_tags;
1970    UInt      summary_tags;
1971    Int       r, stno;
1972    UShort    *srcS, *dstS;
1973 
1974    do_get_x87( gst, (UChar*)&tmp );
1975 
1976    /* Now build the proper fxsave x87 image from the fsave x87 image
1977       we just made. */
1978 
1979    addrS[0]  = tmp.env[FP_ENV_CTRL]; /* FCW: fpu control word */
1980    addrS[1]  = tmp.env[FP_ENV_STAT]; /* FCW: fpu status word */
1981 
1982    /* set addrS[2] in an endian-independent way */
1983    summary_tags = 0;
1984    fp_tags = tmp.env[FP_ENV_TAG];
1985    for (r = 0; r < 8; r++) {
1986       if ( ((fp_tags >> (2*r)) & 3) != 3 )
1987          summary_tags |= (1 << r);
1988    }
1989    addrC[4]  = toUChar(summary_tags); /* FTW: tag summary byte */
1990    addrC[5]  = 0; /* pad */
1991 
1992    /* FOP: faulting fpu opcode.  From experimentation, the real CPU
1993       does not write this field. (?!) */
1994    addrS[3]  = 0; /* BOGUS */
1995 
1996    /* RIP (Last x87 instruction pointer).  From experimentation, the
1997       real CPU does not write this field. (?!) */
1998    addrS[4]  = 0; /* BOGUS */
1999    addrS[5]  = 0; /* BOGUS */
2000    addrS[6]  = 0; /* BOGUS */
2001    addrS[7]  = 0; /* BOGUS */
2002 
2003    /* RDP (Last x87 data pointer).  From experimentation, the real CPU
2004       does not write this field. (?!) */
2005    addrS[8]  = 0; /* BOGUS */
2006    addrS[9]  = 0; /* BOGUS */
2007    addrS[10] = 0; /* BOGUS */
2008    addrS[11] = 0; /* BOGUS */
2009 
2010    /* addrS[13,12] are MXCSR -- not written */
2011    /* addrS[15,14] are MXCSR_MASK -- not written */
2012 
2013    /* Copy in the FP registers, in ST order. */
2014    for (stno = 0; stno < 8; stno++) {
2015       srcS = (UShort*)(&tmp.reg[10*stno]);
2016       dstS = (UShort*)(&addrS[16 + 8*stno]);
2017       dstS[0] = srcS[0];
2018       dstS[1] = srcS[1];
2019       dstS[2] = srcS[2];
2020       dstS[3] = srcS[3];
2021       dstS[4] = srcS[4];
2022       dstS[5] = 0;
2023       dstS[6] = 0;
2024       dstS[7] = 0;
2025    }
2026 }
2027 
2028 
2029 /* CALLED FROM GENERATED CODE */
2030 /* DIRTY HELPER (reads guest state, writes guest mem) */
2031 /* XSAVE component 1 is the SSE state. */
amd64g_dirtyhelper_XSAVE_COMPONENT_1_EXCLUDING_XMMREGS(VexGuestAMD64State * gst,HWord addr)2032 void amd64g_dirtyhelper_XSAVE_COMPONENT_1_EXCLUDING_XMMREGS
2033         ( VexGuestAMD64State* gst, HWord addr )
2034 {
2035    UShort* addrS = (UShort*)addr;
2036    UInt    mxcsr;
2037 
2038    /* The only non-register parts of the SSE state are MXCSR and
2039       MXCSR_MASK. */
2040    mxcsr = amd64g_create_mxcsr( gst->guest_SSEROUND );
2041 
2042    addrS[12] = toUShort(mxcsr);  /* MXCSR */
2043    addrS[13] = toUShort(mxcsr >> 16);
2044 
2045    addrS[14] = 0xFFFF; /* MXCSR mask (lo16) */
2046    addrS[15] = 0x0000; /* MXCSR mask (hi16) */
2047 }
2048 
2049 
2050 /* VISIBLE TO LIBVEX CLIENT */
2051 /* Do FXSAVE from the supplied VexGuestAMD64State structure and store
2052    the result at the given address which represents a buffer of at
2053    least 416 bytes.
2054 
2055    This function is not called from generated code.  FXSAVE is dealt
2056    with by the amd64 front end by calling the XSAVE_COMPONENT_{0,1}
2057    functions above plus some in-line IR.  This function is merely a
2058    convenience function for VEX's users.
2059 */
LibVEX_GuestAMD64_fxsave(VexGuestAMD64State * gst,HWord fp_state)2060 void LibVEX_GuestAMD64_fxsave ( /*IN*/VexGuestAMD64State* gst,
2061                                 /*OUT*/HWord fp_state )
2062 {
2063    /* Do the x87 part */
2064    amd64g_dirtyhelper_XSAVE_COMPONENT_0(gst, fp_state);
2065 
2066    /* And now the SSE part, except for the registers themselves. */
2067    amd64g_dirtyhelper_XSAVE_COMPONENT_1_EXCLUDING_XMMREGS(gst, fp_state);
2068 
2069    /* That's the first 160 bytes of the image done. */
2070    /* Now only %xmm0 .. %xmm15 remain to be copied.  If the host is
2071       big-endian, these need to be byte-swapped. */
2072    U128 *xmm = (U128 *)(fp_state + 160);
2073    vassert(host_is_little_endian());
2074 
2075 #  define COPY_U128(_dst,_src)                       \
2076       do { _dst[0] = _src[0]; _dst[1] = _src[1];     \
2077            _dst[2] = _src[2]; _dst[3] = _src[3]; }   \
2078       while (0)
2079 
2080    COPY_U128( xmm[0],  gst->guest_YMM0 );
2081    COPY_U128( xmm[1],  gst->guest_YMM1 );
2082    COPY_U128( xmm[2],  gst->guest_YMM2 );
2083    COPY_U128( xmm[3],  gst->guest_YMM3 );
2084    COPY_U128( xmm[4],  gst->guest_YMM4 );
2085    COPY_U128( xmm[5],  gst->guest_YMM5 );
2086    COPY_U128( xmm[6],  gst->guest_YMM6 );
2087    COPY_U128( xmm[7],  gst->guest_YMM7 );
2088    COPY_U128( xmm[8],  gst->guest_YMM8 );
2089    COPY_U128( xmm[9],  gst->guest_YMM9 );
2090    COPY_U128( xmm[10], gst->guest_YMM10 );
2091    COPY_U128( xmm[11], gst->guest_YMM11 );
2092    COPY_U128( xmm[12], gst->guest_YMM12 );
2093    COPY_U128( xmm[13], gst->guest_YMM13 );
2094    COPY_U128( xmm[14], gst->guest_YMM14 );
2095    COPY_U128( xmm[15], gst->guest_YMM15 );
2096 #  undef COPY_U128
2097 }
2098 
2099 
2100 /*---------------------------------------------------------------*/
2101 /*--- Supporting functions for XRSTOR/FXRSTOR.                ---*/
2102 /*---------------------------------------------------------------*/
2103 
2104 /* CALLED FROM GENERATED CODE */
2105 /* DIRTY HELPER (writes guest state, reads guest mem) */
amd64g_dirtyhelper_XRSTOR_COMPONENT_0(VexGuestAMD64State * gst,HWord addr)2106 VexEmNote amd64g_dirtyhelper_XRSTOR_COMPONENT_0
2107              ( VexGuestAMD64State* gst, HWord addr )
2108 {
2109    Fpu_State tmp;
2110    UShort*   addrS   = (UShort*)addr;
2111    UChar*    addrC   = (UChar*)addr;
2112    UShort    fp_tags;
2113    Int       r, stno, i;
2114 
2115    /* Copy the x87 registers out of the image, into a temporary
2116       Fpu_State struct. */
2117    for (i = 0; i < 14; i++) tmp.env[i] = 0;
2118    for (i = 0; i < 80; i++) tmp.reg[i] = 0;
2119    /* fill in tmp.reg[0..7] */
2120    for (stno = 0; stno < 8; stno++) {
2121       UShort* dstS = (UShort*)(&tmp.reg[10*stno]);
2122       UShort* srcS = (UShort*)(&addrS[16 + 8*stno]);
2123       dstS[0] = srcS[0];
2124       dstS[1] = srcS[1];
2125       dstS[2] = srcS[2];
2126       dstS[3] = srcS[3];
2127       dstS[4] = srcS[4];
2128    }
2129    /* fill in tmp.env[0..13] */
2130    tmp.env[FP_ENV_CTRL] = addrS[0]; /* FCW: fpu control word */
2131    tmp.env[FP_ENV_STAT] = addrS[1]; /* FCW: fpu status word */
2132 
2133    fp_tags = 0;
2134    for (r = 0; r < 8; r++) {
2135       if (addrC[4] & (1<<r))
2136          fp_tags |= (0 << (2*r)); /* EMPTY */
2137       else
2138          fp_tags |= (3 << (2*r)); /* VALID -- not really precise enough. */
2139    }
2140    tmp.env[FP_ENV_TAG] = fp_tags;
2141 
2142    /* Now write 'tmp' into the guest state. */
2143    VexEmNote warnX87 = do_put_x87( True/*moveRegs*/, (UChar*)&tmp, gst );
2144 
2145    return warnX87;
2146 }
2147 
2148 
2149 /* CALLED FROM GENERATED CODE */
2150 /* DIRTY HELPER (writes guest state, reads guest mem) */
amd64g_dirtyhelper_XRSTOR_COMPONENT_1_EXCLUDING_XMMREGS(VexGuestAMD64State * gst,HWord addr)2151 VexEmNote amd64g_dirtyhelper_XRSTOR_COMPONENT_1_EXCLUDING_XMMREGS
2152              ( VexGuestAMD64State* gst, HWord addr )
2153 {
2154    UShort* addrS = (UShort*)addr;
2155    UInt    w32   = (((UInt)addrS[12]) & 0xFFFF)
2156                    | ((((UInt)addrS[13]) & 0xFFFF) << 16);
2157    ULong   w64   = amd64g_check_ldmxcsr( (ULong)w32 );
2158 
2159    VexEmNote warnXMM = (VexEmNote)(w64 >> 32);
2160 
2161    gst->guest_SSEROUND = w64 & 0xFFFFFFFFULL;
2162    return warnXMM;
2163 }
2164 
2165 
2166 /* VISIBLE TO LIBVEX CLIENT */
2167 /* Do FXRSTOR from the supplied address and store read values to the given
2168    VexGuestAMD64State structure.
2169 
2170    This function is not called from generated code.  FXRSTOR is dealt
2171    with by the amd64 front end by calling the XRSTOR_COMPONENT_{0,1}
2172    functions above plus some in-line IR.  This function is merely a
2173    convenience function for VEX's users.
2174 */
LibVEX_GuestAMD64_fxrstor(HWord fp_state,VexGuestAMD64State * gst)2175 VexEmNote LibVEX_GuestAMD64_fxrstor ( /*IN*/HWord fp_state,
2176                                       /*MOD*/VexGuestAMD64State* gst )
2177 {
2178    /* Restore %xmm0 .. %xmm15.  If the host is big-endian, these need
2179       to be byte-swapped. */
2180    U128 *xmm = (U128 *)(fp_state + 160);
2181 
2182    vassert(host_is_little_endian());
2183 
2184 #  define COPY_U128(_dst,_src)                       \
2185       do { _dst[0] = _src[0]; _dst[1] = _src[1];     \
2186            _dst[2] = _src[2]; _dst[3] = _src[3]; }   \
2187       while (0)
2188 
2189    COPY_U128( gst->guest_YMM0, xmm[0] );
2190    COPY_U128( gst->guest_YMM1, xmm[1] );
2191    COPY_U128( gst->guest_YMM2, xmm[2] );
2192    COPY_U128( gst->guest_YMM3, xmm[3] );
2193    COPY_U128( gst->guest_YMM4, xmm[4] );
2194    COPY_U128( gst->guest_YMM5, xmm[5] );
2195    COPY_U128( gst->guest_YMM6, xmm[6] );
2196    COPY_U128( gst->guest_YMM7, xmm[7] );
2197    COPY_U128( gst->guest_YMM8, xmm[8] );
2198    COPY_U128( gst->guest_YMM9, xmm[9] );
2199    COPY_U128( gst->guest_YMM10, xmm[10] );
2200    COPY_U128( gst->guest_YMM11, xmm[11] );
2201    COPY_U128( gst->guest_YMM12, xmm[12] );
2202    COPY_U128( gst->guest_YMM13, xmm[13] );
2203    COPY_U128( gst->guest_YMM14, xmm[14] );
2204    COPY_U128( gst->guest_YMM15, xmm[15] );
2205 
2206 #  undef COPY_U128
2207 
2208    VexEmNote warnXMM
2209       = amd64g_dirtyhelper_XRSTOR_COMPONENT_1_EXCLUDING_XMMREGS(gst, fp_state);
2210    VexEmNote warnX87
2211       = amd64g_dirtyhelper_XRSTOR_COMPONENT_0(gst, fp_state);
2212 
2213    /* Prefer an X87 emwarn over an XMM one, if both exist. */
2214    if (warnX87 != EmNote_NONE)
2215       return warnX87;
2216    else
2217       return warnXMM;
2218 }
2219 
2220 
2221 /*---------------------------------------------------------------*/
2222 /*--- Supporting functions for FSAVE/FRSTOR                   ---*/
2223 /*---------------------------------------------------------------*/
2224 
2225 /* DIRTY HELPER (writes guest state) */
2226 /* Initialise the x87 FPU state as per 'finit'. */
amd64g_dirtyhelper_FINIT(VexGuestAMD64State * gst)2227 void amd64g_dirtyhelper_FINIT ( VexGuestAMD64State* gst )
2228 {
2229    Int i;
2230    gst->guest_FTOP = 0;
2231    for (i = 0; i < 8; i++) {
2232       gst->guest_FPTAG[i] = 0; /* empty */
2233       gst->guest_FPREG[i] = 0; /* IEEE754 64-bit zero */
2234    }
2235    gst->guest_FPROUND = (ULong)Irrm_NEAREST;
2236    gst->guest_FC3210  = 0;
2237 }
2238 
2239 
2240 /* CALLED FROM GENERATED CODE */
2241 /* DIRTY HELPER (reads guest memory) */
amd64g_dirtyhelper_loadF80le(Addr addrU)2242 ULong amd64g_dirtyhelper_loadF80le ( Addr addrU )
2243 {
2244    ULong f64;
2245    convert_f80le_to_f64le ( (UChar*)addrU, (UChar*)&f64 );
2246    return f64;
2247 }
2248 
2249 /* CALLED FROM GENERATED CODE */
2250 /* DIRTY HELPER (writes guest memory) */
amd64g_dirtyhelper_storeF80le(Addr addrU,ULong f64)2251 void amd64g_dirtyhelper_storeF80le ( Addr addrU, ULong f64 )
2252 {
2253    convert_f64le_to_f80le( (UChar*)&f64, (UChar*)addrU );
2254 }
2255 
2256 
2257 /* CALLED FROM GENERATED CODE */
2258 /* CLEAN HELPER */
2259 /* mxcsr[15:0] contains a SSE native format MXCSR value.
2260    Extract from it the required SSEROUND value and any resulting
2261    emulation warning, and return (warn << 32) | sseround value.
2262 */
amd64g_check_ldmxcsr(ULong mxcsr)2263 ULong amd64g_check_ldmxcsr ( ULong mxcsr )
2264 {
2265    /* Decide on a rounding mode.  mxcsr[14:13] holds it. */
2266    /* NOTE, encoded exactly as per enum IRRoundingMode. */
2267    ULong rmode = (mxcsr >> 13) & 3;
2268 
2269    /* Detect any required emulation warnings. */
2270    VexEmNote ew = EmNote_NONE;
2271 
2272    if ((mxcsr & 0x1F80) != 0x1F80) {
2273       /* unmasked exceptions! */
2274       ew = EmWarn_X86_sseExns;
2275    }
2276    else
2277    if (mxcsr & (1<<15)) {
2278       /* FZ is set */
2279       ew = EmWarn_X86_fz;
2280    }
2281    else
2282    if (mxcsr & (1<<6)) {
2283       /* DAZ is set */
2284       ew = EmWarn_X86_daz;
2285    }
2286 
2287    return (((ULong)ew) << 32) | ((ULong)rmode);
2288 }
2289 
2290 
2291 /* CALLED FROM GENERATED CODE */
2292 /* CLEAN HELPER */
2293 /* Given sseround as an IRRoundingMode value, create a suitable SSE
2294    native format MXCSR value. */
amd64g_create_mxcsr(ULong sseround)2295 ULong amd64g_create_mxcsr ( ULong sseround )
2296 {
2297    sseround &= 3;
2298    return 0x1F80 | (sseround << 13);
2299 }
2300 
2301 
2302 /* CLEAN HELPER */
2303 /* fpucw[15:0] contains a x87 native format FPU control word.
2304    Extract from it the required FPROUND value and any resulting
2305    emulation warning, and return (warn << 32) | fpround value.
2306 */
amd64g_check_fldcw(ULong fpucw)2307 ULong amd64g_check_fldcw ( ULong fpucw )
2308 {
2309    /* Decide on a rounding mode.  fpucw[11:10] holds it. */
2310    /* NOTE, encoded exactly as per enum IRRoundingMode. */
2311    ULong rmode = (fpucw >> 10) & 3;
2312 
2313    /* Detect any required emulation warnings. */
2314    VexEmNote ew = EmNote_NONE;
2315 
2316    if ((fpucw & 0x3F) != 0x3F) {
2317       /* unmasked exceptions! */
2318       ew = EmWarn_X86_x87exns;
2319    }
2320    else
2321    if (((fpucw >> 8) & 3) != 3) {
2322       /* unsupported precision */
2323       ew = EmWarn_X86_x87precision;
2324    }
2325 
2326    return (((ULong)ew) << 32) | ((ULong)rmode);
2327 }
2328 
2329 
2330 /* CLEAN HELPER */
2331 /* Given fpround as an IRRoundingMode value, create a suitable x87
2332    native format FPU control word. */
amd64g_create_fpucw(ULong fpround)2333 ULong amd64g_create_fpucw ( ULong fpround )
2334 {
2335    fpround &= 3;
2336    return 0x037F | (fpround << 10);
2337 }
2338 
2339 
2340 /* This is used to implement 'fldenv'.
2341    Reads 28 bytes at x87_state[0 .. 27]. */
2342 /* CALLED FROM GENERATED CODE */
2343 /* DIRTY HELPER */
amd64g_dirtyhelper_FLDENV(VexGuestAMD64State * vex_state,HWord x87_state)2344 VexEmNote amd64g_dirtyhelper_FLDENV ( /*OUT*/VexGuestAMD64State* vex_state,
2345                                       /*IN*/HWord x87_state)
2346 {
2347    return do_put_x87( False, (UChar*)x87_state, vex_state );
2348 }
2349 
2350 
2351 /* CALLED FROM GENERATED CODE */
2352 /* DIRTY HELPER */
2353 /* Create an x87 FPU env from the guest state, as close as we can
2354    approximate it.  Writes 28 bytes at x87_state[0..27]. */
amd64g_dirtyhelper_FSTENV(VexGuestAMD64State * vex_state,HWord x87_state)2355 void amd64g_dirtyhelper_FSTENV ( /*IN*/VexGuestAMD64State* vex_state,
2356                                  /*OUT*/HWord x87_state )
2357 {
2358    Int        i, stno, preg;
2359    UInt       tagw;
2360    UChar*     vexTags = (UChar*)(&vex_state->guest_FPTAG[0]);
2361    Fpu_State* x87     = (Fpu_State*)x87_state;
2362    UInt       ftop    = vex_state->guest_FTOP;
2363    ULong      c3210   = vex_state->guest_FC3210;
2364 
2365    for (i = 0; i < 14; i++)
2366       x87->env[i] = 0;
2367 
2368    x87->env[1] = x87->env[3] = x87->env[5] = x87->env[13] = 0xFFFF;
2369    x87->env[FP_ENV_STAT]
2370       = toUShort(toUInt( ((ftop & 7) << 11) | (c3210 & 0x4700) ));
2371    x87->env[FP_ENV_CTRL]
2372       = toUShort(toUInt( amd64g_create_fpucw( vex_state->guest_FPROUND ) ));
2373 
2374    /* Compute the x87 tag word. */
2375    tagw = 0;
2376    for (stno = 0; stno < 8; stno++) {
2377       preg = (stno + ftop) & 7;
2378       if (vexTags[preg] == 0) {
2379          /* register is empty */
2380          tagw |= (3 << (2*preg));
2381       } else {
2382          /* register is full. */
2383          tagw |= (0 << (2*preg));
2384       }
2385    }
2386    x87->env[FP_ENV_TAG] = toUShort(tagw);
2387 
2388    /* We don't dump the x87 registers, tho. */
2389 }
2390 
2391 
2392 /* This is used to implement 'fnsave'.
2393    Writes 108 bytes at x87_state[0 .. 107]. */
2394 /* CALLED FROM GENERATED CODE */
2395 /* DIRTY HELPER */
amd64g_dirtyhelper_FNSAVE(VexGuestAMD64State * vex_state,HWord x87_state)2396 void amd64g_dirtyhelper_FNSAVE ( /*IN*/VexGuestAMD64State* vex_state,
2397                                  /*OUT*/HWord x87_state)
2398 {
2399    do_get_x87( vex_state, (UChar*)x87_state );
2400 }
2401 
2402 
2403 /* This is used to implement 'fnsaves'.
2404    Writes 94 bytes at x87_state[0 .. 93]. */
2405 /* CALLED FROM GENERATED CODE */
2406 /* DIRTY HELPER */
amd64g_dirtyhelper_FNSAVES(VexGuestAMD64State * vex_state,HWord x87_state)2407 void amd64g_dirtyhelper_FNSAVES ( /*IN*/VexGuestAMD64State* vex_state,
2408                                   /*OUT*/HWord x87_state)
2409 {
2410    Int           i, stno, preg;
2411    UInt          tagw;
2412    ULong*        vexRegs = (ULong*)(&vex_state->guest_FPREG[0]);
2413    UChar*        vexTags = (UChar*)(&vex_state->guest_FPTAG[0]);
2414    Fpu_State_16* x87     = (Fpu_State_16*)x87_state;
2415    UInt          ftop    = vex_state->guest_FTOP;
2416    UInt          c3210   = vex_state->guest_FC3210;
2417 
2418    for (i = 0; i < 7; i++)
2419       x87->env[i] = 0;
2420 
2421    x87->env[FPS_ENV_STAT]
2422       = toUShort(((ftop & 7) << 11) | (c3210 & 0x4700));
2423    x87->env[FPS_ENV_CTRL]
2424       = toUShort(amd64g_create_fpucw( vex_state->guest_FPROUND ));
2425 
2426    /* Dump the register stack in ST order. */
2427    tagw = 0;
2428    for (stno = 0; stno < 8; stno++) {
2429       preg = (stno + ftop) & 7;
2430       if (vexTags[preg] == 0) {
2431          /* register is empty */
2432          tagw |= (3 << (2*preg));
2433          convert_f64le_to_f80le( (UChar*)&vexRegs[preg],
2434                                  &x87->reg[10*stno] );
2435       } else {
2436          /* register is full. */
2437          tagw |= (0 << (2*preg));
2438          convert_f64le_to_f80le( (UChar*)&vexRegs[preg],
2439                                  &x87->reg[10*stno] );
2440       }
2441    }
2442    x87->env[FPS_ENV_TAG] = toUShort(tagw);
2443 }
2444 
2445 
2446 /* This is used to implement 'frstor'.
2447    Reads 108 bytes at x87_state[0 .. 107]. */
2448 /* CALLED FROM GENERATED CODE */
2449 /* DIRTY HELPER */
amd64g_dirtyhelper_FRSTOR(VexGuestAMD64State * vex_state,HWord x87_state)2450 VexEmNote amd64g_dirtyhelper_FRSTOR ( /*OUT*/VexGuestAMD64State* vex_state,
2451                                       /*IN*/HWord x87_state)
2452 {
2453    return do_put_x87( True, (UChar*)x87_state, vex_state );
2454 }
2455 
2456 
2457 /* This is used to implement 'frstors'.
2458    Reads 94 bytes at x87_state[0 .. 93]. */
2459 /* CALLED FROM GENERATED CODE */
2460 /* DIRTY HELPER */
amd64g_dirtyhelper_FRSTORS(VexGuestAMD64State * vex_state,HWord x87_state)2461 VexEmNote amd64g_dirtyhelper_FRSTORS ( /*OUT*/VexGuestAMD64State* vex_state,
2462                                        /*IN*/HWord x87_state)
2463 {
2464    Int           stno, preg;
2465    UInt          tag;
2466    ULong*        vexRegs = (ULong*)(&vex_state->guest_FPREG[0]);
2467    UChar*        vexTags = (UChar*)(&vex_state->guest_FPTAG[0]);
2468    Fpu_State_16* x87     = (Fpu_State_16*)x87_state;
2469    UInt          ftop    = (x87->env[FPS_ENV_STAT] >> 11) & 7;
2470    UInt          tagw    = x87->env[FPS_ENV_TAG];
2471    UInt          fpucw   = x87->env[FPS_ENV_CTRL];
2472    UInt          c3210   = x87->env[FPS_ENV_STAT] & 0x4700;
2473    VexEmNote     ew;
2474    UInt          fpround;
2475    ULong         pair;
2476 
2477    /* Copy registers and tags */
2478    for (stno = 0; stno < 8; stno++) {
2479       preg = (stno + ftop) & 7;
2480       tag = (tagw >> (2*preg)) & 3;
2481       if (tag == 3) {
2482          /* register is empty */
2483          /* hmm, if it's empty, does it still get written?  Probably
2484             safer to say it does.  If we don't, memcheck could get out
2485             of sync, in that it thinks all FP registers are defined by
2486             this helper, but in reality some have not been updated. */
2487          vexRegs[preg] = 0; /* IEEE754 64-bit zero */
2488          vexTags[preg] = 0;
2489       } else {
2490          /* register is non-empty */
2491          convert_f80le_to_f64le( &x87->reg[10*stno],
2492                                  (UChar*)&vexRegs[preg] );
2493          vexTags[preg] = 1;
2494       }
2495    }
2496 
2497    /* stack pointer */
2498    vex_state->guest_FTOP = ftop;
2499 
2500    /* status word */
2501    vex_state->guest_FC3210 = c3210;
2502 
2503    /* handle the control word, setting FPROUND and detecting any
2504       emulation warnings. */
2505    pair    = amd64g_check_fldcw ( (ULong)fpucw );
2506    fpround = (UInt)pair & 0xFFFFFFFFULL;
2507    ew      = (VexEmNote)(pair >> 32);
2508 
2509    vex_state->guest_FPROUND = fpround & 3;
2510 
2511    /* emulation warnings --> caller */
2512    return ew;
2513 }
2514 
2515 
2516 /*---------------------------------------------------------------*/
2517 /*--- CPUID helpers.                                          ---*/
2518 /*---------------------------------------------------------------*/
2519 
2520 /* Claim to be the following CPU, which is probably representative of
2521    the lowliest (earliest) amd64 offerings.  It can do neither sse3
2522    nor cx16.
2523 
2524    vendor_id       : AuthenticAMD
2525    cpu family      : 15
2526    model           : 5
2527    model name      : AMD Opteron (tm) Processor 848
2528    stepping        : 10
2529    cpu MHz         : 1797.682
2530    cache size      : 1024 KB
2531    fpu             : yes
2532    fpu_exception   : yes
2533    cpuid level     : 1
2534    wp              : yes
2535    flags           : fpu vme de pse tsc msr pae mce cx8 apic sep
2536                      mtrr pge mca cmov pat pse36 clflush mmx fxsr
2537                      sse sse2 syscall nx mmxext lm 3dnowext 3dnow
2538    bogomips        : 3600.62
2539    TLB size        : 1088 4K pages
2540    clflush size    : 64
2541    cache_alignment : 64
2542    address sizes   : 40 bits physical, 48 bits virtual
2543    power management: ts fid vid ttp
2544 
2545    2012-Feb-21: don't claim 3dnow or 3dnowext, since in fact
2546    we don't support them.  See #291568.  3dnow is 80000001.EDX.31
2547    and 3dnowext is 80000001.EDX.30.
2548 */
amd64g_dirtyhelper_CPUID_baseline(VexGuestAMD64State * st)2549 void amd64g_dirtyhelper_CPUID_baseline ( VexGuestAMD64State* st )
2550 {
2551 #  define SET_ABCD(_a,_b,_c,_d)                \
2552       do { st->guest_RAX = (ULong)(_a);        \
2553            st->guest_RBX = (ULong)(_b);        \
2554            st->guest_RCX = (ULong)(_c);        \
2555            st->guest_RDX = (ULong)(_d);        \
2556       } while (0)
2557 
2558    switch (0xFFFFFFFF & st->guest_RAX) {
2559       case 0x00000000:
2560          SET_ABCD(0x00000001, 0x68747541, 0x444d4163, 0x69746e65);
2561          break;
2562       case 0x00000001:
2563          SET_ABCD(0x00000f5a, 0x01000800, 0x00000000, 0x078bfbff);
2564          break;
2565       case 0x80000000:
2566          SET_ABCD(0x80000018, 0x68747541, 0x444d4163, 0x69746e65);
2567          break;
2568       case 0x80000001:
2569          /* Don't claim to support 3dnow or 3dnowext.  0xe1d3fbff is
2570             the original it-is-supported value that the h/w provides.
2571             See #291568. */
2572          SET_ABCD(0x00000f5a, 0x00000505, 0x00000000, /*0xe1d3fbff*/
2573                                                       0x21d3fbff);
2574          break;
2575       case 0x80000002:
2576          SET_ABCD(0x20444d41, 0x6574704f, 0x206e6f72, 0x296d7428);
2577          break;
2578       case 0x80000003:
2579          SET_ABCD(0x6f725020, 0x73736563, 0x3820726f, 0x00003834);
2580          break;
2581       case 0x80000004:
2582          SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x00000000);
2583          break;
2584       case 0x80000005:
2585          SET_ABCD(0xff08ff08, 0xff20ff20, 0x40020140, 0x40020140);
2586          break;
2587       case 0x80000006:
2588          SET_ABCD(0x00000000, 0x42004200, 0x04008140, 0x00000000);
2589          break;
2590       case 0x80000007:
2591          SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x0000000f);
2592          break;
2593       case 0x80000008:
2594          SET_ABCD(0x00003028, 0x00000000, 0x00000000, 0x00000000);
2595          break;
2596       default:
2597          SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x00000000);
2598          break;
2599    }
2600 #  undef SET_ABCD
2601 }
2602 
2603 
2604 /* Claim to be the following CPU (2 x ...), which is sse3 and cx16
2605    capable.
2606 
2607    vendor_id       : GenuineIntel
2608    cpu family      : 6
2609    model           : 15
2610    model name      : Intel(R) Core(TM)2 CPU 6600 @ 2.40GHz
2611    stepping        : 6
2612    cpu MHz         : 2394.000
2613    cache size      : 4096 KB
2614    physical id     : 0
2615    siblings        : 2
2616    core id         : 0
2617    cpu cores       : 2
2618    fpu             : yes
2619    fpu_exception   : yes
2620    cpuid level     : 10
2621    wp              : yes
2622    flags           : fpu vme de pse tsc msr pae mce cx8 apic sep
2623                      mtrr pge mca cmov pat pse36 clflush dts acpi
2624                      mmx fxsr sse sse2 ss ht tm syscall nx lm
2625                      constant_tsc pni monitor ds_cpl vmx est tm2
2626                      cx16 xtpr lahf_lm
2627    bogomips        : 4798.78
2628    clflush size    : 64
2629    cache_alignment : 64
2630    address sizes   : 36 bits physical, 48 bits virtual
2631    power management:
2632 */
amd64g_dirtyhelper_CPUID_sse3_and_cx16(VexGuestAMD64State * st)2633 void amd64g_dirtyhelper_CPUID_sse3_and_cx16 ( VexGuestAMD64State* st )
2634 {
2635 #  define SET_ABCD(_a,_b,_c,_d)                \
2636       do { st->guest_RAX = (ULong)(_a);        \
2637            st->guest_RBX = (ULong)(_b);        \
2638            st->guest_RCX = (ULong)(_c);        \
2639            st->guest_RDX = (ULong)(_d);        \
2640       } while (0)
2641 
2642    switch (0xFFFFFFFF & st->guest_RAX) {
2643       case 0x00000000:
2644          SET_ABCD(0x0000000a, 0x756e6547, 0x6c65746e, 0x49656e69);
2645          break;
2646       case 0x00000001:
2647          SET_ABCD(0x000006f6, 0x00020800, 0x0000e3bd, 0xbfebfbff);
2648          break;
2649       case 0x00000002:
2650          SET_ABCD(0x05b0b101, 0x005657f0, 0x00000000, 0x2cb43049);
2651          break;
2652       case 0x00000003:
2653          SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x00000000);
2654          break;
2655       case 0x00000004: {
2656          switch (0xFFFFFFFF & st->guest_RCX) {
2657             case 0x00000000: SET_ABCD(0x04000121, 0x01c0003f,
2658                                       0x0000003f, 0x00000001); break;
2659             case 0x00000001: SET_ABCD(0x04000122, 0x01c0003f,
2660                                       0x0000003f, 0x00000001); break;
2661             case 0x00000002: SET_ABCD(0x04004143, 0x03c0003f,
2662                                       0x00000fff, 0x00000001); break;
2663             default:         SET_ABCD(0x00000000, 0x00000000,
2664                                       0x00000000, 0x00000000); break;
2665          }
2666          break;
2667       }
2668       case 0x00000005:
2669          SET_ABCD(0x00000040, 0x00000040, 0x00000003, 0x00000020);
2670          break;
2671       case 0x00000006:
2672          SET_ABCD(0x00000001, 0x00000002, 0x00000001, 0x00000000);
2673          break;
2674       case 0x00000007:
2675          SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x00000000);
2676          break;
2677       case 0x00000008:
2678          SET_ABCD(0x00000400, 0x00000000, 0x00000000, 0x00000000);
2679          break;
2680       case 0x00000009:
2681          SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x00000000);
2682          break;
2683       case 0x0000000a:
2684       unhandled_eax_value:
2685          SET_ABCD(0x07280202, 0x00000000, 0x00000000, 0x00000000);
2686          break;
2687       case 0x80000000:
2688          SET_ABCD(0x80000008, 0x00000000, 0x00000000, 0x00000000);
2689          break;
2690       case 0x80000001:
2691          SET_ABCD(0x00000000, 0x00000000, 0x00000001, 0x20100800);
2692          break;
2693       case 0x80000002:
2694          SET_ABCD(0x65746e49, 0x2952286c, 0x726f4320, 0x4d542865);
2695          break;
2696       case 0x80000003:
2697          SET_ABCD(0x43203229, 0x20205550, 0x20202020, 0x20202020);
2698          break;
2699       case 0x80000004:
2700          SET_ABCD(0x30303636, 0x20402020, 0x30342e32, 0x007a4847);
2701          break;
2702       case 0x80000005:
2703          SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x00000000);
2704          break;
2705       case 0x80000006:
2706          SET_ABCD(0x00000000, 0x00000000, 0x10008040, 0x00000000);
2707          break;
2708       case 0x80000007:
2709          SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x00000000);
2710          break;
2711       case 0x80000008:
2712          SET_ABCD(0x00003024, 0x00000000, 0x00000000, 0x00000000);
2713          break;
2714       default:
2715          goto unhandled_eax_value;
2716    }
2717 #  undef SET_ABCD
2718 }
2719 
2720 
2721 /* Claim to be the following CPU (4 x ...), which is sse4.2 and cx16
2722    capable.
2723 
2724    vendor_id       : GenuineIntel
2725    cpu family      : 6
2726    model           : 37
2727    model name      : Intel(R) Core(TM) i5 CPU         670  @ 3.47GHz
2728    stepping        : 2
2729    cpu MHz         : 3334.000
2730    cache size      : 4096 KB
2731    physical id     : 0
2732    siblings        : 4
2733    core id         : 0
2734    cpu cores       : 2
2735    apicid          : 0
2736    initial apicid  : 0
2737    fpu             : yes
2738    fpu_exception   : yes
2739    cpuid level     : 11
2740    wp              : yes
2741    flags           : fpu vme de pse tsc msr pae mce cx8 apic sep
2742                      mtrr pge mca cmov pat pse36 clflush dts acpi
2743                      mmx fxsr sse sse2 ss ht tm pbe syscall nx rdtscp
2744                      lm constant_tsc arch_perfmon pebs bts rep_good
2745                      xtopology nonstop_tsc aperfmperf pni pclmulqdq
2746                      dtes64 monitor ds_cpl vmx smx est tm2 ssse3 cx16
2747                      xtpr pdcm sse4_1 sse4_2 popcnt aes lahf_lm ida
2748                      arat tpr_shadow vnmi flexpriority ept vpid
2749    bogomips        : 6957.57
2750    clflush size    : 64
2751    cache_alignment : 64
2752    address sizes   : 36 bits physical, 48 bits virtual
2753    power management:
2754 */
amd64g_dirtyhelper_CPUID_sse42_and_cx16(VexGuestAMD64State * st)2755 void amd64g_dirtyhelper_CPUID_sse42_and_cx16 ( VexGuestAMD64State* st )
2756 {
2757 #  define SET_ABCD(_a,_b,_c,_d)                \
2758       do { st->guest_RAX = (ULong)(_a);        \
2759            st->guest_RBX = (ULong)(_b);        \
2760            st->guest_RCX = (ULong)(_c);        \
2761            st->guest_RDX = (ULong)(_d);        \
2762       } while (0)
2763 
2764    UInt old_eax = (UInt)st->guest_RAX;
2765    UInt old_ecx = (UInt)st->guest_RCX;
2766 
2767    switch (old_eax) {
2768       case 0x00000000:
2769          SET_ABCD(0x0000000b, 0x756e6547, 0x6c65746e, 0x49656e69);
2770          break;
2771       case 0x00000001:
2772          SET_ABCD(0x00020652, 0x00100800, 0x0298e3ff, 0xbfebfbff);
2773          break;
2774       case 0x00000002:
2775          SET_ABCD(0x55035a01, 0x00f0b2e3, 0x00000000, 0x09ca212c);
2776          break;
2777       case 0x00000003:
2778          SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x00000000);
2779          break;
2780       case 0x00000004:
2781          switch (old_ecx) {
2782             case 0x00000000: SET_ABCD(0x1c004121, 0x01c0003f,
2783                                       0x0000003f, 0x00000000); break;
2784             case 0x00000001: SET_ABCD(0x1c004122, 0x00c0003f,
2785                                       0x0000007f, 0x00000000); break;
2786             case 0x00000002: SET_ABCD(0x1c004143, 0x01c0003f,
2787                                       0x000001ff, 0x00000000); break;
2788             case 0x00000003: SET_ABCD(0x1c03c163, 0x03c0003f,
2789                                       0x00000fff, 0x00000002); break;
2790             default:         SET_ABCD(0x00000000, 0x00000000,
2791                                       0x00000000, 0x00000000); break;
2792          }
2793          break;
2794       case 0x00000005:
2795          SET_ABCD(0x00000040, 0x00000040, 0x00000003, 0x00001120);
2796          break;
2797       case 0x00000006:
2798          SET_ABCD(0x00000007, 0x00000002, 0x00000001, 0x00000000);
2799          break;
2800       case 0x00000007:
2801          SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x00000000);
2802          break;
2803       case 0x00000008:
2804          SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x00000000);
2805          break;
2806       case 0x00000009:
2807          SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x00000000);
2808          break;
2809       case 0x0000000a:
2810          SET_ABCD(0x07300403, 0x00000004, 0x00000000, 0x00000603);
2811          break;
2812       case 0x0000000b:
2813          switch (old_ecx) {
2814             case 0x00000000:
2815                SET_ABCD(0x00000001, 0x00000002,
2816                         0x00000100, 0x00000000); break;
2817             case 0x00000001:
2818                SET_ABCD(0x00000004, 0x00000004,
2819                         0x00000201, 0x00000000); break;
2820             default:
2821                SET_ABCD(0x00000000, 0x00000000,
2822                         old_ecx,    0x00000000); break;
2823          }
2824          break;
2825       case 0x0000000c:
2826          SET_ABCD(0x00000001, 0x00000002, 0x00000100, 0x00000000);
2827          break;
2828       case 0x0000000d:
2829          switch (old_ecx) {
2830             case 0x00000000: SET_ABCD(0x00000001, 0x00000002,
2831                                       0x00000100, 0x00000000); break;
2832             case 0x00000001: SET_ABCD(0x00000004, 0x00000004,
2833                                       0x00000201, 0x00000000); break;
2834             default:         SET_ABCD(0x00000000, 0x00000000,
2835                                       old_ecx,    0x00000000); break;
2836          }
2837          break;
2838       case 0x80000000:
2839          SET_ABCD(0x80000008, 0x00000000, 0x00000000, 0x00000000);
2840          break;
2841       case 0x80000001:
2842          SET_ABCD(0x00000000, 0x00000000, 0x00000001, 0x28100800);
2843          break;
2844       case 0x80000002:
2845          SET_ABCD(0x65746e49, 0x2952286c, 0x726f4320, 0x4d542865);
2846          break;
2847       case 0x80000003:
2848          SET_ABCD(0x35692029, 0x55504320, 0x20202020, 0x20202020);
2849          break;
2850       case 0x80000004:
2851          SET_ABCD(0x30373620, 0x20402020, 0x37342e33, 0x007a4847);
2852          break;
2853       case 0x80000005:
2854          SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x00000000);
2855          break;
2856       case 0x80000006:
2857          SET_ABCD(0x00000000, 0x00000000, 0x01006040, 0x00000000);
2858          break;
2859       case 0x80000007:
2860          SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x00000100);
2861          break;
2862       case 0x80000008:
2863          SET_ABCD(0x00003024, 0x00000000, 0x00000000, 0x00000000);
2864          break;
2865       default:
2866          SET_ABCD(0x00000001, 0x00000002, 0x00000100, 0x00000000);
2867          break;
2868    }
2869 #  undef SET_ABCD
2870 }
2871 
2872 
2873 /* Claim to be the following CPU (4 x ...), which is AVX and cx16
2874    capable.  Plus (kludge!) it "supports" HTM.
2875 
2876    Also with the following change: claim that XSaveOpt is not
2877    available, by cpuid(eax=0xD,ecx=1).eax[0] returns 0, compared to 1
2878    on the real CPU.  Consequently, programs that correctly observe
2879    these CPUID values should only try to use 3 of the 8 XSave-family
2880    instructions: XGETBV, XSAVE and XRSTOR.  In particular this avoids
2881    having to implement the compacted or optimised save/restore
2882    variants.
2883 
2884    vendor_id       : GenuineIntel
2885    cpu family      : 6
2886    model           : 42
2887    model name      : Intel(R) Core(TM) i5-2300 CPU @ 2.80GHz
2888    stepping        : 7
2889    cpu MHz         : 1600.000
2890    cache size      : 6144 KB
2891    physical id     : 0
2892    siblings        : 4
2893    core id         : 3
2894    cpu cores       : 4
2895    apicid          : 6
2896    initial apicid  : 6
2897    fpu             : yes
2898    fpu_exception   : yes
2899    cpuid level     : 13
2900    wp              : yes
2901    flags           : fpu vme de pse tsc msr pae mce cx8 apic sep
2902                      mtrr pge mca cmov pat pse36 clflush dts acpi
2903                      mmx fxsr sse sse2 ss ht tm pbe syscall nx rdtscp
2904                      lm constant_tsc arch_perfmon pebs bts rep_good
2905                      nopl xtopology nonstop_tsc aperfmperf pni pclmulqdq
2906                      dtes64 monitor ds_cpl vmx est tm2 ssse3 cx16
2907                      xtpr pdcm sse4_1 sse4_2 popcnt aes xsave avx
2908                      lahf_lm ida arat epb xsaveopt pln pts dts
2909                      tpr_shadow vnmi flexpriority ept vpid
2910 
2911    bogomips        : 5768.94
2912    clflush size    : 64
2913    cache_alignment : 64
2914    address sizes   : 36 bits physical, 48 bits virtual
2915    power management:
2916 */
amd64g_dirtyhelper_CPUID_avx_and_cx16(VexGuestAMD64State * st)2917 void amd64g_dirtyhelper_CPUID_avx_and_cx16 ( VexGuestAMD64State* st )
2918 {
2919 #  define SET_ABCD(_a,_b,_c,_d)                \
2920       do { st->guest_RAX = (ULong)(_a);        \
2921            st->guest_RBX = (ULong)(_b);        \
2922            st->guest_RCX = (ULong)(_c);        \
2923            st->guest_RDX = (ULong)(_d);        \
2924       } while (0)
2925 
2926    UInt old_eax = (UInt)st->guest_RAX;
2927    UInt old_ecx = (UInt)st->guest_RCX;
2928 
2929    switch (old_eax) {
2930       case 0x00000000:
2931          SET_ABCD(0x0000000d, 0x756e6547, 0x6c65746e, 0x49656e69);
2932          break;
2933       case 0x00000001:
2934          SET_ABCD(0x000206a7, 0x00100800, 0x1f9ae3bf, 0xbfebfbff);
2935          break;
2936       case 0x00000002:
2937          SET_ABCD(0x76035a01, 0x00f0b0ff, 0x00000000, 0x00ca0000);
2938          break;
2939       case 0x00000003:
2940          SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x00000000);
2941          break;
2942       case 0x00000004:
2943          switch (old_ecx) {
2944             case 0x00000000: SET_ABCD(0x1c004121, 0x01c0003f,
2945                                       0x0000003f, 0x00000000); break;
2946             case 0x00000001: SET_ABCD(0x1c004122, 0x01c0003f,
2947                                       0x0000003f, 0x00000000); break;
2948             case 0x00000002: SET_ABCD(0x1c004143, 0x01c0003f,
2949                                       0x000001ff, 0x00000000); break;
2950             case 0x00000003: SET_ABCD(0x1c03c163, 0x02c0003f,
2951                                       0x00001fff, 0x00000006); break;
2952             default:         SET_ABCD(0x00000000, 0x00000000,
2953                                       0x00000000, 0x00000000); break;
2954          }
2955          break;
2956       case 0x00000005:
2957          SET_ABCD(0x00000040, 0x00000040, 0x00000003, 0x00001120);
2958          break;
2959       case 0x00000006:
2960          SET_ABCD(0x00000077, 0x00000002, 0x00000009, 0x00000000);
2961          break;
2962       case 0x00000007:
2963          SET_ABCD(0x00000000, 0x00000800, 0x00000000, 0x00000000);
2964          break;
2965       case 0x00000008:
2966          SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x00000000);
2967          break;
2968       case 0x00000009:
2969          SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x00000000);
2970          break;
2971       case 0x0000000a:
2972          SET_ABCD(0x07300803, 0x00000000, 0x00000000, 0x00000603);
2973          break;
2974       case 0x0000000b:
2975          switch (old_ecx) {
2976             case 0x00000000:
2977                SET_ABCD(0x00000001, 0x00000001,
2978                         0x00000100, 0x00000000); break;
2979             case 0x00000001:
2980                SET_ABCD(0x00000004, 0x00000004,
2981                         0x00000201, 0x00000000); break;
2982             default:
2983                SET_ABCD(0x00000000, 0x00000000,
2984                         old_ecx,    0x00000000); break;
2985          }
2986          break;
2987       case 0x0000000c:
2988          SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x00000000);
2989          break;
2990       case 0x0000000d:
2991          switch (old_ecx) {
2992             case 0x00000000: SET_ABCD(0x00000007, 0x00000340,
2993                                       0x00000340, 0x00000000); break;
2994             case 0x00000001: SET_ABCD(0x00000000, 0x00000000,
2995                                       0x00000000, 0x00000000); break;
2996             case 0x00000002: SET_ABCD(0x00000100, 0x00000240,
2997                                       0x00000000, 0x00000000); break;
2998             default:         SET_ABCD(0x00000000, 0x00000000,
2999                                       0x00000000, 0x00000000); break;
3000          }
3001          break;
3002       case 0x0000000e:
3003          SET_ABCD(0x00000007, 0x00000340, 0x00000340, 0x00000000);
3004          break;
3005       case 0x0000000f:
3006          SET_ABCD(0x00000007, 0x00000340, 0x00000340, 0x00000000);
3007          break;
3008       case 0x80000000:
3009          SET_ABCD(0x80000008, 0x00000000, 0x00000000, 0x00000000);
3010          break;
3011       case 0x80000001:
3012          SET_ABCD(0x00000000, 0x00000000, 0x00000001, 0x28100800);
3013          break;
3014       case 0x80000002:
3015          SET_ABCD(0x20202020, 0x20202020, 0x65746e49, 0x2952286c);
3016          break;
3017       case 0x80000003:
3018          SET_ABCD(0x726f4320, 0x4d542865, 0x35692029, 0x3033322d);
3019          break;
3020       case 0x80000004:
3021          SET_ABCD(0x50432030, 0x20402055, 0x30382e32, 0x007a4847);
3022          break;
3023       case 0x80000005:
3024          SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x00000000);
3025          break;
3026       case 0x80000006:
3027          SET_ABCD(0x00000000, 0x00000000, 0x01006040, 0x00000000);
3028          break;
3029       case 0x80000007:
3030          SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x00000100);
3031          break;
3032       case 0x80000008:
3033          SET_ABCD(0x00003024, 0x00000000, 0x00000000, 0x00000000);
3034          break;
3035       default:
3036          SET_ABCD(0x00000007, 0x00000340, 0x00000340, 0x00000000);
3037          break;
3038    }
3039 #  undef SET_ABCD
3040 }
3041 
3042 
3043 /* Claim to be the following CPU (4 x ...), which is AVX2 capable.
3044 
3045    With the following change: claim that XSaveOpt is not available, by
3046    cpuid(eax=0xD,ecx=1).eax[0] returns 0, compared to 1 on the real
3047    CPU.  Consequently, programs that correctly observe these CPUID
3048    values should only try to use 3 of the 8 XSave-family instructions:
3049    XGETBV, XSAVE and XRSTOR.  In particular this avoids having to
3050    implement the compacted or optimised save/restore variants.
3051 
3052    vendor_id       : GenuineIntel
3053    cpu family      : 6
3054    model           : 60
3055    model name      : Intel(R) Core(TM) i7-4910MQ CPU @ 2.90GHz
3056    stepping        : 3
3057    microcode       : 0x1c
3058    cpu MHz         : 919.957
3059    cache size      : 8192 KB
3060    physical id     : 0
3061    siblings        : 4
3062    core id         : 3
3063    cpu cores       : 4
3064    apicid          : 6
3065    initial apicid  : 6
3066    fpu             : yes
3067    fpu_exception   : yes
3068    cpuid level     : 13
3069    wp              : yes
3070    flags           : fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge mca
3071                      cmov pat pse36 clflush dts acpi mmx fxsr sse sse2 ss ht
3072                      tm pbe syscall nx pdpe1gb rdtscp lm constant_tsc
3073                      arch_perfmon pebs bts rep_good nopl xtopology nonstop_tsc
3074                      aperfmperf eagerfpu pni pclmulqdq dtes64 monitor ds_cpl
3075                      vmx smx est tm2 ssse3 fma cx16 xtpr pdcm pcid sse4_1
3076                      sse4_2 x2apic movbe popcnt tsc_deadline_timer aes xsave
3077                      avx f16c rdrand lahf_lm abm ida arat epb pln pts dtherm
3078                      tpr_shadow vnmi flexpriority ept vpid fsgsbase tsc_adjust
3079                      bmi1 avx2 smep bmi2 erms invpcid xsaveopt
3080    bugs            :
3081    bogomips        : 5786.68
3082    clflush size    : 64
3083    cache_alignment : 64
3084    address sizes   : 39 bits physical, 48 bits virtual
3085    power management:
3086 */
amd64g_dirtyhelper_CPUID_avx2(VexGuestAMD64State * st)3087 void amd64g_dirtyhelper_CPUID_avx2 ( VexGuestAMD64State* st )
3088 {
3089 #  define SET_ABCD(_a,_b,_c,_d)                \
3090       do { st->guest_RAX = (ULong)(_a);        \
3091            st->guest_RBX = (ULong)(_b);        \
3092            st->guest_RCX = (ULong)(_c);        \
3093            st->guest_RDX = (ULong)(_d);        \
3094       } while (0)
3095 
3096    UInt old_eax = (UInt)st->guest_RAX;
3097    UInt old_ecx = (UInt)st->guest_RCX;
3098 
3099    switch (old_eax) {
3100       case 0x00000000:
3101          SET_ABCD(0x0000000d, 0x756e6547, 0x6c65746e, 0x49656e69);
3102          break;
3103       case 0x00000001:
3104          /* Don't advertise RDRAND support, bit 30 in ECX.  */
3105          SET_ABCD(0x000306c3, 0x02100800, 0x3ffafbff, 0xbfebfbff);
3106          break;
3107       case 0x00000002:
3108          SET_ABCD(0x76036301, 0x00f0b6ff, 0x00000000, 0x00c10000);
3109          break;
3110       case 0x00000003:
3111          SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x00000000);
3112          break;
3113       case 0x00000004:
3114          switch (old_ecx) {
3115             case 0x00000000: SET_ABCD(0x1c004121, 0x01c0003f,
3116                                       0x0000003f, 0x00000000); break;
3117             case 0x00000001: SET_ABCD(0x1c004122, 0x01c0003f,
3118                                       0x0000003f, 0x00000000); break;
3119             case 0x00000002: SET_ABCD(0x1c004143, 0x01c0003f,
3120                                       0x000001ff, 0x00000000); break;
3121             case 0x00000003: SET_ABCD(0x1c03c163, 0x03c0003f,
3122                                       0x00001fff, 0x00000006); break;
3123             default:         SET_ABCD(0x00000000, 0x00000000,
3124                                       0x00000000, 0x00000000); break;
3125          }
3126          break;
3127       case 0x00000005:
3128          SET_ABCD(0x00000040, 0x00000040, 0x00000003, 0x00042120);
3129          break;
3130       case 0x00000006:
3131          SET_ABCD(0x00000077, 0x00000002, 0x00000009, 0x00000000);
3132          break;
3133       case 0x00000007:
3134          switch (old_ecx) {
3135             case 0x00000000: SET_ABCD(0x00000000, 0x000027ab,
3136                                       0x00000000, 0x00000000); break;
3137             default:         SET_ABCD(0x00000000, 0x00000000,
3138                                       0x00000000, 0x00000000); break;
3139          }
3140          break;
3141       case 0x00000008:
3142          SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x00000000);
3143          break;
3144       case 0x00000009:
3145          SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x00000000);
3146          break;
3147       case 0x0000000a:
3148          SET_ABCD(0x07300803, 0x00000000, 0x00000000, 0x00000603);
3149          break;
3150       case 0x0000000b:
3151          switch (old_ecx) {
3152             case 0x00000000: SET_ABCD(0x00000001, 0x00000002,
3153                                       0x00000100, 0x00000002); break;
3154             case 0x00000001: SET_ABCD(0x00000004, 0x00000008,
3155                                       0x00000201, 0x00000002); break;
3156             default:         SET_ABCD(0x00000000, 0x00000000,
3157                                       old_ecx,    0x00000002); break;
3158          }
3159          break;
3160       case 0x0000000c:
3161          SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x00000000);
3162          break;
3163       case 0x0000000d:
3164          switch (old_ecx) {
3165             case 0x00000000: SET_ABCD(0x00000007, 0x00000340,
3166                                       0x00000340, 0x00000000); break;
3167             case 0x00000001: SET_ABCD(0x00000000, 0x00000000,
3168                                       0x00000000, 0x00000000); break;
3169             case 0x00000002: SET_ABCD(0x00000100, 0x00000240,
3170                                       0x00000000, 0x00000000); break;
3171             default:         SET_ABCD(0x00000000, 0x00000000,
3172                                       0x00000000, 0x00000000); break;
3173          }
3174          break;
3175       case 0x80000000:
3176          SET_ABCD(0x80000008, 0x00000000, 0x00000000, 0x00000000);
3177          break;
3178       case 0x80000001:
3179          SET_ABCD(0x00000000, 0x00000000, 0x00000021, 0x2c100800);
3180          break;
3181       case 0x80000002:
3182          SET_ABCD(0x65746e49, 0x2952286c, 0x726f4320, 0x4d542865);
3183          break;
3184       case 0x80000003:
3185          SET_ABCD(0x37692029, 0x3139342d, 0x20514d30, 0x20555043);
3186          break;
3187       case 0x80000004:
3188          SET_ABCD(0x2e322040, 0x48473039, 0x0000007a, 0x00000000);
3189          break;
3190       case 0x80000005:
3191          SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x00000000);
3192          break;
3193       case 0x80000006:
3194          SET_ABCD(0x00000000, 0x00000000, 0x01006040, 0x00000000);
3195          break;
3196       case 0x80000007:
3197          SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x00000100);
3198          break;
3199       case 0x80000008:
3200          SET_ABCD(0x00003027, 0x00000000, 0x00000000, 0x00000000);
3201          break;
3202       default:
3203          SET_ABCD(0x00000007, 0x00000340, 0x00000340, 0x00000000);
3204          break;
3205    }
3206 #  undef SET_ABCD
3207 }
3208 
3209 
3210 /*---------------------------------------------------------------*/
3211 /*--- Misc integer helpers, including rotates and crypto.     ---*/
3212 /*---------------------------------------------------------------*/
3213 
amd64g_calculate_RCR(ULong arg,ULong rot_amt,ULong rflags_in,Long szIN)3214 ULong amd64g_calculate_RCR ( ULong arg,
3215                              ULong rot_amt,
3216                              ULong rflags_in,
3217                              Long  szIN )
3218 {
3219    Bool  wantRflags = toBool(szIN < 0);
3220    ULong sz         = wantRflags ? (-szIN) : szIN;
3221    ULong tempCOUNT  = rot_amt & (sz == 8 ? 0x3F : 0x1F);
3222    ULong cf=0, of=0, tempcf;
3223 
3224    switch (sz) {
3225       case 8:
3226          cf        = (rflags_in >> AMD64G_CC_SHIFT_C) & 1;
3227          of        = ((arg >> 63) ^ cf) & 1;
3228          while (tempCOUNT > 0) {
3229             tempcf = arg & 1;
3230             arg    = (arg >> 1) | (cf << 63);
3231             cf     = tempcf;
3232             tempCOUNT--;
3233          }
3234          break;
3235       case 4:
3236          while (tempCOUNT >= 33) tempCOUNT -= 33;
3237          cf        = (rflags_in >> AMD64G_CC_SHIFT_C) & 1;
3238          of        = ((arg >> 31) ^ cf) & 1;
3239          while (tempCOUNT > 0) {
3240             tempcf = arg & 1;
3241             arg    = ((arg >> 1) & 0x7FFFFFFFULL) | (cf << 31);
3242             cf     = tempcf;
3243             tempCOUNT--;
3244          }
3245          break;
3246       case 2:
3247          while (tempCOUNT >= 17) tempCOUNT -= 17;
3248          cf        = (rflags_in >> AMD64G_CC_SHIFT_C) & 1;
3249          of        = ((arg >> 15) ^ cf) & 1;
3250          while (tempCOUNT > 0) {
3251             tempcf = arg & 1;
3252             arg    = ((arg >> 1) & 0x7FFFULL) | (cf << 15);
3253             cf     = tempcf;
3254             tempCOUNT--;
3255          }
3256          break;
3257       case 1:
3258          while (tempCOUNT >= 9) tempCOUNT -= 9;
3259          cf        = (rflags_in >> AMD64G_CC_SHIFT_C) & 1;
3260          of        = ((arg >> 7) ^ cf) & 1;
3261          while (tempCOUNT > 0) {
3262             tempcf = arg & 1;
3263             arg    = ((arg >> 1) & 0x7FULL) | (cf << 7);
3264             cf     = tempcf;
3265             tempCOUNT--;
3266          }
3267          break;
3268       default:
3269          vpanic("calculate_RCR(amd64g): invalid size");
3270    }
3271 
3272    cf &= 1;
3273    of &= 1;
3274    rflags_in &= ~(AMD64G_CC_MASK_C | AMD64G_CC_MASK_O);
3275    rflags_in |= (cf << AMD64G_CC_SHIFT_C) | (of << AMD64G_CC_SHIFT_O);
3276 
3277    /* caller can ask to have back either the resulting flags or
3278       resulting value, but not both */
3279    return wantRflags ? rflags_in : arg;
3280 }
3281 
amd64g_calculate_RCL(ULong arg,ULong rot_amt,ULong rflags_in,Long szIN)3282 ULong amd64g_calculate_RCL ( ULong arg,
3283                              ULong rot_amt,
3284                              ULong rflags_in,
3285                              Long  szIN )
3286 {
3287    Bool  wantRflags = toBool(szIN < 0);
3288    ULong sz         = wantRflags ? (-szIN) : szIN;
3289    ULong tempCOUNT  = rot_amt & (sz == 8 ? 0x3F : 0x1F);
3290    ULong cf=0, of=0, tempcf;
3291 
3292    switch (sz) {
3293       case 8:
3294          cf = (rflags_in >> AMD64G_CC_SHIFT_C) & 1;
3295          while (tempCOUNT > 0) {
3296             tempcf = (arg >> 63) & 1;
3297             arg    = (arg << 1) | (cf & 1);
3298             cf     = tempcf;
3299             tempCOUNT--;
3300          }
3301          of = ((arg >> 63) ^ cf) & 1;
3302          break;
3303       case 4:
3304          while (tempCOUNT >= 33) tempCOUNT -= 33;
3305          cf = (rflags_in >> AMD64G_CC_SHIFT_C) & 1;
3306          while (tempCOUNT > 0) {
3307             tempcf = (arg >> 31) & 1;
3308             arg    = 0xFFFFFFFFULL & ((arg << 1) | (cf & 1));
3309             cf     = tempcf;
3310             tempCOUNT--;
3311          }
3312          of = ((arg >> 31) ^ cf) & 1;
3313          break;
3314       case 2:
3315          while (tempCOUNT >= 17) tempCOUNT -= 17;
3316          cf = (rflags_in >> AMD64G_CC_SHIFT_C) & 1;
3317          while (tempCOUNT > 0) {
3318             tempcf = (arg >> 15) & 1;
3319             arg    = 0xFFFFULL & ((arg << 1) | (cf & 1));
3320             cf     = tempcf;
3321             tempCOUNT--;
3322          }
3323          of = ((arg >> 15) ^ cf) & 1;
3324          break;
3325       case 1:
3326          while (tempCOUNT >= 9) tempCOUNT -= 9;
3327          cf = (rflags_in >> AMD64G_CC_SHIFT_C) & 1;
3328          while (tempCOUNT > 0) {
3329             tempcf = (arg >> 7) & 1;
3330             arg    = 0xFFULL & ((arg << 1) | (cf & 1));
3331             cf     = tempcf;
3332             tempCOUNT--;
3333          }
3334          of = ((arg >> 7) ^ cf) & 1;
3335          break;
3336       default:
3337          vpanic("calculate_RCL(amd64g): invalid size");
3338    }
3339 
3340    cf &= 1;
3341    of &= 1;
3342    rflags_in &= ~(AMD64G_CC_MASK_C | AMD64G_CC_MASK_O);
3343    rflags_in |= (cf << AMD64G_CC_SHIFT_C) | (of << AMD64G_CC_SHIFT_O);
3344 
3345    return wantRflags ? rflags_in : arg;
3346 }
3347 
3348 /* Taken from gf2x-0.9.5, released under GPLv2+ (later versions LGPLv2+)
3349  * svn://scm.gforge.inria.fr/svn/gf2x/trunk/hardware/opteron/gf2x_mul1.h@25
3350  */
amd64g_calculate_pclmul(ULong a,ULong b,ULong which)3351 ULong amd64g_calculate_pclmul(ULong a, ULong b, ULong which)
3352 {
3353     ULong hi, lo, tmp, A[16];
3354 
3355    A[0] = 0;            A[1] = a;
3356    A[2] = A[1] << 1;    A[3] = A[2] ^ a;
3357    A[4] = A[2] << 1;    A[5] = A[4] ^ a;
3358    A[6] = A[3] << 1;    A[7] = A[6] ^ a;
3359    A[8] = A[4] << 1;    A[9] = A[8] ^ a;
3360    A[10] = A[5] << 1;   A[11] = A[10] ^ a;
3361    A[12] = A[6] << 1;   A[13] = A[12] ^ a;
3362    A[14] = A[7] << 1;   A[15] = A[14] ^ a;
3363 
3364    lo = (A[b >> 60] << 4) ^ A[(b >> 56) & 15];
3365    hi = lo >> 56;
3366    lo = (lo << 8) ^ (A[(b >> 52) & 15] << 4) ^ A[(b >> 48) & 15];
3367    hi = (hi << 8) | (lo >> 56);
3368    lo = (lo << 8) ^ (A[(b >> 44) & 15] << 4) ^ A[(b >> 40) & 15];
3369    hi = (hi << 8) | (lo >> 56);
3370    lo = (lo << 8) ^ (A[(b >> 36) & 15] << 4) ^ A[(b >> 32) & 15];
3371    hi = (hi << 8) | (lo >> 56);
3372    lo = (lo << 8) ^ (A[(b >> 28) & 15] << 4) ^ A[(b >> 24) & 15];
3373    hi = (hi << 8) | (lo >> 56);
3374    lo = (lo << 8) ^ (A[(b >> 20) & 15] << 4) ^ A[(b >> 16) & 15];
3375    hi = (hi << 8) | (lo >> 56);
3376    lo = (lo << 8) ^ (A[(b >> 12) & 15] << 4) ^ A[(b >> 8) & 15];
3377    hi = (hi << 8) | (lo >> 56);
3378    lo = (lo << 8) ^ (A[(b >> 4) & 15] << 4) ^ A[b & 15];
3379 
3380    ULong m0 = -1;
3381    m0 /= 255;
3382    tmp = -((a >> 63) & 1); tmp &= ((b & (m0 * 0xfe)) >> 1); hi = hi ^ tmp;
3383    tmp = -((a >> 62) & 1); tmp &= ((b & (m0 * 0xfc)) >> 2); hi = hi ^ tmp;
3384    tmp = -((a >> 61) & 1); tmp &= ((b & (m0 * 0xf8)) >> 3); hi = hi ^ tmp;
3385    tmp = -((a >> 60) & 1); tmp &= ((b & (m0 * 0xf0)) >> 4); hi = hi ^ tmp;
3386    tmp = -((a >> 59) & 1); tmp &= ((b & (m0 * 0xe0)) >> 5); hi = hi ^ tmp;
3387    tmp = -((a >> 58) & 1); tmp &= ((b & (m0 * 0xc0)) >> 6); hi = hi ^ tmp;
3388    tmp = -((a >> 57) & 1); tmp &= ((b & (m0 * 0x80)) >> 7); hi = hi ^ tmp;
3389 
3390    return which ? hi : lo;
3391 }
3392 
3393 
3394 /* CALLED FROM GENERATED CODE */
3395 /* DIRTY HELPER (non-referentially-transparent) */
3396 /* Horrible hack.  On non-amd64 platforms, return 1. */
amd64g_dirtyhelper_RDTSC(void)3397 ULong amd64g_dirtyhelper_RDTSC ( void )
3398 {
3399 #  if defined(__x86_64__)
3400    UInt  eax, edx;
3401    __asm__ __volatile__("rdtsc" : "=a" (eax), "=d" (edx));
3402    return (((ULong)edx) << 32) | ((ULong)eax);
3403 #  else
3404    return 1ULL;
3405 #  endif
3406 }
3407 
3408 /* CALLED FROM GENERATED CODE */
3409 /* DIRTY HELPER (non-referentially-transparent) */
3410 /* Horrible hack.  On non-amd64 platforms, return 1. */
3411 /* This uses a different calling convention from _RDTSC just above
3412    only because of the difficulty of returning 96 bits from a C
3413    function -- RDTSC returns 64 bits and so is simple by comparison,
3414    on amd64. */
amd64g_dirtyhelper_RDTSCP(VexGuestAMD64State * st)3415 void amd64g_dirtyhelper_RDTSCP ( VexGuestAMD64State* st )
3416 {
3417 #  if defined(__x86_64__)
3418    UInt eax, ecx, edx;
3419    __asm__ __volatile__("rdtscp" : "=a" (eax), "=d" (edx), "=c" (ecx));
3420    st->guest_RAX = (ULong)eax;
3421    st->guest_RCX = (ULong)ecx;
3422    st->guest_RDX = (ULong)edx;
3423 #  else
3424    /* Do nothing. */
3425 #  endif
3426 }
3427 
3428 /* CALLED FROM GENERATED CODE */
3429 /* DIRTY HELPER (non-referentially-transparent) */
3430 /* Horrible hack.  On non-amd64 platforms, return 0. */
amd64g_dirtyhelper_IN(ULong portno,ULong sz)3431 ULong amd64g_dirtyhelper_IN ( ULong portno, ULong sz/*1,2 or 4*/ )
3432 {
3433 #  if defined(__x86_64__)
3434    ULong r = 0;
3435    portno &= 0xFFFF;
3436    switch (sz) {
3437       case 4:
3438          __asm__ __volatile__("movq $0,%%rax; inl %w1,%%eax; movq %%rax,%0"
3439                               : "=a" (r) : "Nd" (portno));
3440 	 break;
3441       case 2:
3442          __asm__ __volatile__("movq $0,%%rax; inw %w1,%w0"
3443                               : "=a" (r) : "Nd" (portno));
3444 	 break;
3445       case 1:
3446          __asm__ __volatile__("movq $0,%%rax; inb %w1,%b0"
3447                               : "=a" (r) : "Nd" (portno));
3448 	 break;
3449       default:
3450          break; /* note: no 64-bit version of insn exists */
3451    }
3452    return r;
3453 #  else
3454    return 0;
3455 #  endif
3456 }
3457 
3458 
3459 /* CALLED FROM GENERATED CODE */
3460 /* DIRTY HELPER (non-referentially-transparent) */
3461 /* Horrible hack.  On non-amd64 platforms, do nothing. */
amd64g_dirtyhelper_OUT(ULong portno,ULong data,ULong sz)3462 void amd64g_dirtyhelper_OUT ( ULong portno, ULong data, ULong sz/*1,2 or 4*/ )
3463 {
3464 #  if defined(__x86_64__)
3465    portno &= 0xFFFF;
3466    switch (sz) {
3467       case 4:
3468          __asm__ __volatile__("movq %0,%%rax; outl %%eax, %w1"
3469                               : : "a" (data), "Nd" (portno));
3470 	 break;
3471       case 2:
3472          __asm__ __volatile__("outw %w0, %w1"
3473                               : : "a" (data), "Nd" (portno));
3474 	 break;
3475       case 1:
3476          __asm__ __volatile__("outb %b0, %w1"
3477                               : : "a" (data), "Nd" (portno));
3478 	 break;
3479       default:
3480          break; /* note: no 64-bit version of insn exists */
3481    }
3482 #  else
3483    /* do nothing */
3484 #  endif
3485 }
3486 
3487 /* CALLED FROM GENERATED CODE */
3488 /* DIRTY HELPER (non-referentially-transparent) */
3489 /* Horrible hack.  On non-amd64 platforms, do nothing. */
3490 /* op = 0: call the native SGDT instruction.
3491    op = 1: call the native SIDT instruction.
3492 */
amd64g_dirtyhelper_SxDT(void * address,ULong op)3493 void amd64g_dirtyhelper_SxDT ( void *address, ULong op ) {
3494 #  if defined(__x86_64__)
3495    switch (op) {
3496       case 0:
3497          __asm__ __volatile__("sgdt (%0)" : : "r" (address) : "memory");
3498          break;
3499       case 1:
3500          __asm__ __volatile__("sidt (%0)" : : "r" (address) : "memory");
3501          break;
3502       default:
3503          vpanic("amd64g_dirtyhelper_SxDT");
3504    }
3505 #  else
3506    /* do nothing */
3507    UChar* p = (UChar*)address;
3508    p[0] = p[1] = p[2] = p[3] = p[4] = p[5] = 0;
3509    p[6] = p[7] = p[8] = p[9] = 0;
3510 #  endif
3511 }
3512 
3513 /*---------------------------------------------------------------*/
3514 /*--- Helpers for MMX/SSE/SSE2.                               ---*/
3515 /*---------------------------------------------------------------*/
3516 
abdU8(UChar xx,UChar yy)3517 static inline UChar abdU8 ( UChar xx, UChar yy ) {
3518    return toUChar(xx>yy ? xx-yy : yy-xx);
3519 }
3520 
mk32x2(UInt w1,UInt w0)3521 static inline ULong mk32x2 ( UInt w1, UInt w0 ) {
3522    return (((ULong)w1) << 32) | ((ULong)w0);
3523 }
3524 
sel16x4_3(ULong w64)3525 static inline UShort sel16x4_3 ( ULong w64 ) {
3526    UInt hi32 = toUInt(w64 >> 32);
3527    return toUShort(hi32 >> 16);
3528 }
sel16x4_2(ULong w64)3529 static inline UShort sel16x4_2 ( ULong w64 ) {
3530    UInt hi32 = toUInt(w64 >> 32);
3531    return toUShort(hi32);
3532 }
sel16x4_1(ULong w64)3533 static inline UShort sel16x4_1 ( ULong w64 ) {
3534    UInt lo32 = toUInt(w64);
3535    return toUShort(lo32 >> 16);
3536 }
sel16x4_0(ULong w64)3537 static inline UShort sel16x4_0 ( ULong w64 ) {
3538    UInt lo32 = toUInt(w64);
3539    return toUShort(lo32);
3540 }
3541 
sel8x8_7(ULong w64)3542 static inline UChar sel8x8_7 ( ULong w64 ) {
3543    UInt hi32 = toUInt(w64 >> 32);
3544    return toUChar(hi32 >> 24);
3545 }
sel8x8_6(ULong w64)3546 static inline UChar sel8x8_6 ( ULong w64 ) {
3547    UInt hi32 = toUInt(w64 >> 32);
3548    return toUChar(hi32 >> 16);
3549 }
sel8x8_5(ULong w64)3550 static inline UChar sel8x8_5 ( ULong w64 ) {
3551    UInt hi32 = toUInt(w64 >> 32);
3552    return toUChar(hi32 >> 8);
3553 }
sel8x8_4(ULong w64)3554 static inline UChar sel8x8_4 ( ULong w64 ) {
3555    UInt hi32 = toUInt(w64 >> 32);
3556    return toUChar(hi32 >> 0);
3557 }
sel8x8_3(ULong w64)3558 static inline UChar sel8x8_3 ( ULong w64 ) {
3559    UInt lo32 = toUInt(w64);
3560    return toUChar(lo32 >> 24);
3561 }
sel8x8_2(ULong w64)3562 static inline UChar sel8x8_2 ( ULong w64 ) {
3563    UInt lo32 = toUInt(w64);
3564    return toUChar(lo32 >> 16);
3565 }
sel8x8_1(ULong w64)3566 static inline UChar sel8x8_1 ( ULong w64 ) {
3567    UInt lo32 = toUInt(w64);
3568    return toUChar(lo32 >> 8);
3569 }
sel8x8_0(ULong w64)3570 static inline UChar sel8x8_0 ( ULong w64 ) {
3571    UInt lo32 = toUInt(w64);
3572    return toUChar(lo32 >> 0);
3573 }
3574 
3575 /* CALLED FROM GENERATED CODE: CLEAN HELPER */
amd64g_calculate_mmx_pmaddwd(ULong xx,ULong yy)3576 ULong amd64g_calculate_mmx_pmaddwd ( ULong xx, ULong yy )
3577 {
3578    return
3579       mk32x2(
3580          (((Int)(Short)sel16x4_3(xx)) * ((Int)(Short)sel16x4_3(yy)))
3581             + (((Int)(Short)sel16x4_2(xx)) * ((Int)(Short)sel16x4_2(yy))),
3582          (((Int)(Short)sel16x4_1(xx)) * ((Int)(Short)sel16x4_1(yy)))
3583             + (((Int)(Short)sel16x4_0(xx)) * ((Int)(Short)sel16x4_0(yy)))
3584       );
3585 }
3586 
3587 /* CALLED FROM GENERATED CODE: CLEAN HELPER */
amd64g_calculate_mmx_psadbw(ULong xx,ULong yy)3588 ULong amd64g_calculate_mmx_psadbw ( ULong xx, ULong yy )
3589 {
3590    UInt t = 0;
3591    t += (UInt)abdU8( sel8x8_7(xx), sel8x8_7(yy) );
3592    t += (UInt)abdU8( sel8x8_6(xx), sel8x8_6(yy) );
3593    t += (UInt)abdU8( sel8x8_5(xx), sel8x8_5(yy) );
3594    t += (UInt)abdU8( sel8x8_4(xx), sel8x8_4(yy) );
3595    t += (UInt)abdU8( sel8x8_3(xx), sel8x8_3(yy) );
3596    t += (UInt)abdU8( sel8x8_2(xx), sel8x8_2(yy) );
3597    t += (UInt)abdU8( sel8x8_1(xx), sel8x8_1(yy) );
3598    t += (UInt)abdU8( sel8x8_0(xx), sel8x8_0(yy) );
3599    t &= 0xFFFF;
3600    return (ULong)t;
3601 }
3602 
3603 /* CALLED FROM GENERATED CODE: CLEAN HELPER */
amd64g_calculate_sse_phminposuw(ULong sLo,ULong sHi)3604 ULong amd64g_calculate_sse_phminposuw ( ULong sLo, ULong sHi )
3605 {
3606    UShort t, min;
3607    UInt   idx;
3608    t = sel16x4_0(sLo); if (True)    { min = t; idx = 0; }
3609    t = sel16x4_1(sLo); if (t < min) { min = t; idx = 1; }
3610    t = sel16x4_2(sLo); if (t < min) { min = t; idx = 2; }
3611    t = sel16x4_3(sLo); if (t < min) { min = t; idx = 3; }
3612    t = sel16x4_0(sHi); if (t < min) { min = t; idx = 4; }
3613    t = sel16x4_1(sHi); if (t < min) { min = t; idx = 5; }
3614    t = sel16x4_2(sHi); if (t < min) { min = t; idx = 6; }
3615    t = sel16x4_3(sHi); if (t < min) { min = t; idx = 7; }
3616    return ((ULong)(idx << 16)) | ((ULong)min);
3617 }
3618 
3619 /* CALLED FROM GENERATED CODE: CLEAN HELPER */
amd64g_calc_crc32b(ULong crcIn,ULong b)3620 ULong amd64g_calc_crc32b ( ULong crcIn, ULong b )
3621 {
3622    UInt  i;
3623    ULong crc = (b & 0xFFULL) ^ crcIn;
3624    for (i = 0; i < 8; i++)
3625       crc = (crc >> 1) ^ ((crc & 1) ? 0x82f63b78ULL : 0);
3626    return crc;
3627 }
3628 
3629 /* CALLED FROM GENERATED CODE: CLEAN HELPER */
amd64g_calc_crc32w(ULong crcIn,ULong w)3630 ULong amd64g_calc_crc32w ( ULong crcIn, ULong w )
3631 {
3632    UInt  i;
3633    ULong crc = (w & 0xFFFFULL) ^ crcIn;
3634    for (i = 0; i < 16; i++)
3635       crc = (crc >> 1) ^ ((crc & 1) ? 0x82f63b78ULL : 0);
3636    return crc;
3637 }
3638 
3639 /* CALLED FROM GENERATED CODE: CLEAN HELPER */
amd64g_calc_crc32l(ULong crcIn,ULong l)3640 ULong amd64g_calc_crc32l ( ULong crcIn, ULong l )
3641 {
3642    UInt i;
3643    ULong crc = (l & 0xFFFFFFFFULL) ^ crcIn;
3644    for (i = 0; i < 32; i++)
3645       crc = (crc >> 1) ^ ((crc & 1) ? 0x82f63b78ULL : 0);
3646    return crc;
3647 }
3648 
3649 /* CALLED FROM GENERATED CODE: CLEAN HELPER */
amd64g_calc_crc32q(ULong crcIn,ULong q)3650 ULong amd64g_calc_crc32q ( ULong crcIn, ULong q )
3651 {
3652    ULong crc = amd64g_calc_crc32l(crcIn, q);
3653    return amd64g_calc_crc32l(crc, q >> 32);
3654 }
3655 
3656 
3657 /* .. helper for next fn .. */
sad_8x4(ULong xx,ULong yy)3658 static inline ULong sad_8x4 ( ULong xx, ULong yy )
3659 {
3660    UInt t = 0;
3661    t += (UInt)abdU8( sel8x8_3(xx), sel8x8_3(yy) );
3662    t += (UInt)abdU8( sel8x8_2(xx), sel8x8_2(yy) );
3663    t += (UInt)abdU8( sel8x8_1(xx), sel8x8_1(yy) );
3664    t += (UInt)abdU8( sel8x8_0(xx), sel8x8_0(yy) );
3665    return (ULong)t;
3666 }
3667 
3668 /* CALLED FROM GENERATED CODE: CLEAN HELPER */
amd64g_calc_mpsadbw(ULong sHi,ULong sLo,ULong dHi,ULong dLo,ULong imm_and_return_control_bit)3669 ULong amd64g_calc_mpsadbw ( ULong sHi, ULong sLo,
3670                             ULong dHi, ULong dLo,
3671                             ULong imm_and_return_control_bit )
3672 {
3673    UInt imm8     = imm_and_return_control_bit & 7;
3674    Bool calcHi   = (imm_and_return_control_bit >> 7) & 1;
3675    UInt srcOffsL = imm8 & 3; /* src offs in 32-bit (L) chunks */
3676    UInt dstOffsL = (imm8 >> 2) & 1; /* dst offs in ditto chunks */
3677    /* For src we only need 32 bits, so get them into the
3678       lower half of a 64 bit word. */
3679    ULong src = ((srcOffsL & 2) ? sHi : sLo) >> (32 * (srcOffsL & 1));
3680    /* For dst we need to get hold of 56 bits (7 bytes) from a total of
3681       11 bytes.  If calculating the low part of the result, need bytes
3682       dstOffsL * 4 + (0 .. 6); if calculating the high part,
3683       dstOffsL * 4 + (4 .. 10). */
3684    ULong dst;
3685    /* dstOffL = 0, Lo  ->  0 .. 6
3686       dstOffL = 1, Lo  ->  4 .. 10
3687       dstOffL = 0, Hi  ->  4 .. 10
3688       dstOffL = 1, Hi  ->  8 .. 14
3689    */
3690    if (calcHi && dstOffsL) {
3691       /* 8 .. 14 */
3692       dst = dHi & 0x00FFFFFFFFFFFFFFULL;
3693    }
3694    else if (!calcHi && !dstOffsL) {
3695       /* 0 .. 6 */
3696       dst = dLo & 0x00FFFFFFFFFFFFFFULL;
3697    }
3698    else {
3699       /* 4 .. 10 */
3700       dst = (dLo >> 32) | ((dHi & 0x00FFFFFFULL) << 32);
3701    }
3702    ULong r0  = sad_8x4( dst >>  0, src );
3703    ULong r1  = sad_8x4( dst >>  8, src );
3704    ULong r2  = sad_8x4( dst >> 16, src );
3705    ULong r3  = sad_8x4( dst >> 24, src );
3706    ULong res = (r3 << 48) | (r2 << 32) | (r1 << 16) | r0;
3707    return res;
3708 }
3709 
3710 /* CALLED FROM GENERATED CODE: CLEAN HELPER */
amd64g_calculate_pext(ULong src_masked,ULong mask)3711 ULong amd64g_calculate_pext ( ULong src_masked, ULong mask )
3712 {
3713    ULong dst = 0;
3714    ULong src_bit;
3715    ULong dst_bit = 1;
3716    for (src_bit = 1; src_bit; src_bit <<= 1) {
3717       if (mask & src_bit) {
3718          if (src_masked & src_bit) dst |= dst_bit;
3719          dst_bit <<= 1;
3720       }
3721    }
3722    return dst;
3723 }
3724 
3725 /* CALLED FROM GENERATED CODE: CLEAN HELPER */
amd64g_calculate_pdep(ULong src,ULong mask)3726 ULong amd64g_calculate_pdep ( ULong src, ULong mask )
3727 {
3728    ULong dst = 0;
3729    ULong dst_bit;
3730    ULong src_bit = 1;
3731    for (dst_bit = 1; dst_bit; dst_bit <<= 1) {
3732       if (mask & dst_bit) {
3733          if (src & src_bit) dst |= dst_bit;
3734          src_bit <<= 1;
3735       }
3736    }
3737    return dst;
3738 }
3739 
3740 /*---------------------------------------------------------------*/
3741 /*--- Helpers for SSE4.2 PCMP{E,I}STR{I,M}                    ---*/
3742 /*---------------------------------------------------------------*/
3743 
zmask_from_V128(V128 * arg)3744 static UInt zmask_from_V128 ( V128* arg )
3745 {
3746    UInt i, res = 0;
3747    for (i = 0; i < 16; i++) {
3748       res |=  ((arg->w8[i] == 0) ? 1 : 0) << i;
3749    }
3750    return res;
3751 }
3752 
zmask_from_V128_wide(V128 * arg)3753 static UInt zmask_from_V128_wide ( V128* arg )
3754 {
3755    UInt i, res = 0;
3756    for (i = 0; i < 8; i++) {
3757       res |=  ((arg->w16[i] == 0) ? 1 : 0) << i;
3758    }
3759    return res;
3760 }
3761 
3762 /* Helps with PCMP{I,E}STR{I,M}.
3763 
3764    CALLED FROM GENERATED CODE: DIRTY HELPER(s).  (But not really,
3765    actually it could be a clean helper, but for the fact that we can't
3766    pass by value 2 x V128 to a clean helper, nor have one returned.)
3767    Reads guest state, writes to guest state for the xSTRM cases, no
3768    accesses of memory, is a pure function.
3769 
3770    opc_and_imm contains (4th byte of opcode << 8) | the-imm8-byte so
3771    the callee knows which I/E and I/M variant it is dealing with and
3772    what the specific operation is.  4th byte of opcode is in the range
3773    0x60 to 0x63:
3774        istri  66 0F 3A 63
3775        istrm  66 0F 3A 62
3776        estri  66 0F 3A 61
3777        estrm  66 0F 3A 60
3778 
3779    gstOffL and gstOffR are the guest state offsets for the two XMM
3780    register inputs.  We never have to deal with the memory case since
3781    that is handled by pre-loading the relevant value into the fake
3782    XMM16 register.
3783 
3784    For ESTRx variants, edxIN and eaxIN hold the values of those two
3785    registers.
3786 
3787    In all cases, the bottom 16 bits of the result contain the new
3788    OSZACP %rflags values.  For xSTRI variants, bits[31:16] of the
3789    result hold the new %ecx value.  For xSTRM variants, the helper
3790    writes the result directly to the guest XMM0.
3791 
3792    Declarable side effects: in all cases, reads guest state at
3793    [gstOffL, +16) and [gstOffR, +16).  For xSTRM variants, also writes
3794    guest_XMM0.
3795 
3796    Is expected to be called with opc_and_imm combinations which have
3797    actually been validated, and will assert if otherwise.  The front
3798    end should ensure we're only called with verified values.
3799 */
amd64g_dirtyhelper_PCMPxSTRx(VexGuestAMD64State * gst,HWord opc4_and_imm,HWord gstOffL,HWord gstOffR,HWord edxIN,HWord eaxIN)3800 ULong amd64g_dirtyhelper_PCMPxSTRx (
3801           VexGuestAMD64State* gst,
3802           HWord opc4_and_imm,
3803           HWord gstOffL, HWord gstOffR,
3804           HWord edxIN, HWord eaxIN
3805        )
3806 {
3807    HWord opc4 = (opc4_and_imm >> 8) & 0xFF;
3808    HWord imm8 = opc4_and_imm & 0xFF;
3809    HWord isISTRx = opc4 & 2;
3810    HWord isxSTRM = (opc4 & 1) ^ 1;
3811    vassert((opc4 & 0xFC) == 0x60); /* 0x60 .. 0x63 */
3812    HWord wide = (imm8 & 1);
3813 
3814    // where the args are
3815    V128* argL = (V128*)( ((UChar*)gst) + gstOffL );
3816    V128* argR = (V128*)( ((UChar*)gst) + gstOffR );
3817 
3818    /* Create the arg validity masks, either from the vectors
3819       themselves or from the supplied edx/eax values. */
3820    // FIXME: this is only right for the 8-bit data cases.
3821    // At least that is asserted above.
3822    UInt zmaskL, zmaskR;
3823 
3824    // temp spot for the resulting flags and vector.
3825    V128 resV;
3826    UInt resOSZACP;
3827 
3828    // for checking whether case was handled
3829    Bool ok = False;
3830 
3831    if (wide) {
3832       if (isISTRx) {
3833          zmaskL = zmask_from_V128_wide(argL);
3834          zmaskR = zmask_from_V128_wide(argR);
3835       } else {
3836          Int tmp;
3837          tmp = edxIN & 0xFFFFFFFF;
3838          if (tmp < -8) tmp = -8;
3839          if (tmp > 8)  tmp = 8;
3840          if (tmp < 0)  tmp = -tmp;
3841          vassert(tmp >= 0 && tmp <= 8);
3842          zmaskL = (1 << tmp) & 0xFF;
3843          tmp = eaxIN & 0xFFFFFFFF;
3844          if (tmp < -8) tmp = -8;
3845          if (tmp > 8)  tmp = 8;
3846          if (tmp < 0)  tmp = -tmp;
3847          vassert(tmp >= 0 && tmp <= 8);
3848          zmaskR = (1 << tmp) & 0xFF;
3849       }
3850       // do the meyaath
3851       ok = compute_PCMPxSTRx_wide (
3852               &resV, &resOSZACP, argL, argR,
3853               zmaskL, zmaskR, imm8, (Bool)isxSTRM
3854            );
3855    } else {
3856       if (isISTRx) {
3857          zmaskL = zmask_from_V128(argL);
3858          zmaskR = zmask_from_V128(argR);
3859       } else {
3860          Int tmp;
3861          tmp = edxIN & 0xFFFFFFFF;
3862          if (tmp < -16) tmp = -16;
3863          if (tmp > 16)  tmp = 16;
3864          if (tmp < 0)   tmp = -tmp;
3865          vassert(tmp >= 0 && tmp <= 16);
3866          zmaskL = (1 << tmp) & 0xFFFF;
3867          tmp = eaxIN & 0xFFFFFFFF;
3868          if (tmp < -16) tmp = -16;
3869          if (tmp > 16)  tmp = 16;
3870          if (tmp < 0)   tmp = -tmp;
3871          vassert(tmp >= 0 && tmp <= 16);
3872          zmaskR = (1 << tmp) & 0xFFFF;
3873       }
3874       // do the meyaath
3875       ok = compute_PCMPxSTRx (
3876               &resV, &resOSZACP, argL, argR,
3877               zmaskL, zmaskR, imm8, (Bool)isxSTRM
3878            );
3879    }
3880 
3881    // front end shouldn't pass us any imm8 variants we can't
3882    // handle.  Hence:
3883    vassert(ok);
3884 
3885    // So, finally we need to get the results back to the caller.
3886    // In all cases, the new OSZACP value is the lowest 16 of
3887    // the return value.
3888    if (isxSTRM) {
3889       gst->guest_YMM0[0] = resV.w32[0];
3890       gst->guest_YMM0[1] = resV.w32[1];
3891       gst->guest_YMM0[2] = resV.w32[2];
3892       gst->guest_YMM0[3] = resV.w32[3];
3893       return resOSZACP & 0x8D5;
3894    } else {
3895       UInt newECX = resV.w32[0] & 0xFFFF;
3896       return (newECX << 16) | (resOSZACP & 0x8D5);
3897    }
3898 }
3899 
3900 /*---------------------------------------------------------------*/
3901 /*--- AES primitives and helpers                              ---*/
3902 /*---------------------------------------------------------------*/
3903 /* a 16 x 16 matrix */
3904 static const UChar sbox[256] = {                   // row nr
3905    0x63, 0x7c, 0x77, 0x7b, 0xf2, 0x6b, 0x6f, 0xc5, // 1
3906    0x30, 0x01, 0x67, 0x2b, 0xfe, 0xd7, 0xab, 0x76,
3907    0xca, 0x82, 0xc9, 0x7d, 0xfa, 0x59, 0x47, 0xf0, // 2
3908    0xad, 0xd4, 0xa2, 0xaf, 0x9c, 0xa4, 0x72, 0xc0,
3909    0xb7, 0xfd, 0x93, 0x26, 0x36, 0x3f, 0xf7, 0xcc, // 3
3910    0x34, 0xa5, 0xe5, 0xf1, 0x71, 0xd8, 0x31, 0x15,
3911    0x04, 0xc7, 0x23, 0xc3, 0x18, 0x96, 0x05, 0x9a, // 4
3912    0x07, 0x12, 0x80, 0xe2, 0xeb, 0x27, 0xb2, 0x75,
3913    0x09, 0x83, 0x2c, 0x1a, 0x1b, 0x6e, 0x5a, 0xa0, // 5
3914    0x52, 0x3b, 0xd6, 0xb3, 0x29, 0xe3, 0x2f, 0x84,
3915    0x53, 0xd1, 0x00, 0xed, 0x20, 0xfc, 0xb1, 0x5b, // 6
3916    0x6a, 0xcb, 0xbe, 0x39, 0x4a, 0x4c, 0x58, 0xcf,
3917    0xd0, 0xef, 0xaa, 0xfb, 0x43, 0x4d, 0x33, 0x85, // 7
3918    0x45, 0xf9, 0x02, 0x7f, 0x50, 0x3c, 0x9f, 0xa8,
3919    0x51, 0xa3, 0x40, 0x8f, 0x92, 0x9d, 0x38, 0xf5, // 8
3920    0xbc, 0xb6, 0xda, 0x21, 0x10, 0xff, 0xf3, 0xd2,
3921    0xcd, 0x0c, 0x13, 0xec, 0x5f, 0x97, 0x44, 0x17, // 9
3922    0xc4, 0xa7, 0x7e, 0x3d, 0x64, 0x5d, 0x19, 0x73,
3923    0x60, 0x81, 0x4f, 0xdc, 0x22, 0x2a, 0x90, 0x88, //10
3924    0x46, 0xee, 0xb8, 0x14, 0xde, 0x5e, 0x0b, 0xdb,
3925    0xe0, 0x32, 0x3a, 0x0a, 0x49, 0x06, 0x24, 0x5c, //11
3926    0xc2, 0xd3, 0xac, 0x62, 0x91, 0x95, 0xe4, 0x79,
3927    0xe7, 0xc8, 0x37, 0x6d, 0x8d, 0xd5, 0x4e, 0xa9, //12
3928    0x6c, 0x56, 0xf4, 0xea, 0x65, 0x7a, 0xae, 0x08,
3929    0xba, 0x78, 0x25, 0x2e, 0x1c, 0xa6, 0xb4, 0xc6, //13
3930    0xe8, 0xdd, 0x74, 0x1f, 0x4b, 0xbd, 0x8b, 0x8a,
3931    0x70, 0x3e, 0xb5, 0x66, 0x48, 0x03, 0xf6, 0x0e, //14
3932    0x61, 0x35, 0x57, 0xb9, 0x86, 0xc1, 0x1d, 0x9e,
3933    0xe1, 0xf8, 0x98, 0x11, 0x69, 0xd9, 0x8e, 0x94, //15
3934    0x9b, 0x1e, 0x87, 0xe9, 0xce, 0x55, 0x28, 0xdf,
3935    0x8c, 0xa1, 0x89, 0x0d, 0xbf, 0xe6, 0x42, 0x68, //16
3936    0x41, 0x99, 0x2d, 0x0f, 0xb0, 0x54, 0xbb, 0x16
3937 };
SubBytes(V128 * v)3938 static void SubBytes (V128* v)
3939 {
3940    V128 r;
3941    UInt i;
3942    for (i = 0; i < 16; i++)
3943       r.w8[i] = sbox[v->w8[i]];
3944    *v = r;
3945 }
3946 
3947 /* a 16 x 16 matrix */
3948 static const UChar invsbox[256] = {                // row nr
3949    0x52, 0x09, 0x6a, 0xd5, 0x30, 0x36, 0xa5, 0x38, // 1
3950    0xbf, 0x40, 0xa3, 0x9e, 0x81, 0xf3, 0xd7, 0xfb,
3951    0x7c, 0xe3, 0x39, 0x82, 0x9b, 0x2f, 0xff, 0x87, // 2
3952    0x34, 0x8e, 0x43, 0x44, 0xc4, 0xde, 0xe9, 0xcb,
3953    0x54, 0x7b, 0x94, 0x32, 0xa6, 0xc2, 0x23, 0x3d, // 3
3954    0xee, 0x4c, 0x95, 0x0b, 0x42, 0xfa, 0xc3, 0x4e,
3955    0x08, 0x2e, 0xa1, 0x66, 0x28, 0xd9, 0x24, 0xb2, // 4
3956    0x76, 0x5b, 0xa2, 0x49, 0x6d, 0x8b, 0xd1, 0x25,
3957    0x72, 0xf8, 0xf6, 0x64, 0x86, 0x68, 0x98, 0x16, // 5
3958    0xd4, 0xa4, 0x5c, 0xcc, 0x5d, 0x65, 0xb6, 0x92,
3959    0x6c, 0x70, 0x48, 0x50, 0xfd, 0xed, 0xb9, 0xda, // 6
3960    0x5e, 0x15, 0x46, 0x57, 0xa7, 0x8d, 0x9d, 0x84,
3961    0x90, 0xd8, 0xab, 0x00, 0x8c, 0xbc, 0xd3, 0x0a, // 7
3962    0xf7, 0xe4, 0x58, 0x05, 0xb8, 0xb3, 0x45, 0x06,
3963    0xd0, 0x2c, 0x1e, 0x8f, 0xca, 0x3f, 0x0f, 0x02, // 8
3964    0xc1, 0xaf, 0xbd, 0x03, 0x01, 0x13, 0x8a, 0x6b,
3965    0x3a, 0x91, 0x11, 0x41, 0x4f, 0x67, 0xdc, 0xea, // 9
3966    0x97, 0xf2, 0xcf, 0xce, 0xf0, 0xb4, 0xe6, 0x73,
3967    0x96, 0xac, 0x74, 0x22, 0xe7, 0xad, 0x35, 0x85, //10
3968    0xe2, 0xf9, 0x37, 0xe8, 0x1c, 0x75, 0xdf, 0x6e,
3969    0x47, 0xf1, 0x1a, 0x71, 0x1d, 0x29, 0xc5, 0x89, //11
3970    0x6f, 0xb7, 0x62, 0x0e, 0xaa, 0x18, 0xbe, 0x1b,
3971    0xfc, 0x56, 0x3e, 0x4b, 0xc6, 0xd2, 0x79, 0x20, //12
3972    0x9a, 0xdb, 0xc0, 0xfe, 0x78, 0xcd, 0x5a, 0xf4,
3973    0x1f, 0xdd, 0xa8, 0x33, 0x88, 0x07, 0xc7, 0x31, //13
3974    0xb1, 0x12, 0x10, 0x59, 0x27, 0x80, 0xec, 0x5f,
3975    0x60, 0x51, 0x7f, 0xa9, 0x19, 0xb5, 0x4a, 0x0d, //14
3976    0x2d, 0xe5, 0x7a, 0x9f, 0x93, 0xc9, 0x9c, 0xef,
3977    0xa0, 0xe0, 0x3b, 0x4d, 0xae, 0x2a, 0xf5, 0xb0, //15
3978    0xc8, 0xeb, 0xbb, 0x3c, 0x83, 0x53, 0x99, 0x61,
3979    0x17, 0x2b, 0x04, 0x7e, 0xba, 0x77, 0xd6, 0x26, //16
3980    0xe1, 0x69, 0x14, 0x63, 0x55, 0x21, 0x0c, 0x7d
3981 };
InvSubBytes(V128 * v)3982 static void InvSubBytes (V128* v)
3983 {
3984    V128 r;
3985    UInt i;
3986    for (i = 0; i < 16; i++)
3987       r.w8[i] = invsbox[v->w8[i]];
3988    *v = r;
3989 }
3990 
3991 static const UChar ShiftRows_op[16] =
3992    {11, 6, 1, 12, 7, 2, 13, 8, 3, 14, 9, 4, 15, 10, 5, 0};
ShiftRows(V128 * v)3993 static void ShiftRows (V128* v)
3994 {
3995    V128 r;
3996    UInt i;
3997    for (i = 0; i < 16; i++)
3998       r.w8[i] = v->w8[ShiftRows_op[15-i]];
3999    *v = r;
4000 }
4001 
4002 static const UChar InvShiftRows_op[16] =
4003    {3, 6, 9, 12, 15, 2, 5, 8, 11, 14, 1, 4, 7, 10, 13, 0};
InvShiftRows(V128 * v)4004 static void InvShiftRows (V128* v)
4005 {
4006    V128 r;
4007    UInt i;
4008    for (i = 0; i < 16; i++)
4009       r.w8[i] = v->w8[InvShiftRows_op[15-i]];
4010    *v = r;
4011 }
4012 
4013 /* Multiplication of the finite fields elements of AES.
4014    See "A Specification for The AES Algorithm Rijndael
4015         (by Joan Daemen & Vincent Rijmen)"
4016         Dr. Brian Gladman, v3.1, 3rd March 2001. */
4017 /* N values so that (hex) xy = 0x03^N.
4018    0x00 cannot be used. We put 0xff for this value.*/
4019 /* a 16 x 16 matrix */
4020 static const UChar Nxy[256] = {                    // row nr
4021    0xff, 0x00, 0x19, 0x01, 0x32, 0x02, 0x1a, 0xc6, // 1
4022    0x4b, 0xc7, 0x1b, 0x68, 0x33, 0xee, 0xdf, 0x03,
4023    0x64, 0x04, 0xe0, 0x0e, 0x34, 0x8d, 0x81, 0xef, // 2
4024    0x4c, 0x71, 0x08, 0xc8, 0xf8, 0x69, 0x1c, 0xc1,
4025    0x7d, 0xc2, 0x1d, 0xb5, 0xf9, 0xb9, 0x27, 0x6a, // 3
4026    0x4d, 0xe4, 0xa6, 0x72, 0x9a, 0xc9, 0x09, 0x78,
4027    0x65, 0x2f, 0x8a, 0x05, 0x21, 0x0f, 0xe1, 0x24, // 4
4028    0x12, 0xf0, 0x82, 0x45, 0x35, 0x93, 0xda, 0x8e,
4029    0x96, 0x8f, 0xdb, 0xbd, 0x36, 0xd0, 0xce, 0x94, // 5
4030    0x13, 0x5c, 0xd2, 0xf1, 0x40, 0x46, 0x83, 0x38,
4031    0x66, 0xdd, 0xfd, 0x30, 0xbf, 0x06, 0x8b, 0x62, // 6
4032    0xb3, 0x25, 0xe2, 0x98, 0x22, 0x88, 0x91, 0x10,
4033    0x7e, 0x6e, 0x48, 0xc3, 0xa3, 0xb6, 0x1e, 0x42, // 7
4034    0x3a, 0x6b, 0x28, 0x54, 0xfa, 0x85, 0x3d, 0xba,
4035    0x2b, 0x79, 0x0a, 0x15, 0x9b, 0x9f, 0x5e, 0xca, // 8
4036    0x4e, 0xd4, 0xac, 0xe5, 0xf3, 0x73, 0xa7, 0x57,
4037    0xaf, 0x58, 0xa8, 0x50, 0xf4, 0xea, 0xd6, 0x74, // 9
4038    0x4f, 0xae, 0xe9, 0xd5, 0xe7, 0xe6, 0xad, 0xe8,
4039    0x2c, 0xd7, 0x75, 0x7a, 0xeb, 0x16, 0x0b, 0xf5, //10
4040    0x59, 0xcb, 0x5f, 0xb0, 0x9c, 0xa9, 0x51, 0xa0,
4041    0x7f, 0x0c, 0xf6, 0x6f, 0x17, 0xc4, 0x49, 0xec, //11
4042    0xd8, 0x43, 0x1f, 0x2d, 0xa4, 0x76, 0x7b, 0xb7,
4043    0xcc, 0xbb, 0x3e, 0x5a, 0xfb, 0x60, 0xb1, 0x86, //12
4044    0x3b, 0x52, 0xa1, 0x6c, 0xaa, 0x55, 0x29, 0x9d,
4045    0x97, 0xb2, 0x87, 0x90, 0x61, 0xbe, 0xdc, 0xfc, //13
4046    0xbc, 0x95, 0xcf, 0xcd, 0x37, 0x3f, 0x5b, 0xd1,
4047    0x53, 0x39, 0x84, 0x3c, 0x41, 0xa2, 0x6d, 0x47, //14
4048    0x14, 0x2a, 0x9e, 0x5d, 0x56, 0xf2, 0xd3, 0xab,
4049    0x44, 0x11, 0x92, 0xd9, 0x23, 0x20, 0x2e, 0x89, //15
4050    0xb4, 0x7c, 0xb8, 0x26, 0x77, 0x99, 0xe3, 0xa5,
4051    0x67, 0x4a, 0xed, 0xde, 0xc5, 0x31, 0xfe, 0x18, //16
4052    0x0d, 0x63, 0x8c, 0x80, 0xc0, 0xf7, 0x70, 0x07
4053 };
4054 
4055 /* E values so that E = 0x03^xy. */
4056 static const UChar Exy[256] = {                    // row nr
4057    0x01, 0x03, 0x05, 0x0f, 0x11, 0x33, 0x55, 0xff, // 1
4058    0x1a, 0x2e, 0x72, 0x96, 0xa1, 0xf8, 0x13, 0x35,
4059    0x5f, 0xe1, 0x38, 0x48, 0xd8, 0x73, 0x95, 0xa4, // 2
4060    0xf7, 0x02, 0x06, 0x0a, 0x1e, 0x22, 0x66, 0xaa,
4061    0xe5, 0x34, 0x5c, 0xe4, 0x37, 0x59, 0xeb, 0x26, // 3
4062    0x6a, 0xbe, 0xd9, 0x70, 0x90, 0xab, 0xe6, 0x31,
4063    0x53, 0xf5, 0x04, 0x0c, 0x14, 0x3c, 0x44, 0xcc, // 4
4064    0x4f, 0xd1, 0x68, 0xb8, 0xd3, 0x6e, 0xb2, 0xcd,
4065    0x4c, 0xd4, 0x67, 0xa9, 0xe0, 0x3b, 0x4d, 0xd7, // 5
4066    0x62, 0xa6, 0xf1, 0x08, 0x18, 0x28, 0x78, 0x88,
4067    0x83, 0x9e, 0xb9, 0xd0, 0x6b, 0xbd, 0xdc, 0x7f, // 6
4068    0x81, 0x98, 0xb3, 0xce, 0x49, 0xdb, 0x76, 0x9a,
4069    0xb5, 0xc4, 0x57, 0xf9, 0x10, 0x30, 0x50, 0xf0, // 7
4070    0x0b, 0x1d, 0x27, 0x69, 0xbb, 0xd6, 0x61, 0xa3,
4071    0xfe, 0x19, 0x2b, 0x7d, 0x87, 0x92, 0xad, 0xec, // 8
4072    0x2f, 0x71, 0x93, 0xae, 0xe9, 0x20, 0x60, 0xa0,
4073    0xfb, 0x16, 0x3a, 0x4e, 0xd2, 0x6d, 0xb7, 0xc2, // 9
4074    0x5d, 0xe7, 0x32, 0x56, 0xfa, 0x15, 0x3f, 0x41,
4075    0xc3, 0x5e, 0xe2, 0x3d, 0x47, 0xc9, 0x40, 0xc0, //10
4076    0x5b, 0xed, 0x2c, 0x74, 0x9c, 0xbf, 0xda, 0x75,
4077    0x9f, 0xba, 0xd5, 0x64, 0xac, 0xef, 0x2a, 0x7e, //11
4078    0x82, 0x9d, 0xbc, 0xdf, 0x7a, 0x8e, 0x89, 0x80,
4079    0x9b, 0xb6, 0xc1, 0x58, 0xe8, 0x23, 0x65, 0xaf, //12
4080    0xea, 0x25, 0x6f, 0xb1, 0xc8, 0x43, 0xc5, 0x54,
4081    0xfc, 0x1f, 0x21, 0x63, 0xa5, 0xf4, 0x07, 0x09, //13
4082    0x1b, 0x2d, 0x77, 0x99, 0xb0, 0xcb, 0x46, 0xca,
4083    0x45, 0xcf, 0x4a, 0xde, 0x79, 0x8b, 0x86, 0x91, //14
4084    0xa8, 0xe3, 0x3e, 0x42, 0xc6, 0x51, 0xf3, 0x0e,
4085    0x12, 0x36, 0x5a, 0xee, 0x29, 0x7b, 0x8d, 0x8c, //15
4086    0x8f, 0x8a, 0x85, 0x94, 0xa7, 0xf2, 0x0d, 0x17,
4087    0x39, 0x4b, 0xdd, 0x7c, 0x84, 0x97, 0xa2, 0xfd, //16
4088    0x1c, 0x24, 0x6c, 0xb4, 0xc7, 0x52, 0xf6, 0x01};
4089 
ff_mul(UChar u1,UChar u2)4090 static inline UChar ff_mul(UChar u1, UChar u2)
4091 {
4092    if ((u1 > 0) && (u2 > 0)) {
4093       UInt ui = Nxy[u1] + Nxy[u2];
4094       if (ui >= 255)
4095          ui = ui - 255;
4096       return Exy[ui];
4097    } else {
4098       return 0;
4099    };
4100 }
4101 
MixColumns(V128 * v)4102 static void MixColumns (V128* v)
4103 {
4104    V128 r;
4105    Int j;
4106 #define P(x,row,col) (x)->w8[((row)*4+(col))]
4107    for (j = 0; j < 4; j++) {
4108       P(&r,j,0) = ff_mul(0x02, P(v,j,0)) ^ ff_mul(0x03, P(v,j,1))
4109          ^ P(v,j,2) ^ P(v,j,3);
4110       P(&r,j,1) = P(v,j,0) ^ ff_mul( 0x02, P(v,j,1) )
4111          ^ ff_mul(0x03, P(v,j,2) ) ^ P(v,j,3);
4112       P(&r,j,2) = P(v,j,0) ^ P(v,j,1) ^ ff_mul( 0x02, P(v,j,2) )
4113          ^ ff_mul(0x03, P(v,j,3) );
4114       P(&r,j,3) = ff_mul(0x03, P(v,j,0) ) ^ P(v,j,1) ^ P(v,j,2)
4115          ^ ff_mul( 0x02, P(v,j,3) );
4116    }
4117    *v = r;
4118 #undef P
4119 }
4120 
InvMixColumns(V128 * v)4121 static void InvMixColumns (V128* v)
4122 {
4123    V128 r;
4124    Int j;
4125 #define P(x,row,col) (x)->w8[((row)*4+(col))]
4126    for (j = 0; j < 4; j++) {
4127       P(&r,j,0) = ff_mul(0x0e, P(v,j,0) ) ^ ff_mul(0x0b, P(v,j,1) )
4128          ^ ff_mul(0x0d,P(v,j,2) ) ^ ff_mul(0x09, P(v,j,3) );
4129       P(&r,j,1) = ff_mul(0x09, P(v,j,0) ) ^ ff_mul(0x0e, P(v,j,1) )
4130          ^ ff_mul(0x0b,P(v,j,2) ) ^ ff_mul(0x0d, P(v,j,3) );
4131       P(&r,j,2) = ff_mul(0x0d, P(v,j,0) ) ^ ff_mul(0x09, P(v,j,1) )
4132          ^ ff_mul(0x0e,P(v,j,2) ) ^ ff_mul(0x0b, P(v,j,3) );
4133       P(&r,j,3) = ff_mul(0x0b, P(v,j,0) ) ^ ff_mul(0x0d, P(v,j,1) )
4134          ^ ff_mul(0x09,P(v,j,2) ) ^ ff_mul(0x0e, P(v,j,3) );
4135    }
4136    *v = r;
4137 #undef P
4138 
4139 }
4140 
4141 /* For description, see definition in guest_amd64_defs.h */
amd64g_dirtyhelper_AES(VexGuestAMD64State * gst,HWord opc4,HWord gstOffD,HWord gstOffL,HWord gstOffR)4142 void amd64g_dirtyhelper_AES (
4143           VexGuestAMD64State* gst,
4144           HWord opc4, HWord gstOffD,
4145           HWord gstOffL, HWord gstOffR
4146        )
4147 {
4148    // where the args are
4149    V128* argD = (V128*)( ((UChar*)gst) + gstOffD );
4150    V128* argL = (V128*)( ((UChar*)gst) + gstOffL );
4151    V128* argR = (V128*)( ((UChar*)gst) + gstOffR );
4152    V128  r;
4153 
4154    switch (opc4) {
4155       case 0xDC: /* AESENC */
4156       case 0xDD: /* AESENCLAST */
4157          r = *argR;
4158          ShiftRows (&r);
4159          SubBytes  (&r);
4160          if (opc4 == 0xDC)
4161             MixColumns (&r);
4162          argD->w64[0] = r.w64[0] ^ argL->w64[0];
4163          argD->w64[1] = r.w64[1] ^ argL->w64[1];
4164          break;
4165 
4166       case 0xDE: /* AESDEC */
4167       case 0xDF: /* AESDECLAST */
4168          r = *argR;
4169          InvShiftRows (&r);
4170          InvSubBytes (&r);
4171          if (opc4 == 0xDE)
4172             InvMixColumns (&r);
4173          argD->w64[0] = r.w64[0] ^ argL->w64[0];
4174          argD->w64[1] = r.w64[1] ^ argL->w64[1];
4175          break;
4176 
4177       case 0xDB: /* AESIMC */
4178          *argD = *argL;
4179          InvMixColumns (argD);
4180          break;
4181       default: vassert(0);
4182    }
4183 }
4184 
RotWord(UInt w32)4185 static inline UInt RotWord (UInt   w32)
4186 {
4187    return ((w32 >> 8) | (w32 << 24));
4188 }
4189 
SubWord(UInt w32)4190 static inline UInt SubWord (UInt   w32)
4191 {
4192    UChar *w8;
4193    UChar *r8;
4194    UInt res;
4195    w8 = (UChar*) &w32;
4196    r8 = (UChar*) &res;
4197    r8[0] = sbox[w8[0]];
4198    r8[1] = sbox[w8[1]];
4199    r8[2] = sbox[w8[2]];
4200    r8[3] = sbox[w8[3]];
4201    return res;
4202 }
4203 
4204 /* For description, see definition in guest_amd64_defs.h */
amd64g_dirtyhelper_AESKEYGENASSIST(VexGuestAMD64State * gst,HWord imm8,HWord gstOffL,HWord gstOffR)4205 extern void amd64g_dirtyhelper_AESKEYGENASSIST (
4206           VexGuestAMD64State* gst,
4207           HWord imm8,
4208           HWord gstOffL, HWord gstOffR
4209        )
4210 {
4211    // where the args are
4212    V128* argL = (V128*)( ((UChar*)gst) + gstOffL );
4213    V128* argR = (V128*)( ((UChar*)gst) + gstOffR );
4214 
4215    // We have to create the result in a temporary in the
4216    // case where the src and dst regs are the same.  See #341698.
4217    V128 tmp;
4218 
4219    tmp.w32[3] = RotWord (SubWord (argL->w32[3])) ^ imm8;
4220    tmp.w32[2] = SubWord (argL->w32[3]);
4221    tmp.w32[1] = RotWord (SubWord (argL->w32[1])) ^ imm8;
4222    tmp.w32[0] = SubWord (argL->w32[1]);
4223 
4224    argR->w32[3] = tmp.w32[3];
4225    argR->w32[2] = tmp.w32[2];
4226    argR->w32[1] = tmp.w32[1];
4227    argR->w32[0] = tmp.w32[0];
4228 }
4229 
4230 
4231 
4232 /*---------------------------------------------------------------*/
4233 /*--- Helpers for dealing with, and describing,               ---*/
4234 /*--- guest state as a whole.                                 ---*/
4235 /*---------------------------------------------------------------*/
4236 
4237 /* Initialise the entire amd64 guest state. */
4238 /* VISIBLE TO LIBVEX CLIENT */
LibVEX_GuestAMD64_initialise(VexGuestAMD64State * vex_state)4239 void LibVEX_GuestAMD64_initialise ( /*OUT*/VexGuestAMD64State* vex_state )
4240 {
4241    vex_state->host_EvC_FAILADDR = 0;
4242    vex_state->host_EvC_COUNTER = 0;
4243    vex_state->pad0 = 0;
4244 
4245    vex_state->guest_RAX = 0;
4246    vex_state->guest_RCX = 0;
4247    vex_state->guest_RDX = 0;
4248    vex_state->guest_RBX = 0;
4249    vex_state->guest_RSP = 0;
4250    vex_state->guest_RBP = 0;
4251    vex_state->guest_RSI = 0;
4252    vex_state->guest_RDI = 0;
4253    vex_state->guest_R8  = 0;
4254    vex_state->guest_R9  = 0;
4255    vex_state->guest_R10 = 0;
4256    vex_state->guest_R11 = 0;
4257    vex_state->guest_R12 = 0;
4258    vex_state->guest_R13 = 0;
4259    vex_state->guest_R14 = 0;
4260    vex_state->guest_R15 = 0;
4261 
4262    vex_state->guest_CC_OP   = AMD64G_CC_OP_COPY;
4263    vex_state->guest_CC_DEP1 = 0;
4264    vex_state->guest_CC_DEP2 = 0;
4265    vex_state->guest_CC_NDEP = 0;
4266 
4267    vex_state->guest_DFLAG   = 1; /* forwards */
4268    vex_state->guest_IDFLAG  = 0;
4269    vex_state->guest_ACFLAG  = 0;
4270 
4271    /* HACK: represent the offset associated with a constant %fs.
4272       Typically, on linux, this assumes that %fs is only ever zero (main
4273       thread) or 0x63. */
4274    vex_state->guest_FS_CONST = 0;
4275 
4276    vex_state->guest_RIP = 0;
4277 
4278    /* Initialise the simulated FPU */
4279    amd64g_dirtyhelper_FINIT( vex_state );
4280 
4281    /* Initialise the AVX state. */
4282 #  define AVXZERO(_ymm) \
4283       do { _ymm[0]=_ymm[1]=_ymm[2]=_ymm[3] = 0; \
4284            _ymm[4]=_ymm[5]=_ymm[6]=_ymm[7] = 0; \
4285       } while (0)
4286    vex_state->guest_SSEROUND = (ULong)Irrm_NEAREST;
4287    AVXZERO(vex_state->guest_YMM0);
4288    AVXZERO(vex_state->guest_YMM1);
4289    AVXZERO(vex_state->guest_YMM2);
4290    AVXZERO(vex_state->guest_YMM3);
4291    AVXZERO(vex_state->guest_YMM4);
4292    AVXZERO(vex_state->guest_YMM5);
4293    AVXZERO(vex_state->guest_YMM6);
4294    AVXZERO(vex_state->guest_YMM7);
4295    AVXZERO(vex_state->guest_YMM8);
4296    AVXZERO(vex_state->guest_YMM9);
4297    AVXZERO(vex_state->guest_YMM10);
4298    AVXZERO(vex_state->guest_YMM11);
4299    AVXZERO(vex_state->guest_YMM12);
4300    AVXZERO(vex_state->guest_YMM13);
4301    AVXZERO(vex_state->guest_YMM14);
4302    AVXZERO(vex_state->guest_YMM15);
4303    AVXZERO(vex_state->guest_YMM16);
4304 
4305 #  undef AVXZERO
4306 
4307    vex_state->guest_EMNOTE = EmNote_NONE;
4308 
4309    /* These should not ever be either read or written, but we
4310       initialise them anyway. */
4311    vex_state->guest_CMSTART = 0;
4312    vex_state->guest_CMLEN   = 0;
4313 
4314    vex_state->guest_NRADDR   = 0;
4315    vex_state->guest_SC_CLASS = 0;
4316    vex_state->guest_GS_CONST = 0;
4317 
4318    vex_state->guest_IP_AT_SYSCALL = 0;
4319    vex_state->pad1 = 0;
4320 }
4321 
4322 
4323 /* Figure out if any part of the guest state contained in minoff
4324    .. maxoff requires precise memory exceptions.  If in doubt return
4325    True (but this generates significantly slower code).
4326 
4327    By default we enforce precise exns for guest %RSP, %RBP and %RIP
4328    only.  These are the minimum needed to extract correct stack
4329    backtraces from amd64 code.
4330 
4331    Only %RSP is needed in mode VexRegUpdSpAtMemAccess.
4332 */
guest_amd64_state_requires_precise_mem_exns(Int minoff,Int maxoff,VexRegisterUpdates pxControl)4333 Bool guest_amd64_state_requires_precise_mem_exns (
4334         Int minoff, Int maxoff, VexRegisterUpdates pxControl
4335      )
4336 {
4337    Int rbp_min = offsetof(VexGuestAMD64State, guest_RBP);
4338    Int rbp_max = rbp_min + 8 - 1;
4339    Int rsp_min = offsetof(VexGuestAMD64State, guest_RSP);
4340    Int rsp_max = rsp_min + 8 - 1;
4341    Int rip_min = offsetof(VexGuestAMD64State, guest_RIP);
4342    Int rip_max = rip_min + 8 - 1;
4343 
4344    if (maxoff < rsp_min || minoff > rsp_max) {
4345       /* no overlap with rsp */
4346       if (pxControl == VexRegUpdSpAtMemAccess)
4347          return False; // We only need to check stack pointer.
4348    } else {
4349       return True;
4350    }
4351 
4352    if (maxoff < rbp_min || minoff > rbp_max) {
4353       /* no overlap with rbp */
4354    } else {
4355       return True;
4356    }
4357 
4358    if (maxoff < rip_min || minoff > rip_max) {
4359       /* no overlap with eip */
4360    } else {
4361       return True;
4362    }
4363 
4364    return False;
4365 }
4366 
4367 
4368 #define ALWAYSDEFD(field)                             \
4369     { offsetof(VexGuestAMD64State, field),            \
4370       (sizeof ((VexGuestAMD64State*)0)->field) }
4371 
4372 VexGuestLayout
4373    amd64guest_layout
4374       = {
4375           /* Total size of the guest state, in bytes. */
4376           .total_sizeB = sizeof(VexGuestAMD64State),
4377 
4378           /* Describe the stack pointer. */
4379           .offset_SP = offsetof(VexGuestAMD64State,guest_RSP),
4380           .sizeof_SP = 8,
4381 
4382           /* Describe the frame pointer. */
4383           .offset_FP = offsetof(VexGuestAMD64State,guest_RBP),
4384           .sizeof_FP = 8,
4385 
4386           /* Describe the instruction pointer. */
4387           .offset_IP = offsetof(VexGuestAMD64State,guest_RIP),
4388           .sizeof_IP = 8,
4389 
4390           /* Describe any sections to be regarded by Memcheck as
4391              'always-defined'. */
4392           .n_alwaysDefd = 16,
4393 
4394           /* flags thunk: OP and NDEP are always defd, whereas DEP1
4395              and DEP2 have to be tracked.  See detailed comment in
4396              gdefs.h on meaning of thunk fields. */
4397           .alwaysDefd
4398              = { /*  0 */ ALWAYSDEFD(guest_CC_OP),
4399                  /*  1 */ ALWAYSDEFD(guest_CC_NDEP),
4400 		 /*  2 */ ALWAYSDEFD(guest_DFLAG),
4401                  /*  3 */ ALWAYSDEFD(guest_IDFLAG),
4402                  /*  4 */ ALWAYSDEFD(guest_RIP),
4403                  /*  5 */ ALWAYSDEFD(guest_FS_CONST),
4404                  /*  6 */ ALWAYSDEFD(guest_FTOP),
4405                  /*  7 */ ALWAYSDEFD(guest_FPTAG),
4406                  /*  8 */ ALWAYSDEFD(guest_FPROUND),
4407                  /*  9 */ ALWAYSDEFD(guest_FC3210),
4408                  // /* */ ALWAYSDEFD(guest_CS),
4409                  // /* */ ALWAYSDEFD(guest_DS),
4410                  // /* */ ALWAYSDEFD(guest_ES),
4411                  // /* */ ALWAYSDEFD(guest_FS),
4412                  // /* */ ALWAYSDEFD(guest_GS),
4413                  // /* */ ALWAYSDEFD(guest_SS),
4414                  // /* */ ALWAYSDEFD(guest_LDT),
4415                  // /* */ ALWAYSDEFD(guest_GDT),
4416                  /* 10 */ ALWAYSDEFD(guest_EMNOTE),
4417                  /* 11 */ ALWAYSDEFD(guest_SSEROUND),
4418                  /* 12 */ ALWAYSDEFD(guest_CMSTART),
4419                  /* 13 */ ALWAYSDEFD(guest_CMLEN),
4420                  /* 14 */ ALWAYSDEFD(guest_SC_CLASS),
4421                  /* 15 */ ALWAYSDEFD(guest_IP_AT_SYSCALL)
4422                }
4423         };
4424 
4425 
4426 /*---------------------------------------------------------------*/
4427 /*--- end                               guest_amd64_helpers.c ---*/
4428 /*---------------------------------------------------------------*/
4429