1
2 /*---------------------------------------------------------------*/
3 /*--- begin guest_amd64_helpers.c ---*/
4 /*---------------------------------------------------------------*/
5
6 /*
7 This file is part of Valgrind, a dynamic binary instrumentation
8 framework.
9
10 Copyright (C) 2004-2015 OpenWorks LLP
11 info@open-works.net
12
13 This program is free software; you can redistribute it and/or
14 modify it under the terms of the GNU General Public License as
15 published by the Free Software Foundation; either version 2 of the
16 License, or (at your option) any later version.
17
18 This program is distributed in the hope that it will be useful, but
19 WITHOUT ANY WARRANTY; without even the implied warranty of
20 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
21 General Public License for more details.
22
23 You should have received a copy of the GNU General Public License
24 along with this program; if not, write to the Free Software
25 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
26 02110-1301, USA.
27
28 The GNU General Public License is contained in the file COPYING.
29
30 Neither the names of the U.S. Department of Energy nor the
31 University of California nor the names of its contributors may be
32 used to endorse or promote products derived from this software
33 without prior written permission.
34 */
35
36 #include "libvex_basictypes.h"
37 #include "libvex_emnote.h"
38 #include "libvex_guest_amd64.h"
39 #include "libvex_ir.h"
40 #include "libvex.h"
41
42 #include "main_util.h"
43 #include "main_globals.h"
44 #include "guest_generic_bb_to_IR.h"
45 #include "guest_amd64_defs.h"
46 #include "guest_generic_x87.h"
47
48
49 /* This file contains helper functions for amd64 guest code.
50 Calls to these functions are generated by the back end.
51 These calls are of course in the host machine code and
52 this file will be compiled to host machine code, so that
53 all makes sense.
54
55 Only change the signatures of these helper functions very
56 carefully. If you change the signature here, you'll have to change
57 the parameters passed to it in the IR calls constructed by
58 guest-amd64/toIR.c.
59
60 The convention used is that all functions called from generated
61 code are named amd64g_<something>, and any function whose name lacks
62 that prefix is not called from generated code. Note that some
63 LibVEX_* functions can however be called by VEX's client, but that
64 is not the same as calling them from VEX-generated code.
65 */
66
67
68 /* Set to 1 to get detailed profiling info about use of the flag
69 machinery. */
70 #define PROFILE_RFLAGS 0
71
72
73 /*---------------------------------------------------------------*/
74 /*--- %rflags run-time helpers. ---*/
75 /*---------------------------------------------------------------*/
76
77 /* Do 64x64 -> 128 signed/unsigned multiplies, for computing flags
78 after imulq/mulq. */
79
mullS64(Long u,Long v,Long * rHi,Long * rLo)80 static void mullS64 ( Long u, Long v, Long* rHi, Long* rLo )
81 {
82 const Long halfMask = 0xFFFFFFFFLL;
83 ULong u0, v0, w0;
84 Long u1, v1, w1, w2, t;
85 u0 = u & halfMask;
86 u1 = u >> 32;
87 v0 = v & halfMask;
88 v1 = v >> 32;
89 w0 = u0 * v0;
90 t = u1 * v0 + (w0 >> 32);
91 w1 = t & halfMask;
92 w2 = t >> 32;
93 w1 = u0 * v1 + w1;
94 *rHi = u1 * v1 + w2 + (w1 >> 32);
95 *rLo = (Long)((ULong)u * (ULong)v);
96 }
97
mullU64(ULong u,ULong v,ULong * rHi,ULong * rLo)98 static void mullU64 ( ULong u, ULong v, ULong* rHi, ULong* rLo )
99 {
100 const ULong halfMask = 0xFFFFFFFFULL;
101 ULong u0, v0, w0;
102 ULong u1, v1, w1,w2,t;
103 u0 = u & halfMask;
104 u1 = u >> 32;
105 v0 = v & halfMask;
106 v1 = v >> 32;
107 w0 = u0 * v0;
108 t = u1 * v0 + (w0 >> 32);
109 w1 = t & halfMask;
110 w2 = t >> 32;
111 w1 = u0 * v1 + w1;
112 *rHi = u1 * v1 + w2 + (w1 >> 32);
113 *rLo = u * v;
114 }
115
116
117 static const UChar parity_table[256] = {
118 AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0,
119 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P,
120 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P,
121 AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0,
122 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P,
123 AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0,
124 AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0,
125 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P,
126 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P,
127 AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0,
128 AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0,
129 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P,
130 AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0,
131 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P,
132 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P,
133 AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0,
134 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P,
135 AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0,
136 AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0,
137 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P,
138 AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0,
139 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P,
140 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P,
141 AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0,
142 AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0,
143 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P,
144 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P,
145 AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0,
146 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P,
147 AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0,
148 AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0,
149 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P,
150 };
151
152 /* generalised left-shifter */
lshift(Long x,Int n)153 static inline Long lshift ( Long x, Int n )
154 {
155 if (n >= 0)
156 return (ULong)x << n;
157 else
158 return x >> (-n);
159 }
160
161 /* identity on ULong */
idULong(ULong x)162 static inline ULong idULong ( ULong x )
163 {
164 return x;
165 }
166
167
168 #define PREAMBLE(__data_bits) \
169 /* const */ ULong DATA_MASK \
170 = __data_bits==8 \
171 ? 0xFFULL \
172 : (__data_bits==16 \
173 ? 0xFFFFULL \
174 : (__data_bits==32 \
175 ? 0xFFFFFFFFULL \
176 : 0xFFFFFFFFFFFFFFFFULL)); \
177 /* const */ ULong SIGN_MASK = 1ULL << (__data_bits - 1); \
178 /* const */ ULong CC_DEP1 = cc_dep1_formal; \
179 /* const */ ULong CC_DEP2 = cc_dep2_formal; \
180 /* const */ ULong CC_NDEP = cc_ndep_formal; \
181 /* Four bogus assignments, which hopefully gcc can */ \
182 /* optimise away, and which stop it complaining about */ \
183 /* unused variables. */ \
184 SIGN_MASK = SIGN_MASK; \
185 DATA_MASK = DATA_MASK; \
186 CC_DEP2 = CC_DEP2; \
187 CC_NDEP = CC_NDEP;
188
189
190 /*-------------------------------------------------------------*/
191
192 #define ACTIONS_ADD(DATA_BITS,DATA_UTYPE) \
193 { \
194 PREAMBLE(DATA_BITS); \
195 { ULong cf, pf, af, zf, sf, of; \
196 ULong argL, argR, res; \
197 argL = CC_DEP1; \
198 argR = CC_DEP2; \
199 res = argL + argR; \
200 cf = (DATA_UTYPE)res < (DATA_UTYPE)argL; \
201 pf = parity_table[(UChar)res]; \
202 af = (res ^ argL ^ argR) & 0x10; \
203 zf = ((DATA_UTYPE)res == 0) << 6; \
204 sf = lshift(res, 8 - DATA_BITS) & 0x80; \
205 of = lshift((argL ^ argR ^ -1) & (argL ^ res), \
206 12 - DATA_BITS) & AMD64G_CC_MASK_O; \
207 return cf | pf | af | zf | sf | of; \
208 } \
209 }
210
211 /*-------------------------------------------------------------*/
212
213 #define ACTIONS_SUB(DATA_BITS,DATA_UTYPE) \
214 { \
215 PREAMBLE(DATA_BITS); \
216 { ULong cf, pf, af, zf, sf, of; \
217 ULong argL, argR, res; \
218 argL = CC_DEP1; \
219 argR = CC_DEP2; \
220 res = argL - argR; \
221 cf = (DATA_UTYPE)argL < (DATA_UTYPE)argR; \
222 pf = parity_table[(UChar)res]; \
223 af = (res ^ argL ^ argR) & 0x10; \
224 zf = ((DATA_UTYPE)res == 0) << 6; \
225 sf = lshift(res, 8 - DATA_BITS) & 0x80; \
226 of = lshift((argL ^ argR) & (argL ^ res), \
227 12 - DATA_BITS) & AMD64G_CC_MASK_O; \
228 return cf | pf | af | zf | sf | of; \
229 } \
230 }
231
232 /*-------------------------------------------------------------*/
233
234 #define ACTIONS_ADC(DATA_BITS,DATA_UTYPE) \
235 { \
236 PREAMBLE(DATA_BITS); \
237 { ULong cf, pf, af, zf, sf, of; \
238 ULong argL, argR, oldC, res; \
239 oldC = CC_NDEP & AMD64G_CC_MASK_C; \
240 argL = CC_DEP1; \
241 argR = CC_DEP2 ^ oldC; \
242 res = (argL + argR) + oldC; \
243 if (oldC) \
244 cf = (DATA_UTYPE)res <= (DATA_UTYPE)argL; \
245 else \
246 cf = (DATA_UTYPE)res < (DATA_UTYPE)argL; \
247 pf = parity_table[(UChar)res]; \
248 af = (res ^ argL ^ argR) & 0x10; \
249 zf = ((DATA_UTYPE)res == 0) << 6; \
250 sf = lshift(res, 8 - DATA_BITS) & 0x80; \
251 of = lshift((argL ^ argR ^ -1) & (argL ^ res), \
252 12 - DATA_BITS) & AMD64G_CC_MASK_O; \
253 return cf | pf | af | zf | sf | of; \
254 } \
255 }
256
257 /*-------------------------------------------------------------*/
258
259 #define ACTIONS_SBB(DATA_BITS,DATA_UTYPE) \
260 { \
261 PREAMBLE(DATA_BITS); \
262 { ULong cf, pf, af, zf, sf, of; \
263 ULong argL, argR, oldC, res; \
264 oldC = CC_NDEP & AMD64G_CC_MASK_C; \
265 argL = CC_DEP1; \
266 argR = CC_DEP2 ^ oldC; \
267 res = (argL - argR) - oldC; \
268 if (oldC) \
269 cf = (DATA_UTYPE)argL <= (DATA_UTYPE)argR; \
270 else \
271 cf = (DATA_UTYPE)argL < (DATA_UTYPE)argR; \
272 pf = parity_table[(UChar)res]; \
273 af = (res ^ argL ^ argR) & 0x10; \
274 zf = ((DATA_UTYPE)res == 0) << 6; \
275 sf = lshift(res, 8 - DATA_BITS) & 0x80; \
276 of = lshift((argL ^ argR) & (argL ^ res), \
277 12 - DATA_BITS) & AMD64G_CC_MASK_O; \
278 return cf | pf | af | zf | sf | of; \
279 } \
280 }
281
282 /*-------------------------------------------------------------*/
283
284 #define ACTIONS_LOGIC(DATA_BITS,DATA_UTYPE) \
285 { \
286 PREAMBLE(DATA_BITS); \
287 { ULong cf, pf, af, zf, sf, of; \
288 cf = 0; \
289 pf = parity_table[(UChar)CC_DEP1]; \
290 af = 0; \
291 zf = ((DATA_UTYPE)CC_DEP1 == 0) << 6; \
292 sf = lshift(CC_DEP1, 8 - DATA_BITS) & 0x80; \
293 of = 0; \
294 return cf | pf | af | zf | sf | of; \
295 } \
296 }
297
298 /*-------------------------------------------------------------*/
299
300 #define ACTIONS_INC(DATA_BITS,DATA_UTYPE) \
301 { \
302 PREAMBLE(DATA_BITS); \
303 { ULong cf, pf, af, zf, sf, of; \
304 ULong argL, argR, res; \
305 res = CC_DEP1; \
306 argL = res - 1; \
307 argR = 1; \
308 cf = CC_NDEP & AMD64G_CC_MASK_C; \
309 pf = parity_table[(UChar)res]; \
310 af = (res ^ argL ^ argR) & 0x10; \
311 zf = ((DATA_UTYPE)res == 0) << 6; \
312 sf = lshift(res, 8 - DATA_BITS) & 0x80; \
313 of = ((res & DATA_MASK) == SIGN_MASK) << 11; \
314 return cf | pf | af | zf | sf | of; \
315 } \
316 }
317
318 /*-------------------------------------------------------------*/
319
320 #define ACTIONS_DEC(DATA_BITS,DATA_UTYPE) \
321 { \
322 PREAMBLE(DATA_BITS); \
323 { ULong cf, pf, af, zf, sf, of; \
324 ULong argL, argR, res; \
325 res = CC_DEP1; \
326 argL = res + 1; \
327 argR = 1; \
328 cf = CC_NDEP & AMD64G_CC_MASK_C; \
329 pf = parity_table[(UChar)res]; \
330 af = (res ^ argL ^ argR) & 0x10; \
331 zf = ((DATA_UTYPE)res == 0) << 6; \
332 sf = lshift(res, 8 - DATA_BITS) & 0x80; \
333 of = ((res & DATA_MASK) \
334 == ((ULong)SIGN_MASK - 1)) << 11; \
335 return cf | pf | af | zf | sf | of; \
336 } \
337 }
338
339 /*-------------------------------------------------------------*/
340
341 #define ACTIONS_SHL(DATA_BITS,DATA_UTYPE) \
342 { \
343 PREAMBLE(DATA_BITS); \
344 { ULong cf, pf, af, zf, sf, of; \
345 cf = (CC_DEP2 >> (DATA_BITS - 1)) & AMD64G_CC_MASK_C; \
346 pf = parity_table[(UChar)CC_DEP1]; \
347 af = 0; /* undefined */ \
348 zf = ((DATA_UTYPE)CC_DEP1 == 0) << 6; \
349 sf = lshift(CC_DEP1, 8 - DATA_BITS) & 0x80; \
350 /* of is defined if shift count == 1 */ \
351 of = lshift(CC_DEP2 ^ CC_DEP1, 12 - DATA_BITS) \
352 & AMD64G_CC_MASK_O; \
353 return cf | pf | af | zf | sf | of; \
354 } \
355 }
356
357 /*-------------------------------------------------------------*/
358
359 #define ACTIONS_SHR(DATA_BITS,DATA_UTYPE) \
360 { \
361 PREAMBLE(DATA_BITS); \
362 { ULong cf, pf, af, zf, sf, of; \
363 cf = CC_DEP2 & 1; \
364 pf = parity_table[(UChar)CC_DEP1]; \
365 af = 0; /* undefined */ \
366 zf = ((DATA_UTYPE)CC_DEP1 == 0) << 6; \
367 sf = lshift(CC_DEP1, 8 - DATA_BITS) & 0x80; \
368 /* of is defined if shift count == 1 */ \
369 of = lshift(CC_DEP2 ^ CC_DEP1, 12 - DATA_BITS) \
370 & AMD64G_CC_MASK_O; \
371 return cf | pf | af | zf | sf | of; \
372 } \
373 }
374
375 /*-------------------------------------------------------------*/
376
377 /* ROL: cf' = lsb(result). of' = msb(result) ^ lsb(result). */
378 /* DEP1 = result, NDEP = old flags */
379 #define ACTIONS_ROL(DATA_BITS,DATA_UTYPE) \
380 { \
381 PREAMBLE(DATA_BITS); \
382 { ULong fl \
383 = (CC_NDEP & ~(AMD64G_CC_MASK_O | AMD64G_CC_MASK_C)) \
384 | (AMD64G_CC_MASK_C & CC_DEP1) \
385 | (AMD64G_CC_MASK_O & (lshift(CC_DEP1, \
386 11-(DATA_BITS-1)) \
387 ^ lshift(CC_DEP1, 11))); \
388 return fl; \
389 } \
390 }
391
392 /*-------------------------------------------------------------*/
393
394 /* ROR: cf' = msb(result). of' = msb(result) ^ msb-1(result). */
395 /* DEP1 = result, NDEP = old flags */
396 #define ACTIONS_ROR(DATA_BITS,DATA_UTYPE) \
397 { \
398 PREAMBLE(DATA_BITS); \
399 { ULong fl \
400 = (CC_NDEP & ~(AMD64G_CC_MASK_O | AMD64G_CC_MASK_C)) \
401 | (AMD64G_CC_MASK_C & (CC_DEP1 >> (DATA_BITS-1))) \
402 | (AMD64G_CC_MASK_O & (lshift(CC_DEP1, \
403 11-(DATA_BITS-1)) \
404 ^ lshift(CC_DEP1, 11-(DATA_BITS-1)+1))); \
405 return fl; \
406 } \
407 }
408
409 /*-------------------------------------------------------------*/
410
411 #define ACTIONS_UMUL(DATA_BITS, DATA_UTYPE, NARROWtoU, \
412 DATA_U2TYPE, NARROWto2U) \
413 { \
414 PREAMBLE(DATA_BITS); \
415 { ULong cf, pf, af, zf, sf, of; \
416 DATA_UTYPE hi; \
417 DATA_UTYPE lo \
418 = NARROWtoU( ((DATA_UTYPE)CC_DEP1) \
419 * ((DATA_UTYPE)CC_DEP2) ); \
420 DATA_U2TYPE rr \
421 = NARROWto2U( \
422 ((DATA_U2TYPE)((DATA_UTYPE)CC_DEP1)) \
423 * ((DATA_U2TYPE)((DATA_UTYPE)CC_DEP2)) ); \
424 hi = NARROWtoU(rr >>/*u*/ DATA_BITS); \
425 cf = (hi != 0); \
426 pf = parity_table[(UChar)lo]; \
427 af = 0; /* undefined */ \
428 zf = (lo == 0) << 6; \
429 sf = lshift(lo, 8 - DATA_BITS) & 0x80; \
430 of = cf << 11; \
431 return cf | pf | af | zf | sf | of; \
432 } \
433 }
434
435 /*-------------------------------------------------------------*/
436
437 #define ACTIONS_SMUL(DATA_BITS, DATA_STYPE, NARROWtoS, \
438 DATA_S2TYPE, NARROWto2S) \
439 { \
440 PREAMBLE(DATA_BITS); \
441 { ULong cf, pf, af, zf, sf, of; \
442 DATA_STYPE hi; \
443 DATA_STYPE lo \
444 = NARROWtoS( ((DATA_S2TYPE)(DATA_STYPE)CC_DEP1) \
445 * ((DATA_S2TYPE)(DATA_STYPE)CC_DEP2) ); \
446 DATA_S2TYPE rr \
447 = NARROWto2S( \
448 ((DATA_S2TYPE)((DATA_STYPE)CC_DEP1)) \
449 * ((DATA_S2TYPE)((DATA_STYPE)CC_DEP2)) ); \
450 hi = NARROWtoS(rr >>/*s*/ DATA_BITS); \
451 cf = (hi != (lo >>/*s*/ (DATA_BITS-1))); \
452 pf = parity_table[(UChar)lo]; \
453 af = 0; /* undefined */ \
454 zf = (lo == 0) << 6; \
455 sf = lshift(lo, 8 - DATA_BITS) & 0x80; \
456 of = cf << 11; \
457 return cf | pf | af | zf | sf | of; \
458 } \
459 }
460
461 /*-------------------------------------------------------------*/
462
463 #define ACTIONS_UMULQ \
464 { \
465 PREAMBLE(64); \
466 { ULong cf, pf, af, zf, sf, of; \
467 ULong lo, hi; \
468 mullU64( (ULong)CC_DEP1, (ULong)CC_DEP2, &hi, &lo ); \
469 cf = (hi != 0); \
470 pf = parity_table[(UChar)lo]; \
471 af = 0; /* undefined */ \
472 zf = (lo == 0) << 6; \
473 sf = lshift(lo, 8 - 64) & 0x80; \
474 of = cf << 11; \
475 return cf | pf | af | zf | sf | of; \
476 } \
477 }
478
479 /*-------------------------------------------------------------*/
480
481 #define ACTIONS_SMULQ \
482 { \
483 PREAMBLE(64); \
484 { ULong cf, pf, af, zf, sf, of; \
485 Long lo, hi; \
486 mullS64( (Long)CC_DEP1, (Long)CC_DEP2, &hi, &lo ); \
487 cf = (hi != (lo >>/*s*/ (64-1))); \
488 pf = parity_table[(UChar)lo]; \
489 af = 0; /* undefined */ \
490 zf = (lo == 0) << 6; \
491 sf = lshift(lo, 8 - 64) & 0x80; \
492 of = cf << 11; \
493 return cf | pf | af | zf | sf | of; \
494 } \
495 }
496
497 /*-------------------------------------------------------------*/
498
499 #define ACTIONS_ANDN(DATA_BITS,DATA_UTYPE) \
500 { \
501 PREAMBLE(DATA_BITS); \
502 { ULong cf, pf, af, zf, sf, of; \
503 cf = 0; \
504 pf = 0; \
505 af = 0; \
506 zf = ((DATA_UTYPE)CC_DEP1 == 0) << 6; \
507 sf = lshift(CC_DEP1, 8 - DATA_BITS) & 0x80; \
508 of = 0; \
509 return cf | pf | af | zf | sf | of; \
510 } \
511 }
512
513 /*-------------------------------------------------------------*/
514
515 #define ACTIONS_BLSI(DATA_BITS,DATA_UTYPE) \
516 { \
517 PREAMBLE(DATA_BITS); \
518 { ULong cf, pf, af, zf, sf, of; \
519 cf = ((DATA_UTYPE)CC_DEP2 != 0); \
520 pf = 0; \
521 af = 0; \
522 zf = ((DATA_UTYPE)CC_DEP1 == 0) << 6; \
523 sf = lshift(CC_DEP1, 8 - DATA_BITS) & 0x80; \
524 of = 0; \
525 return cf | pf | af | zf | sf | of; \
526 } \
527 }
528
529 /*-------------------------------------------------------------*/
530
531 #define ACTIONS_BLSMSK(DATA_BITS,DATA_UTYPE) \
532 { \
533 PREAMBLE(DATA_BITS); \
534 { Long cf, pf, af, zf, sf, of; \
535 cf = ((DATA_UTYPE)CC_DEP2 == 0); \
536 pf = 0; \
537 af = 0; \
538 zf = 0; \
539 sf = lshift(CC_DEP1, 8 - DATA_BITS) & 0x80; \
540 of = 0; \
541 return cf | pf | af | zf | sf | of; \
542 } \
543 }
544
545 /*-------------------------------------------------------------*/
546
547 #define ACTIONS_BLSR(DATA_BITS,DATA_UTYPE) \
548 { \
549 PREAMBLE(DATA_BITS); \
550 { ULong cf, pf, af, zf, sf, of; \
551 cf = ((DATA_UTYPE)CC_DEP2 == 0); \
552 pf = 0; \
553 af = 0; \
554 zf = ((DATA_UTYPE)CC_DEP1 == 0) << 6; \
555 sf = lshift(CC_DEP1, 8 - DATA_BITS) & 0x80; \
556 of = 0; \
557 return cf | pf | af | zf | sf | of; \
558 } \
559 }
560
561 /*-------------------------------------------------------------*/
562
563
564 #if PROFILE_RFLAGS
565
566 static Bool initted = False;
567
568 /* C flag, fast route */
569 static UInt tabc_fast[AMD64G_CC_OP_NUMBER];
570 /* C flag, slow route */
571 static UInt tabc_slow[AMD64G_CC_OP_NUMBER];
572 /* table for calculate_cond */
573 static UInt tab_cond[AMD64G_CC_OP_NUMBER][16];
574 /* total entry counts for calc_all, calc_c, calc_cond. */
575 static UInt n_calc_all = 0;
576 static UInt n_calc_c = 0;
577 static UInt n_calc_cond = 0;
578
579 #define SHOW_COUNTS_NOW (0 == (0x3FFFFF & (n_calc_all+n_calc_c+n_calc_cond)))
580
581
showCounts(void)582 static void showCounts ( void )
583 {
584 Int op, co;
585 HChar ch;
586 vex_printf("\nTotal calls: calc_all=%u calc_cond=%u calc_c=%u\n",
587 n_calc_all, n_calc_cond, n_calc_c);
588
589 vex_printf(" cSLOW cFAST O NO B NB Z NZ BE NBE"
590 " S NS P NP L NL LE NLE\n");
591 vex_printf(" -----------------------------------------------------"
592 "----------------------------------------\n");
593 for (op = 0; op < AMD64G_CC_OP_NUMBER; op++) {
594
595 ch = ' ';
596 if (op > 0 && (op-1) % 4 == 0)
597 ch = 'B';
598 if (op > 0 && (op-1) % 4 == 1)
599 ch = 'W';
600 if (op > 0 && (op-1) % 4 == 2)
601 ch = 'L';
602 if (op > 0 && (op-1) % 4 == 3)
603 ch = 'Q';
604
605 vex_printf("%2d%c: ", op, ch);
606 vex_printf("%6u ", tabc_slow[op]);
607 vex_printf("%6u ", tabc_fast[op]);
608 for (co = 0; co < 16; co++) {
609 Int n = tab_cond[op][co];
610 if (n >= 1000) {
611 vex_printf(" %3dK", n / 1000);
612 } else
613 if (n >= 0) {
614 vex_printf(" %3d ", n );
615 } else {
616 vex_printf(" ");
617 }
618 }
619 vex_printf("\n");
620 }
621 vex_printf("\n");
622 }
623
initCounts(void)624 static void initCounts ( void )
625 {
626 Int op, co;
627 initted = True;
628 for (op = 0; op < AMD64G_CC_OP_NUMBER; op++) {
629 tabc_fast[op] = tabc_slow[op] = 0;
630 for (co = 0; co < 16; co++)
631 tab_cond[op][co] = 0;
632 }
633 }
634
635 #endif /* PROFILE_RFLAGS */
636
637
638 /* CALLED FROM GENERATED CODE: CLEAN HELPER */
639 /* Calculate all the 6 flags from the supplied thunk parameters.
640 Worker function, not directly called from generated code. */
641 static
amd64g_calculate_rflags_all_WRK(ULong cc_op,ULong cc_dep1_formal,ULong cc_dep2_formal,ULong cc_ndep_formal)642 ULong amd64g_calculate_rflags_all_WRK ( ULong cc_op,
643 ULong cc_dep1_formal,
644 ULong cc_dep2_formal,
645 ULong cc_ndep_formal )
646 {
647 switch (cc_op) {
648 case AMD64G_CC_OP_COPY:
649 return cc_dep1_formal
650 & (AMD64G_CC_MASK_O | AMD64G_CC_MASK_S | AMD64G_CC_MASK_Z
651 | AMD64G_CC_MASK_A | AMD64G_CC_MASK_C | AMD64G_CC_MASK_P);
652
653 case AMD64G_CC_OP_ADDB: ACTIONS_ADD( 8, UChar );
654 case AMD64G_CC_OP_ADDW: ACTIONS_ADD( 16, UShort );
655 case AMD64G_CC_OP_ADDL: ACTIONS_ADD( 32, UInt );
656 case AMD64G_CC_OP_ADDQ: ACTIONS_ADD( 64, ULong );
657
658 case AMD64G_CC_OP_ADCB: ACTIONS_ADC( 8, UChar );
659 case AMD64G_CC_OP_ADCW: ACTIONS_ADC( 16, UShort );
660 case AMD64G_CC_OP_ADCL: ACTIONS_ADC( 32, UInt );
661 case AMD64G_CC_OP_ADCQ: ACTIONS_ADC( 64, ULong );
662
663 case AMD64G_CC_OP_SUBB: ACTIONS_SUB( 8, UChar );
664 case AMD64G_CC_OP_SUBW: ACTIONS_SUB( 16, UShort );
665 case AMD64G_CC_OP_SUBL: ACTIONS_SUB( 32, UInt );
666 case AMD64G_CC_OP_SUBQ: ACTIONS_SUB( 64, ULong );
667
668 case AMD64G_CC_OP_SBBB: ACTIONS_SBB( 8, UChar );
669 case AMD64G_CC_OP_SBBW: ACTIONS_SBB( 16, UShort );
670 case AMD64G_CC_OP_SBBL: ACTIONS_SBB( 32, UInt );
671 case AMD64G_CC_OP_SBBQ: ACTIONS_SBB( 64, ULong );
672
673 case AMD64G_CC_OP_LOGICB: ACTIONS_LOGIC( 8, UChar );
674 case AMD64G_CC_OP_LOGICW: ACTIONS_LOGIC( 16, UShort );
675 case AMD64G_CC_OP_LOGICL: ACTIONS_LOGIC( 32, UInt );
676 case AMD64G_CC_OP_LOGICQ: ACTIONS_LOGIC( 64, ULong );
677
678 case AMD64G_CC_OP_INCB: ACTIONS_INC( 8, UChar );
679 case AMD64G_CC_OP_INCW: ACTIONS_INC( 16, UShort );
680 case AMD64G_CC_OP_INCL: ACTIONS_INC( 32, UInt );
681 case AMD64G_CC_OP_INCQ: ACTIONS_INC( 64, ULong );
682
683 case AMD64G_CC_OP_DECB: ACTIONS_DEC( 8, UChar );
684 case AMD64G_CC_OP_DECW: ACTIONS_DEC( 16, UShort );
685 case AMD64G_CC_OP_DECL: ACTIONS_DEC( 32, UInt );
686 case AMD64G_CC_OP_DECQ: ACTIONS_DEC( 64, ULong );
687
688 case AMD64G_CC_OP_SHLB: ACTIONS_SHL( 8, UChar );
689 case AMD64G_CC_OP_SHLW: ACTIONS_SHL( 16, UShort );
690 case AMD64G_CC_OP_SHLL: ACTIONS_SHL( 32, UInt );
691 case AMD64G_CC_OP_SHLQ: ACTIONS_SHL( 64, ULong );
692
693 case AMD64G_CC_OP_SHRB: ACTIONS_SHR( 8, UChar );
694 case AMD64G_CC_OP_SHRW: ACTIONS_SHR( 16, UShort );
695 case AMD64G_CC_OP_SHRL: ACTIONS_SHR( 32, UInt );
696 case AMD64G_CC_OP_SHRQ: ACTIONS_SHR( 64, ULong );
697
698 case AMD64G_CC_OP_ROLB: ACTIONS_ROL( 8, UChar );
699 case AMD64G_CC_OP_ROLW: ACTIONS_ROL( 16, UShort );
700 case AMD64G_CC_OP_ROLL: ACTIONS_ROL( 32, UInt );
701 case AMD64G_CC_OP_ROLQ: ACTIONS_ROL( 64, ULong );
702
703 case AMD64G_CC_OP_RORB: ACTIONS_ROR( 8, UChar );
704 case AMD64G_CC_OP_RORW: ACTIONS_ROR( 16, UShort );
705 case AMD64G_CC_OP_RORL: ACTIONS_ROR( 32, UInt );
706 case AMD64G_CC_OP_RORQ: ACTIONS_ROR( 64, ULong );
707
708 case AMD64G_CC_OP_UMULB: ACTIONS_UMUL( 8, UChar, toUChar,
709 UShort, toUShort );
710 case AMD64G_CC_OP_UMULW: ACTIONS_UMUL( 16, UShort, toUShort,
711 UInt, toUInt );
712 case AMD64G_CC_OP_UMULL: ACTIONS_UMUL( 32, UInt, toUInt,
713 ULong, idULong );
714
715 case AMD64G_CC_OP_UMULQ: ACTIONS_UMULQ;
716
717 case AMD64G_CC_OP_SMULB: ACTIONS_SMUL( 8, Char, toUChar,
718 Short, toUShort );
719 case AMD64G_CC_OP_SMULW: ACTIONS_SMUL( 16, Short, toUShort,
720 Int, toUInt );
721 case AMD64G_CC_OP_SMULL: ACTIONS_SMUL( 32, Int, toUInt,
722 Long, idULong );
723
724 case AMD64G_CC_OP_SMULQ: ACTIONS_SMULQ;
725
726 case AMD64G_CC_OP_ANDN32: ACTIONS_ANDN( 32, UInt );
727 case AMD64G_CC_OP_ANDN64: ACTIONS_ANDN( 64, ULong );
728
729 case AMD64G_CC_OP_BLSI32: ACTIONS_BLSI( 32, UInt );
730 case AMD64G_CC_OP_BLSI64: ACTIONS_BLSI( 64, ULong );
731
732 case AMD64G_CC_OP_BLSMSK32: ACTIONS_BLSMSK( 32, UInt );
733 case AMD64G_CC_OP_BLSMSK64: ACTIONS_BLSMSK( 64, ULong );
734
735 case AMD64G_CC_OP_BLSR32: ACTIONS_BLSR( 32, UInt );
736 case AMD64G_CC_OP_BLSR64: ACTIONS_BLSR( 64, ULong );
737
738 default:
739 /* shouldn't really make these calls from generated code */
740 vex_printf("amd64g_calculate_rflags_all_WRK(AMD64)"
741 "( %llu, 0x%llx, 0x%llx, 0x%llx )\n",
742 cc_op, cc_dep1_formal, cc_dep2_formal, cc_ndep_formal );
743 vpanic("amd64g_calculate_rflags_all_WRK(AMD64)");
744 }
745 }
746
747
748 /* CALLED FROM GENERATED CODE: CLEAN HELPER */
749 /* Calculate all the 6 flags from the supplied thunk parameters. */
amd64g_calculate_rflags_all(ULong cc_op,ULong cc_dep1,ULong cc_dep2,ULong cc_ndep)750 ULong amd64g_calculate_rflags_all ( ULong cc_op,
751 ULong cc_dep1,
752 ULong cc_dep2,
753 ULong cc_ndep )
754 {
755 # if PROFILE_RFLAGS
756 if (!initted) initCounts();
757 n_calc_all++;
758 if (SHOW_COUNTS_NOW) showCounts();
759 # endif
760 return
761 amd64g_calculate_rflags_all_WRK ( cc_op, cc_dep1, cc_dep2, cc_ndep );
762 }
763
764
765 /* CALLED FROM GENERATED CODE: CLEAN HELPER */
766 /* Calculate just the carry flag from the supplied thunk parameters. */
amd64g_calculate_rflags_c(ULong cc_op,ULong cc_dep1,ULong cc_dep2,ULong cc_ndep)767 ULong amd64g_calculate_rflags_c ( ULong cc_op,
768 ULong cc_dep1,
769 ULong cc_dep2,
770 ULong cc_ndep )
771 {
772 # if PROFILE_RFLAGS
773 if (!initted) initCounts();
774 n_calc_c++;
775 tabc_fast[cc_op]++;
776 if (SHOW_COUNTS_NOW) showCounts();
777 # endif
778
779 /* Fast-case some common ones. */
780 switch (cc_op) {
781 case AMD64G_CC_OP_COPY:
782 return (cc_dep1 >> AMD64G_CC_SHIFT_C) & 1;
783 case AMD64G_CC_OP_LOGICQ:
784 case AMD64G_CC_OP_LOGICL:
785 case AMD64G_CC_OP_LOGICW:
786 case AMD64G_CC_OP_LOGICB:
787 return 0;
788 // case AMD64G_CC_OP_SUBL:
789 // return ((UInt)cc_dep1) < ((UInt)cc_dep2)
790 // ? AMD64G_CC_MASK_C : 0;
791 // case AMD64G_CC_OP_SUBW:
792 // return ((UInt)(cc_dep1 & 0xFFFF)) < ((UInt)(cc_dep2 & 0xFFFF))
793 // ? AMD64G_CC_MASK_C : 0;
794 // case AMD64G_CC_OP_SUBB:
795 // return ((UInt)(cc_dep1 & 0xFF)) < ((UInt)(cc_dep2 & 0xFF))
796 // ? AMD64G_CC_MASK_C : 0;
797 // case AMD64G_CC_OP_INCL:
798 // case AMD64G_CC_OP_DECL:
799 // return cc_ndep & AMD64G_CC_MASK_C;
800 default:
801 break;
802 }
803
804 # if PROFILE_RFLAGS
805 tabc_fast[cc_op]--;
806 tabc_slow[cc_op]++;
807 # endif
808
809 return amd64g_calculate_rflags_all_WRK(cc_op,cc_dep1,cc_dep2,cc_ndep)
810 & AMD64G_CC_MASK_C;
811 }
812
813
814 /* CALLED FROM GENERATED CODE: CLEAN HELPER */
815 /* returns 1 or 0 */
amd64g_calculate_condition(ULong cond,ULong cc_op,ULong cc_dep1,ULong cc_dep2,ULong cc_ndep)816 ULong amd64g_calculate_condition ( ULong/*AMD64Condcode*/ cond,
817 ULong cc_op,
818 ULong cc_dep1,
819 ULong cc_dep2,
820 ULong cc_ndep )
821 {
822 ULong rflags = amd64g_calculate_rflags_all_WRK(cc_op, cc_dep1,
823 cc_dep2, cc_ndep);
824 ULong of,sf,zf,cf,pf;
825 ULong inv = cond & 1;
826
827 # if PROFILE_RFLAGS
828 if (!initted) initCounts();
829 tab_cond[cc_op][cond]++;
830 n_calc_cond++;
831 if (SHOW_COUNTS_NOW) showCounts();
832 # endif
833
834 switch (cond) {
835 case AMD64CondNO:
836 case AMD64CondO: /* OF == 1 */
837 of = rflags >> AMD64G_CC_SHIFT_O;
838 return 1 & (inv ^ of);
839
840 case AMD64CondNZ:
841 case AMD64CondZ: /* ZF == 1 */
842 zf = rflags >> AMD64G_CC_SHIFT_Z;
843 return 1 & (inv ^ zf);
844
845 case AMD64CondNB:
846 case AMD64CondB: /* CF == 1 */
847 cf = rflags >> AMD64G_CC_SHIFT_C;
848 return 1 & (inv ^ cf);
849 break;
850
851 case AMD64CondNBE:
852 case AMD64CondBE: /* (CF or ZF) == 1 */
853 cf = rflags >> AMD64G_CC_SHIFT_C;
854 zf = rflags >> AMD64G_CC_SHIFT_Z;
855 return 1 & (inv ^ (cf | zf));
856 break;
857
858 case AMD64CondNS:
859 case AMD64CondS: /* SF == 1 */
860 sf = rflags >> AMD64G_CC_SHIFT_S;
861 return 1 & (inv ^ sf);
862
863 case AMD64CondNP:
864 case AMD64CondP: /* PF == 1 */
865 pf = rflags >> AMD64G_CC_SHIFT_P;
866 return 1 & (inv ^ pf);
867
868 case AMD64CondNL:
869 case AMD64CondL: /* (SF xor OF) == 1 */
870 sf = rflags >> AMD64G_CC_SHIFT_S;
871 of = rflags >> AMD64G_CC_SHIFT_O;
872 return 1 & (inv ^ (sf ^ of));
873 break;
874
875 case AMD64CondNLE:
876 case AMD64CondLE: /* ((SF xor OF) or ZF) == 1 */
877 sf = rflags >> AMD64G_CC_SHIFT_S;
878 of = rflags >> AMD64G_CC_SHIFT_O;
879 zf = rflags >> AMD64G_CC_SHIFT_Z;
880 return 1 & (inv ^ ((sf ^ of) | zf));
881 break;
882
883 default:
884 /* shouldn't really make these calls from generated code */
885 vex_printf("amd64g_calculate_condition"
886 "( %llu, %llu, 0x%llx, 0x%llx, 0x%llx )\n",
887 cond, cc_op, cc_dep1, cc_dep2, cc_ndep );
888 vpanic("amd64g_calculate_condition");
889 }
890 }
891
892
893 /* VISIBLE TO LIBVEX CLIENT */
LibVEX_GuestAMD64_get_rflags(const VexGuestAMD64State * vex_state)894 ULong LibVEX_GuestAMD64_get_rflags ( /*IN*/const VexGuestAMD64State* vex_state )
895 {
896 ULong rflags = amd64g_calculate_rflags_all_WRK(
897 vex_state->guest_CC_OP,
898 vex_state->guest_CC_DEP1,
899 vex_state->guest_CC_DEP2,
900 vex_state->guest_CC_NDEP
901 );
902 Long dflag = vex_state->guest_DFLAG;
903 vassert(dflag == 1 || dflag == -1);
904 if (dflag == -1)
905 rflags |= (1<<10);
906 if (vex_state->guest_IDFLAG == 1)
907 rflags |= (1<<21);
908 if (vex_state->guest_ACFLAG == 1)
909 rflags |= (1<<18);
910
911 return rflags;
912 }
913
914 /* VISIBLE TO LIBVEX CLIENT */
915 void
LibVEX_GuestAMD64_put_rflags(ULong rflags,VexGuestAMD64State * vex_state)916 LibVEX_GuestAMD64_put_rflags ( ULong rflags,
917 /*MOD*/VexGuestAMD64State* vex_state )
918 {
919 /* D flag */
920 if (rflags & AMD64G_CC_MASK_D) {
921 vex_state->guest_DFLAG = -1;
922 rflags &= ~AMD64G_CC_MASK_D;
923 }
924 else
925 vex_state->guest_DFLAG = 1;
926
927 /* ID flag */
928 if (rflags & AMD64G_CC_MASK_ID) {
929 vex_state->guest_IDFLAG = 1;
930 rflags &= ~AMD64G_CC_MASK_ID;
931 }
932 else
933 vex_state->guest_IDFLAG = 0;
934
935 /* AC flag */
936 if (rflags & AMD64G_CC_MASK_AC) {
937 vex_state->guest_ACFLAG = 1;
938 rflags &= ~AMD64G_CC_MASK_AC;
939 }
940 else
941 vex_state->guest_ACFLAG = 0;
942
943 UInt cc_mask = AMD64G_CC_MASK_O | AMD64G_CC_MASK_S | AMD64G_CC_MASK_Z |
944 AMD64G_CC_MASK_A | AMD64G_CC_MASK_C | AMD64G_CC_MASK_P;
945 vex_state->guest_CC_OP = AMD64G_CC_OP_COPY;
946 vex_state->guest_CC_DEP1 = rflags & cc_mask;
947 vex_state->guest_CC_DEP2 = 0;
948 vex_state->guest_CC_NDEP = 0;
949 }
950
951 /* VISIBLE TO LIBVEX CLIENT */
952 void
LibVEX_GuestAMD64_put_rflag_c(ULong new_carry_flag,VexGuestAMD64State * vex_state)953 LibVEX_GuestAMD64_put_rflag_c ( ULong new_carry_flag,
954 /*MOD*/VexGuestAMD64State* vex_state )
955 {
956 ULong oszacp = amd64g_calculate_rflags_all_WRK(
957 vex_state->guest_CC_OP,
958 vex_state->guest_CC_DEP1,
959 vex_state->guest_CC_DEP2,
960 vex_state->guest_CC_NDEP
961 );
962 if (new_carry_flag & 1) {
963 oszacp |= AMD64G_CC_MASK_C;
964 } else {
965 oszacp &= ~AMD64G_CC_MASK_C;
966 }
967 vex_state->guest_CC_OP = AMD64G_CC_OP_COPY;
968 vex_state->guest_CC_DEP1 = oszacp;
969 vex_state->guest_CC_DEP2 = 0;
970 vex_state->guest_CC_NDEP = 0;
971 }
972
973
974 /*---------------------------------------------------------------*/
975 /*--- %rflags translation-time function specialisers. ---*/
976 /*--- These help iropt specialise calls the above run-time ---*/
977 /*--- %rflags functions. ---*/
978 /*---------------------------------------------------------------*/
979
980 /* Used by the optimiser to try specialisations. Returns an
981 equivalent expression, or NULL if none. */
982
isU64(IRExpr * e,ULong n)983 static Bool isU64 ( IRExpr* e, ULong n )
984 {
985 return toBool( e->tag == Iex_Const
986 && e->Iex.Const.con->tag == Ico_U64
987 && e->Iex.Const.con->Ico.U64 == n );
988 }
989
guest_amd64_spechelper(const HChar * function_name,IRExpr ** args,IRStmt ** precedingStmts,Int n_precedingStmts)990 IRExpr* guest_amd64_spechelper ( const HChar* function_name,
991 IRExpr** args,
992 IRStmt** precedingStmts,
993 Int n_precedingStmts )
994 {
995 # define unop(_op,_a1) IRExpr_Unop((_op),(_a1))
996 # define binop(_op,_a1,_a2) IRExpr_Binop((_op),(_a1),(_a2))
997 # define mkU64(_n) IRExpr_Const(IRConst_U64(_n))
998 # define mkU32(_n) IRExpr_Const(IRConst_U32(_n))
999 # define mkU8(_n) IRExpr_Const(IRConst_U8(_n))
1000
1001 Int i, arity = 0;
1002 for (i = 0; args[i]; i++)
1003 arity++;
1004 # if 0
1005 vex_printf("spec request:\n");
1006 vex_printf(" %s ", function_name);
1007 for (i = 0; i < arity; i++) {
1008 vex_printf(" ");
1009 ppIRExpr(args[i]);
1010 }
1011 vex_printf("\n");
1012 # endif
1013
1014 /* --------- specialising "amd64g_calculate_condition" --------- */
1015
1016 if (vex_streq(function_name, "amd64g_calculate_condition")) {
1017 /* specialise calls to above "calculate condition" function */
1018 IRExpr *cond, *cc_op, *cc_dep1, *cc_dep2;
1019 vassert(arity == 5);
1020 cond = args[0];
1021 cc_op = args[1];
1022 cc_dep1 = args[2];
1023 cc_dep2 = args[3];
1024
1025 /*---------------- ADDQ ----------------*/
1026
1027 if (isU64(cc_op, AMD64G_CC_OP_ADDQ) && isU64(cond, AMD64CondZ)) {
1028 /* long long add, then Z --> test (dst+src == 0) */
1029 return unop(Iop_1Uto64,
1030 binop(Iop_CmpEQ64,
1031 binop(Iop_Add64, cc_dep1, cc_dep2),
1032 mkU64(0)));
1033 }
1034
1035 /*---------------- ADDL ----------------*/
1036
1037 if (isU64(cc_op, AMD64G_CC_OP_ADDL) && isU64(cond, AMD64CondO)) {
1038 /* This is very commonly generated by Javascript JITs, for
1039 the idiom "do a 32-bit add and jump to out-of-line code if
1040 an overflow occurs". */
1041 /* long add, then O (overflow)
1042 --> ((dep1 ^ dep2 ^ -1) & (dep1 ^ (dep1 + dep2)))[31]
1043 --> (((dep1 ^ dep2 ^ -1) & (dep1 ^ (dep1 +64 dep2))) >>u 31) & 1
1044 --> (((not(dep1 ^ dep2)) & (dep1 ^ (dep1 +64 dep2))) >>u 31) & 1
1045 */
1046 vassert(isIRAtom(cc_dep1));
1047 vassert(isIRAtom(cc_dep2));
1048 return
1049 binop(Iop_And64,
1050 binop(Iop_Shr64,
1051 binop(Iop_And64,
1052 unop(Iop_Not64,
1053 binop(Iop_Xor64, cc_dep1, cc_dep2)),
1054 binop(Iop_Xor64,
1055 cc_dep1,
1056 binop(Iop_Add64, cc_dep1, cc_dep2))),
1057 mkU8(31)),
1058 mkU64(1));
1059
1060 }
1061
1062 /*---------------- SUBQ ----------------*/
1063
1064 /* 0, */
1065 if (isU64(cc_op, AMD64G_CC_OP_SUBQ) && isU64(cond, AMD64CondO)) {
1066 /* long long sub/cmp, then O (overflow)
1067 --> ((dep1 ^ dep2) & (dep1 ^ (dep1 - dep2)))[63]
1068 --> ((dep1 ^ dep2) & (dep1 ^ (dep1 - dep2))) >>u 63
1069 */
1070 vassert(isIRAtom(cc_dep1));
1071 vassert(isIRAtom(cc_dep2));
1072 return binop(Iop_Shr64,
1073 binop(Iop_And64,
1074 binop(Iop_Xor64, cc_dep1, cc_dep2),
1075 binop(Iop_Xor64,
1076 cc_dep1,
1077 binop(Iop_Sub64, cc_dep1, cc_dep2))),
1078 mkU8(63));
1079 }
1080 if (isU64(cc_op, AMD64G_CC_OP_SUBQ) && isU64(cond, AMD64CondNO)) {
1081 /* No action. Never yet found a test case. */
1082 }
1083
1084 /* 2, 3 */
1085 if (isU64(cc_op, AMD64G_CC_OP_SUBQ) && isU64(cond, AMD64CondB)) {
1086 /* long long sub/cmp, then B (unsigned less than)
1087 --> test dst <u src */
1088 return unop(Iop_1Uto64,
1089 binop(Iop_CmpLT64U, cc_dep1, cc_dep2));
1090 }
1091 if (isU64(cc_op, AMD64G_CC_OP_SUBQ) && isU64(cond, AMD64CondNB)) {
1092 /* long long sub/cmp, then NB (unsigned greater than or equal)
1093 --> test src <=u dst */
1094 /* Note, args are opposite way round from the usual */
1095 return unop(Iop_1Uto64,
1096 binop(Iop_CmpLE64U, cc_dep2, cc_dep1));
1097 }
1098
1099 /* 4, 5 */
1100 if (isU64(cc_op, AMD64G_CC_OP_SUBQ) && isU64(cond, AMD64CondZ)) {
1101 /* long long sub/cmp, then Z --> test dst==src */
1102 return unop(Iop_1Uto64,
1103 binop(Iop_CmpEQ64,cc_dep1,cc_dep2));
1104 }
1105 if (isU64(cc_op, AMD64G_CC_OP_SUBQ) && isU64(cond, AMD64CondNZ)) {
1106 /* long long sub/cmp, then NZ --> test dst!=src */
1107 return unop(Iop_1Uto64,
1108 binop(Iop_CmpNE64,cc_dep1,cc_dep2));
1109 }
1110
1111 /* 6, 7 */
1112 if (isU64(cc_op, AMD64G_CC_OP_SUBQ) && isU64(cond, AMD64CondBE)) {
1113 /* long long sub/cmp, then BE (unsigned less than or equal)
1114 --> test dst <=u src */
1115 return unop(Iop_1Uto64,
1116 binop(Iop_CmpLE64U, cc_dep1, cc_dep2));
1117 }
1118 if (isU64(cc_op, AMD64G_CC_OP_SUBQ) && isU64(cond, AMD64CondNBE)) {
1119 /* long long sub/cmp, then NBE (unsigned greater than)
1120 --> test !(dst <=u src) */
1121 return binop(Iop_Xor64,
1122 unop(Iop_1Uto64,
1123 binop(Iop_CmpLE64U, cc_dep1, cc_dep2)),
1124 mkU64(1));
1125 }
1126
1127 /* 8, 9 */
1128 if (isU64(cc_op, AMD64G_CC_OP_SUBQ) && isU64(cond, AMD64CondS)) {
1129 /* long long sub/cmp, then S (negative)
1130 --> (dst-src)[63]
1131 --> (dst-src) >>u 63 */
1132 return binop(Iop_Shr64,
1133 binop(Iop_Sub64, cc_dep1, cc_dep2),
1134 mkU8(63));
1135 }
1136 if (isU64(cc_op, AMD64G_CC_OP_SUBQ) && isU64(cond, AMD64CondNS)) {
1137 /* long long sub/cmp, then NS (not negative)
1138 --> (dst-src)[63] ^ 1
1139 --> ((dst-src) >>u 63) ^ 1 */
1140 return binop(Iop_Xor64,
1141 binop(Iop_Shr64,
1142 binop(Iop_Sub64, cc_dep1, cc_dep2),
1143 mkU8(63)),
1144 mkU64(1));
1145 }
1146
1147 /* 12, 13 */
1148 if (isU64(cc_op, AMD64G_CC_OP_SUBQ) && isU64(cond, AMD64CondL)) {
1149 /* long long sub/cmp, then L (signed less than)
1150 --> test dst <s src */
1151 return unop(Iop_1Uto64,
1152 binop(Iop_CmpLT64S, cc_dep1, cc_dep2));
1153 }
1154 if (isU64(cc_op, AMD64G_CC_OP_SUBQ) && isU64(cond, AMD64CondNL)) {
1155 /* long long sub/cmp, then NL (signed greater than or equal)
1156 --> test dst >=s src
1157 --> test src <=s dst */
1158 return unop(Iop_1Uto64,
1159 binop(Iop_CmpLE64S, cc_dep2, cc_dep1));
1160 }
1161
1162 /* 14, 15 */
1163 if (isU64(cc_op, AMD64G_CC_OP_SUBQ) && isU64(cond, AMD64CondLE)) {
1164 /* long long sub/cmp, then LE (signed less than or equal)
1165 --> test dst <=s src */
1166 return unop(Iop_1Uto64,
1167 binop(Iop_CmpLE64S, cc_dep1, cc_dep2));
1168 }
1169 if (isU64(cc_op, AMD64G_CC_OP_SUBQ) && isU64(cond, AMD64CondNLE)) {
1170 /* long sub/cmp, then NLE (signed greater than)
1171 --> test !(dst <=s src)
1172 --> test (dst >s src)
1173 --> test (src <s dst) */
1174 return unop(Iop_1Uto64,
1175 binop(Iop_CmpLT64S, cc_dep2, cc_dep1));
1176
1177 }
1178
1179 /*---------------- SUBL ----------------*/
1180
1181 /* 0, */
1182 if (isU64(cc_op, AMD64G_CC_OP_SUBL) && isU64(cond, AMD64CondO)) {
1183 /* This is very commonly generated by Javascript JITs, for
1184 the idiom "do a 32-bit subtract and jump to out-of-line
1185 code if an overflow occurs". */
1186 /* long sub/cmp, then O (overflow)
1187 --> ((dep1 ^ dep2) & (dep1 ^ (dep1 - dep2)))[31]
1188 --> (((dep1 ^ dep2) & (dep1 ^ (dep1 -64 dep2))) >>u 31) & 1
1189 */
1190 vassert(isIRAtom(cc_dep1));
1191 vassert(isIRAtom(cc_dep2));
1192 return
1193 binop(Iop_And64,
1194 binop(Iop_Shr64,
1195 binop(Iop_And64,
1196 binop(Iop_Xor64, cc_dep1, cc_dep2),
1197 binop(Iop_Xor64,
1198 cc_dep1,
1199 binop(Iop_Sub64, cc_dep1, cc_dep2))),
1200 mkU8(31)),
1201 mkU64(1));
1202 }
1203 if (isU64(cc_op, AMD64G_CC_OP_SUBL) && isU64(cond, AMD64CondNO)) {
1204 /* No action. Never yet found a test case. */
1205 }
1206
1207 /* 2, 3 */
1208 if (isU64(cc_op, AMD64G_CC_OP_SUBL) && isU64(cond, AMD64CondB)) {
1209 /* long sub/cmp, then B (unsigned less than)
1210 --> test dst <u src */
1211 return unop(Iop_1Uto64,
1212 binop(Iop_CmpLT32U,
1213 unop(Iop_64to32, cc_dep1),
1214 unop(Iop_64to32, cc_dep2)));
1215 }
1216 if (isU64(cc_op, AMD64G_CC_OP_SUBL) && isU64(cond, AMD64CondNB)) {
1217 /* long sub/cmp, then NB (unsigned greater than or equal)
1218 --> test src <=u dst */
1219 /* Note, args are opposite way round from the usual */
1220 return unop(Iop_1Uto64,
1221 binop(Iop_CmpLE32U,
1222 unop(Iop_64to32, cc_dep2),
1223 unop(Iop_64to32, cc_dep1)));
1224 }
1225
1226 /* 4, 5 */
1227 if (isU64(cc_op, AMD64G_CC_OP_SUBL) && isU64(cond, AMD64CondZ)) {
1228 /* long sub/cmp, then Z --> test dst==src */
1229 return unop(Iop_1Uto64,
1230 binop(Iop_CmpEQ32,
1231 unop(Iop_64to32, cc_dep1),
1232 unop(Iop_64to32, cc_dep2)));
1233 }
1234 if (isU64(cc_op, AMD64G_CC_OP_SUBL) && isU64(cond, AMD64CondNZ)) {
1235 /* long sub/cmp, then NZ --> test dst!=src */
1236 return unop(Iop_1Uto64,
1237 binop(Iop_CmpNE32,
1238 unop(Iop_64to32, cc_dep1),
1239 unop(Iop_64to32, cc_dep2)));
1240 }
1241
1242 /* 6, 7 */
1243 if (isU64(cc_op, AMD64G_CC_OP_SUBL) && isU64(cond, AMD64CondBE)) {
1244 /* long sub/cmp, then BE (unsigned less than or equal)
1245 --> test dst <=u src */
1246 return unop(Iop_1Uto64,
1247 binop(Iop_CmpLE32U,
1248 unop(Iop_64to32, cc_dep1),
1249 unop(Iop_64to32, cc_dep2)));
1250 }
1251 if (isU64(cc_op, AMD64G_CC_OP_SUBL) && isU64(cond, AMD64CondNBE)) {
1252 /* long sub/cmp, then NBE (unsigned greater than)
1253 --> test src <u dst */
1254 /* Note, args are opposite way round from the usual */
1255 return unop(Iop_1Uto64,
1256 binop(Iop_CmpLT32U,
1257 unop(Iop_64to32, cc_dep2),
1258 unop(Iop_64to32, cc_dep1)));
1259 }
1260
1261 /* 8, 9 */
1262 if (isU64(cc_op, AMD64G_CC_OP_SUBL) && isU64(cond, AMD64CondS)) {
1263 /* long sub/cmp, then S (negative)
1264 --> (dst-src)[31]
1265 --> ((dst -64 src) >>u 31) & 1
1266 Pointless to narrow the args to 32 bit before the subtract. */
1267 return binop(Iop_And64,
1268 binop(Iop_Shr64,
1269 binop(Iop_Sub64, cc_dep1, cc_dep2),
1270 mkU8(31)),
1271 mkU64(1));
1272 }
1273 if (isU64(cc_op, AMD64G_CC_OP_SUBL) && isU64(cond, AMD64CondNS)) {
1274 /* long sub/cmp, then NS (not negative)
1275 --> (dst-src)[31] ^ 1
1276 --> (((dst -64 src) >>u 31) & 1) ^ 1
1277 Pointless to narrow the args to 32 bit before the subtract. */
1278 return binop(Iop_Xor64,
1279 binop(Iop_And64,
1280 binop(Iop_Shr64,
1281 binop(Iop_Sub64, cc_dep1, cc_dep2),
1282 mkU8(31)),
1283 mkU64(1)),
1284 mkU64(1));
1285 }
1286
1287 /* 12, 13 */
1288 if (isU64(cc_op, AMD64G_CC_OP_SUBL) && isU64(cond, AMD64CondL)) {
1289 /* long sub/cmp, then L (signed less than)
1290 --> test dst <s src */
1291 return unop(Iop_1Uto64,
1292 binop(Iop_CmpLT32S,
1293 unop(Iop_64to32, cc_dep1),
1294 unop(Iop_64to32, cc_dep2)));
1295 }
1296 if (isU64(cc_op, AMD64G_CC_OP_SUBL) && isU64(cond, AMD64CondNL)) {
1297 /* long sub/cmp, then NL (signed greater than or equal)
1298 --> test dst >=s src
1299 --> test src <=s dst */
1300 return unop(Iop_1Uto64,
1301 binop(Iop_CmpLE32S,
1302 unop(Iop_64to32, cc_dep2),
1303 unop(Iop_64to32, cc_dep1)));
1304 }
1305
1306 /* 14, 15 */
1307 if (isU64(cc_op, AMD64G_CC_OP_SUBL) && isU64(cond, AMD64CondLE)) {
1308 /* long sub/cmp, then LE (signed less than or equal)
1309 --> test dst <=s src */
1310 return unop(Iop_1Uto64,
1311 binop(Iop_CmpLE32S,
1312 unop(Iop_64to32, cc_dep1),
1313 unop(Iop_64to32, cc_dep2)));
1314
1315 }
1316 if (isU64(cc_op, AMD64G_CC_OP_SUBL) && isU64(cond, AMD64CondNLE)) {
1317 /* long sub/cmp, then NLE (signed greater than)
1318 --> test !(dst <=s src)
1319 --> test (dst >s src)
1320 --> test (src <s dst) */
1321 return unop(Iop_1Uto64,
1322 binop(Iop_CmpLT32S,
1323 unop(Iop_64to32, cc_dep2),
1324 unop(Iop_64to32, cc_dep1)));
1325
1326 }
1327
1328 /*---------------- SUBW ----------------*/
1329
1330 /* 4, 5 */
1331 if (isU64(cc_op, AMD64G_CC_OP_SUBW) && isU64(cond, AMD64CondZ)) {
1332 /* word sub/cmp, then Z --> test dst==src */
1333 return unop(Iop_1Uto64,
1334 binop(Iop_CmpEQ16,
1335 unop(Iop_64to16,cc_dep1),
1336 unop(Iop_64to16,cc_dep2)));
1337 }
1338 if (isU64(cc_op, AMD64G_CC_OP_SUBW) && isU64(cond, AMD64CondNZ)) {
1339 /* word sub/cmp, then NZ --> test dst!=src */
1340 return unop(Iop_1Uto64,
1341 binop(Iop_CmpNE16,
1342 unop(Iop_64to16,cc_dep1),
1343 unop(Iop_64to16,cc_dep2)));
1344 }
1345
1346 /* 6, */
1347 if (isU64(cc_op, AMD64G_CC_OP_SUBW) && isU64(cond, AMD64CondBE)) {
1348 /* word sub/cmp, then BE (unsigned less than or equal)
1349 --> test dst <=u src */
1350 return unop(Iop_1Uto64,
1351 binop(Iop_CmpLE64U,
1352 binop(Iop_Shl64, cc_dep1, mkU8(48)),
1353 binop(Iop_Shl64, cc_dep2, mkU8(48))));
1354 }
1355
1356 /* 14, */
1357 if (isU64(cc_op, AMD64G_CC_OP_SUBW) && isU64(cond, AMD64CondLE)) {
1358 /* word sub/cmp, then LE (signed less than or equal)
1359 --> test dst <=s src */
1360 return unop(Iop_1Uto64,
1361 binop(Iop_CmpLE64S,
1362 binop(Iop_Shl64,cc_dep1,mkU8(48)),
1363 binop(Iop_Shl64,cc_dep2,mkU8(48))));
1364
1365 }
1366
1367 /*---------------- SUBB ----------------*/
1368
1369 /* 2, 3 */
1370 if (isU64(cc_op, AMD64G_CC_OP_SUBB) && isU64(cond, AMD64CondB)) {
1371 /* byte sub/cmp, then B (unsigned less than)
1372 --> test dst <u src */
1373 return unop(Iop_1Uto64,
1374 binop(Iop_CmpLT64U,
1375 binop(Iop_And64, cc_dep1, mkU64(0xFF)),
1376 binop(Iop_And64, cc_dep2, mkU64(0xFF))));
1377 }
1378 if (isU64(cc_op, AMD64G_CC_OP_SUBB) && isU64(cond, AMD64CondNB)) {
1379 /* byte sub/cmp, then NB (unsigned greater than or equal)
1380 --> test src <=u dst */
1381 /* Note, args are opposite way round from the usual */
1382 return unop(Iop_1Uto64,
1383 binop(Iop_CmpLE64U,
1384 binop(Iop_And64, cc_dep2, mkU64(0xFF)),
1385 binop(Iop_And64, cc_dep1, mkU64(0xFF))));
1386 }
1387
1388 /* 4, 5 */
1389 if (isU64(cc_op, AMD64G_CC_OP_SUBB) && isU64(cond, AMD64CondZ)) {
1390 /* byte sub/cmp, then Z --> test dst==src */
1391 return unop(Iop_1Uto64,
1392 binop(Iop_CmpEQ8,
1393 unop(Iop_64to8,cc_dep1),
1394 unop(Iop_64to8,cc_dep2)));
1395 }
1396 if (isU64(cc_op, AMD64G_CC_OP_SUBB) && isU64(cond, AMD64CondNZ)) {
1397 /* byte sub/cmp, then NZ --> test dst!=src */
1398 return unop(Iop_1Uto64,
1399 binop(Iop_CmpNE8,
1400 unop(Iop_64to8,cc_dep1),
1401 unop(Iop_64to8,cc_dep2)));
1402 }
1403
1404 /* 6, */
1405 if (isU64(cc_op, AMD64G_CC_OP_SUBB) && isU64(cond, AMD64CondBE)) {
1406 /* byte sub/cmp, then BE (unsigned less than or equal)
1407 --> test dst <=u src */
1408 return unop(Iop_1Uto64,
1409 binop(Iop_CmpLE64U,
1410 binop(Iop_And64, cc_dep1, mkU64(0xFF)),
1411 binop(Iop_And64, cc_dep2, mkU64(0xFF))));
1412 }
1413
1414 /* 8, 9 */
1415 if (isU64(cc_op, AMD64G_CC_OP_SUBB) && isU64(cond, AMD64CondS)
1416 && isU64(cc_dep2, 0)) {
1417 /* byte sub/cmp of zero, then S --> test (dst-0 <s 0)
1418 --> test dst <s 0
1419 --> (ULong)dst[7]
1420 This is yet another scheme by which gcc figures out if the
1421 top bit of a byte is 1 or 0. See also LOGICB/CondS below. */
1422 /* Note: isU64(cc_dep2, 0) is correct, even though this is
1423 for an 8-bit comparison, since the args to the helper
1424 function are always U64s. */
1425 return binop(Iop_And64,
1426 binop(Iop_Shr64,cc_dep1,mkU8(7)),
1427 mkU64(1));
1428 }
1429 if (isU64(cc_op, AMD64G_CC_OP_SUBB) && isU64(cond, AMD64CondNS)
1430 && isU64(cc_dep2, 0)) {
1431 /* byte sub/cmp of zero, then NS --> test !(dst-0 <s 0)
1432 --> test !(dst <s 0)
1433 --> (ULong) !dst[7]
1434 */
1435 return binop(Iop_Xor64,
1436 binop(Iop_And64,
1437 binop(Iop_Shr64,cc_dep1,mkU8(7)),
1438 mkU64(1)),
1439 mkU64(1));
1440 }
1441
1442 /*---------------- LOGICQ ----------------*/
1443
1444 if (isU64(cc_op, AMD64G_CC_OP_LOGICQ) && isU64(cond, AMD64CondZ)) {
1445 /* long long and/or/xor, then Z --> test dst==0 */
1446 return unop(Iop_1Uto64,
1447 binop(Iop_CmpEQ64, cc_dep1, mkU64(0)));
1448 }
1449 if (isU64(cc_op, AMD64G_CC_OP_LOGICQ) && isU64(cond, AMD64CondNZ)) {
1450 /* long long and/or/xor, then NZ --> test dst!=0 */
1451 return unop(Iop_1Uto64,
1452 binop(Iop_CmpNE64, cc_dep1, mkU64(0)));
1453 }
1454
1455 if (isU64(cc_op, AMD64G_CC_OP_LOGICQ) && isU64(cond, AMD64CondL)) {
1456 /* long long and/or/xor, then L
1457 LOGIC sets SF and ZF according to the
1458 result and makes OF be zero. L computes SF ^ OF, but
1459 OF is zero, so this reduces to SF -- which will be 1 iff
1460 the result is < signed 0. Hence ...
1461 */
1462 return unop(Iop_1Uto64,
1463 binop(Iop_CmpLT64S,
1464 cc_dep1,
1465 mkU64(0)));
1466 }
1467
1468 /*---------------- LOGICL ----------------*/
1469
1470 if (isU64(cc_op, AMD64G_CC_OP_LOGICL) && isU64(cond, AMD64CondZ)) {
1471 /* long and/or/xor, then Z --> test dst==0 */
1472 return unop(Iop_1Uto64,
1473 binop(Iop_CmpEQ32,
1474 unop(Iop_64to32, cc_dep1),
1475 mkU32(0)));
1476 }
1477 if (isU64(cc_op, AMD64G_CC_OP_LOGICL) && isU64(cond, AMD64CondNZ)) {
1478 /* long and/or/xor, then NZ --> test dst!=0 */
1479 return unop(Iop_1Uto64,
1480 binop(Iop_CmpNE32,
1481 unop(Iop_64to32, cc_dep1),
1482 mkU32(0)));
1483 }
1484
1485 if (isU64(cc_op, AMD64G_CC_OP_LOGICL) && isU64(cond, AMD64CondLE)) {
1486 /* long and/or/xor, then LE
1487 This is pretty subtle. LOGIC sets SF and ZF according to the
1488 result and makes OF be zero. LE computes (SF ^ OF) | ZF, but
1489 OF is zero, so this reduces to SF | ZF -- which will be 1 iff
1490 the result is <=signed 0. Hence ...
1491 */
1492 return unop(Iop_1Uto64,
1493 binop(Iop_CmpLE32S,
1494 unop(Iop_64to32, cc_dep1),
1495 mkU32(0)));
1496 }
1497
1498 if (isU64(cc_op, AMD64G_CC_OP_LOGICL) && isU64(cond, AMD64CondS)) {
1499 /* long and/or/xor, then S --> (ULong)result[31] */
1500 return binop(Iop_And64,
1501 binop(Iop_Shr64, cc_dep1, mkU8(31)),
1502 mkU64(1));
1503 }
1504 if (isU64(cc_op, AMD64G_CC_OP_LOGICL) && isU64(cond, AMD64CondNS)) {
1505 /* long and/or/xor, then S --> (ULong) ~ result[31] */
1506 return binop(Iop_Xor64,
1507 binop(Iop_And64,
1508 binop(Iop_Shr64, cc_dep1, mkU8(31)),
1509 mkU64(1)),
1510 mkU64(1));
1511 }
1512
1513 /*---------------- LOGICW ----------------*/
1514
1515 if (isU64(cc_op, AMD64G_CC_OP_LOGICW) && isU64(cond, AMD64CondZ)) {
1516 /* word and/or/xor, then Z --> test dst==0 */
1517 return unop(Iop_1Uto64,
1518 binop(Iop_CmpEQ64,
1519 binop(Iop_And64, cc_dep1, mkU64(0xFFFF)),
1520 mkU64(0)));
1521 }
1522 if (isU64(cc_op, AMD64G_CC_OP_LOGICW) && isU64(cond, AMD64CondNZ)) {
1523 /* word and/or/xor, then NZ --> test dst!=0 */
1524 return unop(Iop_1Uto64,
1525 binop(Iop_CmpNE64,
1526 binop(Iop_And64, cc_dep1, mkU64(0xFFFF)),
1527 mkU64(0)));
1528 }
1529
1530 /*---------------- LOGICB ----------------*/
1531
1532 if (isU64(cc_op, AMD64G_CC_OP_LOGICB) && isU64(cond, AMD64CondZ)) {
1533 /* byte and/or/xor, then Z --> test dst==0 */
1534 return unop(Iop_1Uto64,
1535 binop(Iop_CmpEQ64, binop(Iop_And64,cc_dep1,mkU64(255)),
1536 mkU64(0)));
1537 }
1538 if (isU64(cc_op, AMD64G_CC_OP_LOGICB) && isU64(cond, AMD64CondNZ)) {
1539 /* byte and/or/xor, then NZ --> test dst!=0 */
1540 return unop(Iop_1Uto64,
1541 binop(Iop_CmpNE64, binop(Iop_And64,cc_dep1,mkU64(255)),
1542 mkU64(0)));
1543 }
1544
1545 if (isU64(cc_op, AMD64G_CC_OP_LOGICB) && isU64(cond, AMD64CondS)) {
1546 /* this is an idiom gcc sometimes uses to find out if the top
1547 bit of a byte register is set: eg testb %al,%al; js ..
1548 Since it just depends on the top bit of the byte, extract
1549 that bit and explicitly get rid of all the rest. This
1550 helps memcheck avoid false positives in the case where any
1551 of the other bits in the byte are undefined. */
1552 /* byte and/or/xor, then S --> (UInt)result[7] */
1553 return binop(Iop_And64,
1554 binop(Iop_Shr64,cc_dep1,mkU8(7)),
1555 mkU64(1));
1556 }
1557 if (isU64(cc_op, AMD64G_CC_OP_LOGICB) && isU64(cond, AMD64CondNS)) {
1558 /* byte and/or/xor, then NS --> (UInt)!result[7] */
1559 return binop(Iop_Xor64,
1560 binop(Iop_And64,
1561 binop(Iop_Shr64,cc_dep1,mkU8(7)),
1562 mkU64(1)),
1563 mkU64(1));
1564 }
1565
1566 /*---------------- INCB ----------------*/
1567
1568 if (isU64(cc_op, AMD64G_CC_OP_INCB) && isU64(cond, AMD64CondLE)) {
1569 /* 8-bit inc, then LE --> sign bit of the arg */
1570 return binop(Iop_And64,
1571 binop(Iop_Shr64,
1572 binop(Iop_Sub64, cc_dep1, mkU64(1)),
1573 mkU8(7)),
1574 mkU64(1));
1575 }
1576
1577 /*---------------- INCW ----------------*/
1578
1579 if (isU64(cc_op, AMD64G_CC_OP_INCW) && isU64(cond, AMD64CondZ)) {
1580 /* 16-bit inc, then Z --> test dst == 0 */
1581 return unop(Iop_1Uto64,
1582 binop(Iop_CmpEQ64,
1583 binop(Iop_Shl64,cc_dep1,mkU8(48)),
1584 mkU64(0)));
1585 }
1586
1587 /*---------------- DECL ----------------*/
1588
1589 if (isU64(cc_op, AMD64G_CC_OP_DECL) && isU64(cond, AMD64CondZ)) {
1590 /* dec L, then Z --> test dst == 0 */
1591 return unop(Iop_1Uto64,
1592 binop(Iop_CmpEQ32,
1593 unop(Iop_64to32, cc_dep1),
1594 mkU32(0)));
1595 }
1596
1597 /*---------------- DECW ----------------*/
1598
1599 if (isU64(cc_op, AMD64G_CC_OP_DECW) && isU64(cond, AMD64CondNZ)) {
1600 /* 16-bit dec, then NZ --> test dst != 0 */
1601 return unop(Iop_1Uto64,
1602 binop(Iop_CmpNE64,
1603 binop(Iop_Shl64,cc_dep1,mkU8(48)),
1604 mkU64(0)));
1605 }
1606
1607 /*---------------- COPY ----------------*/
1608 /* This can happen, as a result of amd64 FP compares: "comisd ... ;
1609 jbe" for example. */
1610
1611 if (isU64(cc_op, AMD64G_CC_OP_COPY) &&
1612 (isU64(cond, AMD64CondBE) || isU64(cond, AMD64CondNBE))) {
1613 /* COPY, then BE --> extract C and Z from dep1, and test (C
1614 or Z == 1). */
1615 /* COPY, then NBE --> extract C and Z from dep1, and test (C
1616 or Z == 0). */
1617 ULong nnn = isU64(cond, AMD64CondBE) ? 1 : 0;
1618 return
1619 unop(
1620 Iop_1Uto64,
1621 binop(
1622 Iop_CmpEQ64,
1623 binop(
1624 Iop_And64,
1625 binop(
1626 Iop_Or64,
1627 binop(Iop_Shr64, cc_dep1, mkU8(AMD64G_CC_SHIFT_C)),
1628 binop(Iop_Shr64, cc_dep1, mkU8(AMD64G_CC_SHIFT_Z))
1629 ),
1630 mkU64(1)
1631 ),
1632 mkU64(nnn)
1633 )
1634 );
1635 }
1636
1637 if (isU64(cc_op, AMD64G_CC_OP_COPY) && isU64(cond, AMD64CondB)) {
1638 /* COPY, then B --> extract C dep1, and test (C == 1). */
1639 return
1640 unop(
1641 Iop_1Uto64,
1642 binop(
1643 Iop_CmpNE64,
1644 binop(
1645 Iop_And64,
1646 binop(Iop_Shr64, cc_dep1, mkU8(AMD64G_CC_SHIFT_C)),
1647 mkU64(1)
1648 ),
1649 mkU64(0)
1650 )
1651 );
1652 }
1653
1654 if (isU64(cc_op, AMD64G_CC_OP_COPY)
1655 && (isU64(cond, AMD64CondZ) || isU64(cond, AMD64CondNZ))) {
1656 /* COPY, then Z --> extract Z from dep1, and test (Z == 1). */
1657 /* COPY, then NZ --> extract Z from dep1, and test (Z == 0). */
1658 UInt nnn = isU64(cond, AMD64CondZ) ? 1 : 0;
1659 return
1660 unop(
1661 Iop_1Uto64,
1662 binop(
1663 Iop_CmpEQ64,
1664 binop(
1665 Iop_And64,
1666 binop(Iop_Shr64, cc_dep1, mkU8(AMD64G_CC_SHIFT_Z)),
1667 mkU64(1)
1668 ),
1669 mkU64(nnn)
1670 )
1671 );
1672 }
1673
1674 if (isU64(cc_op, AMD64G_CC_OP_COPY) && isU64(cond, AMD64CondP)) {
1675 /* COPY, then P --> extract P from dep1, and test (P == 1). */
1676 return
1677 unop(
1678 Iop_1Uto64,
1679 binop(
1680 Iop_CmpNE64,
1681 binop(
1682 Iop_And64,
1683 binop(Iop_Shr64, cc_dep1, mkU8(AMD64G_CC_SHIFT_P)),
1684 mkU64(1)
1685 ),
1686 mkU64(0)
1687 )
1688 );
1689 }
1690
1691 return NULL;
1692 }
1693
1694 /* --------- specialising "amd64g_calculate_rflags_c" --------- */
1695
1696 if (vex_streq(function_name, "amd64g_calculate_rflags_c")) {
1697 /* specialise calls to above "calculate_rflags_c" function */
1698 IRExpr *cc_op, *cc_dep1, *cc_dep2, *cc_ndep;
1699 vassert(arity == 4);
1700 cc_op = args[0];
1701 cc_dep1 = args[1];
1702 cc_dep2 = args[2];
1703 cc_ndep = args[3];
1704
1705 if (isU64(cc_op, AMD64G_CC_OP_SUBQ)) {
1706 /* C after sub denotes unsigned less than */
1707 return unop(Iop_1Uto64,
1708 binop(Iop_CmpLT64U,
1709 cc_dep1,
1710 cc_dep2));
1711 }
1712 if (isU64(cc_op, AMD64G_CC_OP_SUBL)) {
1713 /* C after sub denotes unsigned less than */
1714 return unop(Iop_1Uto64,
1715 binop(Iop_CmpLT32U,
1716 unop(Iop_64to32, cc_dep1),
1717 unop(Iop_64to32, cc_dep2)));
1718 }
1719 if (isU64(cc_op, AMD64G_CC_OP_SUBB)) {
1720 /* C after sub denotes unsigned less than */
1721 return unop(Iop_1Uto64,
1722 binop(Iop_CmpLT64U,
1723 binop(Iop_And64,cc_dep1,mkU64(0xFF)),
1724 binop(Iop_And64,cc_dep2,mkU64(0xFF))));
1725 }
1726 if (isU64(cc_op, AMD64G_CC_OP_LOGICQ)
1727 || isU64(cc_op, AMD64G_CC_OP_LOGICL)
1728 || isU64(cc_op, AMD64G_CC_OP_LOGICW)
1729 || isU64(cc_op, AMD64G_CC_OP_LOGICB)) {
1730 /* cflag after logic is zero */
1731 return mkU64(0);
1732 }
1733 if (isU64(cc_op, AMD64G_CC_OP_DECL) || isU64(cc_op, AMD64G_CC_OP_INCL)
1734 || isU64(cc_op, AMD64G_CC_OP_DECQ) || isU64(cc_op, AMD64G_CC_OP_INCQ)) {
1735 /* If the thunk is dec or inc, the cflag is supplied as CC_NDEP. */
1736 return cc_ndep;
1737 }
1738
1739 # if 0
1740 if (cc_op->tag == Iex_Const) {
1741 vex_printf("CFLAG "); ppIRExpr(cc_op); vex_printf("\n");
1742 }
1743 # endif
1744
1745 return NULL;
1746 }
1747
1748 # undef unop
1749 # undef binop
1750 # undef mkU64
1751 # undef mkU32
1752 # undef mkU8
1753
1754 return NULL;
1755 }
1756
1757
1758 /*---------------------------------------------------------------*/
1759 /*--- Supporting functions for x87 FPU activities. ---*/
1760 /*---------------------------------------------------------------*/
1761
host_is_little_endian(void)1762 static inline Bool host_is_little_endian ( void )
1763 {
1764 UInt x = 0x76543210;
1765 UChar* p = (UChar*)(&x);
1766 return toBool(*p == 0x10);
1767 }
1768
1769 /* Inspect a value and its tag, as per the x87 'FXAM' instruction. */
1770 /* CALLED FROM GENERATED CODE: CLEAN HELPER */
amd64g_calculate_FXAM(ULong tag,ULong dbl)1771 ULong amd64g_calculate_FXAM ( ULong tag, ULong dbl )
1772 {
1773 Bool mantissaIsZero;
1774 Int bexp;
1775 UChar sign;
1776 UChar* f64;
1777
1778 vassert(host_is_little_endian());
1779
1780 /* vex_printf("calculate_FXAM ( %d, %llx ) .. ", tag, dbl ); */
1781
1782 f64 = (UChar*)(&dbl);
1783 sign = toUChar( (f64[7] >> 7) & 1 );
1784
1785 /* First off, if the tag indicates the register was empty,
1786 return 1,0,sign,1 */
1787 if (tag == 0) {
1788 /* vex_printf("Empty\n"); */
1789 return AMD64G_FC_MASK_C3 | 0 | (sign << AMD64G_FC_SHIFT_C1)
1790 | AMD64G_FC_MASK_C0;
1791 }
1792
1793 bexp = (f64[7] << 4) | ((f64[6] >> 4) & 0x0F);
1794 bexp &= 0x7FF;
1795
1796 mantissaIsZero
1797 = toBool(
1798 (f64[6] & 0x0F) == 0
1799 && (f64[5] | f64[4] | f64[3] | f64[2] | f64[1] | f64[0]) == 0
1800 );
1801
1802 /* If both exponent and mantissa are zero, the value is zero.
1803 Return 1,0,sign,0. */
1804 if (bexp == 0 && mantissaIsZero) {
1805 /* vex_printf("Zero\n"); */
1806 return AMD64G_FC_MASK_C3 | 0
1807 | (sign << AMD64G_FC_SHIFT_C1) | 0;
1808 }
1809
1810 /* If exponent is zero but mantissa isn't, it's a denormal.
1811 Return 1,1,sign,0. */
1812 if (bexp == 0 && !mantissaIsZero) {
1813 /* vex_printf("Denormal\n"); */
1814 return AMD64G_FC_MASK_C3 | AMD64G_FC_MASK_C2
1815 | (sign << AMD64G_FC_SHIFT_C1) | 0;
1816 }
1817
1818 /* If the exponent is 7FF and the mantissa is zero, this is an infinity.
1819 Return 0,1,sign,1. */
1820 if (bexp == 0x7FF && mantissaIsZero) {
1821 /* vex_printf("Inf\n"); */
1822 return 0 | AMD64G_FC_MASK_C2 | (sign << AMD64G_FC_SHIFT_C1)
1823 | AMD64G_FC_MASK_C0;
1824 }
1825
1826 /* If the exponent is 7FF and the mantissa isn't zero, this is a NaN.
1827 Return 0,0,sign,1. */
1828 if (bexp == 0x7FF && !mantissaIsZero) {
1829 /* vex_printf("NaN\n"); */
1830 return 0 | 0 | (sign << AMD64G_FC_SHIFT_C1) | AMD64G_FC_MASK_C0;
1831 }
1832
1833 /* Uh, ok, we give up. It must be a normal finite number.
1834 Return 0,1,sign,0.
1835 */
1836 /* vex_printf("normal\n"); */
1837 return 0 | AMD64G_FC_MASK_C2 | (sign << AMD64G_FC_SHIFT_C1) | 0;
1838 }
1839
1840
1841 /* This is used to implement both 'frstor' and 'fldenv'. The latter
1842 appears to differ from the former only in that the 8 FP registers
1843 themselves are not transferred into the guest state. */
1844 static
do_put_x87(Bool moveRegs,UChar * x87_state,VexGuestAMD64State * vex_state)1845 VexEmNote do_put_x87 ( Bool moveRegs,
1846 /*IN*/UChar* x87_state,
1847 /*OUT*/VexGuestAMD64State* vex_state )
1848 {
1849 Int stno, preg;
1850 UInt tag;
1851 ULong* vexRegs = (ULong*)(&vex_state->guest_FPREG[0]);
1852 UChar* vexTags = (UChar*)(&vex_state->guest_FPTAG[0]);
1853 Fpu_State* x87 = (Fpu_State*)x87_state;
1854 UInt ftop = (x87->env[FP_ENV_STAT] >> 11) & 7;
1855 UInt tagw = x87->env[FP_ENV_TAG];
1856 UInt fpucw = x87->env[FP_ENV_CTRL];
1857 UInt c3210 = x87->env[FP_ENV_STAT] & 0x4700;
1858 VexEmNote ew;
1859 UInt fpround;
1860 ULong pair;
1861
1862 /* Copy registers and tags */
1863 for (stno = 0; stno < 8; stno++) {
1864 preg = (stno + ftop) & 7;
1865 tag = (tagw >> (2*preg)) & 3;
1866 if (tag == 3) {
1867 /* register is empty */
1868 /* hmm, if it's empty, does it still get written? Probably
1869 safer to say it does. If we don't, memcheck could get out
1870 of sync, in that it thinks all FP registers are defined by
1871 this helper, but in reality some have not been updated. */
1872 if (moveRegs)
1873 vexRegs[preg] = 0; /* IEEE754 64-bit zero */
1874 vexTags[preg] = 0;
1875 } else {
1876 /* register is non-empty */
1877 if (moveRegs)
1878 convert_f80le_to_f64le( &x87->reg[10*stno],
1879 (UChar*)&vexRegs[preg] );
1880 vexTags[preg] = 1;
1881 }
1882 }
1883
1884 /* stack pointer */
1885 vex_state->guest_FTOP = ftop;
1886
1887 /* status word */
1888 vex_state->guest_FC3210 = c3210;
1889
1890 /* handle the control word, setting FPROUND and detecting any
1891 emulation warnings. */
1892 pair = amd64g_check_fldcw ( (ULong)fpucw );
1893 fpround = (UInt)pair & 0xFFFFFFFFULL;
1894 ew = (VexEmNote)(pair >> 32);
1895
1896 vex_state->guest_FPROUND = fpround & 3;
1897
1898 /* emulation warnings --> caller */
1899 return ew;
1900 }
1901
1902
1903 /* Create an x87 FPU state from the guest state, as close as
1904 we can approximate it. */
1905 static
do_get_x87(VexGuestAMD64State * vex_state,UChar * x87_state)1906 void do_get_x87 ( /*IN*/VexGuestAMD64State* vex_state,
1907 /*OUT*/UChar* x87_state )
1908 {
1909 Int i, stno, preg;
1910 UInt tagw;
1911 ULong* vexRegs = (ULong*)(&vex_state->guest_FPREG[0]);
1912 UChar* vexTags = (UChar*)(&vex_state->guest_FPTAG[0]);
1913 Fpu_State* x87 = (Fpu_State*)x87_state;
1914 UInt ftop = vex_state->guest_FTOP;
1915 UInt c3210 = vex_state->guest_FC3210;
1916
1917 for (i = 0; i < 14; i++)
1918 x87->env[i] = 0;
1919
1920 x87->env[1] = x87->env[3] = x87->env[5] = x87->env[13] = 0xFFFF;
1921 x87->env[FP_ENV_STAT]
1922 = toUShort(((ftop & 7) << 11) | (c3210 & 0x4700));
1923 x87->env[FP_ENV_CTRL]
1924 = toUShort(amd64g_create_fpucw( vex_state->guest_FPROUND ));
1925
1926 /* Dump the register stack in ST order. */
1927 tagw = 0;
1928 for (stno = 0; stno < 8; stno++) {
1929 preg = (stno + ftop) & 7;
1930 if (vexTags[preg] == 0) {
1931 /* register is empty */
1932 tagw |= (3 << (2*preg));
1933 convert_f64le_to_f80le( (UChar*)&vexRegs[preg],
1934 &x87->reg[10*stno] );
1935 } else {
1936 /* register is full. */
1937 tagw |= (0 << (2*preg));
1938 convert_f64le_to_f80le( (UChar*)&vexRegs[preg],
1939 &x87->reg[10*stno] );
1940 }
1941 }
1942 x87->env[FP_ENV_TAG] = toUShort(tagw);
1943 }
1944
1945
1946 /*---------------------------------------------------------------*/
1947 /*--- Supporting functions for XSAVE/FXSAVE. ---*/
1948 /*---------------------------------------------------------------*/
1949
1950 /* CALLED FROM GENERATED CODE */
1951 /* DIRTY HELPER (reads guest state, writes guest mem) */
1952 /* XSAVE component 0 is the x87 FPU state. */
amd64g_dirtyhelper_XSAVE_COMPONENT_0(VexGuestAMD64State * gst,HWord addr)1953 void amd64g_dirtyhelper_XSAVE_COMPONENT_0
1954 ( VexGuestAMD64State* gst, HWord addr )
1955 {
1956 /* Derived from values obtained from
1957 vendor_id : AuthenticAMD
1958 cpu family : 15
1959 model : 12
1960 model name : AMD Athlon(tm) 64 Processor 3200+
1961 stepping : 0
1962 cpu MHz : 2200.000
1963 cache size : 512 KB
1964 */
1965 /* Somewhat roundabout, but at least it's simple. */
1966 Fpu_State tmp;
1967 UShort* addrS = (UShort*)addr;
1968 UChar* addrC = (UChar*)addr;
1969 UShort fp_tags;
1970 UInt summary_tags;
1971 Int r, stno;
1972 UShort *srcS, *dstS;
1973
1974 do_get_x87( gst, (UChar*)&tmp );
1975
1976 /* Now build the proper fxsave x87 image from the fsave x87 image
1977 we just made. */
1978
1979 addrS[0] = tmp.env[FP_ENV_CTRL]; /* FCW: fpu control word */
1980 addrS[1] = tmp.env[FP_ENV_STAT]; /* FCW: fpu status word */
1981
1982 /* set addrS[2] in an endian-independent way */
1983 summary_tags = 0;
1984 fp_tags = tmp.env[FP_ENV_TAG];
1985 for (r = 0; r < 8; r++) {
1986 if ( ((fp_tags >> (2*r)) & 3) != 3 )
1987 summary_tags |= (1 << r);
1988 }
1989 addrC[4] = toUChar(summary_tags); /* FTW: tag summary byte */
1990 addrC[5] = 0; /* pad */
1991
1992 /* FOP: faulting fpu opcode. From experimentation, the real CPU
1993 does not write this field. (?!) */
1994 addrS[3] = 0; /* BOGUS */
1995
1996 /* RIP (Last x87 instruction pointer). From experimentation, the
1997 real CPU does not write this field. (?!) */
1998 addrS[4] = 0; /* BOGUS */
1999 addrS[5] = 0; /* BOGUS */
2000 addrS[6] = 0; /* BOGUS */
2001 addrS[7] = 0; /* BOGUS */
2002
2003 /* RDP (Last x87 data pointer). From experimentation, the real CPU
2004 does not write this field. (?!) */
2005 addrS[8] = 0; /* BOGUS */
2006 addrS[9] = 0; /* BOGUS */
2007 addrS[10] = 0; /* BOGUS */
2008 addrS[11] = 0; /* BOGUS */
2009
2010 /* addrS[13,12] are MXCSR -- not written */
2011 /* addrS[15,14] are MXCSR_MASK -- not written */
2012
2013 /* Copy in the FP registers, in ST order. */
2014 for (stno = 0; stno < 8; stno++) {
2015 srcS = (UShort*)(&tmp.reg[10*stno]);
2016 dstS = (UShort*)(&addrS[16 + 8*stno]);
2017 dstS[0] = srcS[0];
2018 dstS[1] = srcS[1];
2019 dstS[2] = srcS[2];
2020 dstS[3] = srcS[3];
2021 dstS[4] = srcS[4];
2022 dstS[5] = 0;
2023 dstS[6] = 0;
2024 dstS[7] = 0;
2025 }
2026 }
2027
2028
2029 /* CALLED FROM GENERATED CODE */
2030 /* DIRTY HELPER (reads guest state, writes guest mem) */
2031 /* XSAVE component 1 is the SSE state. */
amd64g_dirtyhelper_XSAVE_COMPONENT_1_EXCLUDING_XMMREGS(VexGuestAMD64State * gst,HWord addr)2032 void amd64g_dirtyhelper_XSAVE_COMPONENT_1_EXCLUDING_XMMREGS
2033 ( VexGuestAMD64State* gst, HWord addr )
2034 {
2035 UShort* addrS = (UShort*)addr;
2036 UInt mxcsr;
2037
2038 /* The only non-register parts of the SSE state are MXCSR and
2039 MXCSR_MASK. */
2040 mxcsr = amd64g_create_mxcsr( gst->guest_SSEROUND );
2041
2042 addrS[12] = toUShort(mxcsr); /* MXCSR */
2043 addrS[13] = toUShort(mxcsr >> 16);
2044
2045 addrS[14] = 0xFFFF; /* MXCSR mask (lo16) */
2046 addrS[15] = 0x0000; /* MXCSR mask (hi16) */
2047 }
2048
2049
2050 /* VISIBLE TO LIBVEX CLIENT */
2051 /* Do FXSAVE from the supplied VexGuestAMD64State structure and store
2052 the result at the given address which represents a buffer of at
2053 least 416 bytes.
2054
2055 This function is not called from generated code. FXSAVE is dealt
2056 with by the amd64 front end by calling the XSAVE_COMPONENT_{0,1}
2057 functions above plus some in-line IR. This function is merely a
2058 convenience function for VEX's users.
2059 */
LibVEX_GuestAMD64_fxsave(VexGuestAMD64State * gst,HWord fp_state)2060 void LibVEX_GuestAMD64_fxsave ( /*IN*/VexGuestAMD64State* gst,
2061 /*OUT*/HWord fp_state )
2062 {
2063 /* Do the x87 part */
2064 amd64g_dirtyhelper_XSAVE_COMPONENT_0(gst, fp_state);
2065
2066 /* And now the SSE part, except for the registers themselves. */
2067 amd64g_dirtyhelper_XSAVE_COMPONENT_1_EXCLUDING_XMMREGS(gst, fp_state);
2068
2069 /* That's the first 160 bytes of the image done. */
2070 /* Now only %xmm0 .. %xmm15 remain to be copied. If the host is
2071 big-endian, these need to be byte-swapped. */
2072 U128 *xmm = (U128 *)(fp_state + 160);
2073 vassert(host_is_little_endian());
2074
2075 # define COPY_U128(_dst,_src) \
2076 do { _dst[0] = _src[0]; _dst[1] = _src[1]; \
2077 _dst[2] = _src[2]; _dst[3] = _src[3]; } \
2078 while (0)
2079
2080 COPY_U128( xmm[0], gst->guest_YMM0 );
2081 COPY_U128( xmm[1], gst->guest_YMM1 );
2082 COPY_U128( xmm[2], gst->guest_YMM2 );
2083 COPY_U128( xmm[3], gst->guest_YMM3 );
2084 COPY_U128( xmm[4], gst->guest_YMM4 );
2085 COPY_U128( xmm[5], gst->guest_YMM5 );
2086 COPY_U128( xmm[6], gst->guest_YMM6 );
2087 COPY_U128( xmm[7], gst->guest_YMM7 );
2088 COPY_U128( xmm[8], gst->guest_YMM8 );
2089 COPY_U128( xmm[9], gst->guest_YMM9 );
2090 COPY_U128( xmm[10], gst->guest_YMM10 );
2091 COPY_U128( xmm[11], gst->guest_YMM11 );
2092 COPY_U128( xmm[12], gst->guest_YMM12 );
2093 COPY_U128( xmm[13], gst->guest_YMM13 );
2094 COPY_U128( xmm[14], gst->guest_YMM14 );
2095 COPY_U128( xmm[15], gst->guest_YMM15 );
2096 # undef COPY_U128
2097 }
2098
2099
2100 /*---------------------------------------------------------------*/
2101 /*--- Supporting functions for XRSTOR/FXRSTOR. ---*/
2102 /*---------------------------------------------------------------*/
2103
2104 /* CALLED FROM GENERATED CODE */
2105 /* DIRTY HELPER (writes guest state, reads guest mem) */
amd64g_dirtyhelper_XRSTOR_COMPONENT_0(VexGuestAMD64State * gst,HWord addr)2106 VexEmNote amd64g_dirtyhelper_XRSTOR_COMPONENT_0
2107 ( VexGuestAMD64State* gst, HWord addr )
2108 {
2109 Fpu_State tmp;
2110 UShort* addrS = (UShort*)addr;
2111 UChar* addrC = (UChar*)addr;
2112 UShort fp_tags;
2113 Int r, stno, i;
2114
2115 /* Copy the x87 registers out of the image, into a temporary
2116 Fpu_State struct. */
2117 for (i = 0; i < 14; i++) tmp.env[i] = 0;
2118 for (i = 0; i < 80; i++) tmp.reg[i] = 0;
2119 /* fill in tmp.reg[0..7] */
2120 for (stno = 0; stno < 8; stno++) {
2121 UShort* dstS = (UShort*)(&tmp.reg[10*stno]);
2122 UShort* srcS = (UShort*)(&addrS[16 + 8*stno]);
2123 dstS[0] = srcS[0];
2124 dstS[1] = srcS[1];
2125 dstS[2] = srcS[2];
2126 dstS[3] = srcS[3];
2127 dstS[4] = srcS[4];
2128 }
2129 /* fill in tmp.env[0..13] */
2130 tmp.env[FP_ENV_CTRL] = addrS[0]; /* FCW: fpu control word */
2131 tmp.env[FP_ENV_STAT] = addrS[1]; /* FCW: fpu status word */
2132
2133 fp_tags = 0;
2134 for (r = 0; r < 8; r++) {
2135 if (addrC[4] & (1<<r))
2136 fp_tags |= (0 << (2*r)); /* EMPTY */
2137 else
2138 fp_tags |= (3 << (2*r)); /* VALID -- not really precise enough. */
2139 }
2140 tmp.env[FP_ENV_TAG] = fp_tags;
2141
2142 /* Now write 'tmp' into the guest state. */
2143 VexEmNote warnX87 = do_put_x87( True/*moveRegs*/, (UChar*)&tmp, gst );
2144
2145 return warnX87;
2146 }
2147
2148
2149 /* CALLED FROM GENERATED CODE */
2150 /* DIRTY HELPER (writes guest state, reads guest mem) */
amd64g_dirtyhelper_XRSTOR_COMPONENT_1_EXCLUDING_XMMREGS(VexGuestAMD64State * gst,HWord addr)2151 VexEmNote amd64g_dirtyhelper_XRSTOR_COMPONENT_1_EXCLUDING_XMMREGS
2152 ( VexGuestAMD64State* gst, HWord addr )
2153 {
2154 UShort* addrS = (UShort*)addr;
2155 UInt w32 = (((UInt)addrS[12]) & 0xFFFF)
2156 | ((((UInt)addrS[13]) & 0xFFFF) << 16);
2157 ULong w64 = amd64g_check_ldmxcsr( (ULong)w32 );
2158
2159 VexEmNote warnXMM = (VexEmNote)(w64 >> 32);
2160
2161 gst->guest_SSEROUND = w64 & 0xFFFFFFFFULL;
2162 return warnXMM;
2163 }
2164
2165
2166 /* VISIBLE TO LIBVEX CLIENT */
2167 /* Do FXRSTOR from the supplied address and store read values to the given
2168 VexGuestAMD64State structure.
2169
2170 This function is not called from generated code. FXRSTOR is dealt
2171 with by the amd64 front end by calling the XRSTOR_COMPONENT_{0,1}
2172 functions above plus some in-line IR. This function is merely a
2173 convenience function for VEX's users.
2174 */
LibVEX_GuestAMD64_fxrstor(HWord fp_state,VexGuestAMD64State * gst)2175 VexEmNote LibVEX_GuestAMD64_fxrstor ( /*IN*/HWord fp_state,
2176 /*MOD*/VexGuestAMD64State* gst )
2177 {
2178 /* Restore %xmm0 .. %xmm15. If the host is big-endian, these need
2179 to be byte-swapped. */
2180 U128 *xmm = (U128 *)(fp_state + 160);
2181
2182 vassert(host_is_little_endian());
2183
2184 # define COPY_U128(_dst,_src) \
2185 do { _dst[0] = _src[0]; _dst[1] = _src[1]; \
2186 _dst[2] = _src[2]; _dst[3] = _src[3]; } \
2187 while (0)
2188
2189 COPY_U128( gst->guest_YMM0, xmm[0] );
2190 COPY_U128( gst->guest_YMM1, xmm[1] );
2191 COPY_U128( gst->guest_YMM2, xmm[2] );
2192 COPY_U128( gst->guest_YMM3, xmm[3] );
2193 COPY_U128( gst->guest_YMM4, xmm[4] );
2194 COPY_U128( gst->guest_YMM5, xmm[5] );
2195 COPY_U128( gst->guest_YMM6, xmm[6] );
2196 COPY_U128( gst->guest_YMM7, xmm[7] );
2197 COPY_U128( gst->guest_YMM8, xmm[8] );
2198 COPY_U128( gst->guest_YMM9, xmm[9] );
2199 COPY_U128( gst->guest_YMM10, xmm[10] );
2200 COPY_U128( gst->guest_YMM11, xmm[11] );
2201 COPY_U128( gst->guest_YMM12, xmm[12] );
2202 COPY_U128( gst->guest_YMM13, xmm[13] );
2203 COPY_U128( gst->guest_YMM14, xmm[14] );
2204 COPY_U128( gst->guest_YMM15, xmm[15] );
2205
2206 # undef COPY_U128
2207
2208 VexEmNote warnXMM
2209 = amd64g_dirtyhelper_XRSTOR_COMPONENT_1_EXCLUDING_XMMREGS(gst, fp_state);
2210 VexEmNote warnX87
2211 = amd64g_dirtyhelper_XRSTOR_COMPONENT_0(gst, fp_state);
2212
2213 /* Prefer an X87 emwarn over an XMM one, if both exist. */
2214 if (warnX87 != EmNote_NONE)
2215 return warnX87;
2216 else
2217 return warnXMM;
2218 }
2219
2220
2221 /*---------------------------------------------------------------*/
2222 /*--- Supporting functions for FSAVE/FRSTOR ---*/
2223 /*---------------------------------------------------------------*/
2224
2225 /* DIRTY HELPER (writes guest state) */
2226 /* Initialise the x87 FPU state as per 'finit'. */
amd64g_dirtyhelper_FINIT(VexGuestAMD64State * gst)2227 void amd64g_dirtyhelper_FINIT ( VexGuestAMD64State* gst )
2228 {
2229 Int i;
2230 gst->guest_FTOP = 0;
2231 for (i = 0; i < 8; i++) {
2232 gst->guest_FPTAG[i] = 0; /* empty */
2233 gst->guest_FPREG[i] = 0; /* IEEE754 64-bit zero */
2234 }
2235 gst->guest_FPROUND = (ULong)Irrm_NEAREST;
2236 gst->guest_FC3210 = 0;
2237 }
2238
2239
2240 /* CALLED FROM GENERATED CODE */
2241 /* DIRTY HELPER (reads guest memory) */
amd64g_dirtyhelper_loadF80le(Addr addrU)2242 ULong amd64g_dirtyhelper_loadF80le ( Addr addrU )
2243 {
2244 ULong f64;
2245 convert_f80le_to_f64le ( (UChar*)addrU, (UChar*)&f64 );
2246 return f64;
2247 }
2248
2249 /* CALLED FROM GENERATED CODE */
2250 /* DIRTY HELPER (writes guest memory) */
amd64g_dirtyhelper_storeF80le(Addr addrU,ULong f64)2251 void amd64g_dirtyhelper_storeF80le ( Addr addrU, ULong f64 )
2252 {
2253 convert_f64le_to_f80le( (UChar*)&f64, (UChar*)addrU );
2254 }
2255
2256
2257 /* CALLED FROM GENERATED CODE */
2258 /* CLEAN HELPER */
2259 /* mxcsr[15:0] contains a SSE native format MXCSR value.
2260 Extract from it the required SSEROUND value and any resulting
2261 emulation warning, and return (warn << 32) | sseround value.
2262 */
amd64g_check_ldmxcsr(ULong mxcsr)2263 ULong amd64g_check_ldmxcsr ( ULong mxcsr )
2264 {
2265 /* Decide on a rounding mode. mxcsr[14:13] holds it. */
2266 /* NOTE, encoded exactly as per enum IRRoundingMode. */
2267 ULong rmode = (mxcsr >> 13) & 3;
2268
2269 /* Detect any required emulation warnings. */
2270 VexEmNote ew = EmNote_NONE;
2271
2272 if ((mxcsr & 0x1F80) != 0x1F80) {
2273 /* unmasked exceptions! */
2274 ew = EmWarn_X86_sseExns;
2275 }
2276 else
2277 if (mxcsr & (1<<15)) {
2278 /* FZ is set */
2279 ew = EmWarn_X86_fz;
2280 }
2281 else
2282 if (mxcsr & (1<<6)) {
2283 /* DAZ is set */
2284 ew = EmWarn_X86_daz;
2285 }
2286
2287 return (((ULong)ew) << 32) | ((ULong)rmode);
2288 }
2289
2290
2291 /* CALLED FROM GENERATED CODE */
2292 /* CLEAN HELPER */
2293 /* Given sseround as an IRRoundingMode value, create a suitable SSE
2294 native format MXCSR value. */
amd64g_create_mxcsr(ULong sseround)2295 ULong amd64g_create_mxcsr ( ULong sseround )
2296 {
2297 sseround &= 3;
2298 return 0x1F80 | (sseround << 13);
2299 }
2300
2301
2302 /* CLEAN HELPER */
2303 /* fpucw[15:0] contains a x87 native format FPU control word.
2304 Extract from it the required FPROUND value and any resulting
2305 emulation warning, and return (warn << 32) | fpround value.
2306 */
amd64g_check_fldcw(ULong fpucw)2307 ULong amd64g_check_fldcw ( ULong fpucw )
2308 {
2309 /* Decide on a rounding mode. fpucw[11:10] holds it. */
2310 /* NOTE, encoded exactly as per enum IRRoundingMode. */
2311 ULong rmode = (fpucw >> 10) & 3;
2312
2313 /* Detect any required emulation warnings. */
2314 VexEmNote ew = EmNote_NONE;
2315
2316 if ((fpucw & 0x3F) != 0x3F) {
2317 /* unmasked exceptions! */
2318 ew = EmWarn_X86_x87exns;
2319 }
2320 else
2321 if (((fpucw >> 8) & 3) != 3) {
2322 /* unsupported precision */
2323 ew = EmWarn_X86_x87precision;
2324 }
2325
2326 return (((ULong)ew) << 32) | ((ULong)rmode);
2327 }
2328
2329
2330 /* CLEAN HELPER */
2331 /* Given fpround as an IRRoundingMode value, create a suitable x87
2332 native format FPU control word. */
amd64g_create_fpucw(ULong fpround)2333 ULong amd64g_create_fpucw ( ULong fpround )
2334 {
2335 fpround &= 3;
2336 return 0x037F | (fpround << 10);
2337 }
2338
2339
2340 /* This is used to implement 'fldenv'.
2341 Reads 28 bytes at x87_state[0 .. 27]. */
2342 /* CALLED FROM GENERATED CODE */
2343 /* DIRTY HELPER */
amd64g_dirtyhelper_FLDENV(VexGuestAMD64State * vex_state,HWord x87_state)2344 VexEmNote amd64g_dirtyhelper_FLDENV ( /*OUT*/VexGuestAMD64State* vex_state,
2345 /*IN*/HWord x87_state)
2346 {
2347 return do_put_x87( False, (UChar*)x87_state, vex_state );
2348 }
2349
2350
2351 /* CALLED FROM GENERATED CODE */
2352 /* DIRTY HELPER */
2353 /* Create an x87 FPU env from the guest state, as close as we can
2354 approximate it. Writes 28 bytes at x87_state[0..27]. */
amd64g_dirtyhelper_FSTENV(VexGuestAMD64State * vex_state,HWord x87_state)2355 void amd64g_dirtyhelper_FSTENV ( /*IN*/VexGuestAMD64State* vex_state,
2356 /*OUT*/HWord x87_state )
2357 {
2358 Int i, stno, preg;
2359 UInt tagw;
2360 UChar* vexTags = (UChar*)(&vex_state->guest_FPTAG[0]);
2361 Fpu_State* x87 = (Fpu_State*)x87_state;
2362 UInt ftop = vex_state->guest_FTOP;
2363 ULong c3210 = vex_state->guest_FC3210;
2364
2365 for (i = 0; i < 14; i++)
2366 x87->env[i] = 0;
2367
2368 x87->env[1] = x87->env[3] = x87->env[5] = x87->env[13] = 0xFFFF;
2369 x87->env[FP_ENV_STAT]
2370 = toUShort(toUInt( ((ftop & 7) << 11) | (c3210 & 0x4700) ));
2371 x87->env[FP_ENV_CTRL]
2372 = toUShort(toUInt( amd64g_create_fpucw( vex_state->guest_FPROUND ) ));
2373
2374 /* Compute the x87 tag word. */
2375 tagw = 0;
2376 for (stno = 0; stno < 8; stno++) {
2377 preg = (stno + ftop) & 7;
2378 if (vexTags[preg] == 0) {
2379 /* register is empty */
2380 tagw |= (3 << (2*preg));
2381 } else {
2382 /* register is full. */
2383 tagw |= (0 << (2*preg));
2384 }
2385 }
2386 x87->env[FP_ENV_TAG] = toUShort(tagw);
2387
2388 /* We don't dump the x87 registers, tho. */
2389 }
2390
2391
2392 /* This is used to implement 'fnsave'.
2393 Writes 108 bytes at x87_state[0 .. 107]. */
2394 /* CALLED FROM GENERATED CODE */
2395 /* DIRTY HELPER */
amd64g_dirtyhelper_FNSAVE(VexGuestAMD64State * vex_state,HWord x87_state)2396 void amd64g_dirtyhelper_FNSAVE ( /*IN*/VexGuestAMD64State* vex_state,
2397 /*OUT*/HWord x87_state)
2398 {
2399 do_get_x87( vex_state, (UChar*)x87_state );
2400 }
2401
2402
2403 /* This is used to implement 'fnsaves'.
2404 Writes 94 bytes at x87_state[0 .. 93]. */
2405 /* CALLED FROM GENERATED CODE */
2406 /* DIRTY HELPER */
amd64g_dirtyhelper_FNSAVES(VexGuestAMD64State * vex_state,HWord x87_state)2407 void amd64g_dirtyhelper_FNSAVES ( /*IN*/VexGuestAMD64State* vex_state,
2408 /*OUT*/HWord x87_state)
2409 {
2410 Int i, stno, preg;
2411 UInt tagw;
2412 ULong* vexRegs = (ULong*)(&vex_state->guest_FPREG[0]);
2413 UChar* vexTags = (UChar*)(&vex_state->guest_FPTAG[0]);
2414 Fpu_State_16* x87 = (Fpu_State_16*)x87_state;
2415 UInt ftop = vex_state->guest_FTOP;
2416 UInt c3210 = vex_state->guest_FC3210;
2417
2418 for (i = 0; i < 7; i++)
2419 x87->env[i] = 0;
2420
2421 x87->env[FPS_ENV_STAT]
2422 = toUShort(((ftop & 7) << 11) | (c3210 & 0x4700));
2423 x87->env[FPS_ENV_CTRL]
2424 = toUShort(amd64g_create_fpucw( vex_state->guest_FPROUND ));
2425
2426 /* Dump the register stack in ST order. */
2427 tagw = 0;
2428 for (stno = 0; stno < 8; stno++) {
2429 preg = (stno + ftop) & 7;
2430 if (vexTags[preg] == 0) {
2431 /* register is empty */
2432 tagw |= (3 << (2*preg));
2433 convert_f64le_to_f80le( (UChar*)&vexRegs[preg],
2434 &x87->reg[10*stno] );
2435 } else {
2436 /* register is full. */
2437 tagw |= (0 << (2*preg));
2438 convert_f64le_to_f80le( (UChar*)&vexRegs[preg],
2439 &x87->reg[10*stno] );
2440 }
2441 }
2442 x87->env[FPS_ENV_TAG] = toUShort(tagw);
2443 }
2444
2445
2446 /* This is used to implement 'frstor'.
2447 Reads 108 bytes at x87_state[0 .. 107]. */
2448 /* CALLED FROM GENERATED CODE */
2449 /* DIRTY HELPER */
amd64g_dirtyhelper_FRSTOR(VexGuestAMD64State * vex_state,HWord x87_state)2450 VexEmNote amd64g_dirtyhelper_FRSTOR ( /*OUT*/VexGuestAMD64State* vex_state,
2451 /*IN*/HWord x87_state)
2452 {
2453 return do_put_x87( True, (UChar*)x87_state, vex_state );
2454 }
2455
2456
2457 /* This is used to implement 'frstors'.
2458 Reads 94 bytes at x87_state[0 .. 93]. */
2459 /* CALLED FROM GENERATED CODE */
2460 /* DIRTY HELPER */
amd64g_dirtyhelper_FRSTORS(VexGuestAMD64State * vex_state,HWord x87_state)2461 VexEmNote amd64g_dirtyhelper_FRSTORS ( /*OUT*/VexGuestAMD64State* vex_state,
2462 /*IN*/HWord x87_state)
2463 {
2464 Int stno, preg;
2465 UInt tag;
2466 ULong* vexRegs = (ULong*)(&vex_state->guest_FPREG[0]);
2467 UChar* vexTags = (UChar*)(&vex_state->guest_FPTAG[0]);
2468 Fpu_State_16* x87 = (Fpu_State_16*)x87_state;
2469 UInt ftop = (x87->env[FPS_ENV_STAT] >> 11) & 7;
2470 UInt tagw = x87->env[FPS_ENV_TAG];
2471 UInt fpucw = x87->env[FPS_ENV_CTRL];
2472 UInt c3210 = x87->env[FPS_ENV_STAT] & 0x4700;
2473 VexEmNote ew;
2474 UInt fpround;
2475 ULong pair;
2476
2477 /* Copy registers and tags */
2478 for (stno = 0; stno < 8; stno++) {
2479 preg = (stno + ftop) & 7;
2480 tag = (tagw >> (2*preg)) & 3;
2481 if (tag == 3) {
2482 /* register is empty */
2483 /* hmm, if it's empty, does it still get written? Probably
2484 safer to say it does. If we don't, memcheck could get out
2485 of sync, in that it thinks all FP registers are defined by
2486 this helper, but in reality some have not been updated. */
2487 vexRegs[preg] = 0; /* IEEE754 64-bit zero */
2488 vexTags[preg] = 0;
2489 } else {
2490 /* register is non-empty */
2491 convert_f80le_to_f64le( &x87->reg[10*stno],
2492 (UChar*)&vexRegs[preg] );
2493 vexTags[preg] = 1;
2494 }
2495 }
2496
2497 /* stack pointer */
2498 vex_state->guest_FTOP = ftop;
2499
2500 /* status word */
2501 vex_state->guest_FC3210 = c3210;
2502
2503 /* handle the control word, setting FPROUND and detecting any
2504 emulation warnings. */
2505 pair = amd64g_check_fldcw ( (ULong)fpucw );
2506 fpround = (UInt)pair & 0xFFFFFFFFULL;
2507 ew = (VexEmNote)(pair >> 32);
2508
2509 vex_state->guest_FPROUND = fpround & 3;
2510
2511 /* emulation warnings --> caller */
2512 return ew;
2513 }
2514
2515
2516 /*---------------------------------------------------------------*/
2517 /*--- CPUID helpers. ---*/
2518 /*---------------------------------------------------------------*/
2519
2520 /* Claim to be the following CPU, which is probably representative of
2521 the lowliest (earliest) amd64 offerings. It can do neither sse3
2522 nor cx16.
2523
2524 vendor_id : AuthenticAMD
2525 cpu family : 15
2526 model : 5
2527 model name : AMD Opteron (tm) Processor 848
2528 stepping : 10
2529 cpu MHz : 1797.682
2530 cache size : 1024 KB
2531 fpu : yes
2532 fpu_exception : yes
2533 cpuid level : 1
2534 wp : yes
2535 flags : fpu vme de pse tsc msr pae mce cx8 apic sep
2536 mtrr pge mca cmov pat pse36 clflush mmx fxsr
2537 sse sse2 syscall nx mmxext lm 3dnowext 3dnow
2538 bogomips : 3600.62
2539 TLB size : 1088 4K pages
2540 clflush size : 64
2541 cache_alignment : 64
2542 address sizes : 40 bits physical, 48 bits virtual
2543 power management: ts fid vid ttp
2544
2545 2012-Feb-21: don't claim 3dnow or 3dnowext, since in fact
2546 we don't support them. See #291568. 3dnow is 80000001.EDX.31
2547 and 3dnowext is 80000001.EDX.30.
2548 */
amd64g_dirtyhelper_CPUID_baseline(VexGuestAMD64State * st)2549 void amd64g_dirtyhelper_CPUID_baseline ( VexGuestAMD64State* st )
2550 {
2551 # define SET_ABCD(_a,_b,_c,_d) \
2552 do { st->guest_RAX = (ULong)(_a); \
2553 st->guest_RBX = (ULong)(_b); \
2554 st->guest_RCX = (ULong)(_c); \
2555 st->guest_RDX = (ULong)(_d); \
2556 } while (0)
2557
2558 switch (0xFFFFFFFF & st->guest_RAX) {
2559 case 0x00000000:
2560 SET_ABCD(0x00000001, 0x68747541, 0x444d4163, 0x69746e65);
2561 break;
2562 case 0x00000001:
2563 SET_ABCD(0x00000f5a, 0x01000800, 0x00000000, 0x078bfbff);
2564 break;
2565 case 0x80000000:
2566 SET_ABCD(0x80000018, 0x68747541, 0x444d4163, 0x69746e65);
2567 break;
2568 case 0x80000001:
2569 /* Don't claim to support 3dnow or 3dnowext. 0xe1d3fbff is
2570 the original it-is-supported value that the h/w provides.
2571 See #291568. */
2572 SET_ABCD(0x00000f5a, 0x00000505, 0x00000000, /*0xe1d3fbff*/
2573 0x21d3fbff);
2574 break;
2575 case 0x80000002:
2576 SET_ABCD(0x20444d41, 0x6574704f, 0x206e6f72, 0x296d7428);
2577 break;
2578 case 0x80000003:
2579 SET_ABCD(0x6f725020, 0x73736563, 0x3820726f, 0x00003834);
2580 break;
2581 case 0x80000004:
2582 SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x00000000);
2583 break;
2584 case 0x80000005:
2585 SET_ABCD(0xff08ff08, 0xff20ff20, 0x40020140, 0x40020140);
2586 break;
2587 case 0x80000006:
2588 SET_ABCD(0x00000000, 0x42004200, 0x04008140, 0x00000000);
2589 break;
2590 case 0x80000007:
2591 SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x0000000f);
2592 break;
2593 case 0x80000008:
2594 SET_ABCD(0x00003028, 0x00000000, 0x00000000, 0x00000000);
2595 break;
2596 default:
2597 SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x00000000);
2598 break;
2599 }
2600 # undef SET_ABCD
2601 }
2602
2603
2604 /* Claim to be the following CPU (2 x ...), which is sse3 and cx16
2605 capable.
2606
2607 vendor_id : GenuineIntel
2608 cpu family : 6
2609 model : 15
2610 model name : Intel(R) Core(TM)2 CPU 6600 @ 2.40GHz
2611 stepping : 6
2612 cpu MHz : 2394.000
2613 cache size : 4096 KB
2614 physical id : 0
2615 siblings : 2
2616 core id : 0
2617 cpu cores : 2
2618 fpu : yes
2619 fpu_exception : yes
2620 cpuid level : 10
2621 wp : yes
2622 flags : fpu vme de pse tsc msr pae mce cx8 apic sep
2623 mtrr pge mca cmov pat pse36 clflush dts acpi
2624 mmx fxsr sse sse2 ss ht tm syscall nx lm
2625 constant_tsc pni monitor ds_cpl vmx est tm2
2626 cx16 xtpr lahf_lm
2627 bogomips : 4798.78
2628 clflush size : 64
2629 cache_alignment : 64
2630 address sizes : 36 bits physical, 48 bits virtual
2631 power management:
2632 */
amd64g_dirtyhelper_CPUID_sse3_and_cx16(VexGuestAMD64State * st)2633 void amd64g_dirtyhelper_CPUID_sse3_and_cx16 ( VexGuestAMD64State* st )
2634 {
2635 # define SET_ABCD(_a,_b,_c,_d) \
2636 do { st->guest_RAX = (ULong)(_a); \
2637 st->guest_RBX = (ULong)(_b); \
2638 st->guest_RCX = (ULong)(_c); \
2639 st->guest_RDX = (ULong)(_d); \
2640 } while (0)
2641
2642 switch (0xFFFFFFFF & st->guest_RAX) {
2643 case 0x00000000:
2644 SET_ABCD(0x0000000a, 0x756e6547, 0x6c65746e, 0x49656e69);
2645 break;
2646 case 0x00000001:
2647 SET_ABCD(0x000006f6, 0x00020800, 0x0000e3bd, 0xbfebfbff);
2648 break;
2649 case 0x00000002:
2650 SET_ABCD(0x05b0b101, 0x005657f0, 0x00000000, 0x2cb43049);
2651 break;
2652 case 0x00000003:
2653 SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x00000000);
2654 break;
2655 case 0x00000004: {
2656 switch (0xFFFFFFFF & st->guest_RCX) {
2657 case 0x00000000: SET_ABCD(0x04000121, 0x01c0003f,
2658 0x0000003f, 0x00000001); break;
2659 case 0x00000001: SET_ABCD(0x04000122, 0x01c0003f,
2660 0x0000003f, 0x00000001); break;
2661 case 0x00000002: SET_ABCD(0x04004143, 0x03c0003f,
2662 0x00000fff, 0x00000001); break;
2663 default: SET_ABCD(0x00000000, 0x00000000,
2664 0x00000000, 0x00000000); break;
2665 }
2666 break;
2667 }
2668 case 0x00000005:
2669 SET_ABCD(0x00000040, 0x00000040, 0x00000003, 0x00000020);
2670 break;
2671 case 0x00000006:
2672 SET_ABCD(0x00000001, 0x00000002, 0x00000001, 0x00000000);
2673 break;
2674 case 0x00000007:
2675 SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x00000000);
2676 break;
2677 case 0x00000008:
2678 SET_ABCD(0x00000400, 0x00000000, 0x00000000, 0x00000000);
2679 break;
2680 case 0x00000009:
2681 SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x00000000);
2682 break;
2683 case 0x0000000a:
2684 unhandled_eax_value:
2685 SET_ABCD(0x07280202, 0x00000000, 0x00000000, 0x00000000);
2686 break;
2687 case 0x80000000:
2688 SET_ABCD(0x80000008, 0x00000000, 0x00000000, 0x00000000);
2689 break;
2690 case 0x80000001:
2691 SET_ABCD(0x00000000, 0x00000000, 0x00000001, 0x20100800);
2692 break;
2693 case 0x80000002:
2694 SET_ABCD(0x65746e49, 0x2952286c, 0x726f4320, 0x4d542865);
2695 break;
2696 case 0x80000003:
2697 SET_ABCD(0x43203229, 0x20205550, 0x20202020, 0x20202020);
2698 break;
2699 case 0x80000004:
2700 SET_ABCD(0x30303636, 0x20402020, 0x30342e32, 0x007a4847);
2701 break;
2702 case 0x80000005:
2703 SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x00000000);
2704 break;
2705 case 0x80000006:
2706 SET_ABCD(0x00000000, 0x00000000, 0x10008040, 0x00000000);
2707 break;
2708 case 0x80000007:
2709 SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x00000000);
2710 break;
2711 case 0x80000008:
2712 SET_ABCD(0x00003024, 0x00000000, 0x00000000, 0x00000000);
2713 break;
2714 default:
2715 goto unhandled_eax_value;
2716 }
2717 # undef SET_ABCD
2718 }
2719
2720
2721 /* Claim to be the following CPU (4 x ...), which is sse4.2 and cx16
2722 capable.
2723
2724 vendor_id : GenuineIntel
2725 cpu family : 6
2726 model : 37
2727 model name : Intel(R) Core(TM) i5 CPU 670 @ 3.47GHz
2728 stepping : 2
2729 cpu MHz : 3334.000
2730 cache size : 4096 KB
2731 physical id : 0
2732 siblings : 4
2733 core id : 0
2734 cpu cores : 2
2735 apicid : 0
2736 initial apicid : 0
2737 fpu : yes
2738 fpu_exception : yes
2739 cpuid level : 11
2740 wp : yes
2741 flags : fpu vme de pse tsc msr pae mce cx8 apic sep
2742 mtrr pge mca cmov pat pse36 clflush dts acpi
2743 mmx fxsr sse sse2 ss ht tm pbe syscall nx rdtscp
2744 lm constant_tsc arch_perfmon pebs bts rep_good
2745 xtopology nonstop_tsc aperfmperf pni pclmulqdq
2746 dtes64 monitor ds_cpl vmx smx est tm2 ssse3 cx16
2747 xtpr pdcm sse4_1 sse4_2 popcnt aes lahf_lm ida
2748 arat tpr_shadow vnmi flexpriority ept vpid
2749 bogomips : 6957.57
2750 clflush size : 64
2751 cache_alignment : 64
2752 address sizes : 36 bits physical, 48 bits virtual
2753 power management:
2754 */
amd64g_dirtyhelper_CPUID_sse42_and_cx16(VexGuestAMD64State * st)2755 void amd64g_dirtyhelper_CPUID_sse42_and_cx16 ( VexGuestAMD64State* st )
2756 {
2757 # define SET_ABCD(_a,_b,_c,_d) \
2758 do { st->guest_RAX = (ULong)(_a); \
2759 st->guest_RBX = (ULong)(_b); \
2760 st->guest_RCX = (ULong)(_c); \
2761 st->guest_RDX = (ULong)(_d); \
2762 } while (0)
2763
2764 UInt old_eax = (UInt)st->guest_RAX;
2765 UInt old_ecx = (UInt)st->guest_RCX;
2766
2767 switch (old_eax) {
2768 case 0x00000000:
2769 SET_ABCD(0x0000000b, 0x756e6547, 0x6c65746e, 0x49656e69);
2770 break;
2771 case 0x00000001:
2772 SET_ABCD(0x00020652, 0x00100800, 0x0298e3ff, 0xbfebfbff);
2773 break;
2774 case 0x00000002:
2775 SET_ABCD(0x55035a01, 0x00f0b2e3, 0x00000000, 0x09ca212c);
2776 break;
2777 case 0x00000003:
2778 SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x00000000);
2779 break;
2780 case 0x00000004:
2781 switch (old_ecx) {
2782 case 0x00000000: SET_ABCD(0x1c004121, 0x01c0003f,
2783 0x0000003f, 0x00000000); break;
2784 case 0x00000001: SET_ABCD(0x1c004122, 0x00c0003f,
2785 0x0000007f, 0x00000000); break;
2786 case 0x00000002: SET_ABCD(0x1c004143, 0x01c0003f,
2787 0x000001ff, 0x00000000); break;
2788 case 0x00000003: SET_ABCD(0x1c03c163, 0x03c0003f,
2789 0x00000fff, 0x00000002); break;
2790 default: SET_ABCD(0x00000000, 0x00000000,
2791 0x00000000, 0x00000000); break;
2792 }
2793 break;
2794 case 0x00000005:
2795 SET_ABCD(0x00000040, 0x00000040, 0x00000003, 0x00001120);
2796 break;
2797 case 0x00000006:
2798 SET_ABCD(0x00000007, 0x00000002, 0x00000001, 0x00000000);
2799 break;
2800 case 0x00000007:
2801 SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x00000000);
2802 break;
2803 case 0x00000008:
2804 SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x00000000);
2805 break;
2806 case 0x00000009:
2807 SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x00000000);
2808 break;
2809 case 0x0000000a:
2810 SET_ABCD(0x07300403, 0x00000004, 0x00000000, 0x00000603);
2811 break;
2812 case 0x0000000b:
2813 switch (old_ecx) {
2814 case 0x00000000:
2815 SET_ABCD(0x00000001, 0x00000002,
2816 0x00000100, 0x00000000); break;
2817 case 0x00000001:
2818 SET_ABCD(0x00000004, 0x00000004,
2819 0x00000201, 0x00000000); break;
2820 default:
2821 SET_ABCD(0x00000000, 0x00000000,
2822 old_ecx, 0x00000000); break;
2823 }
2824 break;
2825 case 0x0000000c:
2826 SET_ABCD(0x00000001, 0x00000002, 0x00000100, 0x00000000);
2827 break;
2828 case 0x0000000d:
2829 switch (old_ecx) {
2830 case 0x00000000: SET_ABCD(0x00000001, 0x00000002,
2831 0x00000100, 0x00000000); break;
2832 case 0x00000001: SET_ABCD(0x00000004, 0x00000004,
2833 0x00000201, 0x00000000); break;
2834 default: SET_ABCD(0x00000000, 0x00000000,
2835 old_ecx, 0x00000000); break;
2836 }
2837 break;
2838 case 0x80000000:
2839 SET_ABCD(0x80000008, 0x00000000, 0x00000000, 0x00000000);
2840 break;
2841 case 0x80000001:
2842 SET_ABCD(0x00000000, 0x00000000, 0x00000001, 0x28100800);
2843 break;
2844 case 0x80000002:
2845 SET_ABCD(0x65746e49, 0x2952286c, 0x726f4320, 0x4d542865);
2846 break;
2847 case 0x80000003:
2848 SET_ABCD(0x35692029, 0x55504320, 0x20202020, 0x20202020);
2849 break;
2850 case 0x80000004:
2851 SET_ABCD(0x30373620, 0x20402020, 0x37342e33, 0x007a4847);
2852 break;
2853 case 0x80000005:
2854 SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x00000000);
2855 break;
2856 case 0x80000006:
2857 SET_ABCD(0x00000000, 0x00000000, 0x01006040, 0x00000000);
2858 break;
2859 case 0x80000007:
2860 SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x00000100);
2861 break;
2862 case 0x80000008:
2863 SET_ABCD(0x00003024, 0x00000000, 0x00000000, 0x00000000);
2864 break;
2865 default:
2866 SET_ABCD(0x00000001, 0x00000002, 0x00000100, 0x00000000);
2867 break;
2868 }
2869 # undef SET_ABCD
2870 }
2871
2872
2873 /* Claim to be the following CPU (4 x ...), which is AVX and cx16
2874 capable. Plus (kludge!) it "supports" HTM.
2875
2876 Also with the following change: claim that XSaveOpt is not
2877 available, by cpuid(eax=0xD,ecx=1).eax[0] returns 0, compared to 1
2878 on the real CPU. Consequently, programs that correctly observe
2879 these CPUID values should only try to use 3 of the 8 XSave-family
2880 instructions: XGETBV, XSAVE and XRSTOR. In particular this avoids
2881 having to implement the compacted or optimised save/restore
2882 variants.
2883
2884 vendor_id : GenuineIntel
2885 cpu family : 6
2886 model : 42
2887 model name : Intel(R) Core(TM) i5-2300 CPU @ 2.80GHz
2888 stepping : 7
2889 cpu MHz : 1600.000
2890 cache size : 6144 KB
2891 physical id : 0
2892 siblings : 4
2893 core id : 3
2894 cpu cores : 4
2895 apicid : 6
2896 initial apicid : 6
2897 fpu : yes
2898 fpu_exception : yes
2899 cpuid level : 13
2900 wp : yes
2901 flags : fpu vme de pse tsc msr pae mce cx8 apic sep
2902 mtrr pge mca cmov pat pse36 clflush dts acpi
2903 mmx fxsr sse sse2 ss ht tm pbe syscall nx rdtscp
2904 lm constant_tsc arch_perfmon pebs bts rep_good
2905 nopl xtopology nonstop_tsc aperfmperf pni pclmulqdq
2906 dtes64 monitor ds_cpl vmx est tm2 ssse3 cx16
2907 xtpr pdcm sse4_1 sse4_2 popcnt aes xsave avx
2908 lahf_lm ida arat epb xsaveopt pln pts dts
2909 tpr_shadow vnmi flexpriority ept vpid
2910
2911 bogomips : 5768.94
2912 clflush size : 64
2913 cache_alignment : 64
2914 address sizes : 36 bits physical, 48 bits virtual
2915 power management:
2916 */
amd64g_dirtyhelper_CPUID_avx_and_cx16(VexGuestAMD64State * st)2917 void amd64g_dirtyhelper_CPUID_avx_and_cx16 ( VexGuestAMD64State* st )
2918 {
2919 # define SET_ABCD(_a,_b,_c,_d) \
2920 do { st->guest_RAX = (ULong)(_a); \
2921 st->guest_RBX = (ULong)(_b); \
2922 st->guest_RCX = (ULong)(_c); \
2923 st->guest_RDX = (ULong)(_d); \
2924 } while (0)
2925
2926 UInt old_eax = (UInt)st->guest_RAX;
2927 UInt old_ecx = (UInt)st->guest_RCX;
2928
2929 switch (old_eax) {
2930 case 0x00000000:
2931 SET_ABCD(0x0000000d, 0x756e6547, 0x6c65746e, 0x49656e69);
2932 break;
2933 case 0x00000001:
2934 SET_ABCD(0x000206a7, 0x00100800, 0x1f9ae3bf, 0xbfebfbff);
2935 break;
2936 case 0x00000002:
2937 SET_ABCD(0x76035a01, 0x00f0b0ff, 0x00000000, 0x00ca0000);
2938 break;
2939 case 0x00000003:
2940 SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x00000000);
2941 break;
2942 case 0x00000004:
2943 switch (old_ecx) {
2944 case 0x00000000: SET_ABCD(0x1c004121, 0x01c0003f,
2945 0x0000003f, 0x00000000); break;
2946 case 0x00000001: SET_ABCD(0x1c004122, 0x01c0003f,
2947 0x0000003f, 0x00000000); break;
2948 case 0x00000002: SET_ABCD(0x1c004143, 0x01c0003f,
2949 0x000001ff, 0x00000000); break;
2950 case 0x00000003: SET_ABCD(0x1c03c163, 0x02c0003f,
2951 0x00001fff, 0x00000006); break;
2952 default: SET_ABCD(0x00000000, 0x00000000,
2953 0x00000000, 0x00000000); break;
2954 }
2955 break;
2956 case 0x00000005:
2957 SET_ABCD(0x00000040, 0x00000040, 0x00000003, 0x00001120);
2958 break;
2959 case 0x00000006:
2960 SET_ABCD(0x00000077, 0x00000002, 0x00000009, 0x00000000);
2961 break;
2962 case 0x00000007:
2963 SET_ABCD(0x00000000, 0x00000800, 0x00000000, 0x00000000);
2964 break;
2965 case 0x00000008:
2966 SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x00000000);
2967 break;
2968 case 0x00000009:
2969 SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x00000000);
2970 break;
2971 case 0x0000000a:
2972 SET_ABCD(0x07300803, 0x00000000, 0x00000000, 0x00000603);
2973 break;
2974 case 0x0000000b:
2975 switch (old_ecx) {
2976 case 0x00000000:
2977 SET_ABCD(0x00000001, 0x00000001,
2978 0x00000100, 0x00000000); break;
2979 case 0x00000001:
2980 SET_ABCD(0x00000004, 0x00000004,
2981 0x00000201, 0x00000000); break;
2982 default:
2983 SET_ABCD(0x00000000, 0x00000000,
2984 old_ecx, 0x00000000); break;
2985 }
2986 break;
2987 case 0x0000000c:
2988 SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x00000000);
2989 break;
2990 case 0x0000000d:
2991 switch (old_ecx) {
2992 case 0x00000000: SET_ABCD(0x00000007, 0x00000340,
2993 0x00000340, 0x00000000); break;
2994 case 0x00000001: SET_ABCD(0x00000000, 0x00000000,
2995 0x00000000, 0x00000000); break;
2996 case 0x00000002: SET_ABCD(0x00000100, 0x00000240,
2997 0x00000000, 0x00000000); break;
2998 default: SET_ABCD(0x00000000, 0x00000000,
2999 0x00000000, 0x00000000); break;
3000 }
3001 break;
3002 case 0x0000000e:
3003 SET_ABCD(0x00000007, 0x00000340, 0x00000340, 0x00000000);
3004 break;
3005 case 0x0000000f:
3006 SET_ABCD(0x00000007, 0x00000340, 0x00000340, 0x00000000);
3007 break;
3008 case 0x80000000:
3009 SET_ABCD(0x80000008, 0x00000000, 0x00000000, 0x00000000);
3010 break;
3011 case 0x80000001:
3012 SET_ABCD(0x00000000, 0x00000000, 0x00000001, 0x28100800);
3013 break;
3014 case 0x80000002:
3015 SET_ABCD(0x20202020, 0x20202020, 0x65746e49, 0x2952286c);
3016 break;
3017 case 0x80000003:
3018 SET_ABCD(0x726f4320, 0x4d542865, 0x35692029, 0x3033322d);
3019 break;
3020 case 0x80000004:
3021 SET_ABCD(0x50432030, 0x20402055, 0x30382e32, 0x007a4847);
3022 break;
3023 case 0x80000005:
3024 SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x00000000);
3025 break;
3026 case 0x80000006:
3027 SET_ABCD(0x00000000, 0x00000000, 0x01006040, 0x00000000);
3028 break;
3029 case 0x80000007:
3030 SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x00000100);
3031 break;
3032 case 0x80000008:
3033 SET_ABCD(0x00003024, 0x00000000, 0x00000000, 0x00000000);
3034 break;
3035 default:
3036 SET_ABCD(0x00000007, 0x00000340, 0x00000340, 0x00000000);
3037 break;
3038 }
3039 # undef SET_ABCD
3040 }
3041
3042
3043 /* Claim to be the following CPU (4 x ...), which is AVX2 capable.
3044
3045 With the following change: claim that XSaveOpt is not available, by
3046 cpuid(eax=0xD,ecx=1).eax[0] returns 0, compared to 1 on the real
3047 CPU. Consequently, programs that correctly observe these CPUID
3048 values should only try to use 3 of the 8 XSave-family instructions:
3049 XGETBV, XSAVE and XRSTOR. In particular this avoids having to
3050 implement the compacted or optimised save/restore variants.
3051
3052 vendor_id : GenuineIntel
3053 cpu family : 6
3054 model : 60
3055 model name : Intel(R) Core(TM) i7-4910MQ CPU @ 2.90GHz
3056 stepping : 3
3057 microcode : 0x1c
3058 cpu MHz : 919.957
3059 cache size : 8192 KB
3060 physical id : 0
3061 siblings : 4
3062 core id : 3
3063 cpu cores : 4
3064 apicid : 6
3065 initial apicid : 6
3066 fpu : yes
3067 fpu_exception : yes
3068 cpuid level : 13
3069 wp : yes
3070 flags : fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge mca
3071 cmov pat pse36 clflush dts acpi mmx fxsr sse sse2 ss ht
3072 tm pbe syscall nx pdpe1gb rdtscp lm constant_tsc
3073 arch_perfmon pebs bts rep_good nopl xtopology nonstop_tsc
3074 aperfmperf eagerfpu pni pclmulqdq dtes64 monitor ds_cpl
3075 vmx smx est tm2 ssse3 fma cx16 xtpr pdcm pcid sse4_1
3076 sse4_2 x2apic movbe popcnt tsc_deadline_timer aes xsave
3077 avx f16c rdrand lahf_lm abm ida arat epb pln pts dtherm
3078 tpr_shadow vnmi flexpriority ept vpid fsgsbase tsc_adjust
3079 bmi1 avx2 smep bmi2 erms invpcid xsaveopt
3080 bugs :
3081 bogomips : 5786.68
3082 clflush size : 64
3083 cache_alignment : 64
3084 address sizes : 39 bits physical, 48 bits virtual
3085 power management:
3086 */
amd64g_dirtyhelper_CPUID_avx2(VexGuestAMD64State * st)3087 void amd64g_dirtyhelper_CPUID_avx2 ( VexGuestAMD64State* st )
3088 {
3089 # define SET_ABCD(_a,_b,_c,_d) \
3090 do { st->guest_RAX = (ULong)(_a); \
3091 st->guest_RBX = (ULong)(_b); \
3092 st->guest_RCX = (ULong)(_c); \
3093 st->guest_RDX = (ULong)(_d); \
3094 } while (0)
3095
3096 UInt old_eax = (UInt)st->guest_RAX;
3097 UInt old_ecx = (UInt)st->guest_RCX;
3098
3099 switch (old_eax) {
3100 case 0x00000000:
3101 SET_ABCD(0x0000000d, 0x756e6547, 0x6c65746e, 0x49656e69);
3102 break;
3103 case 0x00000001:
3104 /* Don't advertise RDRAND support, bit 30 in ECX. */
3105 SET_ABCD(0x000306c3, 0x02100800, 0x3ffafbff, 0xbfebfbff);
3106 break;
3107 case 0x00000002:
3108 SET_ABCD(0x76036301, 0x00f0b6ff, 0x00000000, 0x00c10000);
3109 break;
3110 case 0x00000003:
3111 SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x00000000);
3112 break;
3113 case 0x00000004:
3114 switch (old_ecx) {
3115 case 0x00000000: SET_ABCD(0x1c004121, 0x01c0003f,
3116 0x0000003f, 0x00000000); break;
3117 case 0x00000001: SET_ABCD(0x1c004122, 0x01c0003f,
3118 0x0000003f, 0x00000000); break;
3119 case 0x00000002: SET_ABCD(0x1c004143, 0x01c0003f,
3120 0x000001ff, 0x00000000); break;
3121 case 0x00000003: SET_ABCD(0x1c03c163, 0x03c0003f,
3122 0x00001fff, 0x00000006); break;
3123 default: SET_ABCD(0x00000000, 0x00000000,
3124 0x00000000, 0x00000000); break;
3125 }
3126 break;
3127 case 0x00000005:
3128 SET_ABCD(0x00000040, 0x00000040, 0x00000003, 0x00042120);
3129 break;
3130 case 0x00000006:
3131 SET_ABCD(0x00000077, 0x00000002, 0x00000009, 0x00000000);
3132 break;
3133 case 0x00000007:
3134 switch (old_ecx) {
3135 case 0x00000000: SET_ABCD(0x00000000, 0x000027ab,
3136 0x00000000, 0x00000000); break;
3137 default: SET_ABCD(0x00000000, 0x00000000,
3138 0x00000000, 0x00000000); break;
3139 }
3140 break;
3141 case 0x00000008:
3142 SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x00000000);
3143 break;
3144 case 0x00000009:
3145 SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x00000000);
3146 break;
3147 case 0x0000000a:
3148 SET_ABCD(0x07300803, 0x00000000, 0x00000000, 0x00000603);
3149 break;
3150 case 0x0000000b:
3151 switch (old_ecx) {
3152 case 0x00000000: SET_ABCD(0x00000001, 0x00000002,
3153 0x00000100, 0x00000002); break;
3154 case 0x00000001: SET_ABCD(0x00000004, 0x00000008,
3155 0x00000201, 0x00000002); break;
3156 default: SET_ABCD(0x00000000, 0x00000000,
3157 old_ecx, 0x00000002); break;
3158 }
3159 break;
3160 case 0x0000000c:
3161 SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x00000000);
3162 break;
3163 case 0x0000000d:
3164 switch (old_ecx) {
3165 case 0x00000000: SET_ABCD(0x00000007, 0x00000340,
3166 0x00000340, 0x00000000); break;
3167 case 0x00000001: SET_ABCD(0x00000000, 0x00000000,
3168 0x00000000, 0x00000000); break;
3169 case 0x00000002: SET_ABCD(0x00000100, 0x00000240,
3170 0x00000000, 0x00000000); break;
3171 default: SET_ABCD(0x00000000, 0x00000000,
3172 0x00000000, 0x00000000); break;
3173 }
3174 break;
3175 case 0x80000000:
3176 SET_ABCD(0x80000008, 0x00000000, 0x00000000, 0x00000000);
3177 break;
3178 case 0x80000001:
3179 SET_ABCD(0x00000000, 0x00000000, 0x00000021, 0x2c100800);
3180 break;
3181 case 0x80000002:
3182 SET_ABCD(0x65746e49, 0x2952286c, 0x726f4320, 0x4d542865);
3183 break;
3184 case 0x80000003:
3185 SET_ABCD(0x37692029, 0x3139342d, 0x20514d30, 0x20555043);
3186 break;
3187 case 0x80000004:
3188 SET_ABCD(0x2e322040, 0x48473039, 0x0000007a, 0x00000000);
3189 break;
3190 case 0x80000005:
3191 SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x00000000);
3192 break;
3193 case 0x80000006:
3194 SET_ABCD(0x00000000, 0x00000000, 0x01006040, 0x00000000);
3195 break;
3196 case 0x80000007:
3197 SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x00000100);
3198 break;
3199 case 0x80000008:
3200 SET_ABCD(0x00003027, 0x00000000, 0x00000000, 0x00000000);
3201 break;
3202 default:
3203 SET_ABCD(0x00000007, 0x00000340, 0x00000340, 0x00000000);
3204 break;
3205 }
3206 # undef SET_ABCD
3207 }
3208
3209
3210 /*---------------------------------------------------------------*/
3211 /*--- Misc integer helpers, including rotates and crypto. ---*/
3212 /*---------------------------------------------------------------*/
3213
amd64g_calculate_RCR(ULong arg,ULong rot_amt,ULong rflags_in,Long szIN)3214 ULong amd64g_calculate_RCR ( ULong arg,
3215 ULong rot_amt,
3216 ULong rflags_in,
3217 Long szIN )
3218 {
3219 Bool wantRflags = toBool(szIN < 0);
3220 ULong sz = wantRflags ? (-szIN) : szIN;
3221 ULong tempCOUNT = rot_amt & (sz == 8 ? 0x3F : 0x1F);
3222 ULong cf=0, of=0, tempcf;
3223
3224 switch (sz) {
3225 case 8:
3226 cf = (rflags_in >> AMD64G_CC_SHIFT_C) & 1;
3227 of = ((arg >> 63) ^ cf) & 1;
3228 while (tempCOUNT > 0) {
3229 tempcf = arg & 1;
3230 arg = (arg >> 1) | (cf << 63);
3231 cf = tempcf;
3232 tempCOUNT--;
3233 }
3234 break;
3235 case 4:
3236 while (tempCOUNT >= 33) tempCOUNT -= 33;
3237 cf = (rflags_in >> AMD64G_CC_SHIFT_C) & 1;
3238 of = ((arg >> 31) ^ cf) & 1;
3239 while (tempCOUNT > 0) {
3240 tempcf = arg & 1;
3241 arg = ((arg >> 1) & 0x7FFFFFFFULL) | (cf << 31);
3242 cf = tempcf;
3243 tempCOUNT--;
3244 }
3245 break;
3246 case 2:
3247 while (tempCOUNT >= 17) tempCOUNT -= 17;
3248 cf = (rflags_in >> AMD64G_CC_SHIFT_C) & 1;
3249 of = ((arg >> 15) ^ cf) & 1;
3250 while (tempCOUNT > 0) {
3251 tempcf = arg & 1;
3252 arg = ((arg >> 1) & 0x7FFFULL) | (cf << 15);
3253 cf = tempcf;
3254 tempCOUNT--;
3255 }
3256 break;
3257 case 1:
3258 while (tempCOUNT >= 9) tempCOUNT -= 9;
3259 cf = (rflags_in >> AMD64G_CC_SHIFT_C) & 1;
3260 of = ((arg >> 7) ^ cf) & 1;
3261 while (tempCOUNT > 0) {
3262 tempcf = arg & 1;
3263 arg = ((arg >> 1) & 0x7FULL) | (cf << 7);
3264 cf = tempcf;
3265 tempCOUNT--;
3266 }
3267 break;
3268 default:
3269 vpanic("calculate_RCR(amd64g): invalid size");
3270 }
3271
3272 cf &= 1;
3273 of &= 1;
3274 rflags_in &= ~(AMD64G_CC_MASK_C | AMD64G_CC_MASK_O);
3275 rflags_in |= (cf << AMD64G_CC_SHIFT_C) | (of << AMD64G_CC_SHIFT_O);
3276
3277 /* caller can ask to have back either the resulting flags or
3278 resulting value, but not both */
3279 return wantRflags ? rflags_in : arg;
3280 }
3281
amd64g_calculate_RCL(ULong arg,ULong rot_amt,ULong rflags_in,Long szIN)3282 ULong amd64g_calculate_RCL ( ULong arg,
3283 ULong rot_amt,
3284 ULong rflags_in,
3285 Long szIN )
3286 {
3287 Bool wantRflags = toBool(szIN < 0);
3288 ULong sz = wantRflags ? (-szIN) : szIN;
3289 ULong tempCOUNT = rot_amt & (sz == 8 ? 0x3F : 0x1F);
3290 ULong cf=0, of=0, tempcf;
3291
3292 switch (sz) {
3293 case 8:
3294 cf = (rflags_in >> AMD64G_CC_SHIFT_C) & 1;
3295 while (tempCOUNT > 0) {
3296 tempcf = (arg >> 63) & 1;
3297 arg = (arg << 1) | (cf & 1);
3298 cf = tempcf;
3299 tempCOUNT--;
3300 }
3301 of = ((arg >> 63) ^ cf) & 1;
3302 break;
3303 case 4:
3304 while (tempCOUNT >= 33) tempCOUNT -= 33;
3305 cf = (rflags_in >> AMD64G_CC_SHIFT_C) & 1;
3306 while (tempCOUNT > 0) {
3307 tempcf = (arg >> 31) & 1;
3308 arg = 0xFFFFFFFFULL & ((arg << 1) | (cf & 1));
3309 cf = tempcf;
3310 tempCOUNT--;
3311 }
3312 of = ((arg >> 31) ^ cf) & 1;
3313 break;
3314 case 2:
3315 while (tempCOUNT >= 17) tempCOUNT -= 17;
3316 cf = (rflags_in >> AMD64G_CC_SHIFT_C) & 1;
3317 while (tempCOUNT > 0) {
3318 tempcf = (arg >> 15) & 1;
3319 arg = 0xFFFFULL & ((arg << 1) | (cf & 1));
3320 cf = tempcf;
3321 tempCOUNT--;
3322 }
3323 of = ((arg >> 15) ^ cf) & 1;
3324 break;
3325 case 1:
3326 while (tempCOUNT >= 9) tempCOUNT -= 9;
3327 cf = (rflags_in >> AMD64G_CC_SHIFT_C) & 1;
3328 while (tempCOUNT > 0) {
3329 tempcf = (arg >> 7) & 1;
3330 arg = 0xFFULL & ((arg << 1) | (cf & 1));
3331 cf = tempcf;
3332 tempCOUNT--;
3333 }
3334 of = ((arg >> 7) ^ cf) & 1;
3335 break;
3336 default:
3337 vpanic("calculate_RCL(amd64g): invalid size");
3338 }
3339
3340 cf &= 1;
3341 of &= 1;
3342 rflags_in &= ~(AMD64G_CC_MASK_C | AMD64G_CC_MASK_O);
3343 rflags_in |= (cf << AMD64G_CC_SHIFT_C) | (of << AMD64G_CC_SHIFT_O);
3344
3345 return wantRflags ? rflags_in : arg;
3346 }
3347
3348 /* Taken from gf2x-0.9.5, released under GPLv2+ (later versions LGPLv2+)
3349 * svn://scm.gforge.inria.fr/svn/gf2x/trunk/hardware/opteron/gf2x_mul1.h@25
3350 */
amd64g_calculate_pclmul(ULong a,ULong b,ULong which)3351 ULong amd64g_calculate_pclmul(ULong a, ULong b, ULong which)
3352 {
3353 ULong hi, lo, tmp, A[16];
3354
3355 A[0] = 0; A[1] = a;
3356 A[2] = A[1] << 1; A[3] = A[2] ^ a;
3357 A[4] = A[2] << 1; A[5] = A[4] ^ a;
3358 A[6] = A[3] << 1; A[7] = A[6] ^ a;
3359 A[8] = A[4] << 1; A[9] = A[8] ^ a;
3360 A[10] = A[5] << 1; A[11] = A[10] ^ a;
3361 A[12] = A[6] << 1; A[13] = A[12] ^ a;
3362 A[14] = A[7] << 1; A[15] = A[14] ^ a;
3363
3364 lo = (A[b >> 60] << 4) ^ A[(b >> 56) & 15];
3365 hi = lo >> 56;
3366 lo = (lo << 8) ^ (A[(b >> 52) & 15] << 4) ^ A[(b >> 48) & 15];
3367 hi = (hi << 8) | (lo >> 56);
3368 lo = (lo << 8) ^ (A[(b >> 44) & 15] << 4) ^ A[(b >> 40) & 15];
3369 hi = (hi << 8) | (lo >> 56);
3370 lo = (lo << 8) ^ (A[(b >> 36) & 15] << 4) ^ A[(b >> 32) & 15];
3371 hi = (hi << 8) | (lo >> 56);
3372 lo = (lo << 8) ^ (A[(b >> 28) & 15] << 4) ^ A[(b >> 24) & 15];
3373 hi = (hi << 8) | (lo >> 56);
3374 lo = (lo << 8) ^ (A[(b >> 20) & 15] << 4) ^ A[(b >> 16) & 15];
3375 hi = (hi << 8) | (lo >> 56);
3376 lo = (lo << 8) ^ (A[(b >> 12) & 15] << 4) ^ A[(b >> 8) & 15];
3377 hi = (hi << 8) | (lo >> 56);
3378 lo = (lo << 8) ^ (A[(b >> 4) & 15] << 4) ^ A[b & 15];
3379
3380 ULong m0 = -1;
3381 m0 /= 255;
3382 tmp = -((a >> 63) & 1); tmp &= ((b & (m0 * 0xfe)) >> 1); hi = hi ^ tmp;
3383 tmp = -((a >> 62) & 1); tmp &= ((b & (m0 * 0xfc)) >> 2); hi = hi ^ tmp;
3384 tmp = -((a >> 61) & 1); tmp &= ((b & (m0 * 0xf8)) >> 3); hi = hi ^ tmp;
3385 tmp = -((a >> 60) & 1); tmp &= ((b & (m0 * 0xf0)) >> 4); hi = hi ^ tmp;
3386 tmp = -((a >> 59) & 1); tmp &= ((b & (m0 * 0xe0)) >> 5); hi = hi ^ tmp;
3387 tmp = -((a >> 58) & 1); tmp &= ((b & (m0 * 0xc0)) >> 6); hi = hi ^ tmp;
3388 tmp = -((a >> 57) & 1); tmp &= ((b & (m0 * 0x80)) >> 7); hi = hi ^ tmp;
3389
3390 return which ? hi : lo;
3391 }
3392
3393
3394 /* CALLED FROM GENERATED CODE */
3395 /* DIRTY HELPER (non-referentially-transparent) */
3396 /* Horrible hack. On non-amd64 platforms, return 1. */
amd64g_dirtyhelper_RDTSC(void)3397 ULong amd64g_dirtyhelper_RDTSC ( void )
3398 {
3399 # if defined(__x86_64__)
3400 UInt eax, edx;
3401 __asm__ __volatile__("rdtsc" : "=a" (eax), "=d" (edx));
3402 return (((ULong)edx) << 32) | ((ULong)eax);
3403 # else
3404 return 1ULL;
3405 # endif
3406 }
3407
3408 /* CALLED FROM GENERATED CODE */
3409 /* DIRTY HELPER (non-referentially-transparent) */
3410 /* Horrible hack. On non-amd64 platforms, return 1. */
3411 /* This uses a different calling convention from _RDTSC just above
3412 only because of the difficulty of returning 96 bits from a C
3413 function -- RDTSC returns 64 bits and so is simple by comparison,
3414 on amd64. */
amd64g_dirtyhelper_RDTSCP(VexGuestAMD64State * st)3415 void amd64g_dirtyhelper_RDTSCP ( VexGuestAMD64State* st )
3416 {
3417 # if defined(__x86_64__)
3418 UInt eax, ecx, edx;
3419 __asm__ __volatile__("rdtscp" : "=a" (eax), "=d" (edx), "=c" (ecx));
3420 st->guest_RAX = (ULong)eax;
3421 st->guest_RCX = (ULong)ecx;
3422 st->guest_RDX = (ULong)edx;
3423 # else
3424 /* Do nothing. */
3425 # endif
3426 }
3427
3428 /* CALLED FROM GENERATED CODE */
3429 /* DIRTY HELPER (non-referentially-transparent) */
3430 /* Horrible hack. On non-amd64 platforms, return 0. */
amd64g_dirtyhelper_IN(ULong portno,ULong sz)3431 ULong amd64g_dirtyhelper_IN ( ULong portno, ULong sz/*1,2 or 4*/ )
3432 {
3433 # if defined(__x86_64__)
3434 ULong r = 0;
3435 portno &= 0xFFFF;
3436 switch (sz) {
3437 case 4:
3438 __asm__ __volatile__("movq $0,%%rax; inl %w1,%%eax; movq %%rax,%0"
3439 : "=a" (r) : "Nd" (portno));
3440 break;
3441 case 2:
3442 __asm__ __volatile__("movq $0,%%rax; inw %w1,%w0"
3443 : "=a" (r) : "Nd" (portno));
3444 break;
3445 case 1:
3446 __asm__ __volatile__("movq $0,%%rax; inb %w1,%b0"
3447 : "=a" (r) : "Nd" (portno));
3448 break;
3449 default:
3450 break; /* note: no 64-bit version of insn exists */
3451 }
3452 return r;
3453 # else
3454 return 0;
3455 # endif
3456 }
3457
3458
3459 /* CALLED FROM GENERATED CODE */
3460 /* DIRTY HELPER (non-referentially-transparent) */
3461 /* Horrible hack. On non-amd64 platforms, do nothing. */
amd64g_dirtyhelper_OUT(ULong portno,ULong data,ULong sz)3462 void amd64g_dirtyhelper_OUT ( ULong portno, ULong data, ULong sz/*1,2 or 4*/ )
3463 {
3464 # if defined(__x86_64__)
3465 portno &= 0xFFFF;
3466 switch (sz) {
3467 case 4:
3468 __asm__ __volatile__("movq %0,%%rax; outl %%eax, %w1"
3469 : : "a" (data), "Nd" (portno));
3470 break;
3471 case 2:
3472 __asm__ __volatile__("outw %w0, %w1"
3473 : : "a" (data), "Nd" (portno));
3474 break;
3475 case 1:
3476 __asm__ __volatile__("outb %b0, %w1"
3477 : : "a" (data), "Nd" (portno));
3478 break;
3479 default:
3480 break; /* note: no 64-bit version of insn exists */
3481 }
3482 # else
3483 /* do nothing */
3484 # endif
3485 }
3486
3487 /* CALLED FROM GENERATED CODE */
3488 /* DIRTY HELPER (non-referentially-transparent) */
3489 /* Horrible hack. On non-amd64 platforms, do nothing. */
3490 /* op = 0: call the native SGDT instruction.
3491 op = 1: call the native SIDT instruction.
3492 */
amd64g_dirtyhelper_SxDT(void * address,ULong op)3493 void amd64g_dirtyhelper_SxDT ( void *address, ULong op ) {
3494 # if defined(__x86_64__)
3495 switch (op) {
3496 case 0:
3497 __asm__ __volatile__("sgdt (%0)" : : "r" (address) : "memory");
3498 break;
3499 case 1:
3500 __asm__ __volatile__("sidt (%0)" : : "r" (address) : "memory");
3501 break;
3502 default:
3503 vpanic("amd64g_dirtyhelper_SxDT");
3504 }
3505 # else
3506 /* do nothing */
3507 UChar* p = (UChar*)address;
3508 p[0] = p[1] = p[2] = p[3] = p[4] = p[5] = 0;
3509 p[6] = p[7] = p[8] = p[9] = 0;
3510 # endif
3511 }
3512
3513 /*---------------------------------------------------------------*/
3514 /*--- Helpers for MMX/SSE/SSE2. ---*/
3515 /*---------------------------------------------------------------*/
3516
abdU8(UChar xx,UChar yy)3517 static inline UChar abdU8 ( UChar xx, UChar yy ) {
3518 return toUChar(xx>yy ? xx-yy : yy-xx);
3519 }
3520
mk32x2(UInt w1,UInt w0)3521 static inline ULong mk32x2 ( UInt w1, UInt w0 ) {
3522 return (((ULong)w1) << 32) | ((ULong)w0);
3523 }
3524
sel16x4_3(ULong w64)3525 static inline UShort sel16x4_3 ( ULong w64 ) {
3526 UInt hi32 = toUInt(w64 >> 32);
3527 return toUShort(hi32 >> 16);
3528 }
sel16x4_2(ULong w64)3529 static inline UShort sel16x4_2 ( ULong w64 ) {
3530 UInt hi32 = toUInt(w64 >> 32);
3531 return toUShort(hi32);
3532 }
sel16x4_1(ULong w64)3533 static inline UShort sel16x4_1 ( ULong w64 ) {
3534 UInt lo32 = toUInt(w64);
3535 return toUShort(lo32 >> 16);
3536 }
sel16x4_0(ULong w64)3537 static inline UShort sel16x4_0 ( ULong w64 ) {
3538 UInt lo32 = toUInt(w64);
3539 return toUShort(lo32);
3540 }
3541
sel8x8_7(ULong w64)3542 static inline UChar sel8x8_7 ( ULong w64 ) {
3543 UInt hi32 = toUInt(w64 >> 32);
3544 return toUChar(hi32 >> 24);
3545 }
sel8x8_6(ULong w64)3546 static inline UChar sel8x8_6 ( ULong w64 ) {
3547 UInt hi32 = toUInt(w64 >> 32);
3548 return toUChar(hi32 >> 16);
3549 }
sel8x8_5(ULong w64)3550 static inline UChar sel8x8_5 ( ULong w64 ) {
3551 UInt hi32 = toUInt(w64 >> 32);
3552 return toUChar(hi32 >> 8);
3553 }
sel8x8_4(ULong w64)3554 static inline UChar sel8x8_4 ( ULong w64 ) {
3555 UInt hi32 = toUInt(w64 >> 32);
3556 return toUChar(hi32 >> 0);
3557 }
sel8x8_3(ULong w64)3558 static inline UChar sel8x8_3 ( ULong w64 ) {
3559 UInt lo32 = toUInt(w64);
3560 return toUChar(lo32 >> 24);
3561 }
sel8x8_2(ULong w64)3562 static inline UChar sel8x8_2 ( ULong w64 ) {
3563 UInt lo32 = toUInt(w64);
3564 return toUChar(lo32 >> 16);
3565 }
sel8x8_1(ULong w64)3566 static inline UChar sel8x8_1 ( ULong w64 ) {
3567 UInt lo32 = toUInt(w64);
3568 return toUChar(lo32 >> 8);
3569 }
sel8x8_0(ULong w64)3570 static inline UChar sel8x8_0 ( ULong w64 ) {
3571 UInt lo32 = toUInt(w64);
3572 return toUChar(lo32 >> 0);
3573 }
3574
3575 /* CALLED FROM GENERATED CODE: CLEAN HELPER */
amd64g_calculate_mmx_pmaddwd(ULong xx,ULong yy)3576 ULong amd64g_calculate_mmx_pmaddwd ( ULong xx, ULong yy )
3577 {
3578 return
3579 mk32x2(
3580 (((Int)(Short)sel16x4_3(xx)) * ((Int)(Short)sel16x4_3(yy)))
3581 + (((Int)(Short)sel16x4_2(xx)) * ((Int)(Short)sel16x4_2(yy))),
3582 (((Int)(Short)sel16x4_1(xx)) * ((Int)(Short)sel16x4_1(yy)))
3583 + (((Int)(Short)sel16x4_0(xx)) * ((Int)(Short)sel16x4_0(yy)))
3584 );
3585 }
3586
3587 /* CALLED FROM GENERATED CODE: CLEAN HELPER */
amd64g_calculate_mmx_psadbw(ULong xx,ULong yy)3588 ULong amd64g_calculate_mmx_psadbw ( ULong xx, ULong yy )
3589 {
3590 UInt t = 0;
3591 t += (UInt)abdU8( sel8x8_7(xx), sel8x8_7(yy) );
3592 t += (UInt)abdU8( sel8x8_6(xx), sel8x8_6(yy) );
3593 t += (UInt)abdU8( sel8x8_5(xx), sel8x8_5(yy) );
3594 t += (UInt)abdU8( sel8x8_4(xx), sel8x8_4(yy) );
3595 t += (UInt)abdU8( sel8x8_3(xx), sel8x8_3(yy) );
3596 t += (UInt)abdU8( sel8x8_2(xx), sel8x8_2(yy) );
3597 t += (UInt)abdU8( sel8x8_1(xx), sel8x8_1(yy) );
3598 t += (UInt)abdU8( sel8x8_0(xx), sel8x8_0(yy) );
3599 t &= 0xFFFF;
3600 return (ULong)t;
3601 }
3602
3603 /* CALLED FROM GENERATED CODE: CLEAN HELPER */
amd64g_calculate_sse_phminposuw(ULong sLo,ULong sHi)3604 ULong amd64g_calculate_sse_phminposuw ( ULong sLo, ULong sHi )
3605 {
3606 UShort t, min;
3607 UInt idx;
3608 t = sel16x4_0(sLo); if (True) { min = t; idx = 0; }
3609 t = sel16x4_1(sLo); if (t < min) { min = t; idx = 1; }
3610 t = sel16x4_2(sLo); if (t < min) { min = t; idx = 2; }
3611 t = sel16x4_3(sLo); if (t < min) { min = t; idx = 3; }
3612 t = sel16x4_0(sHi); if (t < min) { min = t; idx = 4; }
3613 t = sel16x4_1(sHi); if (t < min) { min = t; idx = 5; }
3614 t = sel16x4_2(sHi); if (t < min) { min = t; idx = 6; }
3615 t = sel16x4_3(sHi); if (t < min) { min = t; idx = 7; }
3616 return ((ULong)(idx << 16)) | ((ULong)min);
3617 }
3618
3619 /* CALLED FROM GENERATED CODE: CLEAN HELPER */
amd64g_calc_crc32b(ULong crcIn,ULong b)3620 ULong amd64g_calc_crc32b ( ULong crcIn, ULong b )
3621 {
3622 UInt i;
3623 ULong crc = (b & 0xFFULL) ^ crcIn;
3624 for (i = 0; i < 8; i++)
3625 crc = (crc >> 1) ^ ((crc & 1) ? 0x82f63b78ULL : 0);
3626 return crc;
3627 }
3628
3629 /* CALLED FROM GENERATED CODE: CLEAN HELPER */
amd64g_calc_crc32w(ULong crcIn,ULong w)3630 ULong amd64g_calc_crc32w ( ULong crcIn, ULong w )
3631 {
3632 UInt i;
3633 ULong crc = (w & 0xFFFFULL) ^ crcIn;
3634 for (i = 0; i < 16; i++)
3635 crc = (crc >> 1) ^ ((crc & 1) ? 0x82f63b78ULL : 0);
3636 return crc;
3637 }
3638
3639 /* CALLED FROM GENERATED CODE: CLEAN HELPER */
amd64g_calc_crc32l(ULong crcIn,ULong l)3640 ULong amd64g_calc_crc32l ( ULong crcIn, ULong l )
3641 {
3642 UInt i;
3643 ULong crc = (l & 0xFFFFFFFFULL) ^ crcIn;
3644 for (i = 0; i < 32; i++)
3645 crc = (crc >> 1) ^ ((crc & 1) ? 0x82f63b78ULL : 0);
3646 return crc;
3647 }
3648
3649 /* CALLED FROM GENERATED CODE: CLEAN HELPER */
amd64g_calc_crc32q(ULong crcIn,ULong q)3650 ULong amd64g_calc_crc32q ( ULong crcIn, ULong q )
3651 {
3652 ULong crc = amd64g_calc_crc32l(crcIn, q);
3653 return amd64g_calc_crc32l(crc, q >> 32);
3654 }
3655
3656
3657 /* .. helper for next fn .. */
sad_8x4(ULong xx,ULong yy)3658 static inline ULong sad_8x4 ( ULong xx, ULong yy )
3659 {
3660 UInt t = 0;
3661 t += (UInt)abdU8( sel8x8_3(xx), sel8x8_3(yy) );
3662 t += (UInt)abdU8( sel8x8_2(xx), sel8x8_2(yy) );
3663 t += (UInt)abdU8( sel8x8_1(xx), sel8x8_1(yy) );
3664 t += (UInt)abdU8( sel8x8_0(xx), sel8x8_0(yy) );
3665 return (ULong)t;
3666 }
3667
3668 /* CALLED FROM GENERATED CODE: CLEAN HELPER */
amd64g_calc_mpsadbw(ULong sHi,ULong sLo,ULong dHi,ULong dLo,ULong imm_and_return_control_bit)3669 ULong amd64g_calc_mpsadbw ( ULong sHi, ULong sLo,
3670 ULong dHi, ULong dLo,
3671 ULong imm_and_return_control_bit )
3672 {
3673 UInt imm8 = imm_and_return_control_bit & 7;
3674 Bool calcHi = (imm_and_return_control_bit >> 7) & 1;
3675 UInt srcOffsL = imm8 & 3; /* src offs in 32-bit (L) chunks */
3676 UInt dstOffsL = (imm8 >> 2) & 1; /* dst offs in ditto chunks */
3677 /* For src we only need 32 bits, so get them into the
3678 lower half of a 64 bit word. */
3679 ULong src = ((srcOffsL & 2) ? sHi : sLo) >> (32 * (srcOffsL & 1));
3680 /* For dst we need to get hold of 56 bits (7 bytes) from a total of
3681 11 bytes. If calculating the low part of the result, need bytes
3682 dstOffsL * 4 + (0 .. 6); if calculating the high part,
3683 dstOffsL * 4 + (4 .. 10). */
3684 ULong dst;
3685 /* dstOffL = 0, Lo -> 0 .. 6
3686 dstOffL = 1, Lo -> 4 .. 10
3687 dstOffL = 0, Hi -> 4 .. 10
3688 dstOffL = 1, Hi -> 8 .. 14
3689 */
3690 if (calcHi && dstOffsL) {
3691 /* 8 .. 14 */
3692 dst = dHi & 0x00FFFFFFFFFFFFFFULL;
3693 }
3694 else if (!calcHi && !dstOffsL) {
3695 /* 0 .. 6 */
3696 dst = dLo & 0x00FFFFFFFFFFFFFFULL;
3697 }
3698 else {
3699 /* 4 .. 10 */
3700 dst = (dLo >> 32) | ((dHi & 0x00FFFFFFULL) << 32);
3701 }
3702 ULong r0 = sad_8x4( dst >> 0, src );
3703 ULong r1 = sad_8x4( dst >> 8, src );
3704 ULong r2 = sad_8x4( dst >> 16, src );
3705 ULong r3 = sad_8x4( dst >> 24, src );
3706 ULong res = (r3 << 48) | (r2 << 32) | (r1 << 16) | r0;
3707 return res;
3708 }
3709
3710 /* CALLED FROM GENERATED CODE: CLEAN HELPER */
amd64g_calculate_pext(ULong src_masked,ULong mask)3711 ULong amd64g_calculate_pext ( ULong src_masked, ULong mask )
3712 {
3713 ULong dst = 0;
3714 ULong src_bit;
3715 ULong dst_bit = 1;
3716 for (src_bit = 1; src_bit; src_bit <<= 1) {
3717 if (mask & src_bit) {
3718 if (src_masked & src_bit) dst |= dst_bit;
3719 dst_bit <<= 1;
3720 }
3721 }
3722 return dst;
3723 }
3724
3725 /* CALLED FROM GENERATED CODE: CLEAN HELPER */
amd64g_calculate_pdep(ULong src,ULong mask)3726 ULong amd64g_calculate_pdep ( ULong src, ULong mask )
3727 {
3728 ULong dst = 0;
3729 ULong dst_bit;
3730 ULong src_bit = 1;
3731 for (dst_bit = 1; dst_bit; dst_bit <<= 1) {
3732 if (mask & dst_bit) {
3733 if (src & src_bit) dst |= dst_bit;
3734 src_bit <<= 1;
3735 }
3736 }
3737 return dst;
3738 }
3739
3740 /*---------------------------------------------------------------*/
3741 /*--- Helpers for SSE4.2 PCMP{E,I}STR{I,M} ---*/
3742 /*---------------------------------------------------------------*/
3743
zmask_from_V128(V128 * arg)3744 static UInt zmask_from_V128 ( V128* arg )
3745 {
3746 UInt i, res = 0;
3747 for (i = 0; i < 16; i++) {
3748 res |= ((arg->w8[i] == 0) ? 1 : 0) << i;
3749 }
3750 return res;
3751 }
3752
zmask_from_V128_wide(V128 * arg)3753 static UInt zmask_from_V128_wide ( V128* arg )
3754 {
3755 UInt i, res = 0;
3756 for (i = 0; i < 8; i++) {
3757 res |= ((arg->w16[i] == 0) ? 1 : 0) << i;
3758 }
3759 return res;
3760 }
3761
3762 /* Helps with PCMP{I,E}STR{I,M}.
3763
3764 CALLED FROM GENERATED CODE: DIRTY HELPER(s). (But not really,
3765 actually it could be a clean helper, but for the fact that we can't
3766 pass by value 2 x V128 to a clean helper, nor have one returned.)
3767 Reads guest state, writes to guest state for the xSTRM cases, no
3768 accesses of memory, is a pure function.
3769
3770 opc_and_imm contains (4th byte of opcode << 8) | the-imm8-byte so
3771 the callee knows which I/E and I/M variant it is dealing with and
3772 what the specific operation is. 4th byte of opcode is in the range
3773 0x60 to 0x63:
3774 istri 66 0F 3A 63
3775 istrm 66 0F 3A 62
3776 estri 66 0F 3A 61
3777 estrm 66 0F 3A 60
3778
3779 gstOffL and gstOffR are the guest state offsets for the two XMM
3780 register inputs. We never have to deal with the memory case since
3781 that is handled by pre-loading the relevant value into the fake
3782 XMM16 register.
3783
3784 For ESTRx variants, edxIN and eaxIN hold the values of those two
3785 registers.
3786
3787 In all cases, the bottom 16 bits of the result contain the new
3788 OSZACP %rflags values. For xSTRI variants, bits[31:16] of the
3789 result hold the new %ecx value. For xSTRM variants, the helper
3790 writes the result directly to the guest XMM0.
3791
3792 Declarable side effects: in all cases, reads guest state at
3793 [gstOffL, +16) and [gstOffR, +16). For xSTRM variants, also writes
3794 guest_XMM0.
3795
3796 Is expected to be called with opc_and_imm combinations which have
3797 actually been validated, and will assert if otherwise. The front
3798 end should ensure we're only called with verified values.
3799 */
amd64g_dirtyhelper_PCMPxSTRx(VexGuestAMD64State * gst,HWord opc4_and_imm,HWord gstOffL,HWord gstOffR,HWord edxIN,HWord eaxIN)3800 ULong amd64g_dirtyhelper_PCMPxSTRx (
3801 VexGuestAMD64State* gst,
3802 HWord opc4_and_imm,
3803 HWord gstOffL, HWord gstOffR,
3804 HWord edxIN, HWord eaxIN
3805 )
3806 {
3807 HWord opc4 = (opc4_and_imm >> 8) & 0xFF;
3808 HWord imm8 = opc4_and_imm & 0xFF;
3809 HWord isISTRx = opc4 & 2;
3810 HWord isxSTRM = (opc4 & 1) ^ 1;
3811 vassert((opc4 & 0xFC) == 0x60); /* 0x60 .. 0x63 */
3812 HWord wide = (imm8 & 1);
3813
3814 // where the args are
3815 V128* argL = (V128*)( ((UChar*)gst) + gstOffL );
3816 V128* argR = (V128*)( ((UChar*)gst) + gstOffR );
3817
3818 /* Create the arg validity masks, either from the vectors
3819 themselves or from the supplied edx/eax values. */
3820 // FIXME: this is only right for the 8-bit data cases.
3821 // At least that is asserted above.
3822 UInt zmaskL, zmaskR;
3823
3824 // temp spot for the resulting flags and vector.
3825 V128 resV;
3826 UInt resOSZACP;
3827
3828 // for checking whether case was handled
3829 Bool ok = False;
3830
3831 if (wide) {
3832 if (isISTRx) {
3833 zmaskL = zmask_from_V128_wide(argL);
3834 zmaskR = zmask_from_V128_wide(argR);
3835 } else {
3836 Int tmp;
3837 tmp = edxIN & 0xFFFFFFFF;
3838 if (tmp < -8) tmp = -8;
3839 if (tmp > 8) tmp = 8;
3840 if (tmp < 0) tmp = -tmp;
3841 vassert(tmp >= 0 && tmp <= 8);
3842 zmaskL = (1 << tmp) & 0xFF;
3843 tmp = eaxIN & 0xFFFFFFFF;
3844 if (tmp < -8) tmp = -8;
3845 if (tmp > 8) tmp = 8;
3846 if (tmp < 0) tmp = -tmp;
3847 vassert(tmp >= 0 && tmp <= 8);
3848 zmaskR = (1 << tmp) & 0xFF;
3849 }
3850 // do the meyaath
3851 ok = compute_PCMPxSTRx_wide (
3852 &resV, &resOSZACP, argL, argR,
3853 zmaskL, zmaskR, imm8, (Bool)isxSTRM
3854 );
3855 } else {
3856 if (isISTRx) {
3857 zmaskL = zmask_from_V128(argL);
3858 zmaskR = zmask_from_V128(argR);
3859 } else {
3860 Int tmp;
3861 tmp = edxIN & 0xFFFFFFFF;
3862 if (tmp < -16) tmp = -16;
3863 if (tmp > 16) tmp = 16;
3864 if (tmp < 0) tmp = -tmp;
3865 vassert(tmp >= 0 && tmp <= 16);
3866 zmaskL = (1 << tmp) & 0xFFFF;
3867 tmp = eaxIN & 0xFFFFFFFF;
3868 if (tmp < -16) tmp = -16;
3869 if (tmp > 16) tmp = 16;
3870 if (tmp < 0) tmp = -tmp;
3871 vassert(tmp >= 0 && tmp <= 16);
3872 zmaskR = (1 << tmp) & 0xFFFF;
3873 }
3874 // do the meyaath
3875 ok = compute_PCMPxSTRx (
3876 &resV, &resOSZACP, argL, argR,
3877 zmaskL, zmaskR, imm8, (Bool)isxSTRM
3878 );
3879 }
3880
3881 // front end shouldn't pass us any imm8 variants we can't
3882 // handle. Hence:
3883 vassert(ok);
3884
3885 // So, finally we need to get the results back to the caller.
3886 // In all cases, the new OSZACP value is the lowest 16 of
3887 // the return value.
3888 if (isxSTRM) {
3889 gst->guest_YMM0[0] = resV.w32[0];
3890 gst->guest_YMM0[1] = resV.w32[1];
3891 gst->guest_YMM0[2] = resV.w32[2];
3892 gst->guest_YMM0[3] = resV.w32[3];
3893 return resOSZACP & 0x8D5;
3894 } else {
3895 UInt newECX = resV.w32[0] & 0xFFFF;
3896 return (newECX << 16) | (resOSZACP & 0x8D5);
3897 }
3898 }
3899
3900 /*---------------------------------------------------------------*/
3901 /*--- AES primitives and helpers ---*/
3902 /*---------------------------------------------------------------*/
3903 /* a 16 x 16 matrix */
3904 static const UChar sbox[256] = { // row nr
3905 0x63, 0x7c, 0x77, 0x7b, 0xf2, 0x6b, 0x6f, 0xc5, // 1
3906 0x30, 0x01, 0x67, 0x2b, 0xfe, 0xd7, 0xab, 0x76,
3907 0xca, 0x82, 0xc9, 0x7d, 0xfa, 0x59, 0x47, 0xf0, // 2
3908 0xad, 0xd4, 0xa2, 0xaf, 0x9c, 0xa4, 0x72, 0xc0,
3909 0xb7, 0xfd, 0x93, 0x26, 0x36, 0x3f, 0xf7, 0xcc, // 3
3910 0x34, 0xa5, 0xe5, 0xf1, 0x71, 0xd8, 0x31, 0x15,
3911 0x04, 0xc7, 0x23, 0xc3, 0x18, 0x96, 0x05, 0x9a, // 4
3912 0x07, 0x12, 0x80, 0xe2, 0xeb, 0x27, 0xb2, 0x75,
3913 0x09, 0x83, 0x2c, 0x1a, 0x1b, 0x6e, 0x5a, 0xa0, // 5
3914 0x52, 0x3b, 0xd6, 0xb3, 0x29, 0xe3, 0x2f, 0x84,
3915 0x53, 0xd1, 0x00, 0xed, 0x20, 0xfc, 0xb1, 0x5b, // 6
3916 0x6a, 0xcb, 0xbe, 0x39, 0x4a, 0x4c, 0x58, 0xcf,
3917 0xd0, 0xef, 0xaa, 0xfb, 0x43, 0x4d, 0x33, 0x85, // 7
3918 0x45, 0xf9, 0x02, 0x7f, 0x50, 0x3c, 0x9f, 0xa8,
3919 0x51, 0xa3, 0x40, 0x8f, 0x92, 0x9d, 0x38, 0xf5, // 8
3920 0xbc, 0xb6, 0xda, 0x21, 0x10, 0xff, 0xf3, 0xd2,
3921 0xcd, 0x0c, 0x13, 0xec, 0x5f, 0x97, 0x44, 0x17, // 9
3922 0xc4, 0xa7, 0x7e, 0x3d, 0x64, 0x5d, 0x19, 0x73,
3923 0x60, 0x81, 0x4f, 0xdc, 0x22, 0x2a, 0x90, 0x88, //10
3924 0x46, 0xee, 0xb8, 0x14, 0xde, 0x5e, 0x0b, 0xdb,
3925 0xe0, 0x32, 0x3a, 0x0a, 0x49, 0x06, 0x24, 0x5c, //11
3926 0xc2, 0xd3, 0xac, 0x62, 0x91, 0x95, 0xe4, 0x79,
3927 0xe7, 0xc8, 0x37, 0x6d, 0x8d, 0xd5, 0x4e, 0xa9, //12
3928 0x6c, 0x56, 0xf4, 0xea, 0x65, 0x7a, 0xae, 0x08,
3929 0xba, 0x78, 0x25, 0x2e, 0x1c, 0xa6, 0xb4, 0xc6, //13
3930 0xe8, 0xdd, 0x74, 0x1f, 0x4b, 0xbd, 0x8b, 0x8a,
3931 0x70, 0x3e, 0xb5, 0x66, 0x48, 0x03, 0xf6, 0x0e, //14
3932 0x61, 0x35, 0x57, 0xb9, 0x86, 0xc1, 0x1d, 0x9e,
3933 0xe1, 0xf8, 0x98, 0x11, 0x69, 0xd9, 0x8e, 0x94, //15
3934 0x9b, 0x1e, 0x87, 0xe9, 0xce, 0x55, 0x28, 0xdf,
3935 0x8c, 0xa1, 0x89, 0x0d, 0xbf, 0xe6, 0x42, 0x68, //16
3936 0x41, 0x99, 0x2d, 0x0f, 0xb0, 0x54, 0xbb, 0x16
3937 };
SubBytes(V128 * v)3938 static void SubBytes (V128* v)
3939 {
3940 V128 r;
3941 UInt i;
3942 for (i = 0; i < 16; i++)
3943 r.w8[i] = sbox[v->w8[i]];
3944 *v = r;
3945 }
3946
3947 /* a 16 x 16 matrix */
3948 static const UChar invsbox[256] = { // row nr
3949 0x52, 0x09, 0x6a, 0xd5, 0x30, 0x36, 0xa5, 0x38, // 1
3950 0xbf, 0x40, 0xa3, 0x9e, 0x81, 0xf3, 0xd7, 0xfb,
3951 0x7c, 0xe3, 0x39, 0x82, 0x9b, 0x2f, 0xff, 0x87, // 2
3952 0x34, 0x8e, 0x43, 0x44, 0xc4, 0xde, 0xe9, 0xcb,
3953 0x54, 0x7b, 0x94, 0x32, 0xa6, 0xc2, 0x23, 0x3d, // 3
3954 0xee, 0x4c, 0x95, 0x0b, 0x42, 0xfa, 0xc3, 0x4e,
3955 0x08, 0x2e, 0xa1, 0x66, 0x28, 0xd9, 0x24, 0xb2, // 4
3956 0x76, 0x5b, 0xa2, 0x49, 0x6d, 0x8b, 0xd1, 0x25,
3957 0x72, 0xf8, 0xf6, 0x64, 0x86, 0x68, 0x98, 0x16, // 5
3958 0xd4, 0xa4, 0x5c, 0xcc, 0x5d, 0x65, 0xb6, 0x92,
3959 0x6c, 0x70, 0x48, 0x50, 0xfd, 0xed, 0xb9, 0xda, // 6
3960 0x5e, 0x15, 0x46, 0x57, 0xa7, 0x8d, 0x9d, 0x84,
3961 0x90, 0xd8, 0xab, 0x00, 0x8c, 0xbc, 0xd3, 0x0a, // 7
3962 0xf7, 0xe4, 0x58, 0x05, 0xb8, 0xb3, 0x45, 0x06,
3963 0xd0, 0x2c, 0x1e, 0x8f, 0xca, 0x3f, 0x0f, 0x02, // 8
3964 0xc1, 0xaf, 0xbd, 0x03, 0x01, 0x13, 0x8a, 0x6b,
3965 0x3a, 0x91, 0x11, 0x41, 0x4f, 0x67, 0xdc, 0xea, // 9
3966 0x97, 0xf2, 0xcf, 0xce, 0xf0, 0xb4, 0xe6, 0x73,
3967 0x96, 0xac, 0x74, 0x22, 0xe7, 0xad, 0x35, 0x85, //10
3968 0xe2, 0xf9, 0x37, 0xe8, 0x1c, 0x75, 0xdf, 0x6e,
3969 0x47, 0xf1, 0x1a, 0x71, 0x1d, 0x29, 0xc5, 0x89, //11
3970 0x6f, 0xb7, 0x62, 0x0e, 0xaa, 0x18, 0xbe, 0x1b,
3971 0xfc, 0x56, 0x3e, 0x4b, 0xc6, 0xd2, 0x79, 0x20, //12
3972 0x9a, 0xdb, 0xc0, 0xfe, 0x78, 0xcd, 0x5a, 0xf4,
3973 0x1f, 0xdd, 0xa8, 0x33, 0x88, 0x07, 0xc7, 0x31, //13
3974 0xb1, 0x12, 0x10, 0x59, 0x27, 0x80, 0xec, 0x5f,
3975 0x60, 0x51, 0x7f, 0xa9, 0x19, 0xb5, 0x4a, 0x0d, //14
3976 0x2d, 0xe5, 0x7a, 0x9f, 0x93, 0xc9, 0x9c, 0xef,
3977 0xa0, 0xe0, 0x3b, 0x4d, 0xae, 0x2a, 0xf5, 0xb0, //15
3978 0xc8, 0xeb, 0xbb, 0x3c, 0x83, 0x53, 0x99, 0x61,
3979 0x17, 0x2b, 0x04, 0x7e, 0xba, 0x77, 0xd6, 0x26, //16
3980 0xe1, 0x69, 0x14, 0x63, 0x55, 0x21, 0x0c, 0x7d
3981 };
InvSubBytes(V128 * v)3982 static void InvSubBytes (V128* v)
3983 {
3984 V128 r;
3985 UInt i;
3986 for (i = 0; i < 16; i++)
3987 r.w8[i] = invsbox[v->w8[i]];
3988 *v = r;
3989 }
3990
3991 static const UChar ShiftRows_op[16] =
3992 {11, 6, 1, 12, 7, 2, 13, 8, 3, 14, 9, 4, 15, 10, 5, 0};
ShiftRows(V128 * v)3993 static void ShiftRows (V128* v)
3994 {
3995 V128 r;
3996 UInt i;
3997 for (i = 0; i < 16; i++)
3998 r.w8[i] = v->w8[ShiftRows_op[15-i]];
3999 *v = r;
4000 }
4001
4002 static const UChar InvShiftRows_op[16] =
4003 {3, 6, 9, 12, 15, 2, 5, 8, 11, 14, 1, 4, 7, 10, 13, 0};
InvShiftRows(V128 * v)4004 static void InvShiftRows (V128* v)
4005 {
4006 V128 r;
4007 UInt i;
4008 for (i = 0; i < 16; i++)
4009 r.w8[i] = v->w8[InvShiftRows_op[15-i]];
4010 *v = r;
4011 }
4012
4013 /* Multiplication of the finite fields elements of AES.
4014 See "A Specification for The AES Algorithm Rijndael
4015 (by Joan Daemen & Vincent Rijmen)"
4016 Dr. Brian Gladman, v3.1, 3rd March 2001. */
4017 /* N values so that (hex) xy = 0x03^N.
4018 0x00 cannot be used. We put 0xff for this value.*/
4019 /* a 16 x 16 matrix */
4020 static const UChar Nxy[256] = { // row nr
4021 0xff, 0x00, 0x19, 0x01, 0x32, 0x02, 0x1a, 0xc6, // 1
4022 0x4b, 0xc7, 0x1b, 0x68, 0x33, 0xee, 0xdf, 0x03,
4023 0x64, 0x04, 0xe0, 0x0e, 0x34, 0x8d, 0x81, 0xef, // 2
4024 0x4c, 0x71, 0x08, 0xc8, 0xf8, 0x69, 0x1c, 0xc1,
4025 0x7d, 0xc2, 0x1d, 0xb5, 0xf9, 0xb9, 0x27, 0x6a, // 3
4026 0x4d, 0xe4, 0xa6, 0x72, 0x9a, 0xc9, 0x09, 0x78,
4027 0x65, 0x2f, 0x8a, 0x05, 0x21, 0x0f, 0xe1, 0x24, // 4
4028 0x12, 0xf0, 0x82, 0x45, 0x35, 0x93, 0xda, 0x8e,
4029 0x96, 0x8f, 0xdb, 0xbd, 0x36, 0xd0, 0xce, 0x94, // 5
4030 0x13, 0x5c, 0xd2, 0xf1, 0x40, 0x46, 0x83, 0x38,
4031 0x66, 0xdd, 0xfd, 0x30, 0xbf, 0x06, 0x8b, 0x62, // 6
4032 0xb3, 0x25, 0xe2, 0x98, 0x22, 0x88, 0x91, 0x10,
4033 0x7e, 0x6e, 0x48, 0xc3, 0xa3, 0xb6, 0x1e, 0x42, // 7
4034 0x3a, 0x6b, 0x28, 0x54, 0xfa, 0x85, 0x3d, 0xba,
4035 0x2b, 0x79, 0x0a, 0x15, 0x9b, 0x9f, 0x5e, 0xca, // 8
4036 0x4e, 0xd4, 0xac, 0xe5, 0xf3, 0x73, 0xa7, 0x57,
4037 0xaf, 0x58, 0xa8, 0x50, 0xf4, 0xea, 0xd6, 0x74, // 9
4038 0x4f, 0xae, 0xe9, 0xd5, 0xe7, 0xe6, 0xad, 0xe8,
4039 0x2c, 0xd7, 0x75, 0x7a, 0xeb, 0x16, 0x0b, 0xf5, //10
4040 0x59, 0xcb, 0x5f, 0xb0, 0x9c, 0xa9, 0x51, 0xa0,
4041 0x7f, 0x0c, 0xf6, 0x6f, 0x17, 0xc4, 0x49, 0xec, //11
4042 0xd8, 0x43, 0x1f, 0x2d, 0xa4, 0x76, 0x7b, 0xb7,
4043 0xcc, 0xbb, 0x3e, 0x5a, 0xfb, 0x60, 0xb1, 0x86, //12
4044 0x3b, 0x52, 0xa1, 0x6c, 0xaa, 0x55, 0x29, 0x9d,
4045 0x97, 0xb2, 0x87, 0x90, 0x61, 0xbe, 0xdc, 0xfc, //13
4046 0xbc, 0x95, 0xcf, 0xcd, 0x37, 0x3f, 0x5b, 0xd1,
4047 0x53, 0x39, 0x84, 0x3c, 0x41, 0xa2, 0x6d, 0x47, //14
4048 0x14, 0x2a, 0x9e, 0x5d, 0x56, 0xf2, 0xd3, 0xab,
4049 0x44, 0x11, 0x92, 0xd9, 0x23, 0x20, 0x2e, 0x89, //15
4050 0xb4, 0x7c, 0xb8, 0x26, 0x77, 0x99, 0xe3, 0xa5,
4051 0x67, 0x4a, 0xed, 0xde, 0xc5, 0x31, 0xfe, 0x18, //16
4052 0x0d, 0x63, 0x8c, 0x80, 0xc0, 0xf7, 0x70, 0x07
4053 };
4054
4055 /* E values so that E = 0x03^xy. */
4056 static const UChar Exy[256] = { // row nr
4057 0x01, 0x03, 0x05, 0x0f, 0x11, 0x33, 0x55, 0xff, // 1
4058 0x1a, 0x2e, 0x72, 0x96, 0xa1, 0xf8, 0x13, 0x35,
4059 0x5f, 0xe1, 0x38, 0x48, 0xd8, 0x73, 0x95, 0xa4, // 2
4060 0xf7, 0x02, 0x06, 0x0a, 0x1e, 0x22, 0x66, 0xaa,
4061 0xe5, 0x34, 0x5c, 0xe4, 0x37, 0x59, 0xeb, 0x26, // 3
4062 0x6a, 0xbe, 0xd9, 0x70, 0x90, 0xab, 0xe6, 0x31,
4063 0x53, 0xf5, 0x04, 0x0c, 0x14, 0x3c, 0x44, 0xcc, // 4
4064 0x4f, 0xd1, 0x68, 0xb8, 0xd3, 0x6e, 0xb2, 0xcd,
4065 0x4c, 0xd4, 0x67, 0xa9, 0xe0, 0x3b, 0x4d, 0xd7, // 5
4066 0x62, 0xa6, 0xf1, 0x08, 0x18, 0x28, 0x78, 0x88,
4067 0x83, 0x9e, 0xb9, 0xd0, 0x6b, 0xbd, 0xdc, 0x7f, // 6
4068 0x81, 0x98, 0xb3, 0xce, 0x49, 0xdb, 0x76, 0x9a,
4069 0xb5, 0xc4, 0x57, 0xf9, 0x10, 0x30, 0x50, 0xf0, // 7
4070 0x0b, 0x1d, 0x27, 0x69, 0xbb, 0xd6, 0x61, 0xa3,
4071 0xfe, 0x19, 0x2b, 0x7d, 0x87, 0x92, 0xad, 0xec, // 8
4072 0x2f, 0x71, 0x93, 0xae, 0xe9, 0x20, 0x60, 0xa0,
4073 0xfb, 0x16, 0x3a, 0x4e, 0xd2, 0x6d, 0xb7, 0xc2, // 9
4074 0x5d, 0xe7, 0x32, 0x56, 0xfa, 0x15, 0x3f, 0x41,
4075 0xc3, 0x5e, 0xe2, 0x3d, 0x47, 0xc9, 0x40, 0xc0, //10
4076 0x5b, 0xed, 0x2c, 0x74, 0x9c, 0xbf, 0xda, 0x75,
4077 0x9f, 0xba, 0xd5, 0x64, 0xac, 0xef, 0x2a, 0x7e, //11
4078 0x82, 0x9d, 0xbc, 0xdf, 0x7a, 0x8e, 0x89, 0x80,
4079 0x9b, 0xb6, 0xc1, 0x58, 0xe8, 0x23, 0x65, 0xaf, //12
4080 0xea, 0x25, 0x6f, 0xb1, 0xc8, 0x43, 0xc5, 0x54,
4081 0xfc, 0x1f, 0x21, 0x63, 0xa5, 0xf4, 0x07, 0x09, //13
4082 0x1b, 0x2d, 0x77, 0x99, 0xb0, 0xcb, 0x46, 0xca,
4083 0x45, 0xcf, 0x4a, 0xde, 0x79, 0x8b, 0x86, 0x91, //14
4084 0xa8, 0xe3, 0x3e, 0x42, 0xc6, 0x51, 0xf3, 0x0e,
4085 0x12, 0x36, 0x5a, 0xee, 0x29, 0x7b, 0x8d, 0x8c, //15
4086 0x8f, 0x8a, 0x85, 0x94, 0xa7, 0xf2, 0x0d, 0x17,
4087 0x39, 0x4b, 0xdd, 0x7c, 0x84, 0x97, 0xa2, 0xfd, //16
4088 0x1c, 0x24, 0x6c, 0xb4, 0xc7, 0x52, 0xf6, 0x01};
4089
ff_mul(UChar u1,UChar u2)4090 static inline UChar ff_mul(UChar u1, UChar u2)
4091 {
4092 if ((u1 > 0) && (u2 > 0)) {
4093 UInt ui = Nxy[u1] + Nxy[u2];
4094 if (ui >= 255)
4095 ui = ui - 255;
4096 return Exy[ui];
4097 } else {
4098 return 0;
4099 };
4100 }
4101
MixColumns(V128 * v)4102 static void MixColumns (V128* v)
4103 {
4104 V128 r;
4105 Int j;
4106 #define P(x,row,col) (x)->w8[((row)*4+(col))]
4107 for (j = 0; j < 4; j++) {
4108 P(&r,j,0) = ff_mul(0x02, P(v,j,0)) ^ ff_mul(0x03, P(v,j,1))
4109 ^ P(v,j,2) ^ P(v,j,3);
4110 P(&r,j,1) = P(v,j,0) ^ ff_mul( 0x02, P(v,j,1) )
4111 ^ ff_mul(0x03, P(v,j,2) ) ^ P(v,j,3);
4112 P(&r,j,2) = P(v,j,0) ^ P(v,j,1) ^ ff_mul( 0x02, P(v,j,2) )
4113 ^ ff_mul(0x03, P(v,j,3) );
4114 P(&r,j,3) = ff_mul(0x03, P(v,j,0) ) ^ P(v,j,1) ^ P(v,j,2)
4115 ^ ff_mul( 0x02, P(v,j,3) );
4116 }
4117 *v = r;
4118 #undef P
4119 }
4120
InvMixColumns(V128 * v)4121 static void InvMixColumns (V128* v)
4122 {
4123 V128 r;
4124 Int j;
4125 #define P(x,row,col) (x)->w8[((row)*4+(col))]
4126 for (j = 0; j < 4; j++) {
4127 P(&r,j,0) = ff_mul(0x0e, P(v,j,0) ) ^ ff_mul(0x0b, P(v,j,1) )
4128 ^ ff_mul(0x0d,P(v,j,2) ) ^ ff_mul(0x09, P(v,j,3) );
4129 P(&r,j,1) = ff_mul(0x09, P(v,j,0) ) ^ ff_mul(0x0e, P(v,j,1) )
4130 ^ ff_mul(0x0b,P(v,j,2) ) ^ ff_mul(0x0d, P(v,j,3) );
4131 P(&r,j,2) = ff_mul(0x0d, P(v,j,0) ) ^ ff_mul(0x09, P(v,j,1) )
4132 ^ ff_mul(0x0e,P(v,j,2) ) ^ ff_mul(0x0b, P(v,j,3) );
4133 P(&r,j,3) = ff_mul(0x0b, P(v,j,0) ) ^ ff_mul(0x0d, P(v,j,1) )
4134 ^ ff_mul(0x09,P(v,j,2) ) ^ ff_mul(0x0e, P(v,j,3) );
4135 }
4136 *v = r;
4137 #undef P
4138
4139 }
4140
4141 /* For description, see definition in guest_amd64_defs.h */
amd64g_dirtyhelper_AES(VexGuestAMD64State * gst,HWord opc4,HWord gstOffD,HWord gstOffL,HWord gstOffR)4142 void amd64g_dirtyhelper_AES (
4143 VexGuestAMD64State* gst,
4144 HWord opc4, HWord gstOffD,
4145 HWord gstOffL, HWord gstOffR
4146 )
4147 {
4148 // where the args are
4149 V128* argD = (V128*)( ((UChar*)gst) + gstOffD );
4150 V128* argL = (V128*)( ((UChar*)gst) + gstOffL );
4151 V128* argR = (V128*)( ((UChar*)gst) + gstOffR );
4152 V128 r;
4153
4154 switch (opc4) {
4155 case 0xDC: /* AESENC */
4156 case 0xDD: /* AESENCLAST */
4157 r = *argR;
4158 ShiftRows (&r);
4159 SubBytes (&r);
4160 if (opc4 == 0xDC)
4161 MixColumns (&r);
4162 argD->w64[0] = r.w64[0] ^ argL->w64[0];
4163 argD->w64[1] = r.w64[1] ^ argL->w64[1];
4164 break;
4165
4166 case 0xDE: /* AESDEC */
4167 case 0xDF: /* AESDECLAST */
4168 r = *argR;
4169 InvShiftRows (&r);
4170 InvSubBytes (&r);
4171 if (opc4 == 0xDE)
4172 InvMixColumns (&r);
4173 argD->w64[0] = r.w64[0] ^ argL->w64[0];
4174 argD->w64[1] = r.w64[1] ^ argL->w64[1];
4175 break;
4176
4177 case 0xDB: /* AESIMC */
4178 *argD = *argL;
4179 InvMixColumns (argD);
4180 break;
4181 default: vassert(0);
4182 }
4183 }
4184
RotWord(UInt w32)4185 static inline UInt RotWord (UInt w32)
4186 {
4187 return ((w32 >> 8) | (w32 << 24));
4188 }
4189
SubWord(UInt w32)4190 static inline UInt SubWord (UInt w32)
4191 {
4192 UChar *w8;
4193 UChar *r8;
4194 UInt res;
4195 w8 = (UChar*) &w32;
4196 r8 = (UChar*) &res;
4197 r8[0] = sbox[w8[0]];
4198 r8[1] = sbox[w8[1]];
4199 r8[2] = sbox[w8[2]];
4200 r8[3] = sbox[w8[3]];
4201 return res;
4202 }
4203
4204 /* For description, see definition in guest_amd64_defs.h */
amd64g_dirtyhelper_AESKEYGENASSIST(VexGuestAMD64State * gst,HWord imm8,HWord gstOffL,HWord gstOffR)4205 extern void amd64g_dirtyhelper_AESKEYGENASSIST (
4206 VexGuestAMD64State* gst,
4207 HWord imm8,
4208 HWord gstOffL, HWord gstOffR
4209 )
4210 {
4211 // where the args are
4212 V128* argL = (V128*)( ((UChar*)gst) + gstOffL );
4213 V128* argR = (V128*)( ((UChar*)gst) + gstOffR );
4214
4215 // We have to create the result in a temporary in the
4216 // case where the src and dst regs are the same. See #341698.
4217 V128 tmp;
4218
4219 tmp.w32[3] = RotWord (SubWord (argL->w32[3])) ^ imm8;
4220 tmp.w32[2] = SubWord (argL->w32[3]);
4221 tmp.w32[1] = RotWord (SubWord (argL->w32[1])) ^ imm8;
4222 tmp.w32[0] = SubWord (argL->w32[1]);
4223
4224 argR->w32[3] = tmp.w32[3];
4225 argR->w32[2] = tmp.w32[2];
4226 argR->w32[1] = tmp.w32[1];
4227 argR->w32[0] = tmp.w32[0];
4228 }
4229
4230
4231
4232 /*---------------------------------------------------------------*/
4233 /*--- Helpers for dealing with, and describing, ---*/
4234 /*--- guest state as a whole. ---*/
4235 /*---------------------------------------------------------------*/
4236
4237 /* Initialise the entire amd64 guest state. */
4238 /* VISIBLE TO LIBVEX CLIENT */
LibVEX_GuestAMD64_initialise(VexGuestAMD64State * vex_state)4239 void LibVEX_GuestAMD64_initialise ( /*OUT*/VexGuestAMD64State* vex_state )
4240 {
4241 vex_state->host_EvC_FAILADDR = 0;
4242 vex_state->host_EvC_COUNTER = 0;
4243 vex_state->pad0 = 0;
4244
4245 vex_state->guest_RAX = 0;
4246 vex_state->guest_RCX = 0;
4247 vex_state->guest_RDX = 0;
4248 vex_state->guest_RBX = 0;
4249 vex_state->guest_RSP = 0;
4250 vex_state->guest_RBP = 0;
4251 vex_state->guest_RSI = 0;
4252 vex_state->guest_RDI = 0;
4253 vex_state->guest_R8 = 0;
4254 vex_state->guest_R9 = 0;
4255 vex_state->guest_R10 = 0;
4256 vex_state->guest_R11 = 0;
4257 vex_state->guest_R12 = 0;
4258 vex_state->guest_R13 = 0;
4259 vex_state->guest_R14 = 0;
4260 vex_state->guest_R15 = 0;
4261
4262 vex_state->guest_CC_OP = AMD64G_CC_OP_COPY;
4263 vex_state->guest_CC_DEP1 = 0;
4264 vex_state->guest_CC_DEP2 = 0;
4265 vex_state->guest_CC_NDEP = 0;
4266
4267 vex_state->guest_DFLAG = 1; /* forwards */
4268 vex_state->guest_IDFLAG = 0;
4269 vex_state->guest_ACFLAG = 0;
4270
4271 /* HACK: represent the offset associated with a constant %fs.
4272 Typically, on linux, this assumes that %fs is only ever zero (main
4273 thread) or 0x63. */
4274 vex_state->guest_FS_CONST = 0;
4275
4276 vex_state->guest_RIP = 0;
4277
4278 /* Initialise the simulated FPU */
4279 amd64g_dirtyhelper_FINIT( vex_state );
4280
4281 /* Initialise the AVX state. */
4282 # define AVXZERO(_ymm) \
4283 do { _ymm[0]=_ymm[1]=_ymm[2]=_ymm[3] = 0; \
4284 _ymm[4]=_ymm[5]=_ymm[6]=_ymm[7] = 0; \
4285 } while (0)
4286 vex_state->guest_SSEROUND = (ULong)Irrm_NEAREST;
4287 AVXZERO(vex_state->guest_YMM0);
4288 AVXZERO(vex_state->guest_YMM1);
4289 AVXZERO(vex_state->guest_YMM2);
4290 AVXZERO(vex_state->guest_YMM3);
4291 AVXZERO(vex_state->guest_YMM4);
4292 AVXZERO(vex_state->guest_YMM5);
4293 AVXZERO(vex_state->guest_YMM6);
4294 AVXZERO(vex_state->guest_YMM7);
4295 AVXZERO(vex_state->guest_YMM8);
4296 AVXZERO(vex_state->guest_YMM9);
4297 AVXZERO(vex_state->guest_YMM10);
4298 AVXZERO(vex_state->guest_YMM11);
4299 AVXZERO(vex_state->guest_YMM12);
4300 AVXZERO(vex_state->guest_YMM13);
4301 AVXZERO(vex_state->guest_YMM14);
4302 AVXZERO(vex_state->guest_YMM15);
4303 AVXZERO(vex_state->guest_YMM16);
4304
4305 # undef AVXZERO
4306
4307 vex_state->guest_EMNOTE = EmNote_NONE;
4308
4309 /* These should not ever be either read or written, but we
4310 initialise them anyway. */
4311 vex_state->guest_CMSTART = 0;
4312 vex_state->guest_CMLEN = 0;
4313
4314 vex_state->guest_NRADDR = 0;
4315 vex_state->guest_SC_CLASS = 0;
4316 vex_state->guest_GS_CONST = 0;
4317
4318 vex_state->guest_IP_AT_SYSCALL = 0;
4319 vex_state->pad1 = 0;
4320 }
4321
4322
4323 /* Figure out if any part of the guest state contained in minoff
4324 .. maxoff requires precise memory exceptions. If in doubt return
4325 True (but this generates significantly slower code).
4326
4327 By default we enforce precise exns for guest %RSP, %RBP and %RIP
4328 only. These are the minimum needed to extract correct stack
4329 backtraces from amd64 code.
4330
4331 Only %RSP is needed in mode VexRegUpdSpAtMemAccess.
4332 */
guest_amd64_state_requires_precise_mem_exns(Int minoff,Int maxoff,VexRegisterUpdates pxControl)4333 Bool guest_amd64_state_requires_precise_mem_exns (
4334 Int minoff, Int maxoff, VexRegisterUpdates pxControl
4335 )
4336 {
4337 Int rbp_min = offsetof(VexGuestAMD64State, guest_RBP);
4338 Int rbp_max = rbp_min + 8 - 1;
4339 Int rsp_min = offsetof(VexGuestAMD64State, guest_RSP);
4340 Int rsp_max = rsp_min + 8 - 1;
4341 Int rip_min = offsetof(VexGuestAMD64State, guest_RIP);
4342 Int rip_max = rip_min + 8 - 1;
4343
4344 if (maxoff < rsp_min || minoff > rsp_max) {
4345 /* no overlap with rsp */
4346 if (pxControl == VexRegUpdSpAtMemAccess)
4347 return False; // We only need to check stack pointer.
4348 } else {
4349 return True;
4350 }
4351
4352 if (maxoff < rbp_min || minoff > rbp_max) {
4353 /* no overlap with rbp */
4354 } else {
4355 return True;
4356 }
4357
4358 if (maxoff < rip_min || minoff > rip_max) {
4359 /* no overlap with eip */
4360 } else {
4361 return True;
4362 }
4363
4364 return False;
4365 }
4366
4367
4368 #define ALWAYSDEFD(field) \
4369 { offsetof(VexGuestAMD64State, field), \
4370 (sizeof ((VexGuestAMD64State*)0)->field) }
4371
4372 VexGuestLayout
4373 amd64guest_layout
4374 = {
4375 /* Total size of the guest state, in bytes. */
4376 .total_sizeB = sizeof(VexGuestAMD64State),
4377
4378 /* Describe the stack pointer. */
4379 .offset_SP = offsetof(VexGuestAMD64State,guest_RSP),
4380 .sizeof_SP = 8,
4381
4382 /* Describe the frame pointer. */
4383 .offset_FP = offsetof(VexGuestAMD64State,guest_RBP),
4384 .sizeof_FP = 8,
4385
4386 /* Describe the instruction pointer. */
4387 .offset_IP = offsetof(VexGuestAMD64State,guest_RIP),
4388 .sizeof_IP = 8,
4389
4390 /* Describe any sections to be regarded by Memcheck as
4391 'always-defined'. */
4392 .n_alwaysDefd = 16,
4393
4394 /* flags thunk: OP and NDEP are always defd, whereas DEP1
4395 and DEP2 have to be tracked. See detailed comment in
4396 gdefs.h on meaning of thunk fields. */
4397 .alwaysDefd
4398 = { /* 0 */ ALWAYSDEFD(guest_CC_OP),
4399 /* 1 */ ALWAYSDEFD(guest_CC_NDEP),
4400 /* 2 */ ALWAYSDEFD(guest_DFLAG),
4401 /* 3 */ ALWAYSDEFD(guest_IDFLAG),
4402 /* 4 */ ALWAYSDEFD(guest_RIP),
4403 /* 5 */ ALWAYSDEFD(guest_FS_CONST),
4404 /* 6 */ ALWAYSDEFD(guest_FTOP),
4405 /* 7 */ ALWAYSDEFD(guest_FPTAG),
4406 /* 8 */ ALWAYSDEFD(guest_FPROUND),
4407 /* 9 */ ALWAYSDEFD(guest_FC3210),
4408 // /* */ ALWAYSDEFD(guest_CS),
4409 // /* */ ALWAYSDEFD(guest_DS),
4410 // /* */ ALWAYSDEFD(guest_ES),
4411 // /* */ ALWAYSDEFD(guest_FS),
4412 // /* */ ALWAYSDEFD(guest_GS),
4413 // /* */ ALWAYSDEFD(guest_SS),
4414 // /* */ ALWAYSDEFD(guest_LDT),
4415 // /* */ ALWAYSDEFD(guest_GDT),
4416 /* 10 */ ALWAYSDEFD(guest_EMNOTE),
4417 /* 11 */ ALWAYSDEFD(guest_SSEROUND),
4418 /* 12 */ ALWAYSDEFD(guest_CMSTART),
4419 /* 13 */ ALWAYSDEFD(guest_CMLEN),
4420 /* 14 */ ALWAYSDEFD(guest_SC_CLASS),
4421 /* 15 */ ALWAYSDEFD(guest_IP_AT_SYSCALL)
4422 }
4423 };
4424
4425
4426 /*---------------------------------------------------------------*/
4427 /*--- end guest_amd64_helpers.c ---*/
4428 /*---------------------------------------------------------------*/
4429